423 Commits

Author SHA1 Message Date
3f66c4f53f Initial plan 2025-11-22 07:59:21 +00:00
081ee5cb4c move requests.py to passing tests 2025-11-22 13:19:55 +05:30
a91c3158ad sort fields in debug info by offset order 2025-11-22 12:35:47 +05:30
2b3635fe20 format chore 2025-11-22 01:48:44 +05:30
6f25c554a9 fix CO-RE read for cast structs 2025-11-22 01:47:25 +05:30
84507b8b98 add btf probe read kernel helper 2025-11-22 00:57:12 +05:30
a42a75179d format chore 2025-11-22 00:37:39 +05:30
377fa4041d add regular struct field access handling in vmlinux_registry.py 2025-11-22 00:36:59 +05:30
99321c7669 add a failing C test 2025-11-21 23:01:08 +05:30
11850d16d3 field check in allocation pass 2025-11-21 21:47:58 +05:30
9ee821c7f6 make pointer allocation feasible but subverting LLC 2025-11-21 21:47:55 +05:30
25394059a6 allow casting 2025-11-21 21:47:10 +05:30
fde8eab775 allow allocation pass on vmlinux cast 2025-11-21 21:47:07 +05:30
42b8865a56 Merge branch 'master' into request-struct 2025-11-21 02:10:52 +05:30
144d9b0ab4 change c-file test structure 2025-11-20 17:24:02 +05:30
902a52a07d remove debug print statements 2025-11-20 14:39:13 +05:30
306570953b format chore 2025-11-20 14:18:45 +05:30
740eed45e1 add placeholder debug info to shut llvmlite up about NoneType 2025-11-20 14:17:57 +05:30
c8801f4c3e nonetype not parsed 2025-11-19 23:35:10 +05:30
e5b3b001ce Minor fix for PTR_TO_MAP_VALUE_OR_NULL target 2025-11-19 04:29:35 +05:30
19b42b9a19 Allocate hashmap lookup return vars based on the value type of said hashmap 2025-11-19 04:09:51 +05:30
9f5ec62383 Add get_uint8_type to DebugInfoGenerator 2025-11-19 03:24:40 +05:30
7af54df7c0 Add passing test hash_map_struct.py for using structs as hashmap key/val types 2025-11-19 00:17:01 +05:30
573bbb350e Allow structs to be key/val type for hashmaps 2025-11-19 00:08:15 +05:30
64679f8072 Add skeleton _get_key_val_dbg_type in maps_debug_info.py 2025-11-18 05:00:00 +05:30
5667facf23 Pass down structs_sym_tab to maps_debug_info, allow vmlinux enums to be used in an indexed format for map declaration 2025-11-18 04:34:51 +05:30
4f8af16a17 Pass structs_sym_tab to maps_proc 2025-11-18 04:34:42 +05:30
b84884162d Merge pull request #69 from pythonbpf/symex
Add support for userspace+kernelspace stack trace example using blazesym
2025-11-17 01:47:35 +05:30
e9bb90cb70 Add docstring for bpf_get_stack_emitter 2025-11-17 01:46:57 +05:30
49740598ea format chore 2025-11-13 09:31:10 +05:30
73bbf00e7c add tests 2025-11-13 09:29:53 +05:30
9d76502d5a Fix get_flags_val usage 2025-11-13 02:24:35 +05:30
a10da4a277 Implement bpf_get_stack handler 2025-11-13 00:59:50 +05:30
29e90601b7 Init bpf_get_stack emitter 2025-11-13 00:51:48 +05:30
56df05a93c Janitorial formatting 2025-11-12 14:38:35 +05:30
a55efc6469 Implement output helper for RingBuf maps, add a match-case based dispatch for output helper handlers for multiple map types 2025-11-12 14:06:09 +05:30
64cd2d2fc2 Set minimum supported Python version to 3.10 2025-11-12 14:06:00 +05:30
cbddc0aa96 Introduce MapSymbol to propagate map type info in map_sym_tab 2025-11-12 13:16:23 +05:30
209df33c8f Add RingBuf submit and reserve helpers 2025-11-12 03:53:16 +05:30
7a56e5d0cd Initialize required helpers for ringbuffer 2025-11-12 01:59:07 +05:30
1d7a436c9f Add linting function for RingBuf.discard 2025-11-12 01:30:15 +05:30
5eaeb3e921 Add max_entries constraints for RingBuffer 2025-11-12 01:27:41 +05:30
cd52d0d91b Rename RingBuf map to RingBuffer 2025-11-12 01:07:12 +05:30
df981be095 Janitorial format 2025-11-11 21:08:06 +05:30
316c21c428 Fix char_array to pointer/int detection fallback in helper_utils 2025-11-11 21:00:42 +05:30
c883d95655 Minor fix - check expr type before sending to char_array handler in printk_formatter 2025-11-11 17:43:20 +05:30
f7dee329cb fix nested pointers issue in array generation and also fix zero length array IR generation 2025-11-10 20:29:28 +05:30
5031f90377 fix stacked vmlinux struct parsing issue 2025-11-10 20:06:04 +05:30
95a624044a fix type error 2025-11-08 20:28:56 +05:30
c5bef26b88 add multi imports to single import line. 2025-11-08 18:08:04 +05:30
5a8b64f1d9 Merge pull request #64 from pythonbpf/all_helpers
Add support for all eBPF helpers
2025-11-07 19:26:55 +05:30
cf99b3bb9a Fix call to get_or_create_ptr_from_arg for probe_read_str 2025-11-07 19:16:48 +05:30
6c85b248ce Init sz in get_or_create_ptr_from_arg 2025-11-07 19:03:21 +05:30
b5a3494cc6 Fix typo in get_or_create_ptr_from_arg 2025-11-07 19:01:40 +05:30
be62972974 Fix ScratchPoolManager::counter 2025-11-07 19:00:57 +05:30
2f4a7d2f90 Remove get_struct_char_array_ptr in favour of get_char_array_ptr_and_size, wrap it in get_or_crate_ptr_from_arg to use in bpf_helper_handler 2025-11-07 18:54:59 +05:30
3ccd3f767e Add expected types for pointer creation of args in probe_read handler 2025-11-06 19:59:04 +05:30
2e37726922 Add signature relection for all helper handlers except print 2025-11-06 19:47:57 +05:30
5b36726b7d Make bpf_skb_store_bytes work 2025-11-05 20:02:39 +05:30
faad3555dc Merge pull request #67 from pythonbpf/32int_support
add i32 support and special support for xdp_md with zext
2025-11-05 19:42:05 +05:30
3e6cea2b67 Move get_struct_char_array_ptr from helper/printk_formatter to helper/helper_utils, enable array to ptr conversion in skb_store_bytes 2025-11-05 19:10:58 +05:30
5ad33b011e move a test to passing 2025-11-05 18:02:28 +05:30
2f4785b796 add int type conversion for all vmlinux struct field int types. 2025-11-05 18:01:41 +05:30
c5fdd3bce2 move some tests to passing 2025-11-05 17:48:26 +05:30
b0d35693b9 format chore 2025-11-05 17:44:45 +05:30
44c6ceda27 fix context debug info repetition circular reference error 2025-11-05 17:44:29 +05:30
2685d0a0ee add i32 support special case and find ctx repetition in multiple functions error. 2025-11-05 17:38:38 +05:30
338d4994d8 Fix count_temps_in_call to only look for Pointer args of a helper_sig 2025-11-05 17:36:37 +05:30
3078d4224d Add typed scratch space support to the bpf_skb_store_bytes helper 2025-11-04 16:09:11 +05:30
7d29790f00 Make use of new get_next_temp in helpers 2025-11-04 16:02:56 +05:30
963e2a8171 Change ScratchPoolManager to use typed scratch space 2025-11-04 14:16:44 +05:30
123a92af1d Change allocation pass to generate typed temp variables 2025-11-04 06:20:39 +05:30
752f564d3f Change count_temps_in_call to return hashmap of types 2025-11-04 05:40:22 +05:30
d8cddb9799 Add signature extraction to HelperHandlerRegistry 2025-11-04 05:19:22 +05:30
33e18f6d6d Introduce HelperSignature in HelperHandlerRegistry 2025-11-03 21:21:13 +05:30
5e371787eb Fix the number of args for skb_store_bytes by making the first arg implicit 2025-11-03 21:11:16 +05:30
67c9d9b932 Fix imports for bpf_skb_store_bytes 2025-11-02 04:33:45 +05:30
f757a32a63 Implement bpf_skb_store_bytes_emitter 2025-11-02 04:32:05 +05:30
c5de92b9d0 Add BPF_SKB_STORE_BYTES to HelperIDs 2025-11-02 04:17:15 +05:30
4efd3223cd Add passing uid_gid helper test 2025-11-02 03:47:26 +05:30
4884ed7577 Fix imports for bpf_get_current_uid_gid 2025-11-02 03:35:41 +05:30
5b7769dd38 Implement bpf_get_current_uid_gid_emitter 2025-11-02 03:34:04 +05:30
b7c1e92f05 Add BPF_GET_CURRENT_UID_GID to HelperIDs 2025-11-02 03:29:02 +05:30
8b28a927c3 Add helpful TODO to PID_TGID emitter 2025-11-02 03:27:27 +05:30
3489f45b63 Add failing XDP vmlinux tests 2025-11-01 18:57:07 +05:30
204ec26154 add i32 support and make it extensible 2025-11-01 14:44:39 +05:30
f9ee43e7ef Add passing test smp_processor_id.py for helpers 2025-11-01 14:13:52 +05:30
dabb8bf0df Fix imports for BPF_GET_SMP_PROCESSOR_ID 2025-11-01 14:07:47 +05:30
19dedede53 Implement BPF_GET_SMP_PROCESSOR_ID helper 2025-11-01 14:05:50 +05:30
82cac8f8ef Add BPF_GET_SMP_PROCESSOR_ID to HelperIDs 2025-11-01 14:02:07 +05:30
70a04f54d1 Add passing test for bpf_probe_read helper 2025-11-01 13:51:08 +05:30
ec2ea835e5 Fix imports and type issues for bpf_probe_read 2025-11-01 13:50:23 +05:30
2257c175ed Implement BPF_PROBE_READ helper 2025-11-01 13:14:50 +05:30
5bf60d69b8 Add BPF_PROBE_READ to HelperIDs 2025-11-01 12:52:15 +05:30
a9d82d40d3 Merge pull request #60 from pythonbpf/vmlinux-handler
vmlinux handler with struct support for only int64 and unsigned uint64 type struct fields.
2025-11-01 08:15:14 +05:30
85a62d6cd8 add example and support unsigned i64 2025-11-01 08:13:22 +05:30
c3fc790c71 remove fixed TODOs 2025-11-01 07:05:42 +05:30
22e30f04b4 Merge pull request #66 from pythonbpf/dependabot/github_actions/actions-3249c11fdc
Bump the actions group with 2 updates
2025-10-27 17:21:49 +05:30
620b8cb1e7 Bump the actions group with 2 updates
Bumps the actions group with 2 updates: [actions/upload-artifact](https://github.com/actions/upload-artifact) and [actions/download-artifact](https://github.com/actions/download-artifact).


Updates `actions/upload-artifact` from 4 to 5
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/v4...v5)

Updates `actions/download-artifact` from 5 to 6
- [Release notes](https://github.com/actions/download-artifact/releases)
- [Commits](https://github.com/actions/download-artifact/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-version: '5'
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: actions
- dependency-name: actions/download-artifact
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: actions
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-27 11:49:59 +00:00
1207fe9f92 Update .gitattributes to include new directories 2025-10-27 03:43:38 +05:30
b138405931 Merge pull request #65 from pythonbpf/varun-r-mallya-patch-1
Mark Jupyter Notebook files as vendored
2025-10-27 03:41:59 +05:30
262f00f635 Mark Jupyter Notebook files as vendored 2025-10-27 03:41:15 +05:30
07580dabf2 revert struct reference pointer sizes to i8 to ensure that compiler does not optimize 2025-10-27 03:29:15 +05:30
ac74b03b14 Add TODO to specify flags and DISubprogram. 2025-10-27 03:01:56 +05:30
3bf85e733e add DI subprogram to make CO-RE work fully. 2025-10-27 03:00:13 +05:30
73f7c80eca add scope field separately to subroutine type remove circular dependency 2025-10-27 02:48:06 +05:30
238697469a create debug info to subroutine type 2025-10-27 02:19:08 +05:30
0006e26b08 Add passing test for bpf_get_prandom_u32 implementation 2025-10-27 01:09:27 +05:30
5cbd9a531e Add bpf_get_prandom_u32 helper 2025-10-27 01:08:56 +05:30
8bd210cede add debug info storage on assignment_info.py dataclass 2025-10-26 15:46:42 +05:30
7bf6f9c48c add function_debug_info.py and format 2025-10-26 15:12:36 +05:30
a1fe2ed4bc change to 64 bit pointers. May be an issue. revert this commit if issues arise 2025-10-26 15:00:53 +05:30
93285dbdd8 geenrate gep IR 2025-10-26 02:12:33 +05:30
1ea44dd8e1 Use pointer arithmetic to resolve vmlinux struct fields 2025-10-25 05:40:45 +05:30
96216d4411 Consistently use Dataclass syntac for AssignmentInfo and related classes 2025-10-25 05:10:47 +05:30
028d9c2c08 generate IR partly 2025-10-25 04:41:13 +05:30
c6b5ecb47e find global variable ir and field data from metadata 2025-10-24 03:34:27 +05:30
30bcfcbbd0 remove compile error on normal c_void_p in arg and separate localsymbol to avoid circular dep 2025-10-24 03:08:39 +05:30
f18a4399ea format chore 2025-10-24 02:40:07 +05:30
4e01df735f complete part of expr passing for attribute of i64 type 2025-10-24 02:38:39 +05:30
64674cf646 add alloc for only i64 2025-10-24 02:06:39 +05:30
5c1e7103a6 Add Python notebook examples for current BCC examples 2025-10-23 00:28:45 +05:30
576fa2f106 Add interactive Python notebook for hello_world BCC Example 2025-10-22 21:58:32 +05:30
76a873cb0d Update clone-matplotlib example 2025-10-22 21:47:16 +05:30
e86c6082c9 Add BCC examples and change dir structure in setup.sh 2025-10-22 21:46:45 +05:30
cb1ad15f43 Fix examples/clone_plot to use new syntax and pylibbpf API 2025-10-22 20:54:59 +05:30
b24b3ed250 Remove TypedDict from assignment_info in favour of dataclasses 2025-10-22 20:48:56 +05:30
beaad996db Fix map access syntax in examples/xdp_pass 2025-10-22 20:07:38 +05:30
99b92e44e3 Fix exapmles/kprobes to use latest pylibbpf 2025-10-22 20:04:02 +05:30
ce7adaadb6 Fix examples/hello_world to use latest pylibbpf 2025-10-22 19:57:47 +05:30
5ac316a1ac Fix examples/binops_demo.py syntax 2025-10-22 19:52:23 +05:30
36a1a0903e Merge branch 'master' into vmlinux-handler 2025-10-22 12:02:51 +05:30
f2bc7f1434 pass context to memory allocation 2025-10-22 12:01:52 +05:30
b3921c424d parse context from first function argument to local symbol table 2025-10-22 11:40:49 +05:30
7a99d21b24 Fix typo in BCC Examples README 2025-10-22 04:49:22 +05:30
cf05e4959d Add README for BCC Examples 2025-10-22 04:45:09 +05:30
a7394ccafa bump version 2025-10-22 04:18:10 +05:30
63f378c34b Merge pull request #51 from pythonbpf/bcc_examples
Port BCC tutorial examples to PythonBPF
2025-10-22 04:14:09 +05:30
37af7d2e20 Janitorial fix format 2025-10-22 04:12:42 +05:30
77c0d131be Add permission error handling in trace_pipe 2025-10-22 04:10:31 +05:30
84fdf52658 Merge branch 'master' into bcc_examples 2025-10-22 04:09:07 +05:30
f4d903d4b5 Fix create_targets_and_rvals early returns 2025-10-22 04:06:22 +05:30
f9494c870b Fix logical fallacy in get_char_array_ptr_and_size 2025-10-22 04:01:45 +05:30
0d4ebf72b6 lint readme
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-22 03:59:53 +05:30
adf32560a0 bpf passthrough gen in codegen
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-22 03:45:54 +05:30
c65900b733 Merge pull request #62 from pythonbpf/script
make script curlable
2025-10-22 03:25:31 +05:30
711e34cae1 Add script running instruction to README.md 2025-10-22 03:25:00 +05:30
cf3f4a0002 make script curlable 2025-10-22 03:21:05 +05:30
d50157fa09 Merge pull request #61 from pythonbpf/script
Add setup script
2025-10-22 02:47:03 +05:30
ba860b5039 add setup script 2025-10-22 02:44:13 +05:30
21cea97d78 add return None statements 2025-10-21 07:02:34 +05:30
d8729342dc add bpf_passthrough generation 2025-10-21 07:01:37 +05:30
4179fbfc88 move around examples 2025-10-21 06:03:16 +05:30
ba397036b4 add failing examples to work on 2025-10-21 05:49:44 +05:30
798f07986a Add enhanced vfsreadlat BCC example with live plotly and dash graphs on browser 2025-10-21 05:36:59 +05:30
caecb8c9b0 Merge pull request #59 from pythonbpf/vmlinux-handler
LGTM
2025-10-21 05:01:14 +05:30
1a0e21eaa8 support vmlinux enum in map arguments 2025-10-21 04:59:46 +05:30
e98d5684ea Add enhanced live vfsreadlat.py monitor BCC example with rich library 2025-10-21 04:31:23 +05:30
190baf2674 support vmlinux enum in printk handler 2025-10-21 04:10:25 +05:30
c07707a9ad Add vfsreadlat.py BCC example 2025-10-21 03:56:04 +05:30
c3f3d1e564 format chore 2025-10-21 03:43:22 +05:30
e7734629a5 support binary ops with vmlinux enums 2025-10-21 03:41:21 +05:30
5955db88cf add vmlinux expressions to eval expr 2025-10-21 03:24:27 +05:30
66caa3cf1d Merge pull request #58 from pythonbpf/vmlinux-handler
finish table construction for vmlinux symbol info transfer
2025-10-20 22:19:27 +05:30
e499c29d42 float vmlinux_symtab till process_func_body 2025-10-20 22:13:38 +05:30
76d0dbfbf4 change globvar string to real global variable 2025-10-20 21:36:54 +05:30
56a2fbaf5b change globvar string to real global variable 2025-10-20 21:36:46 +05:30
3b323132f0 change equality condition 2025-10-20 21:29:04 +05:30
c9363e62a9 move field name generation to assignments dict 2025-10-20 21:27:42 +05:30
a20643f3a7 move assignemnt tablegen to ir_generation.py 2025-10-20 18:41:59 +05:30
d0fecbc03c Add sync_perf_output BCC example 2025-10-20 15:43:57 +05:30
174095973b Fix userspace calling in sys_sync 2025-10-20 15:10:12 +05:30
3273620447 Fix userspace calling in sync_timing 2025-10-20 15:08:27 +05:30
610cbe82a8 Fix userspace calling in sync_count 2025-10-20 15:04:40 +05:30
54c97e648b Fix userspace calling in hello_fields 2025-10-20 15:02:31 +05:30
dd9411b7b9 Fix userspace calling in hello_world 2025-10-20 14:59:13 +05:30
aa85d0e0ef Remove unnecessary attached var in hello_perf_output 2025-10-20 14:58:50 +05:30
eee212795f add assignment dict handling to class_handler.py 2025-10-20 04:41:00 +05:30
8da50b7068 float assignments to class_handler.py 2025-10-20 04:31:23 +05:30
e636fcaea7 add assignment info class family and change how assignments are handled 2025-10-20 04:23:52 +05:30
5bba8dce12 Complete hello_perf_output BCC example 2025-10-20 04:02:34 +05:30
8c976e46ae Fix loglevel and pylibbpf import in codegen 2025-10-20 04:00:30 +05:30
5512bf52e4 add todo on struct name generator 2025-10-18 23:29:31 +05:30
079ceaa0d6 Merge pull request #57 from pythonbpf/vmlinux-debug-info
Add debug info handling to vmlinux
* Does not add support for recursive ctypes pointer based resolution
* Still does not support unions and function pointers.
* Has the mechanism to build for function pointers added.
2025-10-18 23:10:57 +05:30
328b792e4e add function pointer detection warning as well as identify ctypes non recursion error 2025-10-18 23:09:29 +05:30
5dafa5bd0d add function pointer detection warning as well as identify ctypes non recursion error 2025-10-18 22:59:01 +05:30
33aa794718 identify error in pointer to ctypes subclass dependency fixing 2025-10-18 22:48:34 +05:30
d855e9ef2e correct mistake in null pointer. Also identify error in pointer to char debug info generation 2025-10-18 22:32:03 +05:30
de19c8fc90 rename passing test 2025-10-18 22:15:58 +05:30
dc1b243e82 correct error size calculation for arrays 2025-10-18 22:13:59 +05:30
1b4272b408 members generated with wrong size calc for arrays 2025-10-18 22:02:10 +05:30
101183c315 members generated for simple ctypes 2025-10-18 21:45:26 +05:30
3a3116253f generate members with dummy types 2025-10-18 03:53:10 +05:30
9b7aa6d8be add dependency debug info list 2025-10-18 03:27:26 +05:30
60737d9894 Improve error handling in compile, pass structs_sym_tab and maps_sym_tab to BpfProgram 2025-10-17 03:25:15 +05:30
fc55b7ecaa Add passing ptr_to_char_array test for strings 2025-10-16 23:42:03 +05:30
c143739a04 Add passing test struct_field_to_var_str for strings 2025-10-16 23:21:55 +05:30
51a1be0b0b add classifiers 2025-10-16 19:09:19 +05:30
7ae629e8f7 bump version to v0.1.5 2025-10-16 19:04:04 +05:30
dd734ea2aa Merge pull request #56 from pythonbpf/vmlinux-ir-gen
Adds IR and debug info generation capabilities for vmlinux imported structs
2025-10-16 18:59:32 +05:30
71d005b6b1 complete vmlinux struct name generation in IR.
* Breaks when it finds unions.
* Still does not support function pointers.
2025-10-16 18:58:28 +05:30
5d9a29ee8e format chore 2025-10-16 18:22:25 +05:30
041e538b53 fix errors. Does not support union name resolution yet. 2025-10-16 18:21:14 +05:30
5413cc793b something fixed itself. 2025-10-16 18:06:36 +05:30
f21837aefe support most bitfields 2025-10-16 04:13:04 +05:30
0f5c1fa752 format chore 2025-10-16 04:10:24 +05:30
de02731ea1 add support with ctypes getattr offset. Also supports bitfields.
* breaks when struct_ring_buffer_per_cpu
2025-10-16 04:08:06 +05:30
c22d85ceb8 add array field generation support 2025-10-15 23:56:04 +05:30
009b11aca6 Implement bpf_probe_read_kernel_str helper, Allow i8* to i8 ArrayType conversion 2025-10-15 23:52:15 +05:30
2b3c81affa TODO added for llvmlite attribute issue
*Refer: https://github.com/numba/llvmlite/issues/1331

Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-15 21:35:28 +05:30
8372111616 add basic IR gen strategy 2025-10-15 21:25:53 +05:30
9fc3c85b75 Add struct-field to named-var assignment of char arrays 2025-10-15 20:02:18 +05:30
eb4ee64ee5 Revert "float vmlinux_assignments_symtab"
This reverts commit ce7b170fea.
2025-10-15 19:11:53 +05:30
ce7b170fea float vmlinux_assignments_symtab 2025-10-15 18:19:51 +05:30
fd630293f7 Remove duplicate alignment logic from allocation_pass 2025-10-15 18:14:13 +05:30
81f72a7698 Support var-to-var and var-to-struct-fld allocations 2025-10-15 18:10:04 +05:30
9a60dd87e3 Merge pull request #55 from pythonbpf/vmlinux-ir-gen
remove bitfield support and add assignment support
2025-10-15 18:07:27 +05:30
c499fe7421 solve static typing issues 2025-10-15 18:05:57 +05:30
8239097fbb format chore 2025-10-15 17:49:38 +05:30
a4cfc2b7aa add assignments table and offset handler 2025-10-15 17:49:20 +05:30
fb480639a5 Make char array struct fields work 2025-10-15 17:27:43 +05:30
13a804f7ac Implement bpf_get_current_comm_emitter 2025-10-15 14:03:09 +05:30
a0d954b20b Register bpf_get_current_comm_emitter for comm 2025-10-15 12:47:30 +05:30
b105c70b38 Add hello_perf_output BCC-Example skeleton 2025-10-15 12:35:59 +05:30
69b73003ca setup skeleton for offset calculation 2025-10-15 04:42:38 +05:30
11e8e72188 add base for ir gen 2025-10-15 02:00:23 +05:30
0a1557e318 Add sync_count BCC example to use tuple-like assignment 2025-10-14 23:33:54 +05:30
c56928bc8a Add create_targets_and_rvals, use it in handle_assign to enable tuple assignment 2025-10-14 23:30:59 +05:30
dd3fc74d09 Add support for tuple style multiiassignment in allocation_pass 2025-10-14 23:06:43 +05:30
4a79f9b9b2 Add sync_count BCC-Example 2025-10-14 18:50:46 +05:30
b676a5ebb4 Fix return in BCC-Examples 2025-10-14 17:05:05 +05:30
d7329ad3d7 Add BCC sync_timing example 2025-10-14 16:07:55 +05:30
903654daff Add hello_fields BCC Example 2025-10-14 04:42:12 +05:30
263402d137 Add trace_fields 2025-10-14 04:22:17 +05:30
37d1e1b143 Add sys_sync BCC example 2025-10-14 03:54:02 +05:30
edc33733d9 Add trace_pipe utility 2025-10-14 03:51:43 +05:30
d3f0e3b2ef remove tbaa_gen and make IR generator module 2025-10-14 03:09:18 +05:30
09ba749b46 Merge pull request #52 from pythonbpf/vmlinux-ir-gen
Dependency tree functionality to semantic analyser
2025-10-14 02:37:43 +05:30
a03d3e5d4c format chore 2025-10-14 02:36:04 +05:30
e1f9ac6ba0 add dependency tree functionality 2025-10-14 02:35:49 +05:30
18d62d605a Add hello_world BCC example 2025-10-13 20:11:27 +05:30
27ab3aaf1e Cleanup codegen.py 2025-10-13 19:17:58 +05:30
b34f7dd68a format chore 2025-10-13 19:11:59 +05:30
69d8669e44 Merge pull request #28 from pythonbpf/vmlinux-working
Add compilation mechanism from vmlinux
Still  does not compile to IR. only does semantic analysis.
Another PR will be opened soon for IR generation.
2025-10-13 19:08:41 +05:30
d4f5a9c36e fix static typing errors 2025-10-13 19:07:06 +05:30
b2a57edf11 Simplify maps_pass 2025-10-13 19:01:01 +05:30
20ec307288 format chore 2025-10-13 19:00:59 +05:30
0b4c6264a8 complete dependency tree readiness resolution 2025-10-13 19:00:28 +05:30
6345fcdeff Remove unused is_helper_call from allocation_pass 2025-10-13 18:38:07 +05:30
6b41f1fb84 Move print logic to helper/printk_emitter.py 2025-10-13 18:32:51 +05:30
74d8014ade Move HelperHandlerRegistry to helper_registry.py 2025-10-13 18:21:50 +05:30
5d0a888542 Remove deadcode and seperate modules for pythonbpf.functions 2025-10-13 04:41:46 +05:30
0042280ff1 Rename public API and remove deadcode in return_utils 2025-10-13 04:23:58 +05:30
7a67041ea3 Move CallHandlerRegistry to expr/call_registry.py, annotate eval_expr 2025-10-13 04:16:22 +05:30
45e6ce5e5c Move deref_to_depth to expr/ir_ops.py 2025-10-13 04:01:27 +05:30
c5f0a2806f Make printk_emiiter return True to prevent bogus logger warnings in eval_expr 2025-10-13 02:40:34 +05:30
b0ea93a786 Merge pull request #50 from pythonbpf/dep_inv
Use dependency inversion to remove handler delayed import in eval_expr
2025-10-13 02:32:18 +05:30
fc058c4341 Use dependency inversion to remove handler delayed import in eval_expr 2025-10-13 02:28:00 +05:30
158cc42e1e Move binop handling logic to expr_pass, remove delayed imports of get_operand_value 2025-10-13 00:36:42 +05:30
2a1eabc10d Fix regression in struct_perf_output 2025-10-13 00:00:43 +05:30
e5741562f6 add full confidence import parsing 2025-10-12 23:56:38 +05:30
93634a4769 format chore 2025-10-12 23:47:46 +05:30
9b8462f1ed add bitfield size resolution 2025-10-12 23:44:50 +05:30
785182787c make semantics work other than field diffs 2025-10-12 23:16:00 +05:30
80396c78a6 recursive parsing fix without ctypes in recursed type 2025-10-12 20:59:18 +05:30
31645f0316 Merge pull request #40 from pythonbpf/refactor_assign
Refactor assignment statement handling and the typing mechanism around it
2025-10-12 12:19:51 +05:30
e0ad1bfb0f Move bulk of allocation logic to allocation_pass 2025-10-12 12:14:46 +05:30
69bee5fee9 Seperate LocalSymbol from functions 2025-10-12 12:10:09 +05:30
2f1aaa4834 Fix typos 2025-10-12 11:41:01 +05:30
0f6971bcc2 Refactor allocate_mem 2025-10-12 11:34:40 +05:30
08c0ccf0ac Pass map_sym_tab to handle_struct_field_assign 2025-10-12 10:37:20 +05:30
64e44d0d58 Use handle_struct_field_assignment in handle_assign 2025-10-12 10:30:46 +05:30
3ad1b73c5a Add handle_struct_field_assignment to assign_pass 2025-10-12 10:19:52 +05:30
105c5a7bd0 Cleanup handle_assign 2025-10-12 10:12:45 +05:30
933d2a5c77 Fix comprehensive assignment test 2025-10-12 09:47:57 +05:30
b93f704eb8 Tweak the comprehensive assignment test 2025-10-12 09:46:16 +05:30
fa82dc7ebd Add comprehensive passing test for assignment 2025-10-12 09:39:33 +05:30
e8026a13bf Allow helpers to be called within themselves 2025-10-12 09:30:37 +05:30
a3b4d09652 Fix errorstring in _handle_unary_op 2025-10-12 09:13:04 +05:30
4e33fd4a32 Add negation UnaryOp 2025-10-12 09:11:56 +05:30
2cf68f6473 Allow map-based helpers to be used as helper args / within binops which are helper args 2025-10-12 07:57:55 +05:30
d66e6a6aff Allow struct members as helper args 2025-10-12 06:00:50 +05:30
cd74e896cf Allow binops as args to helpers accepting int* 2025-10-12 04:20:46 +05:30
207f714027 Use scratch space to store consts passed to helpers 2025-10-12 04:17:37 +05:30
8774277000 try to separate out ast node from vmlinux type 2025-10-12 01:59:14 +05:30
5dcf670f49 Add ScratchPoolManager and it's singleton 2025-10-12 01:47:11 +05:30
8743ea17f3 one recursion issue solved 2025-10-12 01:33:23 +05:30
6bce29b90f Allocate scratch space for temp vars at the end of allocate_mem 2025-10-12 00:37:57 +05:30
321415fa28 Add update_max_temps_for_stmt in allocate_mem 2025-10-12 00:33:07 +05:30
8776d7607f Add count_temps_in_call to call scratch space needed in a helper call 2025-10-12 00:17:10 +05:30
f8844104a6 add support for single depth pointer resolution 2025-10-11 23:18:51 +05:30
3343bedd11 add extra fields to Field datatype
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-11 22:28:23 +05:30
8b7b1c08a5 Add struct_and_helper_binops passing test for assignments 2025-10-11 22:03:32 +05:30
75d3ad4fe2 format chore 2025-10-11 22:00:25 +05:30
abbf17748d format chore 2025-10-11 21:34:28 +05:30
7c559840f0 add ctype subclass identifier
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-11 20:02:20 +05:30
06773c895f add error message 2025-10-11 19:05:21 +05:30
1e3d775865 handle non-complex types along with recursion 2025-10-11 19:04:11 +05:30
168e26268e add recursive addition algorithm with mixing of ast node type and type node which is not right.
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-11 18:55:56 +05:30
2cf7b28793 extract fields from the class 2025-10-11 18:26:13 +05:30
d24d59c2ba fix structure for IR generation separation. 2025-10-11 18:11:46 +05:30
f190a33e21 init IR generation file and clarify purpose 2025-10-11 17:52:22 +05:30
eb636ef731 add dependency handler class 2025-10-11 17:49:40 +05:30
2ae3aade60 static type checks 2025-10-11 17:13:22 +05:30
f227fe9310 add dependency_node format and also cache results of symbol loader. 2025-10-11 17:13:22 +05:30
7940d02bc7 add symbol resolution to import detection 2025-10-11 17:13:21 +05:30
2483ef2840 separate vmlinux class handler 2025-10-11 17:13:21 +05:30
68e9693f9a add import parser
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-11 17:13:21 +05:30
c9bbe1ffd8 Call eval_expr properly within get_operand_value 2025-10-11 03:21:09 +05:30
91a3fe140d Remove unnecessary return artifacts from get_operand_value 2025-10-11 03:06:24 +05:30
c2c17741e5 Remove store_through_chain 2025-10-11 03:04:26 +05:30
cac88d1560 Allow different int widths in binops 2025-10-11 02:44:08 +05:30
317575644f Interpret bools as ints in binops 2025-10-11 00:18:11 +05:30
a756f5e4b7 Add passing helper test for assignment 2025-10-10 23:55:12 +05:30
e4575a6b1e Merge branch 'master' into vmlinux-working 2025-10-10 22:55:51 +05:30
3ec3ab30fe add vmlinux processor to codegen pipeline
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-10 22:54:14 +05:30
7fb3ecff48 initialise tbaa generation and vmlinux recursive importer modules
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-10 22:54:14 +05:30
ec59dad025 Refactor hist() calls to use dot notation 2025-10-10 22:54:13 +05:30
28b7b1620c remove todos and move to projects on github. 2025-10-10 22:54:13 +05:30
9f8e240a38 add patch for Kernel 6.14 BTF
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-10 22:54:12 +05:30
e6c05ab494 Update TODO.md 2025-10-10 22:54:12 +05:30
8aa9cf7119 Add failing oneline IfExpr conditional test 2025-10-10 22:54:11 +05:30
9683e3799f format chore 2025-10-10 22:54:11 +05:30
200d293750 add global symbol table populate function 2025-10-10 22:54:10 +05:30
ed196caebf add global symbol table populate function 2025-10-10 22:54:10 +05:30
a049796b81 add failing test 2025-10-10 22:54:10 +05:30
384fc9dd40 changer order of passes 2025-10-10 22:54:09 +05:30
5f2df57e64 update globals test and todos. 2025-10-10 22:54:09 +05:30
130d8a9edc format chore 2025-10-10 22:54:08 +05:30
40ae3d825a fix broken IR generation logic for globals 2025-10-10 22:54:08 +05:30
484624104e fix broken IR generation logic for globals 2025-10-10 22:54:07 +05:30
e7c4bdb150 add global support with broken generation function 2025-10-10 22:54:07 +05:30
7210366e7d add global failing test
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-10 22:54:06 +05:30
435bf27176 Add compile to tests/failing_tests/conditionals/helper_cond.py 2025-10-10 22:54:06 +05:30
1ba27ac7cf Remove completed short term goal from TODO.md 2025-10-10 22:54:05 +05:30
e4ddec3a02 Fix expr imports 2025-10-10 22:54:05 +05:30
bc7b5c97d1 Move handle_comparator to type_normalization 2025-10-10 22:54:05 +05:30
fa720f8e6b Move convert_to_bool to type_normalization 2025-10-10 22:54:04 +05:30
eff0f66d95 Seperate type_normalization from expr_pass 2025-10-10 22:54:04 +05:30
b43c252224 Fix type_deducer import in expr 2025-10-10 22:54:03 +05:30
aae7aa981d Fix expr imports 2025-10-10 22:54:03 +05:30
6f9a7301af Rework dir structure for expr 2025-10-10 22:54:02 +05:30
48923d03d4 cleanup handle_cond in functions_pass 2025-10-10 22:54:02 +05:30
019a83cf11 Add passing and.py test for conditionals 2025-10-10 22:54:01 +05:30
140d9e6e35 Add passing or.py test for conditionals 2025-10-10 22:54:01 +05:30
a351b0f1b5 Add _handle_or_or in expr_pass 2025-10-10 22:54:01 +05:30
3cb73ff0c3 Add _handle_and_op in expr_pass 2025-10-10 22:54:00 +05:30
3b08c2bede Add handle_and and handle_or handling stub in eval_expr 2025-10-10 22:54:00 +05:30
86378d6cc4 Add BoolOp handling stub in eval_expr 2025-10-10 22:53:59 +05:30
00d1c583af Add support for is and is not keywords 2025-10-10 22:53:59 +05:30
cfc246c80d Add explanation notes to failing conditionals tests 2025-10-10 22:53:58 +05:30
f3c80f9e5f Add helper_cond failing test for conditionals 2025-10-10 22:53:58 +05:30
0d3a5748dd Move map_comp test to passing 2025-10-10 22:53:57 +05:30
079431754c Add null checks for pointer derefs to avoid map_value_or_null verifier errors 2025-10-10 22:53:57 +05:30
46f5eca33d Add _deref_to_depth in expr_pass 2025-10-10 22:53:56 +05:30
7081e939fb Move _get_base_type to _get_base_type_and_depth 2025-10-10 22:53:56 +05:30
1e29460d6f Add _get_base_type to expr_pass 2025-10-10 22:53:56 +05:30
e180a89644 Add _normalize_types to handle mismatched ints, move type_mismatch test to passing 2025-10-10 22:53:55 +05:30
34a267e982 Add type_mismatch failing test for conditionals 2025-10-10 22:53:55 +05:30
c81aad7c67 Add failing struct_ptr test for conditionals 2025-10-10 22:53:54 +05:30
2e677c2c7b Fix struct_access in eval_expr, move struct_access conditional test to passing 2025-10-10 22:53:54 +05:30
4ea7b22b44 Add 'and' and 'or' BoolOps as future deliverables 2025-10-10 22:53:53 +05:30
b8b937bfca Add failing test map_comp for conditionals 2025-10-10 22:53:53 +05:30
6cc29c4fa1 Add var_comp test for conditionals 2025-10-10 22:53:52 +05:30
5451ba646d Add support for unary op 'not' in eval_expr, move not test to passing 2025-10-10 22:53:52 +05:30
7720437ca5 Add failing tests struct and not for conditionals 2025-10-10 22:53:52 +05:30
eb0a7a917d Add map test to conditionals 2025-10-10 22:53:51 +05:30
6f65903552 Add var_binop test for conditionals 2025-10-10 22:53:51 +05:30
97e74d09be Add var test for conditionals 2025-10-10 22:53:50 +05:30
9c7560ed2e Add const_binop test for conditionals 2025-10-10 22:53:50 +05:30
2979ceedcf Add const_int test for conditionals 2025-10-10 22:53:49 +05:30
745f59278f Move conditional logic to eval_expr, add _conver_to_bool, add passing bool test 2025-10-10 22:53:49 +05:30
49c59b32ca Add Boolean return support 2025-10-10 22:53:48 +05:30
ff78140a7d Eval LHS and RHS in _handle_compare 2025-10-10 22:53:48 +05:30
82ff71b753 Add _handle_cond to expr_pass 2025-10-10 22:53:48 +05:30
f46e7cd846 Reduce a condition from handle_cond 2025-10-10 22:53:47 +05:30
9d73eb67c4 Add TODO for fixing struct_kioctx issue 2025-10-10 22:53:47 +05:30
21ce041353 Refactor hist() calls to use dot notation 2025-10-10 20:45:07 +05:30
7529820c0b Allow int** pointers to store binops of type int** op int 2025-10-10 20:36:37 +05:30
9febadffd3 Add pointer handling to helper_utils, finish pointer assignment 2025-10-10 15:01:15 +05:30
99aacca94b WIP: allow pointer assignments to var 2025-10-10 13:48:40 +05:30
1d517d4e09 Add double_alloc in alloc_mem 2025-10-10 12:28:45 +05:30
047f361ea9 Allocate twice for map lookups 2025-10-10 06:09:46 +05:30
489244a015 Add store_through_chain 2025-10-10 02:56:11 +05:30
8bab07ed72 Remove recursive_dereferencer 2025-10-10 00:13:35 +05:30
1253f51ff3 Use deref_to_val instead of recursive_dereferencer in get_operand value 2025-10-09 23:11:06 +05:30
23afb0bd33 Add deref_to_val to deref into final value and return the chain as well in binops 2025-10-09 21:47:28 +05:30
c596213b2a Add cst_var_binop.py as passing assign test 2025-10-09 03:42:25 +05:30
054a834464 Add failing assign test retype.py, with explanation 2025-10-09 03:28:07 +05:30
d7bfe86524 Add handle_variable_assignment to assign_pass 2025-10-09 03:09:10 +05:30
84ed27f222 Add handle_variable_assignment stub and boilerplate in handle_assign 2025-10-08 22:55:03 +05:30
6008d9841f Change loglevel of multi-assignment warning in handle_assign 2025-10-08 22:45:09 +05:30
6402cf7be5 remove todos and move to projects on github. 2025-10-08 22:27:51 +05:30
9a96e1247b Merge pull request #29 from pythonbpf/smol_pp
add patch for Kernel 6.14 BTF in transpiler
2025-10-08 21:47:49 +05:30
989134f4be add patch for Kernel 6.14 BTF
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-08 21:47:02 +05:30
120aec08da Update TODO.md 2025-10-08 21:40:14 +05:30
e66ae7cc89 Add failing oneline IfExpr conditional test 2025-10-08 21:36:08 +05:30
b95fbd0ed0 rollback example 2025-10-08 14:53:51 +05:30
32dc8e6636 Merge pull request #21 from pythonbpf/globals
Adds support for globals
SO......
*I'm not merging this because it's complete, but because I don't want it to diverge from master too much.
*Stuff I still need to complete:
-> Structs and eval expressions in these globals.
-> handling the global keyword.
-> assigning back to the global and reading from inside a function.
-> Basically, `global` keyword in Python is used to write only and reading can be done directly without declaring as global as a direct assign without global declaration is going to diverge from Python.
-> The above logic is going to be supported by `global_sym_tab` generated using the new order of passes that we are doing.
-> This needs to be fixed and done ASAP to avoid conflicts. so yes, im  gonna do it soon.
2025-10-08 14:48:37 +05:30
8e3942d38c format chore 2025-10-08 14:31:37 +05:30
d84ce0c6fa update helpers and change examples. 2025-10-08 13:57:09 +05:30
8d07a4cd05 add xdp struct to args
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-08 11:40:12 +05:30
8485460374 Merge pull request #26 from pythonbpf/refactor_conds
Refactor conds
2025-10-08 07:28:08 +05:30
0c977514af Add TODO for fixing struct_kioctx issue 2025-10-08 05:34:25 +05:30
1207730ce3 update vmlinux.py 2025-10-08 05:27:56 +05:30
0d9dcd122c Merge pull request #27 from pythonbpf/vmlinux
Add vmlinux transpiler from experiments
2025-10-08 05:19:44 +05:30
8a69e05ee2 fix duplicate variable in example
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-08 05:18:49 +05:30
976af290af Revert "format chore"
This reverts commit a3443ab1d5.
2025-10-08 05:17:59 +05:30
a3443ab1d5 format chore
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-08 05:16:36 +05:30
a27360482b complete vmlinux transpiler.
TODO: struct_kioctx for x86_64 vmlinux.h has anonymous structs that refused to transpile well, so an extra rule has been written to make only the structs of that external. Fix this in the future.
2025-10-08 05:15:29 +05:30
c423cc647d add vmlinux.py transpiler from experiment repository
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-08 00:45:30 +05:30
8554688230 Merge pull request #25 from pythonbpf/dependabot/github_actions/actions-6a14be197d
Bump the actions group with 2 updates
2025-10-06 19:32:01 +05:30
3e873f378e Bump the actions group with 2 updates
Bumps the actions group with 2 updates: [actions/checkout](https://github.com/actions/checkout) and [actions/setup-python](https://github.com/actions/setup-python).


Updates `actions/checkout` from 4 to 5
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v4...v5)

Updates `actions/setup-python` from 5 to 6
- [Release notes](https://github.com/actions/setup-python/releases)
- [Commits](https://github.com/actions/setup-python/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: '5'
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: actions
- dependency-name: actions/setup-python
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: actions
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-06 11:26:23 +00:00
3abe07c5b2 add global symbol table populate function 2025-10-05 14:05:10 +05:30
01bd7604ed add global symbol table populate function 2025-10-05 14:04:25 +05:30
7ae84a0d5a add failing test 2025-10-05 00:55:38 +05:30
df3f00261a changer order of passes 2025-10-04 08:17:16 +05:30
ab610147a5 update globals test and todos. 2025-10-04 06:36:51 +05:30
7720fe9f9f format chore 2025-10-04 06:33:09 +05:30
7aeac86bd3 fix broken IR generation logic for globals 2025-10-04 06:32:25 +05:30
ab1c4223d5 fix broken IR generation logic for globals 2025-10-03 22:55:40 +05:30
c3a512d5cf add global support with broken generation function 2025-10-03 22:20:04 +05:30
4a60c42cd0 add global failing test
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-03 21:25:58 +05:30
122 changed files with 9831 additions and 326513 deletions

2
.gitattributes vendored
View File

@ -1 +1,3 @@
tests/c-form/vmlinux.h linguist-vendored
examples/ linguist-vendored
BCC-Examples/ linguist-vendored

View File

@ -12,8 +12,8 @@ jobs:
name: Format
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.x"
- uses: pre-commit/action@v3.0.1

View File

@ -33,7 +33,7 @@ jobs:
python -m build
- name: Upload distributions
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v5
with:
name: release-dists
path: dist/
@ -59,7 +59,7 @@ jobs:
steps:
- name: Retrieve release distributions
uses: actions/download-artifact@v5
uses: actions/download-artifact@v6
with:
name: release-dists
path: dist/

3
.gitignore vendored
View File

@ -7,3 +7,6 @@ __pycache__/
*.ll
*.o
.ipynb_checkpoints/
vmlinux.py
~*
vmlinux.h

View File

@ -12,7 +12,7 @@
#
# See https://github.com/pre-commit/pre-commit
exclude: 'vmlinux.*\.py$'
exclude: 'vmlinux.py'
ci:
autoupdate_commit_msg: "chore: update pre-commit hooks"
@ -41,7 +41,7 @@ repos:
- id: ruff
args: ["--fix", "--show-fixes"]
- id: ruff-format
exclude: ^(docs)|^(tests)|^(examples)
# exclude: ^(docs)|^(tests)|^(examples)
# Checking static types
- repo: https://github.com/pre-commit/mirrors-mypy

20
BCC-Examples/README.md Normal file
View File

@ -0,0 +1,20 @@
## BCC examples ported to PythonBPF
This folder contains examples of BCC tutorial examples that have been ported to use **PythonBPF**.
## Requirements
- install `pythonbpf` and `pylibbpf` using pip.
- You will also need `matplotlib` for vfsreadlat.py example.
- You will also need `rich` for vfsreadlat_rich.py example.
- You will also need `plotly` and `dash` for vfsreadlat_plotly.py example.
## Usage
- You'll need root privileges to run these examples. If you are using a virtualenv, use the following command to run the scripts:
```bash
sudo <path_to_virtualenv>/bin/python3 <script_name>.py
```
- For vfsreadlat_plotly.py, run the following command to start the Dash server:
```bash
sudo <path_to_virtualenv>/bin/python3 vfsreadlat_plotly/bpf_program.py
```
Then open your web browser and navigate to the given URL.

View File

@ -0,0 +1,83 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "28cf2e27-41e2-461c-a39c-147417141a4e",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import bpf, section, bpfglobal, BPF, trace_fields\n",
"from ctypes import c_void_p, c_int64"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "133190e5-5a99-4585-b6e1-91224ed973c2",
"metadata": {},
"outputs": [],
"source": [
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_clone\")\n",
"def hello_world(ctx: c_void_p) -> c_int64:\n",
" print(\"Hello, World!\")\n",
" return 0\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"# Compile and load\n",
"b = BPF()\n",
"b.load()\n",
"b.attach_all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d3934efb-4043-4545-ae4c-c50ec40a24fd",
"metadata": {},
"outputs": [],
"source": [
"# header\n",
"print(f\"{'TIME(s)':<18} {'COMM':<16} {'PID':<6} {'MESSAGE'}\")\n",
"\n",
"# format output\n",
"while True:\n",
" try:\n",
" (task, pid, cpu, flags, ts, msg) = trace_fields()\n",
" except ValueError:\n",
" continue\n",
" except KeyboardInterrupt:\n",
" exit()\n",
" print(f\"{ts:<18} {task:<16} {pid:<6} {msg}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,34 @@
from pythonbpf import bpf, section, bpfglobal, BPF, trace_fields
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
# header
print(f"{'TIME(s)':<18} {'COMM':<16} {'PID':<6} {'MESSAGE'}")
# format output
while True:
try:
(task, pid, cpu, flags, ts, msg) = trace_fields()
except ValueError:
continue
except KeyboardInterrupt:
exit()
print(f"{ts:<18} {task:<16} {pid:<6} {msg}")

View File

@ -0,0 +1,110 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "79b74928-f4b4-4320-96e3-d973997de2f4",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import bpf, map, struct, section, bpfglobal, BPF\n",
"from pythonbpf.helper import ktime, pid, comm\n",
"from pythonbpf.maps import PerfEventArray\n",
"from ctypes import c_void_p, c_int64"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5bdb0329-ae2d-45e8-808e-5ed5b1374204",
"metadata": {},
"outputs": [],
"source": [
"@bpf\n",
"@struct\n",
"class data_t:\n",
" pid: c_int64\n",
" ts: c_int64\n",
" comm: str(16)\n",
"\n",
"\n",
"@bpf\n",
"@map\n",
"def events() -> PerfEventArray:\n",
" return PerfEventArray(key_size=c_int64, value_size=c_int64)\n",
"\n",
"\n",
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_clone\")\n",
"def hello(ctx: c_void_p) -> c_int64:\n",
" dataobj = data_t()\n",
" dataobj.pid, dataobj.ts = pid(), ktime()\n",
" comm(dataobj.comm)\n",
" events.output(dataobj)\n",
" return 0\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"# Compile and load\n",
"b = BPF()\n",
"b.load()\n",
"b.attach_all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4bcc7d57-6cc4-48a3-bbd2-42ad6263afdf",
"metadata": {},
"outputs": [],
"source": [
"start = 0\n",
"\n",
"\n",
"def callback(cpu, event):\n",
" global start\n",
" if start == 0:\n",
" start = event.ts\n",
" ts = (event.ts - start) / 1e9\n",
" print(f\"[CPU {cpu}] PID: {event.pid}, TS: {ts}, COMM: {event.comm.decode()}\")\n",
"\n",
"\n",
"perf = b[\"events\"].open_perf_buffer(callback, struct_name=\"data_t\")\n",
"print(\"Starting to poll... (Ctrl+C to stop)\")\n",
"print(\"Try running: fork() or clone() system calls to trigger events\")\n",
"\n",
"try:\n",
" while True:\n",
" b[\"events\"].poll(1000)\n",
"except KeyboardInterrupt:\n",
" print(\"Stopping...\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,61 @@
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
from pythonbpf.helper import ktime, pid, comm
from pythonbpf.maps import PerfEventArray
from ctypes import c_void_p, c_int64
@bpf
@struct
class data_t:
pid: c_int64
ts: c_int64
comm: str(16) # type: ignore [valid-type]
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_int64, value_size=c_int64)
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def hello(ctx: c_void_p) -> c_int64:
dataobj = data_t()
dataobj.pid, dataobj.ts = pid(), ktime()
comm(dataobj.comm)
events.output(dataobj)
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
start = 0
def callback(cpu, event):
global start
if start == 0:
start = event.ts
ts = (event.ts - start) / 1e9
print(f"[CPU {cpu}] PID: {event.pid}, TS: {ts}, COMM: {event.comm.decode()}")
perf = b["events"].open_perf_buffer(callback, struct_name="data_t")
print("Starting to poll... (Ctrl+C to stop)")
print("Try running: fork() or clone() system calls to trigger events")
try:
while True:
b["events"].poll(1000)
except KeyboardInterrupt:
print("Stopping...")

View File

@ -0,0 +1,116 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"id": "7d5d3cfb-39ba-4516-9856-b3bed47a0cef",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe\n",
"from ctypes import c_void_p, c_int64"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "cf1c87aa-e173-4156-8f2d-762225bc6d19",
"metadata": {},
"outputs": [],
"source": [
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_clone\")\n",
"def hello_world(ctx: c_void_p) -> c_int64:\n",
" print(\"Hello, World!\")\n",
" return 0\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"b = BPF()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bd81383d-f75a-4269-8451-3d985d85b124",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Cache2 I/O-4716 [003] ...21 8218.000492: bpf_trace_printk: count: 11 with 4716\n",
"\n",
" Cache2 I/O-4716 [003] ...21 8218.000499: bpf_trace_printk: Hello, World!\n",
"\n",
" WebExtensions-5168 [002] ...21 8219.320392: bpf_trace_printk: count: 13 with 5168\n",
"\n",
" WebExtensions-5168 [002] ...21 8219.320399: bpf_trace_printk: Hello, World!\n",
"\n",
" python-21155 [001] ...21 8220.933716: bpf_trace_printk: count: 5 with 21155\n",
"\n",
" python-21155 [001] ...21 8220.933721: bpf_trace_printk: Hello, World!\n",
"\n",
" python-21155 [002] ...21 8221.341290: bpf_trace_printk: count: 6 with 21155\n",
"\n",
" python-21155 [002] ...21 8221.341295: bpf_trace_printk: Hello, World!\n",
"\n",
" Isolated Web Co-5462 [000] ...21 8223.095033: bpf_trace_printk: count: 7 with 5462\n",
"\n",
" Isolated Web Co-5462 [000] ...21 8223.095043: bpf_trace_printk: Hello, World!\n",
"\n",
" firefox-4542 [000] ...21 8227.760067: bpf_trace_printk: count: 8 with 4542\n",
"\n",
" firefox-4542 [000] ...21 8227.760080: bpf_trace_printk: Hello, World!\n",
"\n",
" Isolated Web Co-12404 [003] ...21 8227.917086: bpf_trace_printk: count: 7 with 12404\n",
"\n",
" Isolated Web Co-12404 [003] ...21 8227.917095: bpf_trace_printk: Hello, World!\n",
"\n"
]
}
],
"source": [
"b.load()\n",
"b.attach_all()\n",
"trace_pipe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "01e1f25b-decc-425b-a1aa-a5e701082574",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,23 @@
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
trace_pipe()

View File

@ -0,0 +1,107 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "dcab010c-f5e9-446f-9f9f-056cc794ad14",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_fields\n",
"from pythonbpf.helper import ktime\n",
"from pythonbpf.maps import HashMap\n",
"\n",
"from ctypes import c_void_p, c_int64"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "720797e8-9c81-4af6-a385-80f1ec4c0f15",
"metadata": {},
"outputs": [],
"source": [
"@bpf\n",
"@map\n",
"def last() -> HashMap:\n",
" return HashMap(key=c_int64, value=c_int64, max_entries=2)\n",
"\n",
"\n",
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_sync\")\n",
"def do_trace(ctx: c_void_p) -> c_int64:\n",
" ts_key, cnt_key = 0, 1\n",
" tsp, cntp = last.lookup(ts_key), last.lookup(cnt_key)\n",
" if not cntp:\n",
" last.update(cnt_key, 0)\n",
" cntp = last.lookup(cnt_key)\n",
" if tsp:\n",
" delta = ktime() - tsp\n",
" if delta < 1000000000:\n",
" time_ms = delta // 1000000\n",
" print(f\"{time_ms} {cntp}\")\n",
" last.delete(ts_key)\n",
" else:\n",
" last.update(ts_key, ktime())\n",
" last.update(cnt_key, cntp + 1)\n",
" return 0\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"# Compile and load\n",
"b = BPF()\n",
"b.load()\n",
"b.attach_all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "78a8b82c-7c5f-43c1-9de1-cd982a0f345b",
"metadata": {},
"outputs": [],
"source": [
"print(\"Tracing for quick sync's... Ctrl-C to end\")\n",
"\n",
"# format output\n",
"start = 0\n",
"while True:\n",
" try:\n",
" task, pid, cpu, flags, ts, msg = trace_fields()\n",
" if start == 0:\n",
" start = ts\n",
" ts -= start\n",
" ms, cnt = msg.split()\n",
" print(f\"At time {ts} s: Multiple syncs detected, last {ms} ms ago. Count {cnt}\")\n",
" except KeyboardInterrupt:\n",
" exit()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,58 @@
from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_fields
from pythonbpf.helper import ktime
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=2)
@bpf
@section("tracepoint/syscalls/sys_enter_sync")
def do_trace(ctx: c_void_p) -> c_int64:
ts_key, cnt_key = 0, 1
tsp, cntp = last.lookup(ts_key), last.lookup(cnt_key)
if not cntp:
last.update(cnt_key, 0)
cntp = last.lookup(cnt_key)
if tsp:
delta = ktime() - tsp
if delta < 1000000000:
time_ms = delta // 1000000
print(f"{time_ms} {cntp}")
last.delete(ts_key)
else:
last.update(ts_key, ktime())
last.update(cnt_key, cntp + 1)
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
print("Tracing for quick sync's... Ctrl-C to end")
# format output
start = 0
while True:
try:
task, pid, cpu, flags, ts, msg = trace_fields()
if start == 0:
start = ts
ts -= start
ms, cnt = msg.split()
print(f"At time {ts} s: Multiple syncs detected, last {ms} ms ago. Count {cnt}")
except KeyboardInterrupt:
exit()

View File

@ -0,0 +1,134 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "b0d1ab05-0c1f-4578-9c1b-568202b95a5c",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import bpf, map, struct, section, bpfglobal, BPF\n",
"from pythonbpf.helper import ktime\n",
"from pythonbpf.maps import HashMap, PerfEventArray\n",
"from ctypes import c_void_p, c_int64"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "85e50d0a-f9d8-468f-8e03-f5f7128f05d8",
"metadata": {},
"outputs": [],
"source": [
"@bpf\n",
"@struct\n",
"class data_t:\n",
" ts: c_int64\n",
" ms: c_int64\n",
"\n",
"\n",
"@bpf\n",
"@map\n",
"def events() -> PerfEventArray:\n",
" return PerfEventArray(key_size=c_int64, value_size=c_int64)\n",
"\n",
"\n",
"@bpf\n",
"@map\n",
"def last() -> HashMap:\n",
" return HashMap(key=c_int64, value=c_int64, max_entries=1)\n",
"\n",
"\n",
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_sync\")\n",
"def do_trace(ctx: c_void_p) -> c_int64:\n",
" dat, dat.ts, key = data_t(), ktime(), 0\n",
" tsp = last.lookup(key)\n",
" if tsp:\n",
" delta = ktime() - tsp\n",
" if delta < 1000000000:\n",
" dat.ms = delta // 1000000\n",
" events.output(dat)\n",
" last.delete(key)\n",
" else:\n",
" last.update(key, ktime())\n",
" return 0\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"# Compile and load\n",
"b = BPF()\n",
"b.load()\n",
"b.attach_all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "40bb1107-369f-4be7-9f10-37201900c16b",
"metadata": {},
"outputs": [],
"source": [
"print(\"Tracing for quick sync's... Ctrl-C to end\")\n",
"\n",
"# format output\n",
"start = 0\n",
"\n",
"\n",
"def callback(cpu, event):\n",
" global start\n",
" if start == 0:\n",
" start = event.ts\n",
" event.ts -= start\n",
" print(\n",
" f\"At time {event.ts / 1e9} s: Multiple sync detected, Last sync: {event.ms} ms ago\"\n",
" )\n",
"\n",
"\n",
"perf = b[\"events\"].open_perf_buffer(callback, struct_name=\"data_t\")\n",
"print(\"Starting to poll... (Ctrl+C to stop)\")\n",
"print(\"Try running: fork() or clone() system calls to trigger events\")\n",
"\n",
"try:\n",
" while True:\n",
" b[\"events\"].poll(1000)\n",
"except KeyboardInterrupt:\n",
" print(\"Stopping...\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "94a588d9-3a40-437c-a35b-fc40410f3eb7",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,75 @@
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
from pythonbpf.helper import ktime
from pythonbpf.maps import HashMap, PerfEventArray
from ctypes import c_void_p, c_int64
@bpf
@struct
class data_t:
ts: c_int64
ms: c_int64
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_int64, value_size=c_int64)
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=1)
@bpf
@section("tracepoint/syscalls/sys_enter_sync")
def do_trace(ctx: c_void_p) -> c_int64:
dat, dat.ts, key = data_t(), ktime(), 0
tsp = last.lookup(key)
if tsp:
delta = ktime() - tsp
if delta < 1000000000:
dat.ms = delta // 1000000
events.output(dat)
last.delete(key)
else:
last.update(key, ktime())
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
print("Tracing for quick sync's... Ctrl-C to end")
# format output
start = 0
def callback(cpu, event):
global start
if start == 0:
start = event.ts
event.ts -= start
print(
f"At time {event.ts / 1e9} s: Multiple sync detected, Last sync: {event.ms} ms ago"
)
perf = b["events"].open_perf_buffer(callback, struct_name="data_t")
print("Starting to poll... (Ctrl+C to stop)")
try:
while True:
b["events"].poll(1000)
except KeyboardInterrupt:
print("Stopping...")

View File

@ -0,0 +1,102 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "bfe01ceb-2f27-41b3-b3ba-50ec65cfddda",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_fields\n",
"from pythonbpf.helper import ktime\n",
"from pythonbpf.maps import HashMap\n",
"\n",
"from ctypes import c_void_p, c_int64"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ddb115f4-20a7-43bc-bb5b-ccbfd6031fc2",
"metadata": {},
"outputs": [],
"source": [
"@bpf\n",
"@map\n",
"def last() -> HashMap:\n",
" return HashMap(key=c_int64, value=c_int64, max_entries=1)\n",
"\n",
"\n",
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_sync\")\n",
"def do_trace(ctx: c_void_p) -> c_int64:\n",
" key = 0\n",
" tsp = last.lookup(key)\n",
" if tsp:\n",
" delta = ktime() - tsp\n",
" if delta < 1000000000:\n",
" time_ms = delta // 1000000\n",
" print(f\"{time_ms}\")\n",
" last.delete(key)\n",
" else:\n",
" last.update(key, ktime())\n",
" return 0\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"# Compile and load\n",
"b = BPF()\n",
"b.load()\n",
"b.attach_all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e4f46574-9fd8-46e7-9c7b-27a36d07f200",
"metadata": {},
"outputs": [],
"source": [
"print(\"Tracing for quick sync's... Ctrl-C to end\")\n",
"\n",
"# format output\n",
"start = 0\n",
"while True:\n",
" try:\n",
" task, pid, cpu, flags, ts, ms = trace_fields()\n",
" if start == 0:\n",
" start = ts\n",
" ts -= start\n",
" print(f\"At time {ts} s: Multiple syncs detected, last {ms} ms ago\")\n",
" except KeyboardInterrupt:\n",
" exit()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,53 @@
from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_fields
from pythonbpf.helper import ktime
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=1)
@bpf
@section("tracepoint/syscalls/sys_enter_sync")
def do_trace(ctx: c_void_p) -> c_int64:
key = 0
tsp = last.lookup(key)
if tsp:
delta = ktime() - tsp
if delta < 1000000000:
time_ms = delta // 1000000
print(f"{time_ms}")
last.delete(key)
else:
last.update(key, ktime())
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
print("Tracing for quick sync's... Ctrl-C to end")
# format output
start = 0
while True:
try:
task, pid, cpu, flags, ts, ms = trace_fields()
if start == 0:
start = ts
ts -= start
print(f"At time {ts} s: Multiple syncs detected, last {ms} ms ago")
except KeyboardInterrupt:
exit()

View File

@ -0,0 +1,73 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "bb49598f-b9cc-4ea8-8391-923cad513711",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe\n",
"from ctypes import c_void_p, c_int64"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5da237b0-1c7d-4ec5-8c24-696b1c1d97fa",
"metadata": {},
"outputs": [],
"source": [
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_sync\")\n",
"def hello_world(ctx: c_void_p) -> c_int64:\n",
" print(\"sys_sync() called\")\n",
" return 0\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"# Compile and load\n",
"b = BPF()\n",
"b.load()\n",
"b.attach_all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e4c218ac-fe47-4fd1-a27b-c07e02f3cd05",
"metadata": {},
"outputs": [],
"source": [
"print(\"Tracing sys_sync()... Ctrl-C to end.\")\n",
"trace_pipe()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

23
BCC-Examples/sys_sync.py Normal file
View File

@ -0,0 +1,23 @@
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_sync")
def hello_world(ctx: c_void_p) -> c_int64:
print("sys_sync() called")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
print("Tracing sys_sync()... Ctrl-C to end.")
trace_pipe()

File diff suppressed because one or more lines are too long

127
BCC-Examples/vfsreadlat.py Normal file
View File

@ -0,0 +1,127 @@
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
from pythonbpf.helper import ktime, pid
from pythonbpf.maps import HashMap, PerfEventArray
from ctypes import c_void_p, c_uint64
import matplotlib.pyplot as plt
import numpy as np
@bpf
@struct
class latency_event:
pid: c_uint64
delta_us: c_uint64 # Latency in microseconds
@bpf
@map
def start() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=10240)
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_uint64, value_size=c_uint64)
@bpf
@section("kprobe/vfs_read")
def do_entry(ctx: c_void_p) -> c_uint64:
p, ts = pid(), ktime()
start.update(p, ts)
return 0 # type: ignore [return-value]
@bpf
@section("kretprobe/vfs_read")
def do_return(ctx: c_void_p) -> c_uint64:
p = pid()
tsp = start.lookup(p)
if tsp:
delta_ns = ktime() - tsp
# Only track if latency > 1 microsecond
if delta_ns > 1000:
evt = latency_event()
evt.pid, evt.delta_us = p, delta_ns // 1000
events.output(evt)
start.delete(p)
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Load BPF
print("Loading BPF program...")
b = BPF()
b.load()
b.attach_all()
# Collect latencies
latencies = []
def callback(cpu, event):
latencies.append(event.delta_us)
b["events"].open_perf_buffer(callback, struct_name="latency_event")
print("Tracing vfs_read latency... Hit Ctrl-C to end.")
try:
while True:
b["events"].poll(1000)
if len(latencies) > 0 and len(latencies) % 1000 == 0:
print(f"Collected {len(latencies)} samples...")
except KeyboardInterrupt:
print(f"Collected {len(latencies)} samples. Generating histogram...")
# Create histogram with matplotlib
if latencies:
# Use log scale for better visualization
log_latencies = np.log2(latencies)
plt.figure(figsize=(12, 6))
# Plot 1: Linear histogram
plt.subplot(1, 2, 1)
plt.hist(latencies, bins=50, edgecolor="black", alpha=0.7)
plt.xlabel("Latency (microseconds)")
plt.ylabel("Count")
plt.title("VFS Read Latency Distribution (Linear)")
plt.grid(True, alpha=0.3)
# Plot 2: Log2 histogram (like BCC)
plt.subplot(1, 2, 2)
plt.hist(log_latencies, bins=50, edgecolor="black", alpha=0.7, color="orange")
plt.xlabel("log2(Latency in µs)")
plt.ylabel("Count")
plt.title("VFS Read Latency Distribution (Log2)")
plt.grid(True, alpha=0.3)
# Add statistics
print("Statistics:")
print(f" Count: {len(latencies)}")
print(f" Min: {min(latencies)} µs")
print(f" Max: {max(latencies)} µs")
print(f" Mean: {np.mean(latencies):.2f} µs")
print(f" Median: {np.median(latencies):.2f} µs")
print(f" P95: {np.percentile(latencies, 95):.2f} µs")
print(f" P99: {np.percentile(latencies, 99):.2f} µs")
plt.tight_layout()
plt.savefig("vfs_read_latency.png", dpi=150)
print("Histogram saved to vfs_read_latency.png")
plt.show()
else:
print("No samples collected!")

View File

@ -0,0 +1,101 @@
"""BPF program for tracing VFS read latency."""
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
from pythonbpf.helper import ktime, pid
from pythonbpf.maps import HashMap, PerfEventArray
from ctypes import c_void_p, c_uint64
import argparse
from data_collector import LatencyCollector
from dashboard import LatencyDashboard
@bpf
@struct
class latency_event:
pid: c_uint64
delta_us: c_uint64
@bpf
@map
def start() -> HashMap:
"""Map to store start timestamps by PID."""
return HashMap(key=c_uint64, value=c_uint64, max_entries=10240)
@bpf
@map
def events() -> PerfEventArray:
"""Perf event array for sending latency events to userspace."""
return PerfEventArray(key_size=c_uint64, value_size=c_uint64)
@bpf
@section("kprobe/vfs_read")
def do_entry(ctx: c_void_p) -> c_uint64:
"""Record start time when vfs_read is called."""
p, ts = pid(), ktime()
start.update(p, ts)
return 0 # type: ignore [return-value]
@bpf
@section("kretprobe/vfs_read")
def do_return(ctx: c_void_p) -> c_uint64:
"""Calculate and record latency when vfs_read returns."""
p = pid()
tsp = start.lookup(p)
if tsp:
delta_ns = ktime() - tsp
# Only track latencies > 1 microsecond
if delta_ns > 1000:
evt = latency_event()
evt.pid, evt.delta_us = p, delta_ns // 1000
events.output(evt)
start.delete(p)
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Monitor VFS read latency with live dashboard"
)
parser.add_argument(
"--host", default="0.0.0.0", help="Dashboard host (default: 0.0.0.0)"
)
parser.add_argument(
"--port", type=int, default=8050, help="Dashboard port (default: 8050)"
)
parser.add_argument(
"--buffer", type=int, default=10000, help="Recent data buffer size"
)
return parser.parse_args()
args = parse_args()
# Load BPF program
print("Loading BPF program...")
b = BPF()
b.load()
b.attach_all()
print("✅ BPF program loaded and attached")
# Setup data collector
collector = LatencyCollector(b, buffer_size=args.buffer)
collector.start()
# Create and run dashboard
dashboard = LatencyDashboard(collector)
dashboard.run(host=args.host, port=args.port)

View File

@ -0,0 +1,282 @@
"""Plotly Dash dashboard for visualizing latency data."""
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
class LatencyDashboard:
"""Interactive dashboard for latency visualization."""
def __init__(self, collector, title: str = "VFS Read Latency Monitor"):
self.collector = collector
self.app = dash.Dash(__name__)
self.app.title = title
self._setup_layout()
self._setup_callbacks()
def _setup_layout(self):
"""Create dashboard layout."""
self.app.layout = html.Div(
[
html.H1(
"🔥 VFS Read Latency Dashboard",
style={
"textAlign": "center",
"color": "#2c3e50",
"marginBottom": 20,
},
),
# Stats cards
html.Div(
[
self._create_stat_card(
"total-samples", "📊 Total Samples", "#3498db"
),
self._create_stat_card(
"mean-latency", "⚡ Mean Latency", "#e74c3c"
),
self._create_stat_card(
"p99-latency", "🔥 P99 Latency", "#f39c12"
),
],
style={
"display": "flex",
"justifyContent": "space-around",
"marginBottom": 30,
},
),
# Graphs - ✅ Make sure these IDs match the callback outputs
dcc.Graph(id="dual-histogram", style={"height": "450px"}),
dcc.Graph(id="log2-buckets", style={"height": "350px"}),
dcc.Graph(id="timeseries-graph", style={"height": "300px"}),
# Auto-update
dcc.Interval(id="interval-component", interval=1000, n_intervals=0),
],
style={"padding": 20, "fontFamily": "Arial, sans-serif"},
)
def _create_stat_card(self, id_name: str, title: str, color: str):
"""Create a statistics card."""
return html.Div(
[
html.H3(title, style={"color": color}),
html.H2(id=id_name, style={"fontSize": 48, "color": "#2c3e50"}),
],
className="stat-box",
style={
"background": "white",
"padding": 20,
"borderRadius": 10,
"boxShadow": "0 4px 6px rgba(0,0,0,0.1)",
"textAlign": "center",
"flex": 1,
"margin": "0 10px",
},
)
def _setup_callbacks(self):
"""Setup dashboard callbacks."""
@self.app.callback(
[
Output("total-samples", "children"),
Output("mean-latency", "children"),
Output("p99-latency", "children"),
Output("dual-histogram", "figure"), # ✅ Match layout IDs
Output("log2-buckets", "figure"), # ✅ Match layout IDs
Output("timeseries-graph", "figure"), # ✅ Match layout IDs
],
[Input("interval-component", "n_intervals")],
)
def update_dashboard(n):
stats = self.collector.get_stats()
if stats.total == 0:
return self._empty_state()
return (
f"{stats.total:,}",
f"{stats.mean:.1f} µs",
f"{stats.p99:.1f} µs",
self._create_dual_histogram(),
self._create_log2_buckets(),
self._create_timeseries(),
)
def _empty_state(self):
"""Return empty state for dashboard."""
empty_fig = go.Figure()
empty_fig.update_layout(
title="Waiting for data... Generate some disk I/O!", template="plotly_white"
)
# ✅ Return 6 values (3 stats + 3 figures)
return "0", "0 µs", "0 µs", empty_fig, empty_fig, empty_fig
def _create_dual_histogram(self) -> go.Figure:
"""Create side-by-side linear and log2 histograms."""
latencies = self.collector.get_all_latencies()
# Create subplots
fig = make_subplots(
rows=1,
cols=2,
subplot_titles=("Linear Scale", "Log2 Scale"),
horizontal_spacing=0.12,
)
# Linear histogram
fig.add_trace(
go.Histogram(
x=latencies,
nbinsx=50,
marker_color="rgb(55, 83, 109)",
opacity=0.75,
name="Linear",
),
row=1,
col=1,
)
# Log2 histogram
log2_latencies = np.log2(latencies + 1) # +1 to avoid log2(0)
fig.add_trace(
go.Histogram(
x=log2_latencies,
nbinsx=30,
marker_color="rgb(243, 156, 18)",
opacity=0.75,
name="Log2",
),
row=1,
col=2,
)
# Update axes
fig.update_xaxes(title_text="Latency (µs)", row=1, col=1)
fig.update_xaxes(title_text="log2(Latency in µs)", row=1, col=2)
fig.update_yaxes(title_text="Count", row=1, col=1)
fig.update_yaxes(title_text="Count", row=1, col=2)
fig.update_layout(
title_text="📊 Latency Distribution (Linear vs Log2)",
template="plotly_white",
showlegend=False,
height=450,
)
return fig
def _create_log2_buckets(self) -> go.Figure:
"""Create bar chart of log2 buckets (like BCC histogram)."""
buckets = self.collector.get_histogram_buckets()
if not buckets:
fig = go.Figure()
fig.update_layout(
title="🔥 Log2 Histogram - Waiting for data...", template="plotly_white"
)
return fig
# Sort buckets
sorted_buckets = sorted(buckets.keys())
counts = [buckets[b] for b in sorted_buckets]
# Create labels (e.g., "8-16µs", "16-32µs")
labels = []
hover_text = []
for bucket in sorted_buckets:
lower = 2**bucket
upper = 2 ** (bucket + 1)
labels.append(f"{lower}-{upper}")
# Calculate percentage
total = sum(counts)
pct = (buckets[bucket] / total) * 100 if total > 0 else 0
hover_text.append(
f"Range: {lower}-{upper} µs<br>"
f"Count: {buckets[bucket]:,}<br>"
f"Percentage: {pct:.2f}%"
)
# Create bar chart
fig = go.Figure()
fig.add_trace(
go.Bar(
x=labels,
y=counts,
marker=dict(
color=counts,
colorscale="YlOrRd",
showscale=True,
colorbar=dict(title="Count"),
),
text=counts,
textposition="outside",
hovertext=hover_text,
hoverinfo="text",
)
)
fig.update_layout(
title="🔥 Log2 Histogram (BCC-style buckets)",
xaxis_title="Latency Range (µs)",
yaxis_title="Count",
template="plotly_white",
height=350,
xaxis=dict(tickangle=-45),
)
return fig
def _create_timeseries(self) -> go.Figure:
"""Create time series figure."""
recent = self.collector.get_recent_latencies()
if not recent:
fig = go.Figure()
fig.update_layout(
title="⏱️ Real-time Latency - Waiting for data...",
template="plotly_white",
)
return fig
times = [d["time"] for d in recent]
lats = [d["latency"] for d in recent]
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=times,
y=lats,
mode="lines",
line=dict(color="rgb(231, 76, 60)", width=2),
fill="tozeroy",
fillcolor="rgba(231, 76, 60, 0.2)",
)
)
fig.update_layout(
title="⏱️ Real-time Latency (Last 10,000 samples)",
xaxis_title="Time (seconds)",
yaxis_title="Latency (µs)",
template="plotly_white",
height=300,
)
return fig
def run(self, host: str = "0.0.0.0", port: int = 8050, debug: bool = False):
"""Run the dashboard server."""
print(f"\n{'=' * 60}")
print(f"🚀 Dashboard running at: http://{host}:{port}")
print(" Access from your browser to see live graphs")
print(
" Generate disk I/O to see data: dd if=/dev/zero of=/tmp/test bs=1M count=100"
)
print(f"{'=' * 60}\n")
self.app.run(debug=debug, host=host, port=port)

View File

@ -0,0 +1,96 @@
"""Data collection and management."""
import threading
import time
import numpy as np
from collections import deque
from dataclasses import dataclass
from typing import List, Dict
@dataclass
class LatencyStats:
"""Statistics computed from latency data."""
total: int = 0
mean: float = 0.0
median: float = 0.0
min: float = 0.0
max: float = 0.0
p95: float = 0.0
p99: float = 0.0
@classmethod
def from_array(cls, data: np.ndarray) -> "LatencyStats":
"""Compute stats from numpy array."""
if len(data) == 0:
return cls()
return cls(
total=len(data),
mean=float(np.mean(data)),
median=float(np.median(data)),
min=float(np.min(data)),
max=float(np.max(data)),
p95=float(np.percentile(data, 95)),
p99=float(np.percentile(data, 99)),
)
class LatencyCollector:
"""Collects and manages latency data from BPF."""
def __init__(self, bpf_object, buffer_size: int = 10000):
self.bpf = bpf_object
self.all_latencies: List[float] = []
self.recent_latencies = deque(maxlen=buffer_size) # type: ignore [var-annotated]
self.start_time = time.time()
self._lock = threading.Lock()
self._poll_thread = None
def callback(self, cpu: int, event):
"""Callback for BPF events."""
with self._lock:
self.all_latencies.append(event.delta_us)
self.recent_latencies.append(
{"time": time.time() - self.start_time, "latency": event.delta_us}
)
def start(self):
"""Start collecting data."""
self.bpf["events"].open_perf_buffer(self.callback, struct_name="latency_event")
def poll_loop():
while True:
self.bpf["events"].poll(100)
self._poll_thread = threading.Thread(target=poll_loop, daemon=True)
self._poll_thread.start()
print("✅ Data collection started")
def get_all_latencies(self) -> np.ndarray:
"""Get all latencies as numpy array."""
with self._lock:
return np.array(self.all_latencies) if self.all_latencies else np.array([])
def get_recent_latencies(self) -> List[Dict]:
"""Get recent latencies with timestamps."""
with self._lock:
return list(self.recent_latencies)
def get_stats(self) -> LatencyStats:
"""Compute current statistics."""
return LatencyStats.from_array(self.get_all_latencies())
def get_histogram_buckets(self) -> Dict[int, int]:
"""Get log2 histogram buckets."""
latencies = self.get_all_latencies()
if len(latencies) == 0:
return {}
log_buckets = np.floor(np.log2(latencies + 1)).astype(int)
buckets = {} # type: ignore [var-annotated]
for bucket in log_buckets:
buckets[bucket] = buckets.get(bucket, 0) + 1
return buckets

View File

@ -0,0 +1,178 @@
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
from pythonbpf.helper import ktime, pid
from pythonbpf.maps import HashMap, PerfEventArray
from ctypes import c_void_p, c_uint64
from rich.console import Console
from rich.live import Live
from rich.table import Table
from rich.panel import Panel
from rich.layout import Layout
import numpy as np
import threading
import time
from collections import Counter
# ==================== BPF Setup ====================
@bpf
@struct
class latency_event:
pid: c_uint64
delta_us: c_uint64
@bpf
@map
def start() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=10240)
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_uint64, value_size=c_uint64)
@bpf
@section("kprobe/vfs_read")
def do_entry(ctx: c_void_p) -> c_uint64:
p, ts = pid(), ktime()
start.update(p, ts)
return 0 # type: ignore [return-value]
@bpf
@section("kretprobe/vfs_read")
def do_return(ctx: c_void_p) -> c_uint64:
p = pid()
tsp = start.lookup(p)
if tsp:
delta_ns = ktime() - tsp
if delta_ns > 1000:
evt = latency_event()
evt.pid, evt.delta_us = p, delta_ns // 1000
events.output(evt)
start.delete(p)
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
console = Console()
console.print("[bold green]Loading BPF program...[/]")
b = BPF()
b.load()
b.attach_all()
# ==================== Data Collection ====================
all_latencies = []
histogram_buckets = Counter() # type: ignore [var-annotated]
def callback(cpu, event):
all_latencies.append(event.delta_us)
# Create log2 bucket
bucket = int(np.floor(np.log2(event.delta_us + 1)))
histogram_buckets[bucket] += 1
b["events"].open_perf_buffer(callback, struct_name="latency_event")
def poll_events():
while True:
b["events"].poll(100)
poll_thread = threading.Thread(target=poll_events, daemon=True)
poll_thread.start()
# ==================== Live Display ====================
def generate_display():
layout = Layout()
layout.split_column(
Layout(name="header", size=3),
Layout(name="stats", size=8),
Layout(name="histogram", size=20),
)
# Header
layout["header"].update(
Panel("[bold cyan]🔥 VFS Read Latency Monitor[/]", style="bold white on blue")
)
# Stats
if len(all_latencies) > 0:
lats = np.array(all_latencies)
stats_table = Table(show_header=False, box=None, padding=(0, 2))
stats_table.add_column(style="bold cyan")
stats_table.add_column(style="bold yellow")
stats_table.add_row("📊 Total Samples:", f"{len(lats):,}")
stats_table.add_row("⚡ Mean Latency:", f"{np.mean(lats):.2f} µs")
stats_table.add_row("📉 Min Latency:", f"{np.min(lats):.2f} µs")
stats_table.add_row("📈 Max Latency:", f"{np.max(lats):.2f} µs")
stats_table.add_row("🎯 P95 Latency:", f"{np.percentile(lats, 95):.2f} µs")
stats_table.add_row("🔥 P99 Latency:", f"{np.percentile(lats, 99):.2f} µs")
layout["stats"].update(
Panel(stats_table, title="Statistics", border_style="green")
)
else:
layout["stats"].update(
Panel("[yellow]Waiting for data...[/]", border_style="yellow")
)
# Histogram
if histogram_buckets:
hist_table = Table(title="Latency Distribution", box=None)
hist_table.add_column("Range", style="cyan", no_wrap=True)
hist_table.add_column("Count", justify="right", style="yellow")
hist_table.add_column("Distribution", style="green")
max_count = max(histogram_buckets.values())
for bucket in sorted(histogram_buckets.keys()):
count = histogram_buckets[bucket]
lower = 2**bucket
upper = 2 ** (bucket + 1)
# Create bar
bar_width = int((count / max_count) * 40)
bar = "" * bar_width
hist_table.add_row(
f"{lower:5d}-{upper:5d} µs",
f"{count:6d}",
f"[green]{bar}[/] {count / len(all_latencies) * 100:.1f}%",
)
layout["histogram"].update(Panel(hist_table, border_style="green"))
return layout
try:
with Live(generate_display(), refresh_per_second=2, console=console) as live:
while True:
time.sleep(0.5)
live.update(generate_display())
except KeyboardInterrupt:
console.print("\n[bold red]Stopping...[/]")
if all_latencies:
console.print(f"\n[bold green]✅ Collected {len(all_latencies):,} samples[/]")

View File

@ -40,6 +40,12 @@ Python-BPF is an LLVM IR generator for eBPF programs written in Python. It uses
---
## Try It Out!
Run
```bash
curl -s https://raw.githubusercontent.com/pythonbpf/Python-BPF/refs/heads/master/tools/setup.sh | sudo bash
```
## Installation
Dependencies:
@ -83,14 +89,14 @@ def hist() -> HashMap:
def hello(ctx: c_void_p) -> c_int64:
process_id = pid()
one = 1
prev = hist().lookup(process_id)
prev = hist.lookup(process_id)
if prev:
previous_value = prev + 1
print(f"count: {previous_value} with {process_id}")
hist().update(process_id, previous_value)
hist.update(process_id, previous_value)
return c_int64(0)
else:
hist().update(process_id, one)
hist.update(process_id, one)
return c_int64(0)

12
TODO.md
View File

@ -1,12 +0,0 @@
## Short term
- Implement enough functionality to port the BCC tutorial examples in PythonBPF
- Add all maps
- XDP support in pylibbpf
- ringbuf support
- recursive expression resolution
## Long term
- Refactor the codebase to be better than a hackathon project
- Port to C++ and use actual LLVM?

View File

@ -21,17 +21,17 @@ def last() -> HashMap:
@section("tracepoint/syscalls/sys_enter_execve")
def do_trace(ctx: c_void_p) -> c_int64:
key = 0
tsp = last().lookup(key)
tsp = last.lookup(key)
if tsp:
kt = ktime()
delta = kt - tsp
if delta < 1000000000:
time_ms = delta // 1000000
print(f"Execve syscall entered within last second, last {time_ms} ms ago")
last().delete(key)
last.delete(key)
else:
kt = ktime()
last().update(key, kt)
last.update(key, kt)
return c_int64(0)

File diff suppressed because one or more lines are too long

View File

@ -3,7 +3,6 @@ import time
from pythonbpf import bpf, map, section, bpfglobal, BPF
from pythonbpf.helper import pid
from pythonbpf.maps import HashMap
from pylibbpf import BpfMap
from ctypes import c_void_p, c_int64, c_uint64, c_int32
import matplotlib.pyplot as plt
@ -26,14 +25,14 @@ def hist() -> HashMap:
def hello(ctx: c_void_p) -> c_int64:
process_id = pid()
one = 1
prev = hist().lookup(process_id)
prev = hist.lookup(process_id)
if prev:
previous_value = prev + 1
print(f"count: {previous_value} with {process_id}")
hist().update(process_id, previous_value)
hist.update(process_id, previous_value)
return c_int64(0)
else:
hist().update(process_id, one)
hist.update(process_id, one)
return c_int64(0)
@ -44,12 +43,12 @@ def LICENSE() -> str:
b = BPF()
b.load_and_attach()
hist = BpfMap(b, hist)
b.load()
b.attach_all()
print("Recording")
time.sleep(10)
counts = list(hist.values())
counts = list(b["hist"].values())
plt.hist(counts, bins=20)
plt.xlabel("Clone calls per PID")

View File

@ -1,4 +1,4 @@
from pythonbpf import bpf, section, bpfglobal, BPF
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
# Instructions to how to run this program
@ -21,10 +21,6 @@ def LICENSE() -> str:
b = BPF()
b.load_and_attach()
if b.is_loaded() and b.is_attached():
print("Successfully loaded and attached")
else:
print("Could not load successfully")
# Now cat /sys/kernel/debug/tracing/trace_pipe to see results of the execve syscall.
b.load()
b.attach_all()
trace_pipe()

29
examples/kprobes.py Normal file
View File

@ -0,0 +1,29 @@
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("kretprobe/do_unlinkat")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return c_int64(0)
@bpf
@section("kprobe/do_unlinkat")
def hello_world2(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load()
b.attach_all()
print("running")
trace_pipe()

View File

@ -27,7 +27,7 @@ def hello(ctx: c_void_p) -> c_int32:
dataobj.pid = pid()
dataobj.ts = ktime()
# dataobj.comm = strobj
print(f"clone called at {dataobj.ts} by pid" f"{dataobj.pid}, comm {strobj}")
print(f"clone called at {dataobj.ts} by pid{dataobj.pid}, comm {strobj}")
events.output(dataobj)
return c_int32(0)

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,8 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from pythonbpf import bpf, map, section, bpfglobal, compile, compile_to_ir
from pythonbpf.helper import XDP_PASS
from pythonbpf.maps import HashMap
from ctypes import c_int64, c_void_p
from ctypes import c_void_p, c_int64
# Instructions to how to run this program
# 1. Install PythonBPF: pip install pythonbpf
@ -23,14 +23,14 @@ def count() -> HashMap:
def hello_world(ctx: c_void_p) -> c_int64:
key = 0
one = 1
prev = count().lookup(key)
prev = count.lookup(key)
if prev:
prevval = prev + 1
print(f"count: {prevval}")
count().update(key, prevval)
count.update(key, prevval)
return XDP_PASS
else:
count().update(key, one)
count.update(key, one)
return XDP_PASS
@ -41,4 +41,5 @@ def LICENSE() -> str:
return "GPL"
compile_to_ir("xdp_pass.py", "xdp_pass.ll")
compile()

View File

@ -4,15 +4,29 @@ build-backend = "setuptools.build_meta"
[project]
name = "pythonbpf"
version = "0.1.4"
version = "0.1.6"
description = "Reduced Python frontend for eBPF"
authors = [
{ name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" },
{ name = "varun-r-mallya", email="varunrmallya@gmail.com" }
]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: System :: Operating System Kernels :: Linux",
]
readme = "README.md"
license = {text = "Apache-2.0"}
requires-python = ">=3.8"
requires-python = ">=3.10"
dependencies = [
"llvmlite",

View File

@ -1,5 +1,6 @@
from .decorators import bpf, map, section, bpfglobal, struct
from .codegen import compile_to_ir, compile, BPF
from .utils import trace_pipe, trace_fields
__all__ = [
"bpf",
@ -10,4 +11,6 @@ __all__ = [
"compile_to_ir",
"compile",
"BPF",
"trace_pipe",
"trace_fields",
]

View File

@ -0,0 +1,449 @@
import ast
import logging
import ctypes
from llvmlite import ir
from .local_symbol import LocalSymbol
from pythonbpf.helper import HelperHandlerRegistry
from pythonbpf.vmlinux_parser.dependency_node import Field
from .expr import VmlinuxHandlerRegistry
from pythonbpf.type_deducer import ctypes_to_ir
from pythonbpf.maps import BPFMapType
logger = logging.getLogger(__name__)
def create_targets_and_rvals(stmt):
"""Create lists of targets and right-hand values from an assignment statement."""
if isinstance(stmt.targets[0], ast.Tuple):
if not isinstance(stmt.value, ast.Tuple):
logger.warning("Mismatched multi-target assignment, skipping allocation")
return [], []
targets, rvals = stmt.targets[0].elts, stmt.value.elts
if len(targets) != len(rvals):
logger.warning("length of LHS != length of RHS, skipping allocation")
return [], []
return targets, rvals
return stmt.targets, [stmt.value]
def handle_assign_allocation(
builder, stmt, local_sym_tab, map_sym_tab, structs_sym_tab
):
"""Handle memory allocation for assignment statements."""
logger.info(f"Handling assignment for allocation: {ast.dump(stmt)}")
# NOTE: Support multi-target assignments (e.g.: a, b = 1, 2)
targets, rvals = create_targets_and_rvals(stmt)
for target, rval in zip(targets, rvals):
# Skip non-name targets (e.g., struct field assignments)
if isinstance(target, ast.Attribute):
logger.debug(
f"Struct field assignment to {target.attr}, no allocation needed"
)
continue
if not isinstance(target, ast.Name):
logger.warning(
f"Unsupported assignment target type: {type(target).__name__}"
)
continue
var_name = target.id
# Skip if already allocated
if var_name in local_sym_tab:
logger.debug(f"Variable {var_name} already allocated, skipping")
continue
# Determine type and allocate based on rval
if isinstance(rval, ast.Call):
_allocate_for_call(
builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
elif isinstance(rval, ast.Constant):
_allocate_for_constant(builder, var_name, rval, local_sym_tab)
elif isinstance(rval, ast.BinOp):
_allocate_for_binop(builder, var_name, local_sym_tab)
elif isinstance(rval, ast.Name):
# Variable-to-variable assignment (b = a)
_allocate_for_name(builder, var_name, rval, local_sym_tab)
elif isinstance(rval, ast.Attribute):
# Struct field-to-variable assignment (a = dat.fld)
_allocate_for_attribute(
builder, var_name, rval, local_sym_tab, structs_sym_tab
)
else:
logger.warning(
f"Unsupported assignment value type for {var_name}: {type(rval).__name__}"
)
def _allocate_for_call(
builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
):
"""Allocate memory for variable assigned from a call."""
if isinstance(rval.func, ast.Name):
call_type = rval.func.id
# C type constructors
if call_type in ("c_int32", "c_int64", "c_uint32", "c_uint64", "c_void_p"):
ir_type = ctypes_to_ir(call_type)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} as {call_type}")
# Helper functions
elif HelperHandlerRegistry.has_handler(call_type):
ir_type = ir.IntType(64) # Assume i64 return type
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} for helper {call_type}")
# Deref function
elif call_type == "deref":
ir_type = ir.IntType(64) # Assume i64 return type
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} for deref")
# Struct constructors
elif call_type in structs_sym_tab:
struct_info = structs_sym_tab[call_type]
var = builder.alloca(struct_info.ir_type, name=var_name)
local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type)
logger.info(f"Pre-allocated {var_name} for struct {call_type}")
elif VmlinuxHandlerRegistry.is_vmlinux_struct(call_type):
# When calling struct_name(pointer), we're doing a cast, not construction
# So we allocate as a pointer (i64) not as the actual struct
var = builder.alloca(ir.IntType(64), name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(
var, ir.IntType(64), VmlinuxHandlerRegistry.get_struct_type(call_type)
)
logger.info(
f"Pre-allocated {var_name} for vmlinux struct pointer cast to {call_type}"
)
else:
logger.warning(f"Unknown call type for allocation: {call_type}")
elif isinstance(rval.func, ast.Attribute):
# Map method calls - need double allocation for ptr handling
_allocate_for_map_method(
builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
else:
logger.warning(f"Unsupported call function type for {var_name}")
def _allocate_for_map_method(
builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
):
"""Allocate memory for variable assigned from map method (double alloc)."""
map_name = rval.func.value.id
method_name = rval.func.attr
# NOTE: We will have to special case HashMap.lookup which returns a pointer to value type
# The value type can be a struct as well, so we need to handle that properly
# This special casing is not ideal, as over time other map methods may need similar handling
# But for now, we will just handle lookup specifically
if map_name not in map_sym_tab:
logger.error(f"Map '{map_name}' not found for allocation")
return
if method_name != "lookup":
# Fallback allocation for other map methods
_allocate_for_map_method_fallback(builder, var_name, local_sym_tab)
return
map_params = map_sym_tab[map_name].params
if map_params["type"] != BPFMapType.HASH:
logger.warning(
"Map method lookup used on non-hash map, using fallback allocation"
)
_allocate_for_map_method_fallback(builder, var_name, local_sym_tab)
return
value_type = map_params["value"]
# Determine IR type for value
if isinstance(value_type, str) and value_type in structs_sym_tab:
struct_info = structs_sym_tab[value_type]
value_ir_type = struct_info.ir_type
else:
value_ir_type = ctypes_to_ir(value_type)
if value_ir_type is None:
logger.warning(
f"Could not determine IR type for map value '{value_type}', using fallback allocation"
)
_allocate_for_map_method_fallback(builder, var_name, local_sym_tab)
return
# Main variable (pointer to pointer)
ir_type = ir.PointerType(ir.IntType(64))
var = builder.alloca(ir_type, name=var_name)
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
# Temporary variable for computed values
tmp_ir_type = value_ir_type
var_tmp = builder.alloca(tmp_ir_type, name=f"{var_name}_tmp")
local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, tmp_ir_type)
logger.info(
f"Pre-allocated {var_name} and {var_name}_tmp for map method lookup of type {value_ir_type}"
)
def _allocate_for_map_method_fallback(builder, var_name, local_sym_tab):
"""Fallback allocation for map method variable (i64* and i64**)."""
# Main variable (pointer to pointer)
ir_type = ir.PointerType(ir.IntType(64))
var = builder.alloca(ir_type, name=var_name)
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
# Temporary variable for computed values
tmp_ir_type = ir.IntType(64)
var_tmp = builder.alloca(tmp_ir_type, name=f"{var_name}_tmp")
local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, tmp_ir_type)
logger.info(
f"Pre-allocated {var_name} and {var_name}_tmp for map method (fallback)"
)
def _allocate_for_constant(builder, var_name, rval, local_sym_tab):
"""Allocate memory for variable assigned from a constant."""
if isinstance(rval.value, bool):
ir_type = ir.IntType(1)
var = builder.alloca(ir_type, name=var_name)
var.align = 1
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} as bool")
elif isinstance(rval.value, int):
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} as i64")
elif isinstance(rval.value, str):
ir_type = ir.PointerType(ir.IntType(8))
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} as string")
else:
logger.warning(
f"Unsupported constant type for {var_name}: {type(rval.value).__name__}"
)
def _allocate_for_binop(builder, var_name, local_sym_tab):
"""Allocate memory for variable assigned from a binary operation."""
ir_type = ir.IntType(64) # Assume i64 result
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} for binop result")
def _get_type_name(ir_type):
"""Get a string representation of an IR type."""
if isinstance(ir_type, ir.IntType):
return f"i{ir_type.width}"
elif isinstance(ir_type, ir.PointerType):
return "ptr"
elif isinstance(ir_type, ir.ArrayType):
return f"[{ir_type.count}x{_get_type_name(ir_type.element)}]"
else:
return str(ir_type).replace(" ", "")
def allocate_temp_pool(builder, max_temps, local_sym_tab):
"""Allocate the temporary scratch space pool for helper arguments."""
if not max_temps:
logger.info("No temp pool allocation needed")
return
for tmp_type, cnt in max_temps.items():
type_name = _get_type_name(tmp_type)
logger.info(f"Allocating temp pool of {cnt} variables of type {type_name}")
for i in range(cnt):
temp_name = f"__helper_temp_{type_name}_{i}"
temp_var = builder.alloca(tmp_type, name=temp_name)
temp_var.align = _get_alignment(tmp_type)
local_sym_tab[temp_name] = LocalSymbol(temp_var, tmp_type)
logger.debug(f"Allocated temp variable: {temp_name}")
def _allocate_for_name(builder, var_name, rval, local_sym_tab):
"""Allocate memory for variable-to-variable assignment (b = a)."""
source_var = rval.id
if source_var not in local_sym_tab:
logger.error(f"Source variable '{source_var}' not found in symbol table")
return
# Get type and metadata from source variable
source_symbol = local_sym_tab[source_var]
# Allocate with same type and alignment
var = _allocate_with_type(builder, var_name, source_symbol.ir_type)
local_sym_tab[var_name] = LocalSymbol(
var, source_symbol.ir_type, source_symbol.metadata
)
logger.info(
f"Pre-allocated {var_name} from {source_var} with type {source_symbol.ir_type}"
)
def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_tab):
"""Allocate memory for struct field-to-variable assignment (a = dat.fld)."""
if not isinstance(rval.value, ast.Name):
logger.warning(f"Complex attribute access not supported for {var_name}")
return
struct_var = rval.value.id
field_name = rval.attr
# Validate struct and field
if struct_var not in local_sym_tab:
logger.error(f"Struct variable '{struct_var}' not found")
return
struct_type: type = local_sym_tab[struct_var].metadata
if not struct_type or struct_type not in structs_sym_tab:
if VmlinuxHandlerRegistry.is_vmlinux_struct(struct_type.__name__):
# Handle vmlinux struct field access
vmlinux_struct_name = struct_type.__name__
if not VmlinuxHandlerRegistry.has_field(vmlinux_struct_name, field_name):
logger.error(
f"Field '{field_name}' not found in vmlinux struct '{vmlinux_struct_name}'"
)
return
field_type: tuple[ir.GlobalVariable, Field] = (
VmlinuxHandlerRegistry.get_field_type(vmlinux_struct_name, field_name)
)
field_ir, field = field_type
# Determine the actual IR type based on the field's type
actual_ir_type = None
# Check if it's a ctypes primitive
if field.type.__module__ == ctypes.__name__:
try:
field_size_bytes = ctypes.sizeof(field.type)
field_size_bits = field_size_bytes * 8
if field_size_bits in [8, 16, 32, 64]:
# Special case: struct_xdp_md i32 fields should allocate as i64
# because load_ctx_field will zero-extend them to i64
if (
vmlinux_struct_name == "struct_xdp_md"
and field_size_bits == 32
):
actual_ir_type = ir.IntType(64)
logger.info(
f"Allocating {var_name} as i64 for i32 field from struct_xdp_md.{field_name} "
"(will be zero-extended during load)"
)
else:
actual_ir_type = ir.IntType(field_size_bits)
else:
logger.warning(
f"Unusual field size {field_size_bits} bits for {field_name}"
)
actual_ir_type = ir.IntType(64)
except Exception as e:
logger.warning(
f"Could not determine size for ctypes field {field_name}: {e}"
)
actual_ir_type = ir.IntType(64)
# Check if it's a nested vmlinux struct or complex type
elif field.type.__module__ == "vmlinux":
# For pointers to structs, use pointer type (64-bit)
if field.ctype_complex_type is not None and issubclass(
field.ctype_complex_type, ctypes._Pointer
):
actual_ir_type = ir.IntType(64) # Pointer is always 64-bit
# For embedded structs, this is more complex - might need different handling
else:
logger.warning(
f"Field {field_name} is a nested vmlinux struct, using i64 for now"
)
actual_ir_type = ir.IntType(64)
else:
logger.warning(
f"Unknown field type module {field.type.__module__} for {field_name}"
)
actual_ir_type = ir.IntType(64)
# Allocate with the actual IR type
var = _allocate_with_type(builder, var_name, actual_ir_type)
local_sym_tab[var_name] = LocalSymbol(
var, actual_ir_type, field
) # <-- Store Field metadata
logger.info(
f"Pre-allocated {var_name} as {actual_ir_type} from vmlinux struct {vmlinux_struct_name}.{field_name}"
)
return
else:
logger.error(f"Struct type '{struct_type}' not found")
return
struct_info = structs_sym_tab[struct_type]
if field_name not in struct_info.fields:
logger.error(f"Field '{field_name}' not found in struct '{struct_type}'")
return
# Get field type
field_type = struct_info.field_type(field_name)
# Special case: char array -> allocate as i8* pointer instead
if (
isinstance(field_type, ir.ArrayType)
and isinstance(field_type.element, ir.IntType)
and field_type.element.width == 8
):
alloc_type = ir.PointerType(ir.IntType(8))
logger.info(f"Allocating {var_name} as i8* (pointer to char array)")
else:
alloc_type = field_type
var = _allocate_with_type(builder, var_name, alloc_type)
local_sym_tab[var_name] = LocalSymbol(var, alloc_type)
logger.info(
f"Pre-allocated {var_name} from {struct_var}.{field_name} with type {alloc_type}"
)
def _allocate_with_type(builder, var_name, ir_type):
"""Allocate variable with appropriate alignment for type."""
var = builder.alloca(ir_type, name=var_name)
var.align = _get_alignment(ir_type)
return var
def _get_alignment(ir_type):
"""Get appropriate alignment for IR type."""
if isinstance(ir_type, ir.IntType):
return ir_type.width // 8
elif isinstance(ir_type, ir.ArrayType) and isinstance(ir_type.element, ir.IntType):
return ir_type.element.width // 8
else:
return 8 # Default: pointer size

276
pythonbpf/assign_pass.py Normal file
View File

@ -0,0 +1,276 @@
import ast
import logging
from inspect import isclass
from llvmlite import ir
from pythonbpf.expr import eval_expr
from pythonbpf.helper import emit_probe_read_kernel_str_call
from pythonbpf.type_deducer import ctypes_to_ir
from pythonbpf.vmlinux_parser.dependency_node import Field
logger = logging.getLogger(__name__)
def handle_struct_field_assignment(
func, module, builder, target, rval, local_sym_tab, map_sym_tab, structs_sym_tab
):
"""Handle struct field assignment (obj.field = value)."""
var_name = target.value.id
field_name = target.attr
if var_name not in local_sym_tab:
logger.error(f"Variable '{var_name}' not found in symbol table")
return
struct_type = local_sym_tab[var_name].metadata
struct_info = structs_sym_tab[struct_type]
if field_name not in struct_info.fields:
logger.error(f"Field '{field_name}' not found in struct '{struct_type}'")
return
# Get field pointer and evaluate value
field_ptr = struct_info.gep(builder, local_sym_tab[var_name].var, field_name)
field_type = struct_info.field_type(field_name)
val_result = eval_expr(
func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
if val_result is None:
logger.error(f"Failed to evaluate value for {var_name}.{field_name}")
return
val, val_type = val_result
# Special case: i8* string to [N x i8] char array
if _is_char_array(field_type) and _is_i8_ptr(val_type):
_copy_string_to_char_array(
func,
module,
builder,
val,
field_ptr,
field_type,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
logger.info(f"Copied string to char array {var_name}.{field_name}")
return
# Regular assignment
builder.store(val, field_ptr)
logger.info(f"Assigned to struct field {var_name}.{field_name}")
def _copy_string_to_char_array(
func,
module,
builder,
src_ptr,
dst_ptr,
array_type,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
):
"""Copy string (i8*) to char array ([N x i8]) using bpf_probe_read_kernel_str"""
array_size = array_type.count
# Get pointer to first element: [N x i8]* -> i8*
dst_i8_ptr = builder.gep(
dst_ptr,
[ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
inbounds=True,
)
# Use the shared emitter function
emit_probe_read_kernel_str_call(builder, dst_i8_ptr, array_size, src_ptr)
def _is_char_array(ir_type):
"""Check if type is [N x i8]."""
return (
isinstance(ir_type, ir.ArrayType)
and isinstance(ir_type.element, ir.IntType)
and ir_type.element.width == 8
)
def _is_i8_ptr(ir_type):
"""Check if type is i8*."""
return (
isinstance(ir_type, ir.PointerType)
and isinstance(ir_type.pointee, ir.IntType)
and ir_type.pointee.width == 8
)
def handle_variable_assignment(
func, module, builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
):
"""Handle single named variable assignment."""
if var_name not in local_sym_tab:
logger.error(f"Variable {var_name} not declared.")
return False
var_ptr = local_sym_tab[var_name].var
var_type = local_sym_tab[var_name].ir_type
# NOTE: Special case for struct initialization
if isinstance(rval, ast.Call) and isinstance(rval.func, ast.Name):
struct_name = rval.func.id
if struct_name in structs_sym_tab and len(rval.args) == 0:
struct_info = structs_sym_tab[struct_name]
ir_struct = struct_info.ir_type
builder.store(ir.Constant(ir_struct, None), var_ptr)
logger.info(f"Initialized struct {struct_name} for variable {var_name}")
return True
# Special case: struct field char array -> pointer
# Handle this before eval_expr to get the pointer, not the value
if isinstance(rval, ast.Attribute) and isinstance(rval.value, ast.Name):
converted_val = _try_convert_char_array_to_ptr(
rval, var_type, builder, local_sym_tab, structs_sym_tab
)
if converted_val is not None:
builder.store(converted_val, var_ptr)
logger.info(f"Assigned char array pointer to {var_name}")
return True
val_result = eval_expr(
func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
if val_result is None:
logger.error(f"Failed to evaluate value for {var_name}")
return False
val, val_type = val_result
logger.info(
f"Evaluated value for {var_name}: {val} of type {val_type}, expected {var_type}"
)
if val_type != var_type:
# Handle vmlinux struct pointers - they're represented as Python classes but are i64 pointers
if isclass(val_type) and (val_type.__module__ == "vmlinux"):
logger.info("Handling vmlinux struct pointer assignment")
# vmlinux struct pointers: val is a pointer, need to convert to i64
if isinstance(var_type, ir.IntType) and var_type.width == 64:
# Convert pointer to i64 using ptrtoint
if isinstance(val.type, ir.PointerType):
val = builder.ptrtoint(val, ir.IntType(64))
logger.info(
"Converted vmlinux struct pointer to i64 using ptrtoint"
)
builder.store(val, var_ptr)
logger.info(f"Assigned vmlinux struct pointer to {var_name} (i64)")
return True
else:
logger.error(
f"Type mismatch: vmlinux struct pointer requires i64, got {var_type}"
)
return False
if isinstance(val_type, Field):
logger.info("Handling assignment to struct field")
# Special handling for struct_xdp_md i32 fields that are zero-extended to i64
# The load_ctx_field already extended them, so val is i64 but val_type.type shows c_uint
if (
hasattr(val_type, "type")
and val_type.type.__name__ == "c_uint"
and isinstance(var_type, ir.IntType)
and var_type.width == 64
):
# This is the struct_xdp_md case - value is already i64
builder.store(val, var_ptr)
logger.info(
f"Assigned zero-extended struct_xdp_md i32 field to {var_name} (i64)"
)
return True
# TODO: handling only ctype struct fields for now. Handle other stuff too later.
elif var_type == ctypes_to_ir(val_type.type.__name__):
builder.store(val, var_ptr)
logger.info(f"Assigned ctype struct field to {var_name}")
return True
else:
logger.error(
f"Failed to assign ctype struct field to {var_name}: {val_type} != {var_type}"
)
return False
elif isinstance(val_type, ir.IntType) and isinstance(var_type, ir.IntType):
# Allow implicit int widening
if val_type.width < var_type.width:
val = builder.sext(val, var_type)
logger.info(f"Implicitly widened int for variable {var_name}")
elif val_type.width > var_type.width:
val = builder.trunc(val, var_type)
logger.info(f"Implicitly truncated int for variable {var_name}")
elif isinstance(val_type, ir.IntType) and isinstance(var_type, ir.PointerType):
# NOTE: This is assignment to a PTR_TO_MAP_VALUE_OR_NULL
logger.info(
f"Creating temporary variable for pointer assignment to {var_name}"
)
var_ptr_tmp = local_sym_tab[f"{var_name}_tmp"].var
builder.store(val, var_ptr_tmp)
val = var_ptr_tmp
else:
logger.error(
f"Type mismatch for variable {var_name}: {val_type} vs {var_type}"
)
return False
builder.store(val, var_ptr)
logger.info(f"Assigned value to variable {var_name}")
return True
def _try_convert_char_array_to_ptr(
rval, var_type, builder, local_sym_tab, structs_sym_tab
):
"""Try to convert char array field to i8* pointer"""
# Only convert if target is i8*
if not (
isinstance(var_type, ir.PointerType)
and isinstance(var_type.pointee, ir.IntType)
and var_type.pointee.width == 8
):
return None
struct_var = rval.value.id
field_name = rval.attr
# Validate struct
if struct_var not in local_sym_tab:
return None
struct_type = local_sym_tab[struct_var].metadata
if not struct_type or struct_type not in structs_sym_tab:
return None
struct_info = structs_sym_tab[struct_type]
if field_name not in struct_info.fields:
return None
field_type = struct_info.field_type(field_name)
# Check if it's a char array
if not (
isinstance(field_type, ir.ArrayType)
and isinstance(field_type.element, ir.IntType)
and field_type.element.width == 8
):
return None
# Get pointer to struct field
struct_ptr = local_sym_tab[struct_var].var
field_ptr = struct_info.gep(builder, struct_ptr, field_name)
# GEP to first element: [N x i8]* -> i8*
return builder.gep(
field_ptr,
[ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
inbounds=True,
)

View File

@ -1,72 +0,0 @@
import ast
from llvmlite import ir
from logging import Logger
import logging
logger: Logger = logging.getLogger(__name__)
def recursive_dereferencer(var, builder):
"""dereference until primitive type comes out"""
# TODO: Not worrying about stack overflow for now
logger.info(f"Dereferencing {var}, type is {var.type}")
if isinstance(var.type, ir.PointerType):
a = builder.load(var)
return recursive_dereferencer(a, builder)
elif isinstance(var.type, ir.IntType):
return var
else:
raise TypeError(f"Unsupported type for dereferencing: {var.type}")
def get_operand_value(operand, builder, local_sym_tab):
"""Extract the value from an operand, handling variables and constants."""
if isinstance(operand, ast.Name):
if operand.id in local_sym_tab:
return recursive_dereferencer(local_sym_tab[operand.id].var, builder)
raise ValueError(f"Undefined variable: {operand.id}")
elif isinstance(operand, ast.Constant):
if isinstance(operand.value, int):
return ir.Constant(ir.IntType(64), operand.value)
raise TypeError(f"Unsupported constant type: {type(operand.value)}")
elif isinstance(operand, ast.BinOp):
return handle_binary_op_impl(operand, builder, local_sym_tab)
raise TypeError(f"Unsupported operand type: {type(operand)}")
def handle_binary_op_impl(rval, builder, local_sym_tab):
op = rval.op
left = get_operand_value(rval.left, builder, local_sym_tab)
right = get_operand_value(rval.right, builder, local_sym_tab)
logger.info(f"left is {left}, right is {right}, op is {op}")
# Map AST operation nodes to LLVM IR builder methods
op_map = {
ast.Add: builder.add,
ast.Sub: builder.sub,
ast.Mult: builder.mul,
ast.Div: builder.sdiv,
ast.Mod: builder.srem,
ast.LShift: builder.shl,
ast.RShift: builder.lshr,
ast.BitOr: builder.or_,
ast.BitXor: builder.xor,
ast.BitAnd: builder.and_,
ast.FloorDiv: builder.udiv,
}
if type(op) in op_map:
result = op_map[type(op)](left, right)
return result
else:
raise SyntaxError("Unsupported binary operation")
def handle_binary_op(rval, builder, var_name, local_sym_tab):
result = handle_binary_op_impl(rval, builder, local_sym_tab)
if var_name and var_name in local_sym_tab:
logger.info(
f"Storing result {result} into variable {local_sym_tab[var_name].var}"
)
builder.store(result, local_sym_tab[var_name].var)
return result, result.type

View File

@ -4,20 +4,55 @@ from .license_pass import license_processing
from .functions import func_proc
from .maps import maps_proc
from .structs import structs_proc
from .globals_pass import globals_processing
from .vmlinux_parser import vmlinux_proc
from pythonbpf.vmlinux_parser.vmlinux_exports_handler import VmlinuxHandler
from .expr import VmlinuxHandlerRegistry
from .globals_pass import (
globals_list_creation,
globals_processing,
populate_global_symbol_table,
)
from .debuginfo import DW_LANG_C11, DwarfBehaviorEnum, DebugInfoGenerator
import os
import subprocess
import inspect
from pathlib import Path
from pylibbpf import BpfProgram
from pylibbpf import BpfObject
import tempfile
from logging import Logger
import logging
import re
logger: Logger = logging.getLogger(__name__)
VERSION = "v0.1.4"
VERSION = "v0.1.6"
def finalize_module(original_str):
"""After all IR generation is complete, we monkey patch btf_ama attribute"""
# Create a string with applied transformation of btf_ama attribute addition to BTF struct field accesses.
pattern = r'(@"llvm\.[^"]+:[^"]*" = external global i64, !llvm\.preserve\.access\.index ![0-9]+)'
replacement = r'\1 "btf_ama"'
return re.sub(pattern, replacement, original_str)
def bpf_passthrough_gen(module):
i32_ty = ir.IntType(32)
ptr_ty = ir.PointerType(ir.IntType(8))
fnty = ir.FunctionType(ptr_ty, [i32_ty, ptr_ty])
# Declare the intrinsic
passthrough = ir.Function(module, fnty, "llvm.bpf.passthrough.p0.p0")
# Set function attributes
# TODO: the ones commented are supposed to be there but cannot be added due to llvmlite limitations at the moment
# passthrough.attributes.add("nofree")
# passthrough.attributes.add("nosync")
passthrough.attributes.add("nounwind")
# passthrough.attributes.add("memory(none)")
return passthrough
def find_bpf_chunks(tree):
@ -40,12 +75,22 @@ def processor(source_code, filename, module):
for func_node in bpf_chunks:
logger.info(f"Found BPF function/struct: {func_node.name}")
structs_sym_tab = structs_proc(tree, module, bpf_chunks)
map_sym_tab = maps_proc(tree, module, bpf_chunks)
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
bpf_passthrough_gen(module)
vmlinux_symtab = vmlinux_proc(tree, module)
if vmlinux_symtab:
handler = VmlinuxHandler.initialize(vmlinux_symtab)
VmlinuxHandlerRegistry.set_handler(handler)
populate_global_symbol_table(tree, module)
license_processing(tree, module)
globals_processing(tree, module)
structs_sym_tab = structs_proc(tree, module, bpf_chunks)
map_sym_tab = maps_proc(tree, module, bpf_chunks, structs_sym_tab)
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
globals_list_creation(tree, module)
return structs_sym_tab, map_sym_tab
def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
@ -71,7 +116,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
True,
)
processor(source, filename, module)
structs_sym_tab, maps_sym_tab = processor(source, filename, module)
wchar_size = module.add_metadata(
[
@ -112,16 +157,45 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"])
module_string: str = finalize_module(str(module))
logger.info(f"IR written to {output}")
with open(output, "w") as f:
f.write(f'source_filename = "{filename}"\n')
f.write(str(module))
f.write(module_string)
f.write("\n")
return output
return output, structs_sym_tab, maps_sym_tab
def compile(loglevel=logging.INFO) -> bool:
def _run_llc(ll_file, obj_file):
"""Compile LLVM IR to BPF object file using llc."""
logger.info(f"Compiling IR to object: {ll_file} -> {obj_file}")
result = subprocess.run(
[
"llc",
"-march=bpf",
"-filetype=obj",
"-O2",
str(ll_file),
"-o",
str(obj_file),
],
check=True,
capture_output=True,
text=True,
)
if result.returncode == 0:
logger.info(f"Object file written to {obj_file}")
return True
else:
logger.error(f"llc compilation failed: {result.stderr}")
return False
def compile(loglevel=logging.WARNING) -> bool:
# Look one level up the stack to the caller of this function
caller_frame = inspect.stack()[1]
caller_file = Path(caller_frame.filename).resolve()
@ -129,56 +203,32 @@ def compile(loglevel=logging.INFO) -> bool:
ll_file = Path("/tmp") / caller_file.with_suffix(".ll").name
o_file = caller_file.with_suffix(".o")
success = True
success = (
compile_to_ir(str(caller_file), str(ll_file), loglevel=loglevel) and success
_, structs_sym_tab, maps_sym_tab = compile_to_ir(
str(caller_file), str(ll_file), loglevel=loglevel
)
success = bool(
subprocess.run(
[
"llc",
"-march=bpf",
"-filetype=obj",
"-O2",
str(ll_file),
"-o",
str(o_file),
],
check=True,
)
and success
)
if not _run_llc(ll_file, o_file):
logger.error("Compilation to object file failed.")
return False
logger.info(f"Object written to {o_file}")
return success
return True
def BPF(loglevel=logging.INFO) -> BpfProgram:
def BPF(loglevel=logging.WARNING) -> BpfObject:
caller_frame = inspect.stack()[1]
src = inspect.getsource(caller_frame.frame)
with tempfile.NamedTemporaryFile(
mode="w+", delete=True, suffix=".py"
) as f, tempfile.NamedTemporaryFile(
mode="w+", delete=True, suffix=".ll"
) as inter, tempfile.NamedTemporaryFile(
mode="w+", delete=False, suffix=".o"
) as obj_file:
with (
tempfile.NamedTemporaryFile(mode="w+", delete=True, suffix=".py") as f,
tempfile.NamedTemporaryFile(mode="w+", delete=True, suffix=".ll") as inter,
tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".o") as obj_file,
):
f.write(src)
f.flush()
source = f.name
compile_to_ir(source, str(inter.name), loglevel=loglevel)
subprocess.run(
[
"llc",
"-march=bpf",
"-filetype=obj",
"-O2",
str(inter.name),
"-o",
str(obj_file.name),
],
check=True,
_, structs_sym_tab, maps_sym_tab = compile_to_ir(
source, str(inter.name), loglevel=loglevel
)
_run_llc(str(inter.name), str(obj_file.name))
return BpfProgram(str(obj_file.name))
return BpfObject(str(obj_file.name), structs=structs_sym_tab)

View File

@ -49,6 +49,10 @@ class DebugInfoGenerator:
)
return self._type_cache[key]
def get_uint8_type(self) -> Any:
"""Get debug info for signed 8-bit integer"""
return self.get_basic_type("char", 8, dc.DW_ATE_unsigned)
def get_int32_type(self) -> Any:
"""Get debug info for signed 32-bit integer"""
return self.get_basic_type("int", 32, dc.DW_ATE_signed)
@ -81,6 +85,20 @@ class DebugInfoGenerator:
},
)
def create_array_type_vmlinux(self, type_info: Any, count: int) -> Any:
"""Create an array type of the given base type with specified count"""
base_type, type_sizing = type_info
subrange = self.module.add_debug_info("DISubrange", {"count": count})
return self.module.add_debug_info(
"DICompositeType",
{
"tag": dc.DW_TAG_array_type,
"baseType": base_type,
"size": type_sizing,
"elements": [subrange],
},
)
@staticmethod
def _compute_array_size(base_type: Any, count: int) -> int:
# Extract size from base_type if possible
@ -101,6 +119,23 @@ class DebugInfoGenerator:
},
)
def create_struct_member_vmlinux(
self, name: str, base_type_with_size: Any, offset: int
) -> Any:
"""Create a struct member with the given name, type, and offset"""
base_type, type_size = base_type_with_size
return self.module.add_debug_info(
"DIDerivedType",
{
"tag": dc.DW_TAG_member,
"name": name,
"file": self.module._file_metadata,
"baseType": base_type,
"size": type_size,
"offset": offset,
},
)
def create_struct_type(
self, members: List[Any], size: int, is_distinct: bool
) -> Any:
@ -116,6 +151,22 @@ class DebugInfoGenerator:
is_distinct=is_distinct,
)
def create_struct_type_with_name(
self, name: str, members: List[Any], size: int, is_distinct: bool
) -> Any:
"""Create a struct type with the given members and size"""
return self.module.add_debug_info(
"DICompositeType",
{
"name": name,
"tag": dc.DW_TAG_structure_type,
"file": self.module._file_metadata,
"size": size,
"elements": members,
},
is_distinct=is_distinct,
)
def create_global_var_debug_info(
self, name: str, var_type: Any, is_local: bool = False
) -> Any:
@ -137,3 +188,83 @@ class DebugInfoGenerator:
"DIGlobalVariableExpression",
{"var": global_var, "expr": self.module.add_debug_info("DIExpression", {})},
)
def get_int64_type(self):
return self.get_basic_type("long", 64, dc.DW_ATE_signed)
def create_subroutine_type(self, return_type, param_types):
"""
Create a DISubroutineType given return type and list of parameter types.
Equivalent to: !DISubroutineType(types: !{ret, args...})
"""
type_array = [return_type]
if isinstance(param_types, (list, tuple)):
type_array.extend(param_types)
else:
type_array.append(param_types)
return self.module.add_debug_info("DISubroutineType", {"types": type_array})
def create_local_variable_debug_info(
self, name: str, arg: int, var_type: Any
) -> Any:
"""
Create debug info for a local variable (DILocalVariable) without scope.
Example:
!DILocalVariable(name: "ctx", arg: 1, file: !3, line: 20, type: !7)
"""
return self.module.add_debug_info(
"DILocalVariable",
{
"name": name,
"arg": arg,
"file": self.module._file_metadata,
"type": var_type,
},
)
def add_scope_to_local_variable(self, local_variable_debug_info, scope_value):
"""
Add scope information to an existing local variable debug info object.
"""
# TODO: this is a workaround a flaw in the debug info generation. Fix this if possible in the future.
# We should not be touching llvmlite's internals like this.
if hasattr(local_variable_debug_info, "operands"):
# LLVM metadata operands is a tuple, so we need to rebuild it
existing_operands = local_variable_debug_info.operands
# Convert tuple to list, add scope, convert back to tuple
operands_list = list(existing_operands)
operands_list.append(("scope", scope_value))
# Reassign the new tuple
local_variable_debug_info.operands = tuple(operands_list)
def create_subprogram(
self, name: str, subroutine_type: Any, retained_nodes: List[Any]
) -> Any:
"""
Create a DISubprogram for a function.
Args:
name: Function name
subroutine_type: DISubroutineType for the function signature
retained_nodes: List of DILocalVariable nodes for function parameters/variables
Returns:
DISubprogram metadata
"""
return self.module.add_debug_info(
"DISubprogram",
{
"name": name,
"scope": self.module._file_metadata,
"file": self.module._file_metadata,
"type": subroutine_type,
# TODO: the following flags do not exist at the moment in our dwarf constants file. We need to add them.
# "flags": dc.DW_FLAG_Prototyped | dc.DW_FLAG_AllCallsDescribed,
# "spFlags": dc.DW_SPFLAG_Definition | dc.DW_SPFLAG_Optimized,
"unit": self.module._debug_compile_unit,
"retainedNodes": retained_nodes,
},
is_distinct=True,
)

View File

@ -1,4 +1,16 @@
from .expr_pass import eval_expr, handle_expr
from .type_normalization import convert_to_bool
from .expr_pass import eval_expr, handle_expr, get_operand_value
from .type_normalization import convert_to_bool, get_base_type_and_depth
from .ir_ops import deref_to_depth
from .call_registry import CallHandlerRegistry
from .vmlinux_registry import VmlinuxHandlerRegistry
__all__ = ["eval_expr", "handle_expr", "convert_to_bool"]
__all__ = [
"eval_expr",
"handle_expr",
"convert_to_bool",
"get_base_type_and_depth",
"deref_to_depth",
"get_operand_value",
"CallHandlerRegistry",
"VmlinuxHandlerRegistry",
]

View File

@ -0,0 +1,20 @@
class CallHandlerRegistry:
"""Registry for handling different types of calls (helpers, etc.)"""
_handler = None
@classmethod
def set_handler(cls, handler):
"""Set the handler for unknown calls"""
cls._handler = handler
@classmethod
def handle_call(
cls, call, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab
):
"""Handle a call using the registered handler"""
if cls._handler is None:
return None
return cls._handler(
call, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab
)

View File

@ -5,10 +5,22 @@ import logging
from typing import Dict
from pythonbpf.type_deducer import ctypes_to_ir, is_ctypes
from .type_normalization import convert_to_bool, handle_comparator
from .call_registry import CallHandlerRegistry
from .type_normalization import (
convert_to_bool,
handle_comparator,
get_base_type_and_depth,
deref_to_depth,
)
from .vmlinux_registry import VmlinuxHandlerRegistry
from ..vmlinux_parser.dependency_node import Field
logger: Logger = logging.getLogger(__name__)
# ============================================================================
# Leaf Handlers (No Recursive eval_expr calls)
# ============================================================================
def _handle_name_expr(expr: ast.Name, local_sym_tab: Dict, builder: ir.IRBuilder):
"""Handle ast.Name expressions."""
@ -17,16 +29,34 @@ def _handle_name_expr(expr: ast.Name, local_sym_tab: Dict, builder: ir.IRBuilder
val = builder.load(var)
return val, local_sym_tab[expr.id].ir_type
else:
logger.info(f"Undefined variable {expr.id}")
return None
# Check if it's a vmlinux enum/constant
vmlinux_result = VmlinuxHandlerRegistry.handle_name(expr.id)
if vmlinux_result is not None:
return vmlinux_result
raise SyntaxError(f"Undefined variable {expr.id}")
def _handle_constant_expr(expr: ast.Constant):
def _handle_constant_expr(module, builder, expr: ast.Constant):
"""Handle ast.Constant expressions."""
if isinstance(expr.value, int) or isinstance(expr.value, bool):
return ir.Constant(ir.IntType(64), int(expr.value)), ir.IntType(64)
elif isinstance(expr.value, str):
str_name = f".str.{id(expr)}"
str_bytes = expr.value.encode("utf-8") + b"\x00"
str_type = ir.ArrayType(ir.IntType(8), len(str_bytes))
str_constant = ir.Constant(str_type, bytearray(str_bytes))
# Create global variable
global_str = ir.GlobalVariable(module, str_type, name=str_name)
global_str.linkage = "internal"
global_str.global_constant = True
global_str.initializer = str_constant
str_ptr = builder.bitcast(global_str, ir.PointerType(ir.IntType(8)))
return str_ptr, ir.PointerType(ir.IntType(8))
else:
logger.error("Unsupported constant type")
logger.error(f"Unsupported constant type {ast.dump(expr)}")
return None
@ -43,9 +73,32 @@ def _handle_attribute_expr(
if var_name in local_sym_tab:
var_ptr, var_type, var_metadata = local_sym_tab[var_name]
logger.info(f"Loading attribute {attr_name} from variable {var_name}")
logger.info(f"Variable type: {var_type}, Variable ptr: {var_ptr}")
metadata = structs_sym_tab[var_metadata]
if attr_name in metadata.fields:
logger.info(
f"Variable type: {var_type}, Variable ptr: {var_ptr}, Variable Metadata: {var_metadata}"
)
if (
hasattr(var_metadata, "__module__")
and var_metadata.__module__ == "vmlinux"
):
# Try vmlinux handler when var_metadata is not a string, but has a module attribute.
# This has been done to keep everything separate in vmlinux struct handling.
vmlinux_result = VmlinuxHandlerRegistry.handle_attribute(
expr, local_sym_tab, None, builder
)
if vmlinux_result is not None:
return vmlinux_result
else:
raise RuntimeError("Vmlinux struct did not process successfully")
elif isinstance(var_metadata, Field):
logger.error(
f"Cannot access field '{attr_name}' on already-loaded field value '{var_name}'"
)
return None
# Regular user-defined struct
metadata = structs_sym_tab.get(var_metadata)
if metadata and attr_name in metadata.fields:
gep = metadata.gep(builder, var_ptr, attr_name)
val = builder.load(gep)
field_type = metadata.field_type(attr_name)
@ -88,6 +141,123 @@ def _handle_deref_call(expr: ast.Call, local_sym_tab: Dict, builder: ir.IRBuilde
return val, local_sym_tab[arg.id].ir_type
# ============================================================================
# Binary Operations
# ============================================================================
def get_operand_value(
func, module, operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab=None
):
"""Extract the value from an operand, handling variables and constants."""
logger.info(f"Getting operand value for: {ast.dump(operand)}")
if isinstance(operand, ast.Name):
if operand.id in local_sym_tab:
var = local_sym_tab[operand.id].var
var_type = var.type
base_type, depth = get_base_type_and_depth(var_type)
logger.info(f"var is {var}, base_type is {base_type}, depth is {depth}")
val = deref_to_depth(func, builder, var, depth)
return val
else:
# Check if it's a vmlinux enum/constant
vmlinux_result = VmlinuxHandlerRegistry.handle_name(operand.id)
if vmlinux_result is not None:
val, _ = vmlinux_result
return val
elif isinstance(operand, ast.Constant):
if isinstance(operand.value, int):
cst = ir.Constant(ir.IntType(64), int(operand.value))
return cst
raise TypeError(f"Unsupported constant type: {type(operand.value)}")
elif isinstance(operand, ast.BinOp):
res = _handle_binary_op_impl(
func, module, operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab
)
return res
else:
res = eval_expr(
func, module, builder, operand, local_sym_tab, map_sym_tab, structs_sym_tab
)
if res is None:
raise ValueError(f"Failed to evaluate call expression: {operand}")
val, _ = res
logger.info(f"Evaluated expr to {val} of type {val.type}")
base_type, depth = get_base_type_and_depth(val.type)
if depth > 0:
val = deref_to_depth(func, builder, val, depth)
return val
raise TypeError(f"Unsupported operand type: {type(operand)}")
def _handle_binary_op_impl(
func, module, rval, builder, local_sym_tab, map_sym_tab, structs_sym_tab=None
):
op = rval.op
left = get_operand_value(
func, module, rval.left, builder, local_sym_tab, map_sym_tab, structs_sym_tab
)
right = get_operand_value(
func, module, rval.right, builder, local_sym_tab, map_sym_tab, structs_sym_tab
)
logger.info(f"left is {left}, right is {right}, op is {op}")
# NOTE: Before doing the operation, if the operands are integers
# we always extend them to i64. The assignment to LHS will take
# care of truncation if needed.
if isinstance(left.type, ir.IntType) and left.type.width < 64:
left = builder.sext(left, ir.IntType(64))
if isinstance(right.type, ir.IntType) and right.type.width < 64:
right = builder.sext(right, ir.IntType(64))
# Map AST operation nodes to LLVM IR builder methods
op_map = {
ast.Add: builder.add,
ast.Sub: builder.sub,
ast.Mult: builder.mul,
ast.Div: builder.sdiv,
ast.Mod: builder.srem,
ast.LShift: builder.shl,
ast.RShift: builder.lshr,
ast.BitOr: builder.or_,
ast.BitXor: builder.xor,
ast.BitAnd: builder.and_,
ast.FloorDiv: builder.udiv,
}
if type(op) in op_map:
result = op_map[type(op)](left, right)
return result
else:
raise SyntaxError("Unsupported binary operation")
def _handle_binary_op(
func,
module,
rval,
builder,
var_name,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
result = _handle_binary_op_impl(
func, module, rval, builder, local_sym_tab, map_sym_tab, structs_sym_tab
)
if var_name and var_name in local_sym_tab:
logger.info(
f"Storing result {result} into variable {local_sym_tab[var_name].var}"
)
builder.store(result, local_sym_tab[var_name].var)
return result, result.type
# ============================================================================
# Comparison and Unary Operations
# ============================================================================
def _handle_ctypes_call(
func,
module,
@ -118,16 +288,45 @@ def _handle_ctypes_call(
call_type = expr.func.id
expected_type = ctypes_to_ir(call_type)
if val[1] != expected_type:
# Extract the actual IR value and type
# val could be (value, ir_type) or (value, Field)
value, val_type = val
# If val_type is a Field object (from vmlinux struct), get the actual IR type of the value
if isinstance(val_type, Field):
# The value is already the correct IR value (potentially zero-extended)
# Get the IR type from the value itself
actual_ir_type = value.type
logger.info(
f"Converting vmlinux field {val_type.name} (IR type: {actual_ir_type}) to {call_type}"
)
else:
actual_ir_type = val_type
if actual_ir_type != expected_type:
# NOTE: We are only considering casting to and from int types for now
if isinstance(val[1], ir.IntType) and isinstance(expected_type, ir.IntType):
if val[1].width < expected_type.width:
val = (builder.sext(val[0], expected_type), expected_type)
if isinstance(actual_ir_type, ir.IntType) and isinstance(
expected_type, ir.IntType
):
if actual_ir_type.width < expected_type.width:
value = builder.sext(value, expected_type)
logger.info(
f"Sign-extended from i{actual_ir_type.width} to i{expected_type.width}"
)
elif actual_ir_type.width > expected_type.width:
value = builder.trunc(value, expected_type)
logger.info(
f"Truncated from i{actual_ir_type.width} to i{expected_type.width}"
)
else:
val = (builder.trunc(val[0], expected_type), expected_type)
# Same width, just use as-is (e.g., both i64)
pass
else:
raise ValueError(f"Type mismatch: expected {expected_type}, got {val[1]}")
return val
raise ValueError(
f"Type mismatch: expected {expected_type}, got {actual_ir_type} (original type: {val_type})"
)
return value, expected_type
def _handle_compare(
@ -176,21 +375,32 @@ def _handle_unary_op(
structs_sym_tab=None,
):
"""Handle ast.UnaryOp expressions."""
if not isinstance(expr.op, ast.Not):
logger.error("Only 'not' unary operator is supported")
if not isinstance(expr.op, ast.Not) and not isinstance(expr.op, ast.USub):
logger.error("Only 'not' and '-' unary operators are supported")
return None
operand = eval_expr(
func, module, builder, expr.operand, local_sym_tab, map_sym_tab, structs_sym_tab
operand = get_operand_value(
func, module, expr.operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab
)
if operand is None:
logger.error("Failed to evaluate operand for unary operation")
return None
operand_val, operand_type = operand
true_const = ir.Constant(ir.IntType(1), 1)
result = builder.xor(convert_to_bool(builder, operand_val), true_const)
return result, ir.IntType(1)
if isinstance(expr.op, ast.Not):
true_const = ir.Constant(ir.IntType(1), 1)
result = builder.xor(convert_to_bool(builder, operand), true_const)
return result, ir.IntType(1)
elif isinstance(expr.op, ast.USub):
# Multiply by -1
neg_one = ir.Constant(ir.IntType(64), -1)
result = builder.mul(operand, neg_one)
return result, ir.IntType(64)
return None
# ============================================================================
# Boolean Operations
# ============================================================================
def _handle_and_op(func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab):
@ -323,6 +533,71 @@ def _handle_boolean_op(
return None
# ============================================================================
# VMLinux casting
# ============================================================================
def _handle_vmlinux_cast(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
# handle expressions such as struct_request(ctx.di) where struct_request is a vmlinux
# struct and ctx.di is a pointer to a struct but is actually represented as a c_uint64
# which needs to be cast to a pointer. This is also a field of another vmlinux struct
"""Handle vmlinux struct cast expressions like struct_request(ctx.di)."""
if len(expr.args) != 1:
logger.info("vmlinux struct cast takes exactly one argument")
return None
# Get the struct name
struct_name = expr.func.id
# Evaluate the argument (e.g., ctx.di which is a c_uint64)
arg_result = eval_expr(
func,
module,
builder,
expr.args[0],
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if arg_result is None:
logger.info("Failed to evaluate argument to vmlinux struct cast")
return None
arg_val, arg_type = arg_result
# Get the vmlinux struct type
vmlinux_struct_type = VmlinuxHandlerRegistry.get_struct_type(struct_name)
if vmlinux_struct_type is None:
logger.error(f"Failed to get vmlinux struct type for {struct_name}")
return None
# Cast the integer/value to a pointer to the struct
# If arg_val is an integer type, we need to inttoptr it
ptr_type = ir.PointerType()
# TODO: add a integer check here later
if ctypes_to_ir(arg_type.type.__name__):
# Cast integer to pointer
casted_ptr = builder.inttoptr(arg_val, ptr_type)
else:
logger.error(f"Unsupported type for vmlinux cast: {arg_type}")
return None
return casted_ptr, vmlinux_struct_type
# ============================================================================
# Expression Dispatcher
# ============================================================================
def eval_expr(
func,
module,
@ -336,8 +611,20 @@ def eval_expr(
if isinstance(expr, ast.Name):
return _handle_name_expr(expr, local_sym_tab, builder)
elif isinstance(expr, ast.Constant):
return _handle_constant_expr(expr)
return _handle_constant_expr(module, builder, expr)
elif isinstance(expr, ast.Call):
if isinstance(expr.func, ast.Name) and VmlinuxHandlerRegistry.is_vmlinux_struct(
expr.func.id
):
return _handle_vmlinux_cast(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if isinstance(expr.func, ast.Name) and expr.func.id == "deref":
return _handle_deref_call(expr, local_sym_tab, builder)
@ -352,57 +639,27 @@ def eval_expr(
structs_sym_tab,
)
# delayed import to avoid circular dependency
from pythonbpf.helper import HelperHandlerRegistry, handle_helper_call
result = CallHandlerRegistry.handle_call(
expr, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab
)
if result is not None:
return result
if isinstance(expr.func, ast.Name) and HelperHandlerRegistry.has_handler(
expr.func.id
):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(expr.func, ast.Attribute):
logger.info(f"Handling method call: {ast.dump(expr.func)}")
if isinstance(expr.func.value, ast.Call) and isinstance(
expr.func.value.func, ast.Name
):
method_name = expr.func.attr
if HelperHandlerRegistry.has_handler(method_name):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(expr.func.value, ast.Name):
obj_name = expr.func.value.id
method_name = expr.func.attr
if obj_name in map_sym_tab:
if HelperHandlerRegistry.has_handler(method_name):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
logger.warning(f"Unknown call: {ast.dump(expr)}")
return None
elif isinstance(expr, ast.Attribute):
return _handle_attribute_expr(expr, local_sym_tab, structs_sym_tab, builder)
elif isinstance(expr, ast.BinOp):
from pythonbpf.binary_ops import handle_binary_op
return handle_binary_op(expr, builder, None, local_sym_tab)
return _handle_binary_op(
func,
module,
expr,
builder,
None,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(expr, ast.Compare):
return _handle_compare(
func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab

50
pythonbpf/expr/ir_ops.py Normal file
View File

@ -0,0 +1,50 @@
import logging
from llvmlite import ir
logger = logging.getLogger(__name__)
def deref_to_depth(func, builder, val, target_depth):
"""Dereference a pointer to a certain depth."""
cur_val = val
cur_type = val.type
for depth in range(target_depth):
if not isinstance(val.type, ir.PointerType):
logger.error("Cannot dereference further, non-pointer type")
return None
# dereference with null check
pointee_type = cur_type.pointee
null_check_block = builder.block
not_null_block = func.append_basic_block(name=f"deref_not_null_{depth}")
merge_block = func.append_basic_block(name=f"deref_merge_{depth}")
null_ptr = ir.Constant(cur_type, None)
is_not_null = builder.icmp_signed("!=", cur_val, null_ptr)
logger.debug(f"Inserted null check for pointer at depth {depth}")
builder.cbranch(is_not_null, not_null_block, merge_block)
builder.position_at_end(not_null_block)
dereferenced_val = builder.load(cur_val)
logger.debug(f"Dereferenced to depth {depth - 1}, type: {pointee_type}")
builder.branch(merge_block)
builder.position_at_end(merge_block)
phi = builder.phi(pointee_type, name=f"deref_result_{depth}")
zero_value = (
ir.Constant(pointee_type, 0)
if isinstance(pointee_type, ir.IntType)
else ir.Constant(pointee_type, None)
)
phi.add_incoming(zero_value, null_check_block)
phi.add_incoming(dereferenced_val, not_null_block)
# Continue with phi result
cur_val = phi
cur_type = pointee_type
return cur_val

View File

@ -1,6 +1,7 @@
from llvmlite import ir
import logging
import ast
from llvmlite import ir
from .ir_ops import deref_to_depth
logger = logging.getLogger(__name__)
@ -16,7 +17,7 @@ COMPARISON_OPS = {
}
def _get_base_type_and_depth(ir_type):
def get_base_type_and_depth(ir_type):
"""Get the base type for pointer types."""
cur_type = ir_type
depth = 0
@ -26,52 +27,6 @@ def _get_base_type_and_depth(ir_type):
return cur_type, depth
def _deref_to_depth(func, builder, val, target_depth):
"""Dereference a pointer to a certain depth."""
cur_val = val
cur_type = val.type
for depth in range(target_depth):
if not isinstance(val.type, ir.PointerType):
logger.error("Cannot dereference further, non-pointer type")
return None
# dereference with null check
pointee_type = cur_type.pointee
null_check_block = builder.block
not_null_block = func.append_basic_block(name=f"deref_not_null_{depth}")
merge_block = func.append_basic_block(name=f"deref_merge_{depth}")
null_ptr = ir.Constant(cur_type, None)
is_not_null = builder.icmp_signed("!=", cur_val, null_ptr)
logger.debug(f"Inserted null check for pointer at depth {depth}")
builder.cbranch(is_not_null, not_null_block, merge_block)
builder.position_at_end(not_null_block)
dereferenced_val = builder.load(cur_val)
logger.debug(f"Dereferenced to depth {depth - 1}, type: {pointee_type}")
builder.branch(merge_block)
builder.position_at_end(merge_block)
phi = builder.phi(pointee_type, name=f"deref_result_{depth}")
zero_value = (
ir.Constant(pointee_type, 0)
if isinstance(pointee_type, ir.IntType)
else ir.Constant(pointee_type, None)
)
phi.add_incoming(zero_value, null_check_block)
phi.add_incoming(dereferenced_val, not_null_block)
# Continue with phi result
cur_val = phi
cur_type = pointee_type
return cur_val
def _normalize_types(func, builder, lhs, rhs):
"""Normalize types for comparison."""
@ -88,13 +43,13 @@ def _normalize_types(func, builder, lhs, rhs):
logger.error(f"Type mismatch: {lhs.type} vs {rhs.type}")
return None, None
else:
lhs_base, lhs_depth = _get_base_type_and_depth(lhs.type)
rhs_base, rhs_depth = _get_base_type_and_depth(rhs.type)
lhs_base, lhs_depth = get_base_type_and_depth(lhs.type)
rhs_base, rhs_depth = get_base_type_and_depth(rhs.type)
if lhs_base == rhs_base:
if lhs_depth < rhs_depth:
rhs = _deref_to_depth(func, builder, rhs, rhs_depth - lhs_depth)
rhs = deref_to_depth(func, builder, rhs, rhs_depth - lhs_depth)
elif rhs_depth < lhs_depth:
lhs = _deref_to_depth(func, builder, lhs, lhs_depth - rhs_depth)
lhs = deref_to_depth(func, builder, lhs, lhs_depth - rhs_depth)
return _normalize_types(func, builder, lhs, rhs)

View File

@ -0,0 +1,75 @@
import ast
from pythonbpf.vmlinux_parser.vmlinux_exports_handler import VmlinuxHandler
class VmlinuxHandlerRegistry:
"""Registry for vmlinux handler operations"""
_handler = None
@classmethod
def set_handler(cls, handler: VmlinuxHandler):
"""Set the vmlinux handler"""
cls._handler = handler
@classmethod
def get_handler(cls):
"""Get the vmlinux handler"""
return cls._handler
@classmethod
def handle_name(cls, name):
"""Try to handle a name as vmlinux enum/constant"""
if cls._handler is None:
return None
return cls._handler.handle_vmlinux_enum(name)
@classmethod
def handle_attribute(cls, expr, local_sym_tab, module, builder):
"""Try to handle an attribute access as vmlinux struct field"""
if cls._handler is None:
return None
if isinstance(expr.value, ast.Name):
var_name = expr.value.id
field_name = expr.attr
return cls._handler.handle_vmlinux_struct_field(
var_name, field_name, module, builder, local_sym_tab
)
return None
@classmethod
def get_struct_debug_info(cls, name):
if cls._handler is None:
return False
return cls._handler.get_struct_debug_info(name)
@classmethod
def is_vmlinux_struct(cls, name):
"""Check if a name refers to a vmlinux struct"""
if cls._handler is None:
return False
return cls._handler.is_vmlinux_struct(name)
@classmethod
def get_struct_type(cls, name):
"""Try to handle a struct name as vmlinux struct"""
if cls._handler is None:
return None
return cls._handler.get_vmlinux_struct_type(name)
@classmethod
def has_field(cls, vmlinux_struct_name, field_name):
"""Check if a vmlinux struct has a specific field"""
if cls._handler is None:
return False
return cls._handler.has_field(vmlinux_struct_name, field_name)
@classmethod
def get_field_type(cls, vmlinux_struct_name, field_name):
"""Get the type of a field in a vmlinux struct"""
if cls._handler is None:
return None
assert isinstance(cls._handler, VmlinuxHandler)
return cls._handler.get_field_type(vmlinux_struct_name, field_name)

View File

@ -1,22 +0,0 @@
from typing import Dict
class StatementHandlerRegistry:
"""Registry for statement handlers."""
_handlers: Dict = {}
@classmethod
def register(cls, stmt_type):
"""Register a handler for a specific statement type."""
def decorator(handler):
cls._handlers[stmt_type] = handler
return handler
return decorator
@classmethod
def __getitem__(cls, stmt_type):
"""Get the handler for a specific statement type."""
return cls._handlers.get(stmt_type, None)

View File

@ -0,0 +1,82 @@
import ast
import llvmlite.ir as ir
import logging
from pythonbpf.debuginfo import DebugInfoGenerator
from pythonbpf.expr import VmlinuxHandlerRegistry
import ctypes
logger = logging.getLogger(__name__)
def generate_function_debug_info(
func_node: ast.FunctionDef, module: ir.Module, func: ir.Function
):
generator = DebugInfoGenerator(module)
leading_argument = func_node.args.args[0]
leading_argument_name = leading_argument.arg
annotation = leading_argument.annotation
if func_node.returns is None:
# TODO: should check if this logic is consistent with function return type handling elsewhere
return_type = ctypes.c_int64()
elif hasattr(func_node.returns, "id"):
return_type = func_node.returns.id
if return_type == "c_int32":
return_type = generator.get_int32_type()
elif return_type == "c_int64":
return_type = generator.get_int64_type()
elif return_type == "c_uint32":
return_type = generator.get_uint32_type()
elif return_type == "c_uint64":
return_type = generator.get_uint64_type()
else:
logger.warning(
"Return type should be int32, int64, uint32 or uint64 only. Falling back to int64"
)
return_type = generator.get_int64_type()
else:
return_type = ctypes.c_int64()
# context processing
if annotation is None:
logger.warning("Type of context of function not found.")
return
if hasattr(annotation, "id"):
ctype_name = annotation.id
if ctype_name == "c_void_p":
return
elif ctype_name.startswith("ctypes"):
raise SyntaxError(
"The first argument should always be a pointer to a struct or a void pointer"
)
context_debug_info = VmlinuxHandlerRegistry.get_struct_debug_info(annotation.id)
# Create pointer to context this must be created fresh for each function
# to avoid circular reference issues when the same struct is used in multiple functions
pointer_to_context_debug_info = generator.create_pointer_type(
context_debug_info, 64
)
# Create subroutine type - also fresh for each function
subroutine_type = generator.create_subroutine_type(
return_type, pointer_to_context_debug_info
)
# Create local variable - fresh for each function with unique name
context_local_variable = generator.create_local_variable_debug_info(
leading_argument_name, 1, pointer_to_context_debug_info
)
retained_nodes = [context_local_variable]
logger.info(f"Generating debug info for function {func_node.name}")
# Create subprogram with is_distinct=True to ensure each function gets unique debug info
subprogram_debug_info = generator.create_subprogram(
func_node.name, subroutine_type, retained_nodes
)
generator.add_scope_to_local_variable(
context_local_variable, subprogram_debug_info
)
func.set_metadata("dbg", subprogram_debug_info)
else:
logger.error(f"Invalid annotation type for argument '{leading_argument_name}'")

View File

@ -0,0 +1,88 @@
import ast
def get_probe_string(func_node):
"""Extract the probe string from the decorator of the function node"""
# TODO: right now we have the whole string in the section decorator
# But later we can implement typed tuples for tracepoints and kprobes
# For helper functions, we return "helper"
for decorator in func_node.decorator_list:
if isinstance(decorator, ast.Name) and decorator.id == "bpfglobal":
return None
if isinstance(decorator, ast.Call) and isinstance(decorator.func, ast.Name):
if decorator.func.id == "section" and len(decorator.args) == 1:
arg = decorator.args[0]
if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
return arg.value
return "helper"
def is_global_function(func_node):
"""Check if the function is a global"""
for decorator in func_node.decorator_list:
if isinstance(decorator, ast.Name) and decorator.id in (
"map",
"bpfglobal",
"struct",
):
return True
return False
def infer_return_type(func_node: ast.FunctionDef):
if not isinstance(func_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
raise TypeError("Expected ast.FunctionDef")
if func_node.returns is not None:
try:
return ast.unparse(func_node.returns)
except Exception:
node = func_node.returns
if isinstance(node, ast.Name):
return node.id
if isinstance(node, ast.Attribute):
return getattr(node, "attr", type(node).__name__)
try:
return str(node)
except Exception:
return type(node).__name__
found_type = None
def _expr_type(e):
if e is None:
return "None"
if isinstance(e, ast.Constant):
return type(e.value).__name__
if isinstance(e, ast.Name):
return e.id
if isinstance(e, ast.Call):
f = e.func
if isinstance(f, ast.Name):
return f.id
if isinstance(f, ast.Attribute):
try:
return ast.unparse(f)
except Exception:
return getattr(f, "attr", type(f).__name__)
try:
return ast.unparse(f)
except Exception:
return type(f).__name__
if isinstance(e, ast.Attribute):
try:
return ast.unparse(e)
except Exception:
return getattr(e, "attr", type(e).__name__)
try:
return ast.unparse(e)
except Exception:
return type(e).__name__
for walked_node in ast.walk(func_node):
if isinstance(walked_node, ast.Return):
t = _expr_type(walked_node.value)
if found_type is None:
found_type = t
elif found_type != t:
raise ValueError(f"Conflicting return types: {found_type} vs {t}")
return found_type or "None"

View File

@ -1,243 +1,208 @@
from llvmlite import ir
import ast
import logging
from typing import Any
from dataclasses import dataclass
from pythonbpf.helper import HelperHandlerRegistry, handle_helper_call
from pythonbpf.helper import (
HelperHandlerRegistry,
reset_scratch_pool,
)
from pythonbpf.type_deducer import ctypes_to_ir
from pythonbpf.binary_ops import handle_binary_op
from pythonbpf.expr import eval_expr, handle_expr, convert_to_bool
from .return_utils import _handle_none_return, _handle_xdp_return, _is_xdp_name
from pythonbpf.expr import (
eval_expr,
handle_expr,
convert_to_bool,
VmlinuxHandlerRegistry,
)
from pythonbpf.assign_pass import (
handle_variable_assignment,
handle_struct_field_assignment,
)
from pythonbpf.allocation_pass import (
handle_assign_allocation,
allocate_temp_pool,
create_targets_and_rvals,
LocalSymbol,
)
from .function_debug_info import generate_function_debug_info
from .return_utils import handle_none_return, handle_xdp_return, is_xdp_name
from .function_metadata import get_probe_string, is_global_function, infer_return_type
logger = logging.getLogger(__name__)
@dataclass
class LocalSymbol:
var: ir.AllocaInstr
ir_type: ir.Type
metadata: Any = None
def __iter__(self):
yield self.var
yield self.ir_type
yield self.metadata
# ============================================================================
# SECTION 1: Memory Allocation
# ============================================================================
def get_probe_string(func_node):
"""Extract the probe string from the decorator of the function node."""
# TODO: right now we have the whole string in the section decorator
# But later we can implement typed tuples for tracepoints and kprobes
# For helper functions, we return "helper"
def count_temps_in_call(call_node, local_sym_tab):
"""Count the number of temporary variables needed for a function call."""
for decorator in func_node.decorator_list:
if isinstance(decorator, ast.Name) and decorator.id == "bpfglobal":
return None
if isinstance(decorator, ast.Call) and isinstance(decorator.func, ast.Name):
if decorator.func.id == "section" and len(decorator.args) == 1:
arg = decorator.args[0]
if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
return arg.value
return "helper"
count = {}
is_helper = False
# NOTE: We exclude print calls for now
if isinstance(call_node.func, ast.Name):
if (
HelperHandlerRegistry.has_handler(call_node.func.id)
and call_node.func.id != "print"
):
is_helper = True
func_name = call_node.func.id
elif isinstance(call_node.func, ast.Attribute):
if HelperHandlerRegistry.has_handler(call_node.func.attr):
is_helper = True
func_name = call_node.func.attr
if not is_helper:
return {} # No temps needed
for arg_idx in range(len(call_node.args)):
# NOTE: Count all non-name arguments
# For struct fields, if it is being passed as an argument,
# The struct object should already exist in the local_sym_tab
arg = call_node.args[arg_idx]
if isinstance(arg, ast.Name) or (
isinstance(arg, ast.Attribute) and arg.value.id in local_sym_tab
):
continue
param_type = HelperHandlerRegistry.get_param_type(func_name, arg_idx)
if isinstance(param_type, ir.PointerType):
pointee_type = param_type.pointee
count[pointee_type] = count.get(pointee_type, 0) + 1
return count
def handle_if_allocation(
module, builder, stmt, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab
):
"""Recursively handle allocations in if/else branches."""
if stmt.body:
allocate_mem(
module,
builder,
stmt.body,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
if stmt.orelse:
allocate_mem(
module,
builder,
stmt.orelse,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
def allocate_mem(
module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab
):
max_temps_needed = {}
def merge_type_counts(count_dict):
nonlocal max_temps_needed
for typ, cnt in count_dict.items():
max_temps_needed[typ] = max(max_temps_needed.get(typ, 0), cnt)
def update_max_temps_for_stmt(stmt):
nonlocal max_temps_needed
if isinstance(stmt, ast.If):
for s in stmt.body:
update_max_temps_for_stmt(s)
for s in stmt.orelse:
update_max_temps_for_stmt(s)
return
stmt_temps = {}
for node in ast.walk(stmt):
if isinstance(node, ast.Call):
call_temps = count_temps_in_call(node, local_sym_tab)
for typ, cnt in call_temps.items():
stmt_temps[typ] = stmt_temps.get(typ, 0) + cnt
merge_type_counts(stmt_temps)
for stmt in body:
update_max_temps_for_stmt(stmt)
# Handle allocations
if isinstance(stmt, ast.If):
handle_if_allocation(
module,
builder,
stmt,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
elif isinstance(stmt, ast.Assign):
handle_assign_allocation(
builder, stmt, local_sym_tab, map_sym_tab, structs_sym_tab
)
allocate_temp_pool(builder, max_temps_needed, local_sym_tab)
return local_sym_tab
# ============================================================================
# SECTION 2: Statement Handlers
# ============================================================================
def handle_assign(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab
):
"""Handle assignment statements in the function body."""
if len(stmt.targets) != 1:
logger.info("Unsupported multiassignment")
return
num_types = ("c_int32", "c_int64", "c_uint32", "c_uint64")
# NOTE: Support multi-target assignments (e.g.: a, b = 1, 2)
targets, rvals = create_targets_and_rvals(stmt)
target = stmt.targets[0]
logger.info(f"Handling assignment to {ast.dump(target)}")
if not isinstance(target, ast.Name) and not isinstance(target, ast.Attribute):
logger.info("Unsupported assignment target")
return
var_name = target.id if isinstance(target, ast.Name) else target.value.id
rval = stmt.value
if isinstance(target, ast.Attribute):
# struct field assignment
field_name = target.attr
if var_name in local_sym_tab:
struct_type = local_sym_tab[var_name].metadata
struct_info = structs_sym_tab[struct_type]
if field_name in struct_info.fields:
field_ptr = struct_info.gep(
builder, local_sym_tab[var_name].var, field_name
)
val = eval_expr(
func,
module,
builder,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if isinstance(struct_info.field_type(field_name), ir.ArrayType) and val[
1
] == ir.PointerType(ir.IntType(8)):
# TODO: Figure it out, not a priority rn
# Special case for string assignment to char array
# str_len = struct_info["field_types"][field_idx].count
# assign_string_to_array(builder, field_ptr, val[0], str_len)
# print(f"Assigned to struct field {var_name}.{field_name}")
pass
if val is None:
logger.info("Failed to evaluate struct field assignment")
return
logger.info(field_ptr)
builder.store(val[0], field_ptr)
logger.info(f"Assigned to struct field {var_name}.{field_name}")
return
elif isinstance(rval, ast.Constant):
if isinstance(rval.value, bool):
if rval.value:
builder.store(
ir.Constant(ir.IntType(1), 1), local_sym_tab[var_name].var
)
else:
builder.store(
ir.Constant(ir.IntType(1), 0), local_sym_tab[var_name].var
)
logger.info(f"Assigned constant {rval.value} to {var_name}")
elif isinstance(rval.value, int):
# Assume c_int64 for now
# var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8
builder.store(
ir.Constant(ir.IntType(64), rval.value), local_sym_tab[var_name].var
for target, rval in zip(targets, rvals):
if isinstance(target, ast.Name):
# NOTE: Simple variable assignment case: x = 5
var_name = target.id
result = handle_variable_assignment(
func,
module,
builder,
var_name,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
logger.info(f"Assigned constant {rval.value} to {var_name}")
elif isinstance(rval.value, str):
str_val = rval.value.encode("utf-8") + b"\x00"
str_const = ir.Constant(
ir.ArrayType(ir.IntType(8), len(str_val)), bytearray(str_val)
if not result:
logger.error(f"Failed to handle assignment to {var_name}")
continue
if isinstance(target, ast.Attribute):
# NOTE: Struct field assignment case: pkt.field = value
handle_struct_field_assignment(
func,
module,
builder,
target,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
global_str = ir.GlobalVariable(
module, str_const.type, name=f"{var_name}_str"
)
global_str.linkage = "internal"
global_str.global_constant = True
global_str.initializer = str_const
str_ptr = builder.bitcast(global_str, ir.PointerType(ir.IntType(8)))
builder.store(str_ptr, local_sym_tab[var_name].var)
logger.info(f"Assigned string constant '{rval.value}' to {var_name}")
else:
logger.info("Unsupported constant type")
elif isinstance(rval, ast.Call):
if isinstance(rval.func, ast.Name):
call_type = rval.func.id
logger.info(f"Assignment call type: {call_type}")
if (
call_type in num_types
and len(rval.args) == 1
and isinstance(rval.args[0], ast.Constant)
and isinstance(rval.args[0].value, int)
):
ir_type = ctypes_to_ir(call_type)
# var = builder.alloca(ir_type, name=var_name)
# var.align = ir_type.width // 8
builder.store(
ir.Constant(ir_type, rval.args[0].value),
local_sym_tab[var_name].var,
)
logger.info(
f"Assigned {call_type} constant {rval.args[0].value} to {var_name}"
)
elif HelperHandlerRegistry.has_handler(call_type):
# var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8
val = handle_helper_call(
rval,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
builder.store(val[0], local_sym_tab[var_name].var)
logger.info(f"Assigned constant {rval.func.id} to {var_name}")
elif call_type == "deref" and len(rval.args) == 1:
logger.info(f"Handling deref assignment {ast.dump(rval)}")
val = eval_expr(
func,
module,
builder,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if val is None:
logger.info("Failed to evaluate deref argument")
return
logger.info(f"Dereferenced value: {val}, storing in {var_name}")
builder.store(val[0], local_sym_tab[var_name].var)
logger.info(f"Dereferenced and assigned to {var_name}")
elif call_type in structs_sym_tab and len(rval.args) == 0:
struct_info = structs_sym_tab[call_type]
ir_type = struct_info.ir_type
# var = builder.alloca(ir_type, name=var_name)
# Null init
builder.store(ir.Constant(ir_type, None), local_sym_tab[var_name].var)
logger.info(f"Assigned struct {call_type} to {var_name}")
else:
logger.info(f"Unsupported assignment call type: {call_type}")
elif isinstance(rval.func, ast.Attribute):
logger.info(f"Assignment call attribute: {ast.dump(rval.func)}")
if isinstance(rval.func.value, ast.Name):
if rval.func.value.id in map_sym_tab:
map_name = rval.func.value.id
method_name = rval.func.attr
if HelperHandlerRegistry.has_handler(method_name):
val = handle_helper_call(
rval,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
builder.store(val[0], local_sym_tab[var_name].var)
else:
# TODO: probably a struct access
logger.info(f"TODO STRUCT ACCESS {ast.dump(rval)}")
elif isinstance(rval.func.value, ast.Call) and isinstance(
rval.func.value.func, ast.Name
):
map_name = rval.func.value.func.id
method_name = rval.func.attr
if map_name in map_sym_tab:
if HelperHandlerRegistry.has_handler(method_name):
val = handle_helper_call(
rval,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
# var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8
builder.store(val[0], local_sym_tab[var_name].var)
else:
logger.info("Unsupported assignment call structure")
else:
logger.info("Unsupported assignment call function type")
elif isinstance(rval, ast.BinOp):
handle_binary_op(rval, builder, var_name, local_sym_tab)
else:
logger.info("Unsupported assignment value type")
continue
# Unsupported target type
logger.error(f"Unsupported assignment target: {ast.dump(target)}")
def handle_cond(
@ -300,9 +265,9 @@ def handle_if(
def handle_return(builder, stmt, local_sym_tab, ret_type):
logger.info(f"Handling return statement: {ast.dump(stmt)}")
if stmt.value is None:
return _handle_none_return(builder)
elif isinstance(stmt.value, ast.Name) and _is_xdp_name(stmt.value.id):
return _handle_xdp_return(stmt, builder, ret_type)
return handle_none_return(builder)
elif isinstance(stmt.value, ast.Name) and is_xdp_name(stmt.value.id):
return handle_xdp_return(stmt, builder, ret_type)
else:
val = eval_expr(
func=None,
@ -330,6 +295,7 @@ def process_stmt(
ret_type=ir.IntType(64),
):
logger.info(f"Processing statement: {ast.dump(stmt)}")
reset_scratch_pool()
if isinstance(stmt, ast.Expr):
handle_expr(
func,
@ -360,124 +326,19 @@ def process_stmt(
return did_return
def allocate_mem(
module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab
):
for stmt in body:
has_metadata = False
if isinstance(stmt, ast.If):
if stmt.body:
local_sym_tab = allocate_mem(
module,
builder,
stmt.body,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
if stmt.orelse:
local_sym_tab = allocate_mem(
module,
builder,
stmt.orelse,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
elif isinstance(stmt, ast.Assign):
if len(stmt.targets) != 1:
logger.info("Unsupported multiassignment")
continue
target = stmt.targets[0]
if not isinstance(target, ast.Name):
logger.info("Unsupported assignment target")
continue
var_name = target.id
rval = stmt.value
if var_name in local_sym_tab:
logger.info(f"Variable {var_name} already allocated")
continue
if isinstance(rval, ast.Call):
if isinstance(rval.func, ast.Name):
call_type = rval.func.id
if call_type in ("c_int32", "c_int64", "c_uint32", "c_uint64"):
ir_type = ctypes_to_ir(call_type)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
logger.info(
f"Pre-allocated variable {var_name} of type {call_type}"
)
elif HelperHandlerRegistry.has_handler(call_type):
# Assume return type is int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} for helper")
elif call_type == "deref" and len(rval.args) == 1:
# Assume return type is int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} for deref")
elif call_type in structs_sym_tab:
struct_info = structs_sym_tab[call_type]
ir_type = struct_info.ir_type
var = builder.alloca(ir_type, name=var_name)
has_metadata = True
logger.info(
f"Pre-allocated variable {var_name} for struct {call_type}"
)
elif isinstance(rval.func, ast.Attribute):
ir_type = ir.PointerType(ir.IntType(64))
var = builder.alloca(ir_type, name=var_name)
# var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} for map")
else:
logger.info("Unsupported assignment call function type")
continue
elif isinstance(rval, ast.Constant):
if isinstance(rval.value, bool):
ir_type = ir.IntType(1)
var = builder.alloca(ir_type, name=var_name)
var.align = 1
logger.info(f"Pre-allocated variable {var_name} of type c_bool")
elif isinstance(rval.value, int):
# Assume c_int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} of type c_int64")
elif isinstance(rval.value, str):
ir_type = ir.PointerType(ir.IntType(8))
var = builder.alloca(ir_type, name=var_name)
var.align = 8
logger.info(f"Pre-allocated variable {var_name} of type string")
else:
logger.info("Unsupported constant type")
continue
elif isinstance(rval, ast.BinOp):
# Assume c_int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} of type c_int64")
else:
logger.info("Unsupported assignment value type")
continue
if has_metadata:
local_sym_tab[var_name] = LocalSymbol(var, ir_type, call_type)
else:
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
return local_sym_tab
# ============================================================================
# SECTION 3: Function Body Processing
# ============================================================================
def process_func_body(
module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab
module,
builder,
func_node,
func,
ret_type,
map_sym_tab,
structs_sym_tab,
):
"""Process the body of a bpf function"""
# TODO: A lot. We just have print -> bpf_trace_printk for now
@ -485,6 +346,28 @@ def process_func_body(
local_sym_tab = {}
# Add the context parameter (first function argument) to the local symbol table
if func_node.args.args and len(func_node.args.args) > 0:
context_arg = func_node.args.args[0]
context_name = context_arg.arg
if hasattr(context_arg, "annotation") and context_arg.annotation:
if isinstance(context_arg.annotation, ast.Name):
context_type_name = context_arg.annotation.id
elif isinstance(context_arg.annotation, ast.Attribute):
context_type_name = context_arg.annotation.attr
else:
raise TypeError(
f"Unsupported annotation type: {ast.dump(context_arg.annotation)}"
)
if VmlinuxHandlerRegistry.is_vmlinux_struct(context_type_name):
resolved_type = VmlinuxHandlerRegistry.get_struct_type(
context_type_name
)
context_type = LocalSymbol(None, None, resolved_type)
local_sym_tab[context_name] = context_type
logger.info(f"Added argument '{context_name}' to local symbol table")
# pre-allocate dynamic variables
local_sym_tab = allocate_mem(
module,
@ -535,7 +418,7 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t
func.linkage = "dso_local"
func.attributes.add("nounwind")
func.attributes.add("noinline")
func.attributes.add("optnone")
# func.attributes.add("optnone")
if func_node.args.args:
# Only look at the first argument for now
@ -550,28 +433,30 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t
builder = ir.IRBuilder(block)
process_func_body(
module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab
module,
builder,
func_node,
func,
ret_type,
map_sym_tab,
structs_sym_tab,
)
return func
# ============================================================================
# SECTION 4: Top-Level Function Processor
# ============================================================================
def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab):
for func_node in chunks:
is_global = False
for decorator in func_node.decorator_list:
if isinstance(decorator, ast.Name) and decorator.id in (
"map",
"bpfglobal",
"struct",
):
is_global = True
break
if is_global:
if is_global_function(func_node):
continue
func_type = get_probe_string(func_node)
logger.info(f"Found probe_string of {func_node.name}: {func_type}")
process_bpf_chunk(
func = process_bpf_chunk(
func_node,
module,
ctypes_to_ir(infer_return_type(func_node)),
@ -579,68 +464,11 @@ def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab):
structs_sym_tab,
)
def infer_return_type(func_node: ast.FunctionDef):
if not isinstance(func_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
raise TypeError("Expected ast.FunctionDef")
if func_node.returns is not None:
try:
return ast.unparse(func_node.returns)
except Exception:
node = func_node.returns
if isinstance(node, ast.Name):
return node.id
if isinstance(node, ast.Attribute):
return getattr(node, "attr", type(node).__name__)
try:
return str(node)
except Exception:
return type(node).__name__
found_type = None
def _expr_type(e):
if e is None:
return "None"
if isinstance(e, ast.Constant):
return type(e.value).__name__
if isinstance(e, ast.Name):
return e.id
if isinstance(e, ast.Call):
f = e.func
if isinstance(f, ast.Name):
return f.id
if isinstance(f, ast.Attribute):
try:
return ast.unparse(f)
except Exception:
return getattr(f, "attr", type(f).__name__)
try:
return ast.unparse(f)
except Exception:
return type(f).__name__
if isinstance(e, ast.Attribute):
try:
return ast.unparse(e)
except Exception:
return getattr(e, "attr", type(e).__name__)
try:
return ast.unparse(e)
except Exception:
return type(e).__name__
for walked_node in ast.walk(func_node):
if isinstance(walked_node, ast.Return):
t = _expr_type(walked_node.value)
if found_type is None:
found_type = t
elif found_type != t:
raise ValueError(f"Conflicting return types: {found_type} vs {t}")
return found_type or "None"
# For string assignment to fixed-size arrays
logger.info(f"Generating Debug Info for Function {func_node.name}")
generate_function_debug_info(func_node, module, func)
# TODO: WIP, for string assignment to fixed-size arrays
def assign_string_to_array(builder, target_array_ptr, source_string_ptr, array_length):
"""
Copy a string (i8*) to a fixed-size array ([N x i8]*)

View File

@ -14,19 +14,19 @@ XDP_ACTIONS = {
}
def _handle_none_return(builder) -> bool:
def handle_none_return(builder) -> bool:
"""Handle return or return None -> returns 0."""
builder.ret(ir.Constant(ir.IntType(64), 0))
logger.debug("Generated default return: 0")
return True
def _is_xdp_name(name: str) -> bool:
def is_xdp_name(name: str) -> bool:
"""Check if a name is an XDP action"""
return name in XDP_ACTIONS
def _handle_xdp_return(stmt: ast.Return, builder, ret_type) -> bool:
def handle_xdp_return(stmt: ast.Return, builder, ret_type) -> bool:
"""Handle XDP returns"""
if not isinstance(stmt.value, ast.Name):
return False
@ -37,7 +37,6 @@ def _handle_xdp_return(stmt: ast.Return, builder, ret_type) -> bool:
raise ValueError(
f"Unknown XDP action: {action_name}. Available: {XDP_ACTIONS.keys()}"
)
return False
value = XDP_ACTIONS[action_name]
builder.ret(ir.Constant(ret_type, value))

View File

@ -1,8 +1,121 @@
from llvmlite import ir
import ast
from logging import Logger
import logging
from .type_deducer import ctypes_to_ir
def emit_globals(module: ir.Module, names: list[str]):
logger: Logger = logging.getLogger(__name__)
# TODO: this is going to be a huge fuck of a headache in the future.
global_sym_tab = []
def populate_global_symbol_table(tree, module: ir.Module):
for node in tree.body:
if isinstance(node, ast.FunctionDef):
for dec in node.decorator_list:
if (
isinstance(dec, ast.Call)
and isinstance(dec.func, ast.Name)
and dec.func.id == "section"
and len(dec.args) == 1
and isinstance(dec.args[0], ast.Constant)
and isinstance(dec.args[0].value, str)
):
global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "map":
global_sym_tab.append(node)
return False
def emit_global(module: ir.Module, node, name):
logger.info(f"global identifier {name} processing")
# deduce LLVM type from the annotated return
if not isinstance(node.returns, ast.Name):
raise ValueError(f"Unsupported return annotation {ast.dump(node.returns)}")
ty = ctypes_to_ir(node.returns.id)
# extract the return expression
# TODO: turn this return extractor into a generic function I can use everywhere.
ret_stmt = node.body[0]
if not isinstance(ret_stmt, ast.Return) or ret_stmt.value is None:
raise ValueError(f"Global '{name}' has no valid return")
init_val = ret_stmt.value
# simple constant like "return 0"
if isinstance(init_val, ast.Constant):
llvm_init = ir.Constant(ty, init_val.value)
# variable reference like "return SOME_CONST"
elif isinstance(init_val, ast.Name):
# need symbol resolution here, stub as 0 for now
raise ValueError(f"Name reference {init_val.id} not yet supported")
# constructor call like "return c_int64(0)" or dataclass(...)
elif isinstance(init_val, ast.Call):
if len(init_val.args) >= 1 and isinstance(init_val.args[0], ast.Constant):
llvm_init = ir.Constant(ty, init_val.args[0].value)
else:
logger.info("Defaulting to zero as no constant argument found")
llvm_init = ir.Constant(ty, 0)
else:
raise ValueError(f"Unsupported return expr {ast.dump(init_val)}")
gvar = ir.GlobalVariable(module, ty, name=name)
gvar.initializer = llvm_init
gvar.align = 8
gvar.linkage = "dso_local"
gvar.global_constant = False
return gvar
def globals_processing(tree, module):
"""Process stuff decorated with @bpf and @bpfglobal except license and return the section name"""
globals_sym_tab = []
for node in tree.body:
# Skip non-assignment and non-function nodes
if not (isinstance(node, ast.FunctionDef)):
continue
# Get the name based on node type
if isinstance(node, ast.FunctionDef):
name = node.name
else:
continue
# Check for duplicate names
if name in globals_sym_tab:
raise SyntaxError(f"ERROR: Global name '{name}' previously defined")
else:
globals_sym_tab.append(name)
if isinstance(node, ast.FunctionDef) and node.name != "LICENSE":
decorators = [
dec.id for dec in node.decorator_list if isinstance(dec, ast.Name)
]
if "bpf" in decorators and "bpfglobal" in decorators:
if (
len(node.body) == 1
and isinstance(node.body[0], ast.Return)
and node.body[0].value is not None
and isinstance(
node.body[0].value, (ast.Constant, ast.Name, ast.Call)
)
):
emit_global(module, node, name)
else:
raise SyntaxError(f"ERROR: Invalid syntax for {name} global")
return None
def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
"""
Emit the @llvm.compiler.used global given a list of function/global names.
"""
@ -24,7 +137,7 @@ def emit_globals(module: ir.Module, names: list[str]):
gv.section = "llvm.metadata"
def globals_processing(tree, module: ir.Module):
def globals_list_creation(tree, module: ir.Module):
collected = ["LICENSE"]
for node in tree.body:
@ -40,10 +153,11 @@ def globals_processing(tree, module: ir.Module):
):
collected.append(node.name)
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
collected.append(node.name)
# NOTE: all globals other than
# elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
# collected.append(node.name)
elif isinstance(dec, ast.Name) and dec.id == "map":
collected.append(node.name)
emit_globals(module, collected)
emit_llvm_compiler_used(module, collected)

View File

@ -1,13 +1,95 @@
from .helper_utils import HelperHandlerRegistry
from .bpf_helper_handler import handle_helper_call
from .helpers import ktime, pid, deref, XDP_DROP, XDP_PASS
from .helper_registry import HelperHandlerRegistry
from .helper_utils import reset_scratch_pool
from .bpf_helper_handler import (
handle_helper_call,
emit_probe_read_kernel_str_call,
emit_probe_read_kernel_call,
)
from .helpers import (
ktime,
pid,
deref,
comm,
probe_read_str,
random,
probe_read,
smp_processor_id,
uid,
skb_store_bytes,
get_stack,
XDP_DROP,
XDP_PASS,
)
# Register the helper handler with expr module
def _register_helper_handler():
"""Register helper call handler with the expression evaluator"""
from pythonbpf.expr.expr_pass import CallHandlerRegistry
def helper_call_handler(
call, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab
):
"""Check if call is a helper and handle it"""
import ast
# Check for direct helper calls (e.g., ktime(), print())
if isinstance(call.func, ast.Name):
if HelperHandlerRegistry.has_handler(call.func.id):
return handle_helper_call(
call,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
# Check for method calls (e.g., map.lookup())
elif isinstance(call.func, ast.Attribute):
method_name = call.func.attr
# Handle: my_map.lookup(key)
if isinstance(call.func.value, ast.Name):
obj_name = call.func.value.id
if map_sym_tab and obj_name in map_sym_tab:
if HelperHandlerRegistry.has_handler(method_name):
return handle_helper_call(
call,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
return None
CallHandlerRegistry.set_handler(helper_call_handler)
# Register on module import
_register_helper_handler()
__all__ = [
"HelperHandlerRegistry",
"reset_scratch_pool",
"handle_helper_call",
"emit_probe_read_kernel_str_call",
"emit_probe_read_kernel_call",
"ktime",
"pid",
"deref",
"comm",
"probe_read_str",
"random",
"probe_read",
"smp_processor_id",
"uid",
"skb_store_bytes",
"get_stack",
"XDP_DROP",
"XDP_PASS",
]

View File

@ -1,31 +1,51 @@
import ast
from llvmlite import ir
from enum import Enum
from .helper_registry import HelperHandlerRegistry
from .helper_utils import (
HelperHandlerRegistry,
get_or_create_ptr_from_arg,
get_flags_val,
handle_fstring_print,
simple_string_print,
get_data_ptr_and_size,
get_buffer_ptr_and_size,
get_ptr_from_arg,
get_int_value_from_arg,
)
from logging import Logger
from .printk_formatter import simple_string_print, handle_fstring_print
from pythonbpf.maps import BPFMapType
import logging
logger: Logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)
class BPFHelperID(Enum):
BPF_MAP_LOOKUP_ELEM = 1
BPF_MAP_UPDATE_ELEM = 2
BPF_MAP_DELETE_ELEM = 3
BPF_PROBE_READ = 4
BPF_KTIME_GET_NS = 5
BPF_PRINTK = 6
BPF_GET_PRANDOM_U32 = 7
BPF_GET_SMP_PROCESSOR_ID = 8
BPF_SKB_STORE_BYTES = 9
BPF_GET_CURRENT_PID_TGID = 14
BPF_GET_CURRENT_UID_GID = 15
BPF_GET_CURRENT_COMM = 16
BPF_PERF_EVENT_OUTPUT = 25
BPF_GET_STACK = 67
BPF_PROBE_READ_KERNEL_STR = 115
BPF_PROBE_READ_KERNEL = 113
BPF_RINGBUF_OUTPUT = 130
BPF_RINGBUF_RESERVE = 131
BPF_RINGBUF_SUBMIT = 132
BPF_RINGBUF_DISCARD = 133
@HelperHandlerRegistry.register("ktime")
@HelperHandlerRegistry.register(
"ktime",
param_types=[],
return_type=ir.IntType(64),
)
def bpf_ktime_get_ns_emitter(
call,
map_ptr,
@ -34,6 +54,7 @@ def bpf_ktime_get_ns_emitter(
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_ktime_get_ns helper function call.
@ -47,7 +68,11 @@ def bpf_ktime_get_ns_emitter(
return result, ir.IntType(64)
@HelperHandlerRegistry.register("lookup")
@HelperHandlerRegistry.register(
"lookup",
param_types=[ir.PointerType(ir.IntType(64))],
return_type=ir.PointerType(ir.IntType(64)),
)
def bpf_map_lookup_elem_emitter(
call,
map_ptr,
@ -56,6 +81,7 @@ def bpf_map_lookup_elem_emitter(
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_map_lookup_elem helper function call.
@ -64,11 +90,17 @@ def bpf_map_lookup_elem_emitter(
raise ValueError(
f"Map lookup expects exactly one argument (key), got {len(call.args)}"
)
key_ptr = get_or_create_ptr_from_arg(call.args[0], builder, local_sym_tab)
key_ptr = get_or_create_ptr_from_arg(
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
# TODO: I have changed the return type to i64*, as we are
# allocating space for that type in allocate_mem. This is
# temporary, and we will honour other widths later. But this
# allows us to have cool binary ops on the returned value.
fn_type = ir.FunctionType(
ir.PointerType(), # Return type: void*
ir.PointerType(ir.IntType(64)), # Return type: void*
[ir.PointerType(), ir.PointerType()], # Args: (void*, void*)
var_arg=False,
)
@ -82,6 +114,7 @@ def bpf_map_lookup_elem_emitter(
return result, ir.PointerType()
# NOTE: This has special handling so we won't reflect the signature here.
@HelperHandlerRegistry.register("print")
def bpf_printk_emitter(
call,
@ -91,6 +124,7 @@ def bpf_printk_emitter(
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""Emit LLVM IR for bpf_printk helper function call."""
if not hasattr(func, "_fmt_counter"):
@ -126,10 +160,18 @@ def bpf_printk_emitter(
fn_ptr = builder.inttoptr(fn_addr, fn_ptr_type)
builder.call(fn_ptr, args, tail=True)
return None
return True
@HelperHandlerRegistry.register("update")
@HelperHandlerRegistry.register(
"update",
param_types=[
ir.PointerType(ir.IntType(64)),
ir.PointerType(ir.IntType(64)),
ir.IntType(64),
],
return_type=ir.PointerType(ir.IntType(64)),
)
def bpf_map_update_elem_emitter(
call,
map_ptr,
@ -138,6 +180,7 @@ def bpf_map_update_elem_emitter(
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_map_update_elem helper function call.
@ -152,8 +195,12 @@ def bpf_map_update_elem_emitter(
value_arg = call.args[1]
flags_arg = call.args[2] if len(call.args) > 2 else None
key_ptr = get_or_create_ptr_from_arg(key_arg, builder, local_sym_tab)
value_ptr = get_or_create_ptr_from_arg(value_arg, builder, local_sym_tab)
key_ptr = get_or_create_ptr_from_arg(
func, module, key_arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
value_ptr = get_or_create_ptr_from_arg(
func, module, value_arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
flags_val = get_flags_val(flags_arg, builder, local_sym_tab)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
@ -179,7 +226,11 @@ def bpf_map_update_elem_emitter(
return result, None
@HelperHandlerRegistry.register("delete")
@HelperHandlerRegistry.register(
"delete",
param_types=[ir.PointerType(ir.IntType(64))],
return_type=ir.PointerType(ir.IntType(64)),
)
def bpf_map_delete_elem_emitter(
call,
map_ptr,
@ -188,6 +239,7 @@ def bpf_map_delete_elem_emitter(
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_map_delete_elem helper function call.
@ -197,7 +249,9 @@ def bpf_map_delete_elem_emitter(
raise ValueError(
f"Map delete expects exactly one argument (key), got {len(call.args)}"
)
key_ptr = get_or_create_ptr_from_arg(call.args[0], builder, local_sym_tab)
key_ptr = get_or_create_ptr_from_arg(
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
# Define function type for bpf_map_delete_elem
@ -216,7 +270,72 @@ def bpf_map_delete_elem_emitter(
return result, None
@HelperHandlerRegistry.register("pid")
@HelperHandlerRegistry.register(
"comm",
param_types=[ir.PointerType(ir.IntType(8))],
return_type=ir.IntType(64),
)
def bpf_get_current_comm_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_current_comm helper function call.
Accepts: comm(dataobj.field) or comm(my_buffer)
"""
if not call.args or len(call.args) != 1:
raise ValueError(
f"comm expects exactly one argument (buffer), got {len(call.args)}"
)
buf_arg = call.args[0]
# Extract buffer pointer and size
buf_ptr, buf_size = get_buffer_ptr_and_size(
buf_arg, builder, local_sym_tab, struct_sym_tab
)
# Validate it's a char array
if not isinstance(
buf_ptr.type.pointee, ir.ArrayType
) or buf_ptr.type.pointee.element != ir.IntType(8):
raise ValueError(
f"comm expects a char array buffer, got {buf_ptr.type.pointee}"
)
# Cast to void* and call helper
buf_void_ptr = builder.bitcast(buf_ptr, ir.PointerType())
fn_type = ir.FunctionType(
ir.IntType(64),
[ir.PointerType(), ir.IntType(32)],
var_arg=False,
)
fn_ptr = builder.inttoptr(
ir.Constant(ir.IntType(64), BPFHelperID.BPF_GET_CURRENT_COMM.value),
ir.PointerType(fn_type),
)
result = builder.call(
fn_ptr, [buf_void_ptr, ir.Constant(ir.IntType(32), buf_size)], tail=False
)
logger.info(f"Emitted bpf_get_current_comm with {buf_size} byte buffer")
return result, None
@HelperHandlerRegistry.register(
"pid",
param_types=[],
return_type=ir.IntType(64),
)
def bpf_get_current_pid_tgid_emitter(
call,
map_ptr,
@ -225,6 +344,7 @@ def bpf_get_current_pid_tgid_emitter(
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_current_pid_tgid helper function call.
@ -237,12 +357,12 @@ def bpf_get_current_pid_tgid_emitter(
result = builder.call(fn_ptr, [], tail=False)
# Extract the lower 32 bits (PID) using bitwise AND with 0xFFFFFFFF
# TODO: return both PID and TGID if we end up needing TGID somewhere
mask = ir.Constant(ir.IntType(64), 0xFFFFFFFF)
pid = builder.and_(result, mask)
return pid, ir.IntType(64)
@HelperHandlerRegistry.register("output")
def bpf_perf_event_output_handler(
call,
map_ptr,
@ -251,7 +371,12 @@ def bpf_perf_event_output_handler(
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_perf_event_output helper function call.
"""
if len(call.args) != 1:
raise ValueError(
f"Perf event output expects exactly one argument, got {len(call.args)}"
@ -289,6 +414,660 @@ def bpf_perf_event_output_handler(
return result, None
def bpf_ringbuf_output_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_ringbuf_output helper function call.
"""
if len(call.args) != 1:
raise ValueError(
f"Ringbuf output expects exactly one argument, got {len(call.args)}"
)
data_arg = call.args[0]
data_ptr, size_val = get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab)
flags_val = ir.Constant(ir.IntType(64), 0)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
data_void_ptr = builder.bitcast(data_ptr, ir.PointerType())
fn_type = ir.FunctionType(
ir.IntType(64),
[
ir.PointerType(),
ir.PointerType(),
ir.IntType(64),
ir.IntType(64),
],
var_arg=False,
)
fn_ptr_type = ir.PointerType(fn_type)
# helper id
fn_addr = ir.Constant(ir.IntType(64), BPFHelperID.BPF_RINGBUF_OUTPUT.value)
fn_ptr = builder.inttoptr(fn_addr, fn_ptr_type)
result = builder.call(
fn_ptr, [map_void_ptr, data_void_ptr, size_val, flags_val], tail=False
)
return result, None
@HelperHandlerRegistry.register(
"output",
param_types=[ir.PointerType(ir.IntType(8))],
return_type=ir.IntType(64),
)
def handle_output_helper(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Route output helper to the appropriate emitter based on map type.
"""
match map_sym_tab[map_ptr.name].type:
case BPFMapType.PERF_EVENT_ARRAY:
return bpf_perf_event_output_handler(
call,
map_ptr,
module,
builder,
func,
local_sym_tab,
struct_sym_tab,
map_sym_tab,
)
case BPFMapType.RINGBUF:
return bpf_ringbuf_output_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab,
struct_sym_tab,
map_sym_tab,
)
case _:
logger.error("Unsupported map type for output helper.")
raise NotImplementedError("Output helper for this map type is not implemented.")
def emit_probe_read_kernel_str_call(builder, dst_ptr, dst_size, src_ptr):
"""Emit LLVM IR call to bpf_probe_read_kernel_str"""
fn_type = ir.FunctionType(
ir.IntType(64),
[ir.PointerType(), ir.IntType(32), ir.PointerType()],
var_arg=False,
)
fn_ptr = builder.inttoptr(
ir.Constant(ir.IntType(64), BPFHelperID.BPF_PROBE_READ_KERNEL_STR.value),
ir.PointerType(fn_type),
)
result = builder.call(
fn_ptr,
[
builder.bitcast(dst_ptr, ir.PointerType()),
ir.Constant(ir.IntType(32), dst_size),
builder.bitcast(src_ptr, ir.PointerType()),
],
tail=False,
)
logger.info(f"Emitted bpf_probe_read_kernel_str (size={dst_size})")
return result
@HelperHandlerRegistry.register(
"probe_read_str",
param_types=[
ir.PointerType(ir.IntType(8)),
ir.PointerType(ir.IntType(8)),
],
return_type=ir.IntType(64),
)
def bpf_probe_read_kernel_str_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""Emit LLVM IR for bpf_probe_read_kernel_str helper."""
if len(call.args) != 2:
raise ValueError(
f"probe_read_str expects 2 args (dst, src), got {len(call.args)}"
)
# Get destination buffer (char array -> i8*)
dst_ptr, dst_size = get_or_create_ptr_from_arg(
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
# Get source pointer (evaluate expression)
src_ptr, src_type = get_ptr_from_arg(
call.args[1], func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
# Emit the helper call
result = emit_probe_read_kernel_str_call(builder, dst_ptr, dst_size, src_ptr)
logger.info(f"Emitted bpf_probe_read_kernel_str (size={dst_size})")
return result, ir.IntType(64)
def emit_probe_read_kernel_call(builder, dst_ptr, dst_size, src_ptr):
"""Emit LLVM IR call to bpf_probe_read_kernel"""
fn_type = ir.FunctionType(
ir.IntType(64),
[ir.PointerType(), ir.IntType(32), ir.PointerType()],
var_arg=False,
)
fn_ptr = builder.inttoptr(
ir.Constant(ir.IntType(64), BPFHelperID.BPF_PROBE_READ_KERNEL.value),
ir.PointerType(fn_type),
)
result = builder.call(
fn_ptr,
[
builder.bitcast(dst_ptr, ir.PointerType()),
ir.Constant(ir.IntType(32), dst_size),
builder.bitcast(src_ptr, ir.PointerType()),
],
tail=False,
)
logger.info(f"Emitted bpf_probe_read_kernel (size={dst_size})")
return result
@HelperHandlerRegistry.register(
"probe_read_kernel",
param_types=[
ir.PointerType(ir.IntType(8)),
ir.PointerType(ir.IntType(8)),
],
return_type=ir.IntType(64),
)
def bpf_probe_read_kernel_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""Emit LLVM IR for bpf_probe_read_kernel helper."""
if len(call.args) != 2:
raise ValueError(
f"probe_read_kernel expects 2 args (dst, src), got {len(call.args)}"
)
# Get destination buffer (char array -> i8*)
dst_ptr, dst_size = get_or_create_ptr_from_arg(
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
# Get source pointer (evaluate expression)
src_ptr, src_type = get_ptr_from_arg(
call.args[1], func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
# Emit the helper call
result = emit_probe_read_kernel_call(builder, dst_ptr, dst_size, src_ptr)
logger.info(f"Emitted bpf_probe_read_kernel (size={dst_size})")
return result, ir.IntType(64)
@HelperHandlerRegistry.register(
"random",
param_types=[],
return_type=ir.IntType(32),
)
def bpf_get_prandom_u32_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_prandom_u32 helper function call.
"""
helper_id = ir.Constant(ir.IntType(64), BPFHelperID.BPF_GET_PRANDOM_U32.value)
fn_type = ir.FunctionType(ir.IntType(32), [], var_arg=False)
fn_ptr_type = ir.PointerType(fn_type)
fn_ptr = builder.inttoptr(helper_id, fn_ptr_type)
result = builder.call(fn_ptr, [], tail=False)
return result, ir.IntType(32)
@HelperHandlerRegistry.register(
"probe_read",
param_types=[
ir.PointerType(ir.IntType(8)),
ir.IntType(32),
ir.PointerType(ir.IntType(8)),
],
return_type=ir.IntType(64),
)
def bpf_probe_read_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_probe_read helper function
"""
if len(call.args) != 3:
logger.warn("Expected 3 args for probe_read helper")
return
dst_ptr = get_or_create_ptr_from_arg(
func,
module,
call.args[0],
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
ir.IntType(8),
)
size_val = get_int_value_from_arg(
call.args[1],
func,
module,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
)
src_ptr = get_or_create_ptr_from_arg(
func,
module,
call.args[2],
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
ir.IntType(8),
)
fn_type = ir.FunctionType(
ir.IntType(64),
[ir.PointerType(), ir.IntType(32), ir.PointerType()],
var_arg=False,
)
fn_ptr = builder.inttoptr(
ir.Constant(ir.IntType(64), BPFHelperID.BPF_PROBE_READ.value),
ir.PointerType(fn_type),
)
result = builder.call(
fn_ptr,
[
builder.bitcast(dst_ptr, ir.PointerType()),
builder.trunc(size_val, ir.IntType(32)),
builder.bitcast(src_ptr, ir.PointerType()),
],
tail=False,
)
logger.info(f"Emitted bpf_probe_read (size={size_val})")
return result, ir.IntType(64)
@HelperHandlerRegistry.register(
"smp_processor_id",
param_types=[],
return_type=ir.IntType(32),
)
def bpf_get_smp_processor_id_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_smp_processor_id helper function call.
"""
helper_id = ir.Constant(ir.IntType(64), BPFHelperID.BPF_GET_SMP_PROCESSOR_ID.value)
fn_type = ir.FunctionType(ir.IntType(32), [], var_arg=False)
fn_ptr_type = ir.PointerType(fn_type)
fn_ptr = builder.inttoptr(helper_id, fn_ptr_type)
result = builder.call(fn_ptr, [], tail=False)
logger.info("Emitted bpf_get_smp_processor_id call")
return result, ir.IntType(32)
@HelperHandlerRegistry.register(
"uid",
param_types=[],
return_type=ir.IntType(64),
)
def bpf_get_current_uid_gid_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_current_uid_gid helper function call.
"""
helper_id = ir.Constant(ir.IntType(64), BPFHelperID.BPF_GET_CURRENT_UID_GID.value)
fn_type = ir.FunctionType(ir.IntType(64), [], var_arg=False)
fn_ptr_type = ir.PointerType(fn_type)
fn_ptr = builder.inttoptr(helper_id, fn_ptr_type)
result = builder.call(fn_ptr, [], tail=False)
# Extract the lower 32 bits (UID) using bitwise AND with 0xFFFFFFFF
# TODO: return both UID and GID if we end up needing GID somewhere
mask = ir.Constant(ir.IntType(64), 0xFFFFFFFF)
pid = builder.and_(result, mask)
return pid, ir.IntType(64)
@HelperHandlerRegistry.register(
"skb_store_bytes",
param_types=[
ir.IntType(32),
ir.PointerType(ir.IntType(8)),
ir.IntType(32),
ir.IntType(64),
],
return_type=ir.IntType(64),
)
def bpf_skb_store_bytes_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_skb_store_bytes helper function call.
Expected call signature: skb_store_bytes(skb, offset, from, len, flags)
"""
args_signature = [
ir.PointerType(), # skb pointer
ir.IntType(32), # offset
ir.PointerType(), # from
ir.IntType(32), # len
ir.IntType(64), # flags
]
if len(call.args) not in (3, 4):
raise ValueError(
f"skb_store_bytes expects 3 or 4 args (offset, from, len, flags), got {len(call.args)}"
)
skb_ptr = func.args[0] # First argument to the function is skb
offset_val = get_int_value_from_arg(
call.args[0],
func,
module,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
)
from_ptr = get_or_create_ptr_from_arg(
func,
module,
call.args[1],
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
args_signature[2],
)
len_val = get_int_value_from_arg(
call.args[2],
func,
module,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
)
if len(call.args) == 4:
flags_val = get_flags_val(call.args[3], builder, local_sym_tab)
else:
flags_val = 0
if isinstance(flags_val, int):
flags = ir.Constant(ir.IntType(64), flags_val)
else:
flags = flags_val
fn_type = ir.FunctionType(
ir.IntType(64),
args_signature,
var_arg=False,
)
fn_ptr = builder.inttoptr(
ir.Constant(ir.IntType(64), BPFHelperID.BPF_SKB_STORE_BYTES.value),
ir.PointerType(fn_type),
)
result = builder.call(
fn_ptr,
[
builder.bitcast(skb_ptr, ir.PointerType()),
builder.trunc(offset_val, ir.IntType(32)),
builder.bitcast(from_ptr, ir.PointerType()),
builder.trunc(len_val, ir.IntType(32)),
flags,
],
tail=False,
)
logger.info("Emitted bpf_skb_store_bytes call")
return result, ir.IntType(64)
@HelperHandlerRegistry.register(
"reserve",
param_types=[ir.IntType(64)],
return_type=ir.PointerType(ir.IntType(8)),
)
def bpf_ringbuf_reserve_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_ringbuf_reserve helper function call.
Expected call signature: ringbuf.reserve(size)
"""
if len(call.args) != 1:
raise ValueError(
f"ringbuf.reserve expects exactly one argument (size), got {len(call.args)}"
)
size_val = get_int_value_from_arg(
call.args[0],
func,
module,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
fn_type = ir.FunctionType(
ir.PointerType(ir.IntType(8)),
[ir.PointerType(), ir.IntType(64)],
var_arg=False,
)
fn_ptr_type = ir.PointerType(fn_type)
fn_addr = ir.Constant(ir.IntType(64), BPFHelperID.BPF_RINGBUF_RESERVE.value)
fn_ptr = builder.inttoptr(fn_addr, fn_ptr_type)
result = builder.call(fn_ptr, [map_void_ptr, size_val], tail=False)
return result, ir.PointerType(ir.IntType(8))
@HelperHandlerRegistry.register(
"submit",
param_types=[ir.PointerType(ir.IntType(8)), ir.IntType(64)],
return_type=ir.VoidType(),
)
def bpf_ringbuf_submit_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_ringbuf_submit helper function call.
Expected call signature: ringbuf.submit(data, flags=0)
"""
if len(call.args) not in (1, 2):
raise ValueError(
f"ringbuf.submit expects 1 or 2 args (data, flags), got {len(call.args)}"
)
data_arg = call.args[0]
flags_arg = call.args[1] if len(call.args) == 2 else None
data_ptr = get_or_create_ptr_from_arg(
func,
module,
data_arg,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
ir.PointerType(ir.IntType(8)),
)
flags_const = get_flags_val(flags_arg, builder, local_sym_tab)
if isinstance(flags_const, int):
flags_const = ir.Constant(ir.IntType(64), flags_const)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
fn_type = ir.FunctionType(
ir.VoidType(),
[ir.PointerType(), ir.PointerType(), ir.IntType(64)],
var_arg=False,
)
fn_ptr_type = ir.PointerType(fn_type)
fn_addr = ir.Constant(ir.IntType(64), BPFHelperID.BPF_RINGBUF_SUBMIT.value)
fn_ptr = builder.inttoptr(fn_addr, fn_ptr_type)
result = builder.call(fn_ptr, [map_void_ptr, data_ptr, flags_const], tail=False)
return result, None
@HelperHandlerRegistry.register(
"get_stack",
param_types=[ir.PointerType(ir.IntType(8)), ir.IntType(64)],
return_type=ir.IntType(64),
)
def bpf_get_stack_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_stack helper function call.
"""
if len(call.args) not in (1, 2):
raise ValueError(
f"get_stack expects atmost two arguments (buf, flags), got {len(call.args)}"
)
ctx_ptr = func.args[0] # First argument to the function is ctx
buf_arg = call.args[0]
flags_arg = call.args[1] if len(call.args) == 2 else None
buf_ptr, buf_size = get_buffer_ptr_and_size(
buf_arg, builder, local_sym_tab, struct_sym_tab
)
flags_val = get_flags_val(flags_arg, builder, local_sym_tab)
if isinstance(flags_val, int):
flags_val = ir.Constant(ir.IntType(64), flags_val)
buf_void_ptr = builder.bitcast(buf_ptr, ir.PointerType())
fn_type = ir.FunctionType(
ir.IntType(64),
[
ir.PointerType(ir.IntType(8)),
ir.PointerType(),
ir.IntType(64),
ir.IntType(64),
],
var_arg=False,
)
fn_ptr_type = ir.PointerType(fn_type)
fn_addr = ir.Constant(ir.IntType(64), BPFHelperID.BPF_GET_STACK.value)
fn_ptr = builder.inttoptr(fn_addr, fn_ptr_type)
result = builder.call(
fn_ptr,
[ctx_ptr, buf_void_ptr, ir.Constant(ir.IntType(64), buf_size), flags_val],
tail=False,
)
return result, ir.IntType(64)
def handle_helper_call(
call,
module,
@ -315,6 +1094,7 @@ def handle_helper_call(
func,
local_sym_tab,
struct_sym_tab,
map_sym_tab,
)
# Handle direct function calls (e.g., print(), ktime())
@ -342,6 +1122,6 @@ def handle_helper_call(
if not map_sym_tab or map_name not in map_sym_tab:
raise ValueError(f"Map '{map_name}' not found in symbol table")
return invoke_helper(method_name, map_sym_tab[map_name])
return invoke_helper(method_name, map_sym_tab[map_name].sym)
return None

View File

@ -0,0 +1,61 @@
from dataclasses import dataclass
from llvmlite import ir
from typing import Callable
@dataclass
class HelperSignature:
"""Signature of a BPF helper function"""
arg_types: list[ir.Type]
return_type: ir.Type
func: Callable
class HelperHandlerRegistry:
"""Registry for BPF helpers"""
_handlers: dict[str, HelperSignature] = {}
@classmethod
def register(cls, helper_name, param_types=None, return_type=None):
"""Decorator to register a handler function for a helper"""
def decorator(func):
helper_sig = HelperSignature(
arg_types=param_types, return_type=return_type, func=func
)
cls._handlers[helper_name] = helper_sig
return func
return decorator
@classmethod
def get_handler(cls, helper_name):
"""Get the handler function for a helper"""
handler = cls._handlers.get(helper_name)
return handler.func if handler else None
@classmethod
def has_handler(cls, helper_name):
"""Check if a handler function is registered for a helper"""
return helper_name in cls._handlers
@classmethod
def get_signature(cls, helper_name):
"""Get the signature of a helper function"""
return cls._handlers.get(helper_name)
@classmethod
def get_param_type(cls, helper_name, index):
"""Get the type of a parameter of a helper function by the index"""
signature = cls.get_signature(helper_name)
if signature and signature.arg_types and 0 <= index < len(signature.arg_types):
return signature.arg_types[index]
return None
@classmethod
def get_return_type(cls, helper_name):
"""Get the return type of a helper function"""
signature = cls.get_signature(helper_name)
return signature.return_type if signature else None

View File

@ -1,37 +1,70 @@
import ast
import logging
from collections.abc import Callable
from llvmlite import ir
from pythonbpf.expr import eval_expr
from pythonbpf.expr import (
get_operand_value,
eval_expr,
)
logger = logging.getLogger(__name__)
class HelperHandlerRegistry:
"""Registry for BPF helpers"""
class ScratchPoolManager:
"""Manage the temporary helper variables in local_sym_tab"""
_handlers: dict[str, Callable] = {}
def __init__(self):
self._counters = {}
@classmethod
def register(cls, helper_name):
"""Decorator to register a handler function for a helper"""
@property
def counter(self):
return sum(self._counters.values())
def decorator(func):
cls._handlers[helper_name] = func
return func
def reset(self):
self._counters.clear()
logger.debug("Scratch pool counter reset to 0")
return decorator
def _get_type_name(self, ir_type):
if isinstance(ir_type, ir.PointerType):
return "ptr"
elif isinstance(ir_type, ir.IntType):
return f"i{ir_type.width}"
elif isinstance(ir_type, ir.ArrayType):
return f"[{ir_type.count}x{self._get_type_name(ir_type.element)}]"
else:
return str(ir_type).replace(" ", "")
@classmethod
def get_handler(cls, helper_name):
"""Get the handler function for a helper"""
return cls._handlers.get(helper_name)
def get_next_temp(self, local_sym_tab, expected_type=None):
# Default to i64 if no expected type provided
type_name = self._get_type_name(expected_type) if expected_type else "i64"
if type_name not in self._counters:
self._counters[type_name] = 0
@classmethod
def has_handler(cls, helper_name):
"""Check if a handler function is registered for a helper"""
return helper_name in cls._handlers
counter = self._counters[type_name]
temp_name = f"__helper_temp_{type_name}_{counter}"
self._counters[type_name] += 1
if temp_name not in local_sym_tab:
raise ValueError(
f"Scratch pool exhausted or inadequate: {temp_name}. "
f"Type: {type_name} Counter: {counter}"
)
logger.debug(f"Using {temp_name} for type {type_name}")
return local_sym_tab[temp_name].var, temp_name
_temp_pool_manager = ScratchPoolManager() # Singleton instance
def reset_scratch_pool():
"""Reset the scratch pool counter"""
_temp_pool_manager.reset()
# ============================================================================
# Argument Preparation
# ============================================================================
def get_var_ptr_from_name(var_name, local_sym_tab):
@ -41,27 +74,97 @@ def get_var_ptr_from_name(var_name, local_sym_tab):
raise ValueError(f"Variable '{var_name}' not found in local symbol table")
def create_int_constant_ptr(value, builder, int_width=64):
def create_int_constant_ptr(value, builder, local_sym_tab, int_width=64):
"""Create a pointer to an integer constant."""
# Default to 64-bit integer
int_type = ir.IntType(int_width)
ptr = builder.alloca(int_type)
ptr.align = int_type.width // 8
builder.store(ir.Constant(int_type, value), ptr)
ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab, int_type)
logger.info(f"Using temp variable '{temp_name}' for int constant {value}")
const_val = ir.Constant(int_type, value)
builder.store(const_val, ptr)
return ptr
def get_or_create_ptr_from_arg(arg, builder, local_sym_tab):
def get_or_create_ptr_from_arg(
func,
module,
arg,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab=None,
expected_type=None,
):
"""Extract or create pointer from the call arguments."""
logger.info(f"Getting pointer from arg: {ast.dump(arg)}")
sz = None
if isinstance(arg, ast.Name):
# Stack space is already allocated
ptr = get_var_ptr_from_name(arg.id, local_sym_tab)
elif isinstance(arg, ast.Constant) and isinstance(arg.value, int):
ptr = create_int_constant_ptr(arg.value, builder)
int_width = 64 # Default to i64
if expected_type and isinstance(expected_type, ir.IntType):
int_width = expected_type.width
ptr = create_int_constant_ptr(arg.value, builder, local_sym_tab, int_width)
elif isinstance(arg, ast.Attribute):
# A struct field
struct_name = arg.value.id
field_name = arg.attr
if not local_sym_tab or struct_name not in local_sym_tab:
raise ValueError(f"Struct '{struct_name}' not found")
struct_type = local_sym_tab[struct_name].metadata
if not struct_sym_tab or struct_type not in struct_sym_tab:
raise ValueError(f"Struct type '{struct_type}' not found")
struct_info = struct_sym_tab[struct_type]
if field_name not in struct_info.fields:
raise ValueError(
f"Field '{field_name}' not found in struct '{struct_name}'"
)
field_type = struct_info.field_type(field_name)
struct_ptr = local_sym_tab[struct_name].var
# Special handling for char arrays
if (
isinstance(field_type, ir.ArrayType)
and isinstance(field_type.element, ir.IntType)
and field_type.element.width == 8
):
ptr, sz = get_char_array_ptr_and_size(
arg, builder, local_sym_tab, struct_sym_tab
)
if not ptr:
raise ValueError("Failed to get char array pointer from struct field")
else:
ptr = struct_info.gep(builder, struct_ptr, field_name)
else:
raise NotImplementedError(
"Only simple variable names are supported as args in map helpers."
# NOTE: For any integer expression reaching this branch, it is probably a struct field or a binop
# Evaluate the expression and store the result in a temp variable
val = get_operand_value(
func, module, arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
if val is None:
raise ValueError("Failed to evaluate expression for helper arg.")
ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab, expected_type)
logger.info(f"Using temp variable '{temp_name}' for expression result")
if (
isinstance(val.type, ir.IntType)
and expected_type
and val.type.width > expected_type.width
):
val = builder.trunc(val, expected_type)
builder.store(val, ptr)
# NOTE: For char arrays, also return size
if sz:
return ptr, sz
return ptr
@ -84,204 +187,6 @@ def get_flags_val(arg, builder, local_sym_tab):
)
def simple_string_print(string_value, module, builder, func):
"""Prepare arguments for bpf_printk from a simple string value"""
fmt_str = string_value + "\n\0"
fmt_ptr = _create_format_string_global(fmt_str, func, module, builder)
args = [fmt_ptr, ir.Constant(ir.IntType(32), len(fmt_str))]
return args
def handle_fstring_print(
joined_str,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
):
"""Handle f-string formatting for bpf_printk emitter."""
fmt_parts = []
exprs = []
for value in joined_str.values:
logger.debug(f"Processing f-string value: {ast.dump(value)}")
if isinstance(value, ast.Constant):
_process_constant_in_fstring(value, fmt_parts, exprs)
elif isinstance(value, ast.FormattedValue):
_process_fval(
value,
fmt_parts,
exprs,
local_sym_tab,
struct_sym_tab,
)
else:
raise NotImplementedError(f"Unsupported f-string value type: {type(value)}")
fmt_str = "".join(fmt_parts)
args = simple_string_print(fmt_str, module, builder, func)
# NOTE: Process expressions (limited to 3 due to BPF constraints)
if len(exprs) > 3:
logger.warning("bpf_printk supports up to 3 args, extra args will be ignored.")
for expr in exprs[:3]:
arg_value = _prepare_expr_args(
expr,
func,
module,
builder,
local_sym_tab,
struct_sym_tab,
)
args.append(arg_value)
return args
def _process_constant_in_fstring(cst, fmt_parts, exprs):
"""Process constant values in f-string."""
if isinstance(cst.value, str):
fmt_parts.append(cst.value)
elif isinstance(cst.value, int):
fmt_parts.append("%lld")
exprs.append(ir.Constant(ir.IntType(64), cst.value))
else:
raise NotImplementedError(
f"Unsupported constant type in f-string: {type(cst.value)}"
)
def _process_fval(fval, fmt_parts, exprs, local_sym_tab, struct_sym_tab):
"""Process formatted values in f-string."""
logger.debug(f"Processing formatted value: {ast.dump(fval)}")
if isinstance(fval.value, ast.Name):
_process_name_in_fval(fval.value, fmt_parts, exprs, local_sym_tab)
elif isinstance(fval.value, ast.Attribute):
_process_attr_in_fval(
fval.value,
fmt_parts,
exprs,
local_sym_tab,
struct_sym_tab,
)
else:
raise NotImplementedError(
f"Unsupported formatted value in f-string: {type(fval.value)}"
)
def _process_name_in_fval(name_node, fmt_parts, exprs, local_sym_tab):
"""Process name nodes in formatted values."""
if local_sym_tab and name_node.id in local_sym_tab:
_, var_type, tmp = local_sym_tab[name_node.id]
_populate_fval(var_type, name_node, fmt_parts, exprs)
def _process_attr_in_fval(attr_node, fmt_parts, exprs, local_sym_tab, struct_sym_tab):
"""Process attribute nodes in formatted values."""
if (
isinstance(attr_node.value, ast.Name)
and local_sym_tab
and attr_node.value.id in local_sym_tab
):
var_name = attr_node.value.id
field_name = attr_node.attr
var_type = local_sym_tab[var_name].metadata
if var_type not in struct_sym_tab:
raise ValueError(
f"Struct '{var_type}' for '{var_name}' not in symbol table"
)
struct_info = struct_sym_tab[var_type]
if field_name not in struct_info.fields:
raise ValueError(f"Field '{field_name}' not found in struct '{var_type}'")
field_type = struct_info.field_type(field_name)
_populate_fval(field_type, attr_node, fmt_parts, exprs)
else:
raise NotImplementedError(
"Only simple attribute on local vars is supported in f-strings."
)
def _populate_fval(ftype, node, fmt_parts, exprs):
"""Populate format parts and expressions based on field type."""
if isinstance(ftype, ir.IntType):
# TODO: We print as signed integers only for now
if ftype.width == 64:
fmt_parts.append("%lld")
exprs.append(node)
elif ftype.width == 32:
fmt_parts.append("%d")
exprs.append(node)
else:
raise NotImplementedError(
f"Unsupported integer width in f-string: {ftype.width}"
)
elif ftype == ir.PointerType(ir.IntType(8)):
# NOTE: We assume i8* is a string
fmt_parts.append("%s")
exprs.append(node)
else:
raise NotImplementedError(f"Unsupported field type in f-string: {ftype}")
def _create_format_string_global(fmt_str, func, module, builder):
"""Create a global variable for the format string."""
fmt_name = f"{func.name}____fmt{func._fmt_counter}"
func._fmt_counter += 1
fmt_gvar = ir.GlobalVariable(
module, ir.ArrayType(ir.IntType(8), len(fmt_str)), name=fmt_name
)
fmt_gvar.global_constant = True
fmt_gvar.initializer = ir.Constant(
ir.ArrayType(ir.IntType(8), len(fmt_str)), bytearray(fmt_str.encode("utf8"))
)
fmt_gvar.linkage = "internal"
fmt_gvar.align = 1
return builder.bitcast(fmt_gvar, ir.PointerType())
def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_tab):
"""Evaluate and prepare an expression to use as an arg for bpf_printk."""
val, _ = eval_expr(
func,
module,
builder,
expr,
local_sym_tab,
None,
struct_sym_tab,
)
if val:
if isinstance(val.type, ir.PointerType):
val = builder.ptrtoint(val, ir.IntType(64))
elif isinstance(val.type, ir.IntType):
if val.type.width < 64:
val = builder.sext(val, ir.IntType(64))
else:
logger.warning(
"Only int and ptr supported in bpf_printk args. Others default to 0."
)
val = ir.Constant(ir.IntType(64), 0)
return val
else:
logger.warning(
"Failed to evaluate expression for bpf_printk argument. "
"It will be converted to 0."
)
return ir.Constant(ir.IntType(64), 0)
def get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab):
"""Extract data pointer and size information for perf event output."""
if isinstance(data_arg, ast.Name):
@ -305,3 +210,163 @@ def get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab):
raise NotImplementedError(
"Only simple object names are supported as data in perf event output."
)
def get_buffer_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab):
"""Extract buffer pointer and size from either a struct field or variable."""
# Case 1: Struct field (obj.field)
if isinstance(buf_arg, ast.Attribute):
if not isinstance(buf_arg.value, ast.Name):
raise ValueError(
"Only simple struct field access supported (e.g., obj.field)"
)
struct_name = buf_arg.value.id
field_name = buf_arg.attr
# Lookup struct
if not local_sym_tab or struct_name not in local_sym_tab:
raise ValueError(f"Struct '{struct_name}' not found")
struct_type = local_sym_tab[struct_name].metadata
if not struct_sym_tab or struct_type not in struct_sym_tab:
raise ValueError(f"Struct type '{struct_type}' not found")
struct_info = struct_sym_tab[struct_type]
# Get field pointer and type
struct_ptr = local_sym_tab[struct_name].var
field_ptr = struct_info.gep(builder, struct_ptr, field_name)
field_type = struct_info.field_type(field_name)
if not isinstance(field_type, ir.ArrayType):
raise ValueError(f"Field '{field_name}' must be an array type")
return field_ptr, field_type.count
# Case 2: Variable name
elif isinstance(buf_arg, ast.Name):
var_name = buf_arg.id
if not local_sym_tab or var_name not in local_sym_tab:
raise ValueError(f"Variable '{var_name}' not found")
var_ptr = local_sym_tab[var_name].var
var_type = local_sym_tab[var_name].ir_type
if not isinstance(var_type, ir.ArrayType):
raise ValueError(f"Variable '{var_name}' must be an array type")
return var_ptr, var_type.count
else:
raise ValueError(
"comm expects either a struct field (obj.field) or variable name"
)
def get_char_array_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab):
"""Get pointer to char array and its size."""
# Struct field: obj.field
if isinstance(buf_arg, ast.Attribute) and isinstance(buf_arg.value, ast.Name):
var_name = buf_arg.value.id
field_name = buf_arg.attr
if not (local_sym_tab and var_name in local_sym_tab):
raise ValueError(f"Variable '{var_name}' not found")
struct_type = local_sym_tab[var_name].metadata
if not (struct_sym_tab and struct_type in struct_sym_tab):
raise ValueError(f"Struct type '{struct_type}' not found")
struct_info = struct_sym_tab[struct_type]
if field_name not in struct_info.fields:
raise ValueError(f"Field '{field_name}' not found")
field_type = struct_info.field_type(field_name)
if not _is_char_array(field_type):
logger.info(
"Field is not a char array, falling back to int or ptr detection"
)
return None, 0
struct_ptr = local_sym_tab[var_name].var
field_ptr = struct_info.gep(builder, struct_ptr, field_name)
# GEP to first element: [N x i8]* -> i8*
buf_ptr = builder.gep(
field_ptr,
[ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
inbounds=True,
)
return buf_ptr, field_type.count
elif isinstance(buf_arg, ast.Name):
# NOTE: We shouldn't be doing this as we can't get size info
var_name = buf_arg.id
if not (local_sym_tab and var_name in local_sym_tab):
raise ValueError(f"Variable '{var_name}' not found")
var_ptr = local_sym_tab[var_name].var
var_type = local_sym_tab[var_name].ir_type
if not isinstance(var_type, ir.PointerType) or not isinstance(
var_type.pointee, ir.IntType(8)
):
raise ValueError("Expected str ptr variable")
return var_ptr, 256 # Size unknown for str ptr, using 256 as default
else:
raise ValueError("Expected struct field or variable name")
def _is_char_array(ir_type):
"""Check if IR type is [N x i8]."""
return (
isinstance(ir_type, ir.ArrayType)
and isinstance(ir_type.element, ir.IntType)
and ir_type.element.width == 8
)
def get_ptr_from_arg(
arg, func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
):
"""Evaluate argument and return pointer value"""
result = eval_expr(
func, module, builder, arg, local_sym_tab, map_sym_tab, struct_sym_tab
)
if not result:
raise ValueError("Failed to evaluate argument")
val, val_type = result
if not isinstance(val_type, ir.PointerType):
raise ValueError(f"Expected pointer type, got {val_type}")
return val, val_type
def get_int_value_from_arg(
arg, func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
):
"""Evaluate argument and return integer value"""
result = eval_expr(
func, module, builder, arg, local_sym_tab, map_sym_tab, struct_sym_tab
)
if not result:
raise ValueError("Failed to evaluate argument")
val, val_type = result
if not isinstance(val_type, ir.IntType):
raise ValueError(f"Expected integer type, got {val_type}")
return val

View File

@ -2,18 +2,63 @@ import ctypes
def ktime():
"""get current ktime"""
return ctypes.c_int64(0)
def pid():
"""get current process id"""
return ctypes.c_int32(0)
def deref(ptr):
"dereference a pointer"
"""dereference a pointer"""
result = ctypes.cast(ptr, ctypes.POINTER(ctypes.c_void_p)).contents.value
return result if result is not None else 0
def comm(buf):
"""get current process command name"""
return ctypes.c_int64(0)
def probe_read_str(dst, src):
"""Safely read a null-terminated string from kernel memory"""
return ctypes.c_int64(0)
def random():
"""get a pseudorandom u32 number"""
return ctypes.c_int32(0)
def probe_read(dst, size, src):
"""Safely read data from kernel memory"""
return ctypes.c_int64(0)
def smp_processor_id():
"""get the current CPU id"""
return ctypes.c_int32(0)
def uid():
"""get current user id"""
return ctypes.c_int32(0)
def skb_store_bytes(offset, from_buf, size, flags=0):
"""store bytes into a socket buffer"""
return ctypes.c_int64(0)
def get_stack(buf, flags=0):
"""get the current stack trace"""
return ctypes.c_int64(0)
XDP_ABORTED = ctypes.c_int64(0)
XDP_DROP = ctypes.c_int64(1)
XDP_PASS = ctypes.c_int64(2)
XDP_TX = ctypes.c_int64(3)
XDP_REDIRECT = ctypes.c_int64(4)

View File

@ -0,0 +1,272 @@
import ast
import logging
from llvmlite import ir
from pythonbpf.expr import eval_expr, get_base_type_and_depth, deref_to_depth
from pythonbpf.expr.vmlinux_registry import VmlinuxHandlerRegistry
from pythonbpf.helper.helper_utils import get_char_array_ptr_and_size
logger = logging.getLogger(__name__)
def simple_string_print(string_value, module, builder, func):
"""Prepare arguments for bpf_printk from a simple string value"""
fmt_str = string_value + "\n\0"
fmt_ptr = _create_format_string_global(fmt_str, func, module, builder)
args = [fmt_ptr, ir.Constant(ir.IntType(32), len(fmt_str))]
return args
def handle_fstring_print(
joined_str,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
):
"""Handle f-string formatting for bpf_printk emitter."""
fmt_parts = []
exprs = []
for value in joined_str.values:
logger.debug(f"Processing f-string value: {ast.dump(value)}")
if isinstance(value, ast.Constant):
_process_constant_in_fstring(value, fmt_parts, exprs)
elif isinstance(value, ast.FormattedValue):
_process_fval(
value,
fmt_parts,
exprs,
local_sym_tab,
struct_sym_tab,
)
else:
raise NotImplementedError(f"Unsupported f-string value type: {type(value)}")
fmt_str = "".join(fmt_parts)
args = simple_string_print(fmt_str, module, builder, func)
# NOTE: Process expressions (limited to 3 due to BPF constraints)
if len(exprs) > 3:
logger.warning("bpf_printk supports up to 3 args, extra args will be ignored.")
for expr in exprs[:3]:
arg_value = _prepare_expr_args(
expr,
func,
module,
builder,
local_sym_tab,
struct_sym_tab,
)
args.append(arg_value)
return args
# ============================================================================
# Internal Helpers
# ============================================================================
def _process_constant_in_fstring(cst, fmt_parts, exprs):
"""Process constant values in f-string."""
if isinstance(cst.value, str):
fmt_parts.append(cst.value)
elif isinstance(cst.value, int):
fmt_parts.append("%lld")
exprs.append(ir.Constant(ir.IntType(64), cst.value))
else:
raise NotImplementedError(
f"Unsupported constant type in f-string: {type(cst.value)}"
)
def _process_fval(fval, fmt_parts, exprs, local_sym_tab, struct_sym_tab):
"""Process formatted values in f-string."""
logger.debug(f"Processing formatted value: {ast.dump(fval)}")
if isinstance(fval.value, ast.Name):
_process_name_in_fval(fval.value, fmt_parts, exprs, local_sym_tab)
elif isinstance(fval.value, ast.Attribute):
_process_attr_in_fval(
fval.value,
fmt_parts,
exprs,
local_sym_tab,
struct_sym_tab,
)
else:
raise NotImplementedError(
f"Unsupported formatted value in f-string: {type(fval.value)}"
)
def _process_name_in_fval(name_node, fmt_parts, exprs, local_sym_tab):
"""Process name nodes in formatted values."""
if local_sym_tab and name_node.id in local_sym_tab:
_, var_type, tmp = local_sym_tab[name_node.id]
_populate_fval(var_type, name_node, fmt_parts, exprs)
else:
# Try to resolve through vmlinux registry if not in local symbol table
result = VmlinuxHandlerRegistry.handle_name(name_node.id)
if result:
val, var_type = result
_populate_fval(var_type, name_node, fmt_parts, exprs)
else:
raise ValueError(
f"Variable '{name_node.id}' not found in symbol table or vmlinux"
)
def _process_attr_in_fval(attr_node, fmt_parts, exprs, local_sym_tab, struct_sym_tab):
"""Process attribute nodes in formatted values."""
if (
isinstance(attr_node.value, ast.Name)
and local_sym_tab
and attr_node.value.id in local_sym_tab
):
var_name = attr_node.value.id
field_name = attr_node.attr
var_type = local_sym_tab[var_name].metadata
if var_type not in struct_sym_tab:
raise ValueError(
f"Struct '{var_type}' for '{var_name}' not in symbol table"
)
struct_info = struct_sym_tab[var_type]
if field_name not in struct_info.fields:
raise ValueError(f"Field '{field_name}' not found in struct '{var_type}'")
field_type = struct_info.field_type(field_name)
_populate_fval(field_type, attr_node, fmt_parts, exprs)
else:
raise NotImplementedError(
"Only simple attribute on local vars is supported in f-strings."
)
def _populate_fval(ftype, node, fmt_parts, exprs):
"""Populate format parts and expressions based on field type."""
if isinstance(ftype, ir.IntType):
# TODO: We print as signed integers only for now
if ftype.width == 64:
fmt_parts.append("%lld")
exprs.append(node)
elif ftype.width == 32:
fmt_parts.append("%d")
exprs.append(node)
else:
raise NotImplementedError(
f"Unsupported integer width in f-string: {ftype.width}"
)
elif isinstance(ftype, ir.PointerType):
target, depth = get_base_type_and_depth(ftype)
if isinstance(target, ir.IntType):
if target.width == 64:
fmt_parts.append("%lld")
exprs.append(node)
elif target.width == 32:
fmt_parts.append("%d")
exprs.append(node)
elif target.width == 8 and depth == 1:
# NOTE: Assume i8* is a string
fmt_parts.append("%s")
exprs.append(node)
else:
raise NotImplementedError(
f"Unsupported pointer target type in f-string: {target}"
)
else:
raise NotImplementedError(
f"Unsupported pointer target type in f-string: {target}"
)
elif isinstance(ftype, ir.ArrayType):
if isinstance(ftype.element, ir.IntType) and ftype.element.width == 8:
# Char array
fmt_parts.append("%s")
exprs.append(node)
else:
raise NotImplementedError(
f"Unsupported array element type in f-string: {ftype.element}"
)
else:
raise NotImplementedError(f"Unsupported field type in f-string: {ftype}")
def _create_format_string_global(fmt_str, func, module, builder):
"""Create a global variable for the format string."""
fmt_name = f"{func.name}____fmt{func._fmt_counter}"
func._fmt_counter += 1
fmt_gvar = ir.GlobalVariable(
module, ir.ArrayType(ir.IntType(8), len(fmt_str)), name=fmt_name
)
fmt_gvar.global_constant = True
fmt_gvar.initializer = ir.Constant(
ir.ArrayType(ir.IntType(8), len(fmt_str)), bytearray(fmt_str.encode("utf8"))
)
fmt_gvar.linkage = "internal"
fmt_gvar.align = 1
return builder.bitcast(fmt_gvar, ir.PointerType())
def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_tab):
"""Evaluate and prepare an expression to use as an arg for bpf_printk."""
# Special case: struct field char array needs pointer to first element
if isinstance(expr, ast.Attribute):
char_array_ptr, _ = get_char_array_ptr_and_size(
expr, builder, local_sym_tab, struct_sym_tab
)
if char_array_ptr:
return char_array_ptr
# Regular expression evaluation
val, _ = eval_expr(func, module, builder, expr, local_sym_tab, None, struct_sym_tab)
if not val:
logger.warning("Failed to evaluate expression for bpf_printk, defaulting to 0")
return ir.Constant(ir.IntType(64), 0)
# Convert value to bpf_printk compatible type
if isinstance(val.type, ir.PointerType):
return _handle_pointer_arg(val, func, builder)
elif isinstance(val.type, ir.IntType):
return _handle_int_arg(val, builder)
else:
logger.warning(f"Unsupported type {val.type} in bpf_printk, defaulting to 0")
return ir.Constant(ir.IntType(64), 0)
def _handle_pointer_arg(val, func, builder):
"""Convert pointer type for bpf_printk."""
target, depth = get_base_type_and_depth(val.type)
if not isinstance(target, ir.IntType):
logger.warning("Only int pointers supported in bpf_printk, defaulting to 0")
return ir.Constant(ir.IntType(64), 0)
# i8* is string - use as-is
if target.width == 8 and depth == 1:
return val
# Integer pointers: dereference and sign-extend to i64
if target.width >= 32:
val = deref_to_depth(func, builder, val, depth)
return builder.sext(val, ir.IntType(64))
logger.warning("Unsupported pointer width in bpf_printk, defaulting to 0")
return ir.Constant(ir.IntType(64), 0)
def _handle_int_arg(val, builder):
"""Convert integer type for bpf_printk (sign-extend to i64)."""
if val.type.width < 64:
return builder.sext(val, ir.IntType(64))
return val

15
pythonbpf/local_symbol.py Normal file
View File

@ -0,0 +1,15 @@
import llvmlite.ir as ir
from dataclasses import dataclass
from typing import Any
@dataclass
class LocalSymbol:
var: ir.AllocaInstr
ir_type: ir.Type
metadata: Any = None
def __iter__(self):
yield self.var
yield self.ir_type
yield self.metadata

View File

@ -1,4 +1,5 @@
from .maps import HashMap, PerfEventArray, RingBuf
from .maps import HashMap, PerfEventArray, RingBuffer
from .maps_pass import maps_proc
from .map_types import BPFMapType
__all__ = ["HashMap", "PerfEventArray", "maps_proc", "RingBuf"]
__all__ = ["HashMap", "PerfEventArray", "maps_proc", "RingBuffer", "BPFMapType"]

View File

@ -0,0 +1,170 @@
import logging
from llvmlite import ir
from pythonbpf.debuginfo import DebugInfoGenerator
from .map_types import BPFMapType
logger: logging.Logger = logging.getLogger(__name__)
def create_map_debug_info(module, map_global, map_name, map_params, structs_sym_tab):
"""Generate debug info metadata for BPF maps HASH and PERF_EVENT_ARRAY"""
generator = DebugInfoGenerator(module)
logger.info(f"Creating debug info for map {map_name} with params {map_params}")
uint_type = generator.get_uint32_type()
array_type = generator.create_array_type(
uint_type, map_params.get("type", BPFMapType.UNSPEC).value
)
type_ptr = generator.create_pointer_type(array_type, 64)
key_ptr = generator.create_pointer_type(
array_type
if "key_size" in map_params
else _get_key_val_dbg_type(map_params.get("key"), generator, structs_sym_tab),
64,
)
value_ptr = generator.create_pointer_type(
array_type
if "value_size" in map_params
else _get_key_val_dbg_type(map_params.get("value"), generator, structs_sym_tab),
64,
)
elements_arr = []
# Create struct members
# scope field does not appear for some reason
cnt = 0
for elem in map_params:
if elem == "max_entries":
continue
if elem == "type":
ptr = type_ptr
elif "key" in elem:
ptr = key_ptr
else:
ptr = value_ptr
# TODO: the best way to do this is not 64, but get the size each time. this will not work for structs.
member = generator.create_struct_member(elem, ptr, cnt * 64)
elements_arr.append(member)
cnt += 1
if "max_entries" in map_params:
max_entries_array = generator.create_array_type(
uint_type, map_params["max_entries"]
)
max_entries_ptr = generator.create_pointer_type(max_entries_array, 64)
max_entries_member = generator.create_struct_member(
"max_entries", max_entries_ptr, cnt * 64
)
elements_arr.append(max_entries_member)
# Create the struct type
struct_type = generator.create_struct_type(
elements_arr, 64 * len(elements_arr), is_distinct=True
)
# Create global variable debug info
global_var = generator.create_global_var_debug_info(
map_name, struct_type, is_local=False
)
# Attach debug info to the global variable
map_global.set_metadata("dbg", global_var)
return global_var
# TODO: This should not be exposed outside of the module.
# Ideally we should expose a single create_map_debug_info function that handles all map types.
# We can probably use a registry pattern to register different map types and their debug info generators.
# map_params["type"] will be used to determine which generator to use.
def create_ringbuf_debug_info(
module, map_global, map_name, map_params, structs_sym_tab
):
"""Generate debug information metadata for BPF RINGBUF map"""
generator = DebugInfoGenerator(module)
int_type = generator.get_int32_type()
type_array = generator.create_array_type(
int_type, map_params.get("type", BPFMapType.RINGBUF).value
)
type_ptr = generator.create_pointer_type(type_array, 64)
type_member = generator.create_struct_member("type", type_ptr, 0)
max_entries_array = generator.create_array_type(int_type, map_params["max_entries"])
max_entries_ptr = generator.create_pointer_type(max_entries_array, 64)
max_entries_member = generator.create_struct_member(
"max_entries", max_entries_ptr, 64
)
elements_arr = [type_member, max_entries_member]
struct_type = generator.create_struct_type(elements_arr, 128, is_distinct=True)
global_var = generator.create_global_var_debug_info(
map_name, struct_type, is_local=False
)
map_global.set_metadata("dbg", global_var)
return global_var
def _get_key_val_dbg_type(name, generator, structs_sym_tab):
"""Get the debug type for key/value based on type object"""
if not name:
logger.warn("No name provided for key/value type, defaulting to uint64")
return generator.get_uint64_type()
type_obj = structs_sym_tab.get(name)
if type_obj:
return _get_struct_debug_type(type_obj, generator, structs_sym_tab)
# Fallback to basic types
logger.info(f"No struct named {name}, falling back to basic type")
# NOTE: Only handling int and long for now
if name in ["c_int32", "c_uint32"]:
return generator.get_uint32_type()
# Default fallback for now
return generator.get_uint64_type()
def _get_struct_debug_type(struct_obj, generator, structs_sym_tab):
"""Recursively create debug type for struct"""
elements_arr = []
for fld in struct_obj.fields.keys():
fld_type = struct_obj.field_type(fld)
if isinstance(fld_type, ir.IntType):
if fld_type.width == 32:
fld_dbg_type = generator.get_uint32_type()
else:
# NOTE: Assuming 64-bit for all other int types
fld_dbg_type = generator.get_uint64_type()
elif isinstance(fld_type, ir.ArrayType):
# NOTE: Array types have u8 elements only for now
# Debug info generation should fail for other types
elem_type = fld_type.element
if isinstance(elem_type, ir.IntType) and elem_type.width == 8:
char_type = generator.get_uint8_type()
fld_dbg_type = generator.create_array_type(char_type, fld_type.count)
else:
logger.warning(
f"Array element type {str(elem_type)} not supported for debug info, skipping"
)
continue
else:
# NOTE: Only handling int and char arrays for now
logger.warning(
f"Field type {str(fld_type)} not supported for debug info, skipping"
)
continue
member = generator.create_struct_member(
fld, fld_dbg_type, struct_obj.field_size(fld)
)
elements_arr.append(member)
struct_type = generator.create_struct_type(
elements_arr, struct_obj.size, is_distinct=True
)
return struct_type

View File

@ -0,0 +1,39 @@
from enum import Enum
class BPFMapType(Enum):
UNSPEC = 0
HASH = 1
ARRAY = 2
PROG_ARRAY = 3
PERF_EVENT_ARRAY = 4
PERCPU_HASH = 5
PERCPU_ARRAY = 6
STACK_TRACE = 7
CGROUP_ARRAY = 8
LRU_HASH = 9
LRU_PERCPU_HASH = 10
LPM_TRIE = 11
ARRAY_OF_MAPS = 12
HASH_OF_MAPS = 13
DEVMAP = 14
SOCKMAP = 15
CPUMAP = 16
XSKMAP = 17
SOCKHASH = 18
CGROUP_STORAGE_DEPRECATED = 19
CGROUP_STORAGE = 19
REUSEPORT_SOCKARRAY = 20
PERCPU_CGROUP_STORAGE_DEPRECATED = 21
PERCPU_CGROUP_STORAGE = 21
QUEUE = 22
STACK = 23
SK_STORAGE = 24
DEVMAP_HASH = 25
STRUCT_OPS = 26
RINGBUF = 27
INODE_STORAGE = 28
TASK_STORAGE = 29
BLOOM_FILTER = 30
USER_RINGBUF = 31
CGRP_STORAGE = 32

View File

@ -36,11 +36,14 @@ class PerfEventArray:
pass # Placeholder for output method
class RingBuf:
class RingBuffer:
def __init__(self, max_entries):
self.max_entries = max_entries
def reserve(self, size: int, flags=0):
def output(self, data, flags=0):
pass
def reserve(self, size: int):
if size > self.max_entries:
raise ValueError("size cannot be greater than set maximum entries")
return 0
@ -48,4 +51,7 @@ class RingBuf:
def submit(self, data, flags=0):
pass
def discard(self, data, flags=0):
pass
# add discard, output and also give names to flags and stuff

View File

@ -1,21 +1,26 @@
import ast
import logging
from logging import Logger
from llvmlite import ir
from enum import Enum
from .maps_utils import MapProcessorRegistry
from pythonbpf.debuginfo import DebugInfoGenerator
import logging
from .maps_utils import MapProcessorRegistry, MapSymbol
from .map_types import BPFMapType
from .map_debug_info import create_map_debug_info, create_ringbuf_debug_info
from pythonbpf.expr.vmlinux_registry import VmlinuxHandlerRegistry
logger: Logger = logging.getLogger(__name__)
def maps_proc(tree, module, chunks):
def maps_proc(tree, module, chunks, structs_sym_tab):
"""Process all functions decorated with @map to find BPF maps"""
map_sym_tab = {}
for func_node in chunks:
if is_map(func_node):
logger.info(f"Found BPF map: {func_node.name}")
map_sym_tab[func_node.name] = process_bpf_map(func_node, module)
map_sym_tab[func_node.name] = process_bpf_map(
func_node, module, structs_sym_tab
)
return map_sym_tab
@ -26,44 +31,6 @@ def is_map(func_node):
)
class BPFMapType(Enum):
UNSPEC = 0
HASH = 1
ARRAY = 2
PROG_ARRAY = 3
PERF_EVENT_ARRAY = 4
PERCPU_HASH = 5
PERCPU_ARRAY = 6
STACK_TRACE = 7
CGROUP_ARRAY = 8
LRU_HASH = 9
LRU_PERCPU_HASH = 10
LPM_TRIE = 11
ARRAY_OF_MAPS = 12
HASH_OF_MAPS = 13
DEVMAP = 14
SOCKMAP = 15
CPUMAP = 16
XSKMAP = 17
SOCKHASH = 18
CGROUP_STORAGE_DEPRECATED = 19
CGROUP_STORAGE = 19
REUSEPORT_SOCKARRAY = 20
PERCPU_CGROUP_STORAGE_DEPRECATED = 21
PERCPU_CGROUP_STORAGE = 21
QUEUE = 22
STACK = 23
SK_STORAGE = 24
DEVMAP_HASH = 25
STRUCT_OPS = 26
RINGBUF = 27
INODE_STORAGE = 28
TASK_STORAGE = 29
BLOOM_FILTER = 30
USER_RINGBUF = 31
CGRP_STORAGE = 32
def create_bpf_map(module, map_name, map_params):
"""Create a BPF map in the module with given parameters and debug info"""
@ -81,183 +48,98 @@ def create_bpf_map(module, map_name, map_params):
map_global.align = 8
logger.info(f"Created BPF map: {map_name} with params {map_params}")
return map_global
return MapSymbol(type=map_params["type"], sym=map_global, params=map_params)
def create_map_debug_info(module, map_global, map_name, map_params):
"""Generate debug info metadata for BPF maps HASH and PERF_EVENT_ARRAY"""
generator = DebugInfoGenerator(module)
def _parse_map_params(rval, expected_args=None):
"""Parse map parameters from call arguments and keywords."""
uint_type = generator.get_uint32_type()
ulong_type = generator.get_uint64_type()
array_type = generator.create_array_type(
uint_type, map_params.get("type", BPFMapType.UNSPEC).value
)
type_ptr = generator.create_pointer_type(array_type, 64)
key_ptr = generator.create_pointer_type(
array_type if "key_size" in map_params else ulong_type, 64
)
value_ptr = generator.create_pointer_type(
array_type if "value_size" in map_params else ulong_type, 64
)
params = {}
handler = VmlinuxHandlerRegistry.get_handler()
# Parse positional arguments
if expected_args:
for i, arg_name in enumerate(expected_args):
if i < len(rval.args):
arg = rval.args[i]
if isinstance(arg, ast.Name):
result = _get_vmlinux_enum(handler, arg.id)
params[arg_name] = result if result is not None else arg.id
elif isinstance(arg, ast.Constant):
params[arg_name] = arg.value
elements_arr = []
# Parse keyword arguments (override positional)
for keyword in rval.keywords:
if isinstance(keyword.value, ast.Name):
name = keyword.value.id
result = _get_vmlinux_enum(handler, name)
params[keyword.arg] = result if result is not None else name
elif isinstance(keyword.value, ast.Constant):
params[keyword.arg] = keyword.value.value
# Create struct members
# scope field does not appear for some reason
cnt = 0
for elem in map_params:
if elem == "max_entries":
continue
if elem == "type":
ptr = type_ptr
elif "key" in elem:
ptr = key_ptr
else:
ptr = value_ptr
# TODO: the best way to do this is not 64, but get the size each time. this will not work for structs.
member = generator.create_struct_member(elem, ptr, cnt * 64)
elements_arr.append(member)
cnt += 1
if "max_entries" in map_params:
max_entries_array = generator.create_array_type(
uint_type, map_params["max_entries"]
)
max_entries_ptr = generator.create_pointer_type(max_entries_array, 64)
max_entries_member = generator.create_struct_member(
"max_entries", max_entries_ptr, cnt * 64
)
elements_arr.append(max_entries_member)
# Create the struct type
struct_type = generator.create_struct_type(
elements_arr, 64 * len(elements_arr), is_distinct=True
)
# Create global variable debug info
global_var = generator.create_global_var_debug_info(
map_name, struct_type, is_local=False
)
# Attach debug info to the global variable
map_global.set_metadata("dbg", global_var)
return global_var
return params
def create_ringbuf_debug_info(module, map_global, map_name, map_params):
"""Generate debug information metadata for BPF RINGBUF map"""
generator = DebugInfoGenerator(module)
int_type = generator.get_int32_type()
type_array = generator.create_array_type(
int_type, map_params.get("type", BPFMapType.RINGBUF).value
)
type_ptr = generator.create_pointer_type(type_array, 64)
type_member = generator.create_struct_member("type", type_ptr, 0)
max_entries_array = generator.create_array_type(int_type, map_params["max_entries"])
max_entries_ptr = generator.create_pointer_type(max_entries_array, 64)
max_entries_member = generator.create_struct_member(
"max_entries", max_entries_ptr, 64
)
elements_arr = [type_member, max_entries_member]
struct_type = generator.create_struct_type(elements_arr, 128, is_distinct=True)
global_var = generator.create_global_var_debug_info(
map_name, struct_type, is_local=False
)
map_global.set_metadata("dbg", global_var)
return global_var
def _get_vmlinux_enum(handler, name):
if handler and handler.is_vmlinux_enum(name):
return handler.get_vmlinux_enum_value(name)
@MapProcessorRegistry.register("RingBuf")
def process_ringbuf_map(map_name, rval, module):
@MapProcessorRegistry.register("RingBuffer")
def process_ringbuf_map(map_name, rval, module, structs_sym_tab):
"""Process a BPF_RINGBUF map declaration"""
logger.info(f"Processing Ringbuf: {map_name}")
map_params = {"type": BPFMapType.RINGBUF}
map_params = _parse_map_params(rval, expected_args=["max_entries"])
map_params["type"] = BPFMapType.RINGBUF
# Parse max_entries if present
if len(rval.args) >= 1 and isinstance(rval.args[0], ast.Constant):
const_val = rval.args[0].value
if isinstance(const_val, int):
map_params["max_entries"] = const_val
for keyword in rval.keywords:
if keyword.arg == "max_entries" and isinstance(keyword.value, ast.Constant):
const_val = keyword.value.value
if isinstance(const_val, int):
map_params["max_entries"] = const_val
# NOTE: constraints borrowed from https://docs.ebpf.io/linux/map-type/BPF_MAP_TYPE_RINGBUF/
max_entries = map_params.get("max_entries")
if (
not isinstance(max_entries, int)
or max_entries < 4096
or (max_entries & (max_entries - 1)) != 0
):
raise ValueError(
"Ringbuf max_entries must be a power of two greater than or equal to the page size (4096)"
)
logger.info(f"Ringbuf map parameters: {map_params}")
map_global = create_bpf_map(module, map_name, map_params)
create_ringbuf_debug_info(module, map_global, map_name, map_params)
create_ringbuf_debug_info(
module, map_global.sym, map_name, map_params, structs_sym_tab
)
return map_global
@MapProcessorRegistry.register("HashMap")
def process_hash_map(map_name, rval, module):
def process_hash_map(map_name, rval, module, structs_sym_tab):
"""Process a BPF_HASH map declaration"""
logger.info(f"Processing HashMap: {map_name}")
map_params = {"type": BPFMapType.HASH}
# Assuming order: key_type, value_type, max_entries
if len(rval.args) >= 1 and isinstance(rval.args[0], ast.Name):
map_params["key"] = rval.args[0].id
if len(rval.args) >= 2 and isinstance(rval.args[1], ast.Name):
map_params["value"] = rval.args[1].id
if len(rval.args) >= 3 and isinstance(rval.args[2], ast.Constant):
const_val = rval.args[2].value
if isinstance(const_val, (int, str)): # safe check
map_params["max_entries"] = const_val
for keyword in rval.keywords:
if keyword.arg == "key" and isinstance(keyword.value, ast.Name):
map_params["key"] = keyword.value.id
elif keyword.arg == "value" and isinstance(keyword.value, ast.Name):
map_params["value"] = keyword.value.id
elif keyword.arg == "max_entries" and isinstance(keyword.value, ast.Constant):
const_val = keyword.value.value
if isinstance(const_val, (int, str)):
map_params["max_entries"] = const_val
map_params = _parse_map_params(rval, expected_args=["key", "value", "max_entries"])
map_params["type"] = BPFMapType.HASH
logger.info(f"Map parameters: {map_params}")
map_global = create_bpf_map(module, map_name, map_params)
# Generate debug info for BTF
create_map_debug_info(module, map_global, map_name, map_params)
create_map_debug_info(module, map_global.sym, map_name, map_params, structs_sym_tab)
return map_global
@MapProcessorRegistry.register("PerfEventArray")
def process_perf_event_map(map_name, rval, module):
def process_perf_event_map(map_name, rval, module, structs_sym_tab):
"""Process a BPF_PERF_EVENT_ARRAY map declaration"""
logger.info(f"Processing PerfEventArray: {map_name}")
map_params = {"type": BPFMapType.PERF_EVENT_ARRAY}
if len(rval.args) >= 1 and isinstance(rval.args[0], ast.Name):
map_params["key_size"] = rval.args[0].id
if len(rval.args) >= 2 and isinstance(rval.args[1], ast.Name):
map_params["value_size"] = rval.args[1].id
for keyword in rval.keywords:
if keyword.arg == "key_size" and isinstance(keyword.value, ast.Name):
map_params["key_size"] = keyword.value.id
elif keyword.arg == "value_size" and isinstance(keyword.value, ast.Name):
map_params["value_size"] = keyword.value.id
map_params = _parse_map_params(rval, expected_args=["key_size", "value_size"])
map_params["type"] = BPFMapType.PERF_EVENT_ARRAY
logger.info(f"Map parameters: {map_params}")
map_global = create_bpf_map(module, map_name, map_params)
# Generate debug info for BTF
create_map_debug_info(module, map_global, map_name, map_params)
create_map_debug_info(module, map_global.sym, map_name, map_params)
return map_global
def process_bpf_map(func_node, module):
def process_bpf_map(func_node, module, structs_sym_tab):
"""Process a BPF map (a function decorated with @map)"""
map_name = func_node.name
logger.info(f"Processing BPF map: {map_name}")
@ -276,7 +158,7 @@ def process_bpf_map(func_node, module):
if isinstance(rval, ast.Call) and isinstance(rval.func, ast.Name):
handler = MapProcessorRegistry.get_processor(rval.func.id)
if handler:
return handler(map_name, rval, module)
return handler(map_name, rval, module, structs_sym_tab)
else:
logger.warning(f"Unknown map type {rval.func.id}, defaulting to HashMap")
return process_hash_map(map_name, rval, module)

View File

@ -1,5 +1,17 @@
from collections.abc import Callable
from dataclasses import dataclass
from llvmlite import ir
from typing import Any
from .map_types import BPFMapType
@dataclass
class MapSymbol:
"""Class representing a symbol on the map"""
type: BPFMapType
sym: ir.GlobalVariable
params: dict[str, Any] | None = None
class MapProcessorRegistry:

View File

@ -13,6 +13,10 @@ mapping = {
"c_float": ir.FloatType(),
"c_double": ir.DoubleType(),
"c_void_p": ir.IntType(64),
"c_long": ir.IntType(64),
"c_ulong": ir.IntType(64),
"c_longlong": ir.IntType(64),
"c_uint": ir.IntType(32),
# Not so sure about this one
"str": ir.PointerType(ir.IntType(8)),
}

58
pythonbpf/utils.py Normal file
View File

@ -0,0 +1,58 @@
import subprocess
def trace_pipe():
"""Util to read from the trace pipe."""
try:
subprocess.run(["cat", "/sys/kernel/tracing/trace_pipe"])
except KeyboardInterrupt:
print("Tracing stopped.")
except (FileNotFoundError, PermissionError) as e:
print(f"Error accessing trace_pipe: {e}. Try running as root.")
def trace_fields():
"""Parse one line from trace_pipe into fields."""
with open("/sys/kernel/tracing/trace_pipe", "rb", buffering=0) as f:
while True:
line = f.readline().rstrip()
if not line:
continue
# Skip lost event lines
if line.startswith(b"CPU:"):
continue
# Parse BCC-style: first 16 bytes = task
task = line[:16].lstrip().decode("utf-8")
line = line[17:] # Skip past task field and space
# Find the colon that ends "pid cpu flags timestamp"
ts_end = line.find(b":")
if ts_end == -1:
raise ValueError("Cannot parse trace line")
# Split "pid [cpu] flags timestamp"
try:
parts = line[:ts_end].split()
if len(parts) < 4:
raise ValueError("Not enough fields")
pid = int(parts[0])
cpu = parts[1][1:-1] # Remove brackets from [cpu]
cpu = int(cpu)
flags = parts[2]
ts = float(parts[3])
except (ValueError, IndexError):
raise ValueError("Cannot parse trace line")
# Get message: skip ": symbol:" part
line = line[ts_end + 1 :] # Skip first ":"
sym_end = line.find(b":")
if sym_end != -1:
msg = line[sym_end + 2 :].decode("utf-8") # Skip ": " after symbol
else:
msg = line.lstrip().decode("utf-8")
return (task, pid, cpu, flags, ts, msg)

View File

@ -0,0 +1,3 @@
from .import_detector import vmlinux_proc
__all__ = ["vmlinux_proc"]

View File

@ -0,0 +1,36 @@
from enum import Enum, auto
from typing import Any, Dict, List, Optional
from dataclasses import dataclass
import llvmlite.ir as ir
from pythonbpf.vmlinux_parser.dependency_node import Field
class AssignmentType(Enum):
CONSTANT = auto()
STRUCT = auto()
ARRAY = auto() # probably won't be used
FUNCTION_POINTER = auto()
POINTER = auto() # again, probably won't be used
@dataclass
class FunctionSignature:
return_type: str
param_types: List[str]
varargs: bool
# Thew name of the assignment will be in the dict that uses this class
@dataclass
class AssignmentInfo:
value_type: AssignmentType
python_type: type
value: Optional[Any]
pointer_level: Optional[int]
signature: Optional[FunctionSignature] # For function pointers
# The key of the dict is the name of the field.
# Value is a tuple that contains the global variable representing that field
# along with all the information about that field as a Field type.
members: Optional[Dict[str, tuple[ir.GlobalVariable, Field]]] # For structs.
debug_info: Any

View File

@ -0,0 +1,325 @@
import logging
from functools import lru_cache
import importlib
from .dependency_handler import DependencyHandler
from .dependency_node import DependencyNode
import ctypes
from typing import Optional, Any, Dict
logger = logging.getLogger(__name__)
@lru_cache(maxsize=1)
def get_module_symbols(module_name: str):
imported_module = importlib.import_module(module_name)
return [name for name in dir(imported_module)], imported_module
def unwrap_pointer_type(type_obj: Any) -> Any:
"""
Recursively unwrap all pointer layers to get the base type.
This handles multiply nested pointers like LP_LP_struct_attribute_group
and returns the base type (struct_attribute_group).
Stops unwrapping when reaching a non-pointer type (one without _type_ attribute).
Args:
type_obj: The type object to unwrap
Returns:
The base type after unwrapping all pointer layers
"""
current_type = type_obj
# Keep unwrapping while it's a pointer/array type (has _type_)
# But stop if _type_ is just a string or basic type marker
while hasattr(current_type, "_type_"):
next_type = current_type._type_
# Stop if _type_ is a string (like 'c' for c_char)
if isinstance(next_type, str):
break
current_type = next_type
return current_type
def process_vmlinux_class(
node,
llvm_module,
handler: DependencyHandler,
):
symbols_in_module, imported_module = get_module_symbols("vmlinux")
if node.name in symbols_in_module:
vmlinux_type = getattr(imported_module, node.name)
process_vmlinux_post_ast(vmlinux_type, llvm_module, handler)
else:
raise ImportError(f"{node.name} not in vmlinux")
def process_vmlinux_post_ast(
elem_type_class,
llvm_handler,
handler: DependencyHandler,
processing_stack=None,
):
# Initialize processing stack on first call
if processing_stack is None:
processing_stack = set()
symbols_in_module, imported_module = get_module_symbols("vmlinux")
current_symbol_name = elem_type_class.__name__
logger.info(f"Begin {current_symbol_name} Processing")
field_table: Dict[str, list] = {}
is_complex_type = False
containing_type: Optional[Any] = None
ctype_complex_type: Optional[Any] = None
type_length: Optional[int] = None
module_name = getattr(elem_type_class, "__module__", None)
# Check if already processed
if handler.has_node(current_symbol_name):
logger.debug(f"Node {current_symbol_name} already processed and ready")
return True
# XXX:Check its use. It's probably not being used.
if current_symbol_name in processing_stack:
logger.debug(
f"Dependency already in processing stack for {current_symbol_name}, skipping"
)
return True
processing_stack.add(current_symbol_name)
if module_name == "vmlinux":
if hasattr(elem_type_class, "_type_"):
pass
else:
new_dep_node = DependencyNode(name=current_symbol_name)
# elem_type_class is the actual vmlinux struct/class
new_dep_node.set_ctype_struct(elem_type_class)
handler.add_node(new_dep_node)
class_obj = getattr(imported_module, current_symbol_name)
# Inspect the class fields
if hasattr(class_obj, "_fields_"):
for field_elem in class_obj._fields_:
field_name: str = ""
field_type: Optional[Any] = None
bitfield_size: Optional[int] = None
if len(field_elem) == 2:
field_name, field_type = field_elem
elif len(field_elem) == 3:
field_name, field_type, bitfield_size = field_elem
field_table[field_name] = [field_type, bitfield_size]
elif hasattr(class_obj, "__annotations__"):
for field_elem in class_obj.__annotations__.items():
if len(field_elem) == 2:
field_name, field_type = field_elem
bitfield_size = None
elif len(field_elem) == 3:
field_name, field_type, bitfield_size = field_elem
else:
raise ValueError(
"Number of fields in items() of class object unexpected"
)
field_table[field_name] = [field_type, bitfield_size]
else:
raise TypeError("Could not get required class and definition")
logger.debug(f"Extracted fields for {current_symbol_name}: {field_table}")
for elem in field_table.items():
elem_name, elem_temp_list = elem
[elem_type, elem_bitfield_size] = elem_temp_list
local_module_name = getattr(elem_type, "__module__", None)
new_dep_node.add_field(elem_name, elem_type, ready=False)
if local_module_name == ctypes.__name__:
# TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. Current processing is a single dereference
new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size)
# Process pointer to ctype
if isinstance(elem_type, type) and issubclass(
elem_type, ctypes._Pointer
):
# Get the pointed-to type
pointed_type = elem_type._type_
logger.debug(f"Found pointer to type: {pointed_type}")
new_dep_node.set_field_containing_type(elem_name, pointed_type)
new_dep_node.set_field_ctype_complex_type(
elem_name, ctypes._Pointer
)
new_dep_node.set_field_ready(elem_name, is_ready=True)
# Process function pointers (CFUNCTYPE)
elif hasattr(elem_type, "_restype_") and hasattr(
elem_type, "_argtypes_"
):
# This is a CFUNCTYPE or similar
logger.info(
f"Function pointer detected for {elem_name} with return type {elem_type._restype_} and arguments {elem_type._argtypes_}"
)
# Set the field as ready but mark it with special handling
new_dep_node.set_field_ctype_complex_type(
elem_name, ctypes.CFUNCTYPE
)
new_dep_node.set_field_ready(elem_name, is_ready=True)
logger.warning(
"Blindly processing CFUNCTYPE ctypes to ensure compilation. Unsupported"
)
else:
# Regular ctype
new_dep_node.set_field_ready(elem_name, is_ready=True)
logger.debug(
f"Field {elem_name} is direct ctypes type: {elem_type}"
)
elif local_module_name == "vmlinux":
new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size)
logger.debug(
f"Processing vmlinux field: {elem_name}, type: {elem_type}"
)
if hasattr(elem_type, "_type_"):
is_complex_type = True
containing_type = elem_type._type_
if hasattr(elem_type, "_length_") and is_complex_type:
type_length = elem_type._length_
# Unwrap all pointer layers to get the base type for dependency tracking
base_type = unwrap_pointer_type(elem_type)
base_type_module = getattr(base_type, "__module__", None)
if base_type_module == "vmlinux":
base_type_name = (
base_type.__name__
if hasattr(base_type, "__name__")
else str(base_type)
)
# ONLY add vmlinux types as dependencies
new_dep_node.add_dependent(base_type_name)
logger.debug(
f"{containing_type} containing type of parent {elem_name} with {elem_type} and ctype {ctype_complex_type} and length {type_length}"
)
new_dep_node.set_field_containing_type(
elem_name, containing_type
)
new_dep_node.set_field_type_size(elem_name, type_length)
new_dep_node.set_field_ctype_complex_type(
elem_name, ctype_complex_type
)
new_dep_node.set_field_type(elem_name, elem_type)
# Check the containing_type module to decide whether to recurse
containing_type_module = getattr(
containing_type, "__module__", None
)
if containing_type_module == "vmlinux":
# Also unwrap containing_type to get base type name
base_containing_type = unwrap_pointer_type(
containing_type
)
containing_type_name = (
base_containing_type.__name__
if hasattr(base_containing_type, "__name__")
else str(base_containing_type)
)
# Check for self-reference or already processed
if containing_type_name == current_symbol_name:
# Self-referential pointer
logger.debug(
f"Self-referential pointer in {current_symbol_name}.{elem_name}"
)
new_dep_node.set_field_ready(elem_name, True)
elif handler.has_node(containing_type_name):
# Already processed
logger.debug(
f"Reusing already processed {containing_type_name}"
)
new_dep_node.set_field_ready(elem_name, True)
else:
# Process recursively - use base containing type, not the pointer wrapper
new_dep_node.add_dependent(containing_type_name)
process_vmlinux_post_ast(
base_containing_type,
llvm_handler,
handler,
processing_stack,
)
new_dep_node.set_field_ready(elem_name, True)
elif (
containing_type_module == ctypes.__name__
or containing_type_module is None
):
logger.debug(
f"Processing ctype internal{containing_type}"
)
new_dep_node.set_field_ready(elem_name, True)
else:
raise TypeError(
f"Module not supported in recursive resolution: {containing_type_module}"
)
elif (
base_type_module == ctypes.__name__
or base_type_module is None
):
# Handle ctypes or types with no module (like some internal ctypes types)
# DO NOT add ctypes as dependencies - just set field metadata and mark ready
logger.debug(
f"Base type {base_type} is ctypes - NOT adding as dependency, just processing field"
)
if isinstance(elem_type, type):
if issubclass(elem_type, ctypes.Array):
ctype_complex_type = ctypes.Array
elif issubclass(elem_type, ctypes._Pointer):
ctype_complex_type = ctypes._Pointer
else:
raise ImportError(
"Non Array and Pointer type ctype imports not supported in current version"
)
else:
raise TypeError("Unsupported ctypes subclass")
# Set field metadata but DO NOT add dependency or recurse
new_dep_node.set_field_containing_type(
elem_name, containing_type
)
new_dep_node.set_field_type_size(elem_name, type_length)
new_dep_node.set_field_ctype_complex_type(
elem_name, ctype_complex_type
)
new_dep_node.set_field_type(elem_name, elem_type)
new_dep_node.set_field_ready(elem_name, True)
else:
raise ImportError(
f"Unsupported module of {base_type}: {base_type_module}"
)
else:
new_dep_node.add_dependent(
elem_type.__name__
if hasattr(elem_type, "__name__")
else str(elem_type)
)
process_vmlinux_post_ast(
elem_type,
llvm_handler,
handler,
processing_stack,
)
new_dep_node.set_field_ready(elem_name, True)
else:
raise ValueError(
f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver"
)
elif module_name == ctypes.__name__ or module_name is None:
# Handle ctypes types - these don't need processing, just return
logger.debug(f"Skipping ctypes type {current_symbol_name}")
return True
else:
raise ImportError(f"UNSUPPORTED Module {module_name}")
logger.info(
f"{current_symbol_name} processed and handler readiness {handler.is_ready}"
)
return True

View File

@ -0,0 +1,173 @@
from typing import Optional, Dict, List, Iterator
from .dependency_node import DependencyNode
class DependencyHandler:
"""
Manages a collection of DependencyNode objects with no duplicates.
Ensures that no two nodes with the same name can be added and provides
methods to check readiness and retrieve specific nodes.
Example usage:
# Create a handler
handler = DependencyHandler()
# Create some dependency nodes
node1 = DependencyNode(name="node1")
node1.add_field("field1", str)
node1.set_field_value("field1", "value1")
node2 = DependencyNode(name="node2")
node2.add_field("field1", int)
# Add nodes to the handler
handler.add_node(node1)
handler.add_node(node2)
# Check if a specific node exists
print(handler.has_node("node1")) # True
# Get a reference to a node and modify it
node = handler.get_node("node2")
node.set_field_value("field1", 42)
# Check if all nodes are ready
print(handler.is_ready) # False (node2 is ready, but node1 isn't)
"""
def __init__(self):
# Using a dictionary with node names as keys ensures name uniqueness
# and provides efficient lookups
self._nodes: Dict[str, DependencyNode] = {}
def add_node(self, node: DependencyNode) -> bool:
"""
Add a dependency node to the handler.
Args:
node: The DependencyNode to add
Returns:
bool: True if the node was added, False if a node with the same name already exists
Raises:
TypeError: If the provided object is not a DependencyNode
"""
if not isinstance(node, DependencyNode):
raise TypeError(f"Expected DependencyNode, got {type(node).__name__}")
# Check if a node with this name already exists
if node.name in self._nodes:
return False
self._nodes[node.name] = node
return True
@property
def is_ready(self) -> bool:
"""
Check if all nodes are ready.
Returns:
bool: True if all nodes are ready (or if there are no nodes), False otherwise
"""
if not self._nodes:
return True
return all(node.is_ready for node in self._nodes.values())
def has_node(self, name: str) -> bool:
"""
Check if a node with the given name exists.
Args:
name: The name to check
Returns:
bool: True if a node with the given name exists, False otherwise
"""
return name in self._nodes
def get_node(self, name: str) -> Optional[DependencyNode]:
"""
Get a node by name for manipulation.
Args:
name: The name of the node to retrieve
Returns:
Optional[DependencyNode]: The node with the given name, or None if not found
"""
return self._nodes.get(name)
def remove_node(self, node_or_name) -> bool:
"""
Remove a node by name or reference.
Args:
node_or_name: The node to remove or its name
Returns:
bool: True if the node was removed, False if not found
"""
if isinstance(node_or_name, DependencyNode):
name = node_or_name.name
else:
name = node_or_name
if name in self._nodes:
del self._nodes[name]
return True
return False
def get_all_nodes(self) -> List[DependencyNode]:
"""
Get all nodes stored in the handler.
Returns:
List[DependencyNode]: List of all nodes
"""
return list(self._nodes.values())
def __iter__(self) -> Iterator[DependencyNode]:
"""
Iterate over all nodes.
Returns:
Iterator[DependencyNode]: Iterator over all nodes
"""
return iter(self._nodes.values())
def __len__(self) -> int:
"""
Get the number of nodes in the handler.
Returns:
int: The number of nodes
"""
return len(self._nodes)
def __getitem__(self, name: str) -> DependencyNode:
"""
Get a node by name using dictionary-style access.
Args:
name: The name of the node to retrieve
Returns:
DependencyNode: The node with the given name
Raises:
KeyError: If no node with the given name exists
Example:
node = handler["some-dep_node_name"]
"""
if name not in self._nodes:
raise KeyError(f"No node with name '{name}' found")
return self._nodes[name]
@property
def nodes(self):
return self._nodes

View File

@ -0,0 +1,388 @@
from dataclasses import dataclass, field
from typing import Dict, Any, Optional
import ctypes
# TODO: FIX THE FUCKING TYPE NAME CONVENTION.
@dataclass
class Field:
"""Represents a field in a dependency node with its type and readiness state."""
name: str
type: type
ctype_complex_type: Optional[Any]
containing_type: Optional[Any]
type_size: Optional[int]
bitfield_size: Optional[int]
offset: int
value: Any = None
ready: bool = False
def __hash__(self):
"""
Create a hash based on the immutable attributes that define this field's identity.
This allows Field objects to be used as dictionary keys.
"""
# Use a tuple of the fields that uniquely identify this field
identity = (
self.name,
id(self.type), # Use id for non-hashable types
id(self.ctype_complex_type) if self.ctype_complex_type else None,
id(self.containing_type) if self.containing_type else None,
self.type_size,
self.bitfield_size,
self.offset,
self.value if self.value else None,
)
return hash(identity)
def __eq__(self, other):
"""
Define equality consistent with the hash function.
Two fields are equal if they have they are the same
"""
return self is other
def set_ready(self, is_ready: bool = True) -> None:
"""Set the readiness state of this field."""
self.ready = is_ready
def set_value(self, value: Any, mark_ready: bool = False) -> None:
"""Set the value of this field and optionally mark it as ready."""
self.value = value
if mark_ready:
self.ready = True
def set_type(self, given_type, mark_ready: bool = False) -> None:
"""Set value of the type field and mark as ready"""
self.type = given_type
if mark_ready:
self.ready = True
def set_containing_type(
self, containing_type: Optional[Any], mark_ready: bool = False
) -> None:
"""Set the containing_type of this field and optionally mark it as ready."""
self.containing_type = containing_type
if mark_ready:
self.ready = True
def set_type_size(self, type_size: Any, mark_ready: bool = False) -> None:
"""Set the type_size of this field and optionally mark it as ready."""
self.type_size = type_size
if mark_ready:
self.ready = True
def set_ctype_complex_type(
self, ctype_complex_type: Any, mark_ready: bool = False
) -> None:
"""Set the ctype_complex_type of this field and optionally mark it as ready."""
self.ctype_complex_type = ctype_complex_type
if mark_ready:
self.ready = True
def set_bitfield_size(self, bitfield_size: Any, mark_ready: bool = False) -> None:
"""Set the bitfield_size of this field and optionally mark it as ready."""
self.bitfield_size = bitfield_size
if mark_ready:
self.ready = True
def set_offset(self, offset: int) -> None:
"""Set the offset of this field"""
self.offset = offset
@dataclass
class DependencyNode:
"""
A node with typed fields and readiness tracking.
Example usage:
# Create a dependency node for a Person
somestruct = DependencyNode(name="struct_1")
# Add fields with their types
somestruct.add_field("field_1", str)
somestruct.add_field("field_2", int)
somestruct.add_field("field_3", str)
# Check if the node is ready (should be False initially)
print(f"Is node ready? {somestruct.is_ready}") # False
# Set some field values
somestruct.set_field_value("field_1", "someproperty")
somestruct.set_field_value("field_2", 30)
# Check if the node is ready (still False because email is not ready)
print(f"Is node ready? {somestruct.is_ready}") # False
# Set the last field and make the node ready
somestruct.set_field_value("field_3", "anotherproperty")
# Now the node should be ready
print(f"Is node ready? {somestruct.is_ready}") # True
# You can also mark a field as not ready
somestruct.set_field_ready("field_3", False)
# Now the node is not ready again
print(f"Is node ready? {somestruct.is_ready}") # False
# Get all field values
print(somestruct.get_field_values()) # {'field_1': 'someproperty', 'field_2': 30, 'field_3': 'anotherproperty'}
# Get only ready fields
ready_fields = somestruct.get_ready_fields()
print(f"Ready fields: {[field.name for field in ready_fields.values()]}") # ['field_1', 'field_2']
"""
name: str
depends_on: Optional[list[str]] = None
fields: Dict[str, Field] = field(default_factory=dict)
_ready_cache: Optional[bool] = field(default=None, repr=False)
current_offset: int = 0
ctype_struct: Optional[Any] = field(default=None, repr=False)
def add_field(
self,
name: str,
field_type: type,
initial_value: Any = None,
containing_type: Optional[Any] = None,
type_size: Optional[int] = None,
ctype_complex_type: Optional[int] = None,
bitfield_size: Optional[int] = None,
ready: bool = False,
offset: int = 0,
) -> None:
"""Add a field to the node with an optional initial value and readiness state."""
if self.depends_on is None:
self.depends_on = []
self.fields[name] = Field(
name=name,
type=field_type,
value=initial_value,
ready=ready,
containing_type=containing_type,
type_size=type_size,
ctype_complex_type=ctype_complex_type,
bitfield_size=bitfield_size,
offset=offset,
)
# Invalidate readiness cache
self._ready_cache = None
def set_ctype_struct(self, ctype_struct: Any) -> None:
"""Set the ctypes structure for automatic offset calculation."""
self.ctype_struct = ctype_struct
def __sizeof__(self):
# If we have a ctype_struct, use its size
if self.ctype_struct is not None:
return ctypes.sizeof(self.ctype_struct)
return self.current_offset
def get_field(self, name: str) -> Field:
"""Get a field by name."""
return self.fields[name]
def set_field_value(self, name: str, value: Any, mark_ready: bool = False) -> None:
"""Set a field's value and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_value(value, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_type(self, name: str, type: Any, mark_ready: bool = False) -> None:
"""Set a field's type and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_type(type, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_containing_type(
self, name: str, containing_type: Any, mark_ready: bool = False
) -> None:
"""Set a field's containing_type and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_containing_type(containing_type, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_type_size(
self, name: str, type_size: Any, mark_ready: bool = False
) -> None:
"""Set a field's type_size and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_type_size(type_size, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_ctype_complex_type(
self, name: str, ctype_complex_type: Any, mark_ready: bool = False
) -> None:
"""Set a field's ctype_complex_type and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_ctype_complex_type(ctype_complex_type, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_bitfield_size(
self, name: str, bitfield_size: Any, mark_ready: bool = False
) -> None:
"""Set a field's bitfield_size and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_bitfield_size(bitfield_size, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_ready(
self,
name: str,
is_ready: bool = False,
size_of_containing_type: Optional[int] = None,
) -> None:
"""Mark a field as ready or not ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_ready(is_ready)
# Use ctypes built-in offset if available
if self.ctype_struct is not None:
try:
self.fields[name].set_offset(getattr(self.ctype_struct, name).offset)
except AttributeError:
# Fallback to manual calculation if field not found in ctype_struct
self.fields[name].set_offset(self.current_offset)
self.current_offset += self._calculate_size(
name, size_of_containing_type
)
else:
# Manual offset calculation when no ctype_struct is available
self.fields[name].set_offset(self.current_offset)
self.current_offset += self._calculate_size(name, size_of_containing_type)
# Invalidate readiness cache
self._ready_cache = None
def _calculate_size(
self, name: str, size_of_containing_type: Optional[int] = None
) -> int:
processing_field = self.fields[name]
# size_of_field will be in bytes
if processing_field.type.__module__ == ctypes.__name__:
size_of_field = ctypes.sizeof(processing_field.type)
return size_of_field
elif processing_field.type.__module__ == "vmlinux":
if processing_field.ctype_complex_type is not None:
if issubclass(processing_field.ctype_complex_type, ctypes.Array):
if processing_field.containing_type.__module__ == ctypes.__name__:
if (
processing_field.containing_type is not None
and processing_field.type_size is not None
):
size_of_field = (
ctypes.sizeof(processing_field.containing_type)
* processing_field.type_size
)
else:
raise RuntimeError(
f"{processing_field} has no containing_type or type_size"
)
return size_of_field
elif processing_field.containing_type.__module__ == "vmlinux":
if (
size_of_containing_type is not None
and processing_field.type_size is not None
):
size_of_field = (
size_of_containing_type * processing_field.type_size
)
else:
raise RuntimeError(
f"{processing_field} has no containing_type or type_size"
)
return size_of_field
elif issubclass(processing_field.ctype_complex_type, ctypes._Pointer):
return ctypes.sizeof(ctypes.c_void_p)
else:
raise NotImplementedError(
"This subclass of ctype not supported yet"
)
elif processing_field.type_size is not None:
# Handle vmlinux types with type_size but no ctype_complex_type
# This means it's a direct vmlinux struct field (not array/pointer wrapped)
# The type_size should already contain the full size of the struct
# But if there's a containing_type from vmlinux, we need that size
if processing_field.containing_type is not None:
if processing_field.containing_type.__module__ == "vmlinux":
# For vmlinux containing types, we need the pre-calculated size
if size_of_containing_type is not None:
return size_of_containing_type * processing_field.type_size
else:
raise RuntimeError(
f"Field {name}: vmlinux containing_type requires size_of_containing_type"
)
else:
raise ModuleNotFoundError(
f"Containing type module {processing_field.containing_type.__module__} not supported"
)
else:
raise RuntimeError("Wrong type found with no containing type")
else:
# No ctype_complex_type and no type_size, must rely on size_of_containing_type
if size_of_containing_type is None:
raise RuntimeError(
f"Size of containing type {size_of_containing_type} is None"
)
return size_of_containing_type
else:
raise ModuleNotFoundError("Module is not supported for the operation")
raise RuntimeError("control should not reach here")
@property
def is_ready(self) -> bool:
"""Check if the node is ready (all fields are ready)."""
# Use cached value if available
if self._ready_cache is not None:
return self._ready_cache
# Calculate readiness only when needed
if not self.fields:
self._ready_cache = True
return True
self._ready_cache = all(elem.ready for elem in self.fields.values())
return self._ready_cache
def get_field_values(self) -> Dict[str, Any]:
"""Get a dictionary of field names to their values."""
return {name: elem.value for name, elem in self.fields.items()}
def get_ready_fields(self) -> Dict[str, Field]:
"""Get all fields that are marked as ready."""
return {name: elem for name, elem in self.fields.items() if elem.ready}
def get_not_ready_fields(self) -> Dict[str, Field]:
"""Get all fields that are marked as not ready."""
return {name: elem for name, elem in self.fields.items() if not elem.ready}
def add_dependent(self, dep_type):
if dep_type in self.depends_on:
return
else:
self.depends_on.append(dep_type)

View File

@ -0,0 +1,153 @@
import ast
import logging
import importlib
import inspect
from .assignment_info import AssignmentInfo, AssignmentType
from .dependency_handler import DependencyHandler
from .ir_gen import IRGenerator
from .class_handler import process_vmlinux_class
logger = logging.getLogger(__name__)
def detect_import_statement(
tree: ast.AST,
) -> list[tuple[str, ast.ImportFrom, str, str]]:
"""
Parse AST and detect import statements from vmlinux.
Returns a list of tuples (module_name, imported_item) for vmlinux imports.
Raises SyntaxError for invalid import patterns.
Args:
tree: The AST to parse
Returns:
List of tuples containing (module_name, imported_item) for each vmlinux import
Raises:
SyntaxError: If import * is used
"""
vmlinux_imports = []
for node in ast.walk(tree):
# Handle "from vmlinux import ..." statements
if isinstance(node, ast.ImportFrom):
if node.module == "vmlinux":
# Check for wildcard import: from vmlinux import *
if any(alias.name == "*" for alias in node.names):
raise SyntaxError(
"Wildcard imports from vmlinux are not supported. "
"Please import specific types explicitly."
)
# Check if no specific import is specified (should not happen with valid Python)
if len(node.names) == 0:
raise SyntaxError(
"Import from vmlinux must specify at least one type."
)
# Support multiple imports: from vmlinux import A, B, C
for alias in node.names:
import_name = alias.name
# Use alias if provided, otherwise use the original name
as_name = alias.asname if alias.asname else alias.name
vmlinux_imports.append(("vmlinux", node, import_name, as_name))
logger.info(f"Found vmlinux import: {import_name} as {as_name}")
# Handle "import vmlinux" statements (not typical but should be rejected)
elif isinstance(node, ast.Import):
for alias in node.names:
if alias.name == "vmlinux" or alias.name.startswith("vmlinux."):
raise SyntaxError(
"Direct import of vmlinux module is not supported. "
"Use 'from vmlinux import <type>' instead."
)
logger.info(f"Total vmlinux imports detected: {len(vmlinux_imports)}")
return vmlinux_imports
def vmlinux_proc(tree: ast.AST, module):
import_statements = detect_import_statement(tree)
# initialise dependency handler
handler = DependencyHandler()
# initialise assignment dictionary of name to type
assignments: dict[str, AssignmentInfo] = {}
if not import_statements:
logger.info("No vmlinux imports found")
return None
# Import vmlinux module directly
try:
vmlinux_mod = importlib.import_module("vmlinux")
except ImportError:
logger.warning("Could not import vmlinux module")
return None
source_file = inspect.getsourcefile(vmlinux_mod)
if source_file is None:
logger.warning("Cannot find source for vmlinux module")
return None
with open(source_file, "r") as f:
mod_ast = ast.parse(f.read(), filename=source_file)
for import_mod, import_node, imported_name, as_name in import_statements:
found = False
for mod_node in mod_ast.body:
if isinstance(mod_node, ast.ClassDef) and mod_node.name == imported_name:
process_vmlinux_class(mod_node, module, handler)
found = True
break
if isinstance(mod_node, ast.Assign):
for target in mod_node.targets:
if isinstance(target, ast.Name) and target.id == imported_name:
process_vmlinux_assign(mod_node, module, assignments, as_name)
found = True
break
if found:
break
if not found:
logger.info(f"{imported_name} not found as ClassDef or Assign in vmlinux")
IRGenerator(module, handler, assignments)
return assignments
def process_vmlinux_assign(
node, module, assignments: dict[str, AssignmentInfo], target_name=None
):
"""Process assignments from vmlinux module."""
# Only handle single-target assignments
if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
# Use provided target_name (for aliased imports) or fall back to original name
if target_name is None:
target_name = node.targets[0].id
# Handle constant value assignments
if isinstance(node.value, ast.Constant):
# Fixed: using proper TypedDict creation syntax with named arguments
assignments[target_name] = AssignmentInfo(
value_type=AssignmentType.CONSTANT,
python_type=type(node.value.value),
value=node.value.value,
pointer_level=None,
signature=None,
members=None,
debug_info=None,
)
logger.info(
f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}"
)
# Handle other assignment types that we may need to support
else:
logger.warning(
f"Unsupported assignment type for {target_name}: {ast.dump(node.value)}"
)
else:
raise ValueError("Not a simple assignment")

View File

@ -0,0 +1,3 @@
from .ir_generation import IRGenerator
__all__ = ["IRGenerator"]

View File

@ -0,0 +1,190 @@
from pythonbpf.debuginfo import DebugInfoGenerator, dwarf_constants as dc
from ..dependency_node import DependencyNode
import ctypes
import logging
from typing import List, Any, Tuple
logger = logging.getLogger(__name__)
def debug_info_generation(
struct: DependencyNode,
llvm_module,
generated_debug_info: List[Tuple[DependencyNode, Any]],
) -> Any:
"""
Generate DWARF debug information for a struct defined in a DependencyNode.
Args:
struct: The dependency node containing struct information
llvm_module: The LLVM module to add debug info to
generated_debug_info: List of tuples (struct, debug_info) to track generated debug info
Returns:
The generated global variable debug info, or None for unsupported types
"""
# Set up debug info generator
generator = DebugInfoGenerator(llvm_module)
# Check if debug info for this struct has already been generated
for existing_struct, debug_info in generated_debug_info:
if existing_struct.name == struct.name:
return debug_info
# Check if this is a union (not supported yet)
if not struct.name.startswith("struct_"):
logger.warning(f"Skipping debug info generation for union: {struct.name}")
# Create a minimal forward declaration for unions
union_type = generator.create_struct_type(
[], struct.__sizeof__() * 8, is_distinct=True
)
return union_type
# Process all fields and create members for the struct
members = []
sorted_fields = sorted(struct.fields.items(), key=lambda item: item[1].offset)
for field_name, field in sorted_fields:
try:
# Get appropriate debug type for this field
field_type = _get_field_debug_type(
field_name, field, generator, struct, generated_debug_info
)
# Ensure field_type is a tuple
if not isinstance(field_type, tuple) or len(field_type) != 2:
logger.error(f"Invalid field_type for {field_name}: {field_type}")
continue
# Create struct member with proper offset
member = generator.create_struct_member_vmlinux(
field_name, field_type, field.offset * 8
)
members.append(member)
except Exception as e:
logger.error(f"Failed to process field {field_name} in {struct.name}: {e}")
continue
struct_name = struct.name.removeprefix("struct_")
# Create struct type with all members
struct_type = generator.create_struct_type_with_name(
struct_name, members, struct.__sizeof__() * 8, is_distinct=True
)
return struct_type
def _get_field_debug_type(
field_name: str,
field,
generator: DebugInfoGenerator,
parent_struct: DependencyNode,
generated_debug_info: List[Tuple[DependencyNode, Any]],
) -> tuple[Any, int]:
"""
Determine the appropriate debug type for a field based on its Python/ctypes type.
Args:
field_name: Name of the field
field: Field object containing type information
generator: DebugInfoGenerator instance
parent_struct: The parent struct containing this field
generated_debug_info: List of already generated debug info
Returns:
A tuple of (debug_type, size_in_bits)
"""
# Handle complex types (arrays, pointers, function pointers)
if field.ctype_complex_type is not None:
# Handle function pointer types (CFUNCTYPE)
if callable(field.ctype_complex_type):
# Function pointers are represented as void pointers
logger.warning(
f"Field {field_name} is a function pointer, using void pointer"
)
void_ptr = generator.create_pointer_type(None, 64)
return void_ptr, 64
elif issubclass(field.ctype_complex_type, ctypes.Array):
# Handle array types
element_type, base_type_size = _get_basic_debug_type(
field.containing_type, generator
)
return generator.create_array_type_vmlinux(
(element_type, base_type_size * field.type_size), field.type_size
), field.type_size * base_type_size
elif issubclass(field.ctype_complex_type, ctypes._Pointer):
# Handle pointer types
pointee_type, _ = _get_basic_debug_type(field.containing_type, generator)
return generator.create_pointer_type(pointee_type), 64
# Handle other vmlinux types (nested structs)
if field.type.__module__ == "vmlinux":
# If it's a struct from vmlinux, check if we've already generated debug info for it
struct_name = field.type.__name__
# Look for existing debug info in the list
for existing_struct, debug_info in generated_debug_info:
if existing_struct.name == struct_name:
# Use existing debug info
return debug_info, existing_struct.__sizeof__() * 8
# If not found, create a forward declaration
# This will be completed when the actual struct is processed
logger.info(
f"Forward declaration created for {struct_name} in {parent_struct.name}"
)
forward_type = generator.create_struct_type([], 0, is_distinct=True)
return forward_type, 0
# Handle basic C types
return _get_basic_debug_type(field.type, generator)
def _get_basic_debug_type(ctype, generator: DebugInfoGenerator) -> Any:
"""
Map a ctypes type to a DWARF debug type.
Args:
ctype: A ctypes type or Python type
generator: DebugInfoGenerator instance
Returns:
The corresponding debug type
"""
# Map ctypes to debug info types
if ctype == ctypes.c_char or ctype == ctypes.c_byte:
return generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8
elif ctype == ctypes.c_ubyte or ctype == ctypes.c_uint8:
return generator.get_basic_type("unsigned char", 8, dc.DW_ATE_unsigned_char), 8
elif ctype == ctypes.c_short or ctype == ctypes.c_int16:
return generator.get_basic_type("short", 16, dc.DW_ATE_signed), 16
elif ctype == ctypes.c_ushort or ctype == ctypes.c_uint16:
return generator.get_basic_type("unsigned short", 16, dc.DW_ATE_unsigned), 16
elif ctype == ctypes.c_int or ctype == ctypes.c_int32:
return generator.get_basic_type("int", 32, dc.DW_ATE_signed), 32
elif ctype == ctypes.c_uint or ctype == ctypes.c_uint32:
return generator.get_basic_type("unsigned int", 32, dc.DW_ATE_unsigned), 32
elif ctype == ctypes.c_long:
return generator.get_basic_type("long", 64, dc.DW_ATE_signed), 64
elif ctype == ctypes.c_ulong:
return generator.get_basic_type("unsigned long", 64, dc.DW_ATE_unsigned), 64
elif ctype == ctypes.c_longlong or ctype == ctypes.c_int64:
return generator.get_basic_type("long long", 64, dc.DW_ATE_signed), 64
elif ctype == ctypes.c_ulonglong or ctype == ctypes.c_uint64:
return generator.get_basic_type(
"unsigned long long", 64, dc.DW_ATE_unsigned
), 64
elif ctype == ctypes.c_float:
return generator.get_basic_type("float", 32, dc.DW_ATE_float), 32
elif ctype == ctypes.c_double:
return generator.get_basic_type("double", 64, dc.DW_ATE_float), 64
elif ctype == ctypes.c_bool:
return generator.get_basic_type("bool", 8, dc.DW_ATE_boolean), 8
elif ctype == ctypes.c_char_p:
char_type = generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8
return generator.create_pointer_type(char_type)
elif ctype == ctypes.c_void_p:
return generator.create_pointer_type(None), 64
else:
return generator.get_uint64_type(), 64

View File

@ -0,0 +1,287 @@
import ctypes
import logging
from ..assignment_info import AssignmentInfo, AssignmentType
from ..dependency_handler import DependencyHandler
from .debug_info_gen import debug_info_generation
from ..dependency_node import DependencyNode
import llvmlite.ir as ir
logger = logging.getLogger(__name__)
class IRGenerator:
# This field keeps track of the non_struct names to avoid duplicate name errors.
type_number = 0
unprocessed_store: list[str] = []
# get the assignments dict and add this stuff to it.
def __init__(self, llvm_module, handler: DependencyHandler, assignments):
self.llvm_module = llvm_module
self.handler: DependencyHandler = handler
self.generated: list[str] = []
self.generated_debug_info: list = []
# Use struct_name and field_name as key instead of Field object
self.generated_field_names: dict[str, dict[str, ir.GlobalVariable]] = {}
self.assignments: dict[str, AssignmentInfo] = assignments
if not handler.is_ready:
raise ImportError(
"Semantic analysis of vmlinux imports failed. Cannot generate IR"
)
for struct in handler:
self.struct_processor(struct)
def struct_processor(self, struct, processing_stack=None):
# Initialize processing stack on first call
if processing_stack is None:
processing_stack = set()
# If already generated, skip
if struct.name in self.generated:
return
# Detect circular dependency
if struct.name in processing_stack:
logger.info(
f"Circular dependency detected for {struct.name}, skipping recursive processing"
)
# For circular dependencies, we can either:
# 1. Use forward declarations (opaque pointers)
# 2. Mark as incomplete and process later
# 3. Generate a placeholder type
# Here we'll just skip and let it be processed in its own call
return
logger.info(f"IR generating for {struct.name}")
# Add to processing stack before processing dependencies
processing_stack.add(struct.name)
try:
# Process all dependencies first
if struct.depends_on is None:
pass
else:
for dependency in struct.depends_on:
if dependency not in self.generated:
# Check if dependency exists in handler
if dependency in self.handler.nodes:
dep_node_from_dependency = self.handler[dependency]
# Pass the processing_stack down to track circular refs
self.struct_processor(
dep_node_from_dependency, processing_stack
)
else:
raise RuntimeError(
f"Warning: Dependency {dependency} not found in handler"
)
# Generate IR first to populate field names
struct_debug_info = self.gen_ir(struct, self.generated_debug_info)
self.generated_debug_info.append((struct, struct_debug_info))
# Fill the assignments dictionary with struct information
if struct.name not in self.assignments:
# Create a members dictionary for AssignmentInfo
members_dict = {}
for field_name, field in struct.fields.items():
# Get the generated field name from our dictionary, or use field_name if not found
if (
struct.name in self.generated_field_names
and field_name in self.generated_field_names[struct.name]
):
field_global_variable = self.generated_field_names[struct.name][
field_name
]
members_dict[field_name] = (field_global_variable, field)
else:
raise ValueError(
f"llvm global name not found for struct field {field_name}"
)
# members_dict[field_name] = (field_name, field)
# Add struct to assignments dictionary
self.assignments[struct.name] = AssignmentInfo(
value_type=AssignmentType.STRUCT,
python_type=struct.ctype_struct,
value=None,
pointer_level=None,
signature=None,
members=members_dict,
debug_info=struct_debug_info,
)
logger.info(f"Added struct assignment info for {struct.name}")
self.generated.append(struct.name)
finally:
# Remove from processing stack after we're done
processing_stack.discard(struct.name)
def gen_ir(self, struct, generated_debug_info):
# TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite
# accepts our issue, we will resort to normal accessed attribute based attribute addition
# currently we generate all possible field accesses for CO-RE and put into the assignment table
debug_info = debug_info_generation(
struct, self.llvm_module, generated_debug_info
)
field_index = 0
# Make sure the struct has an entry in our field names dictionary
if struct.name not in self.generated_field_names:
self.generated_field_names[struct.name] = {}
for field_name, field in struct.fields.items():
# does not take arrays and similar types into consideration yet.
if callable(field.ctype_complex_type):
# Function pointer case - generate a simple field accessor
field_co_re_name, returned = self._struct_name_generator(
struct, field, field_index
)
field_index += 1
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
elif field.ctype_complex_type is not None and issubclass(
field.ctype_complex_type, ctypes.Array
):
array_size = field.type_size
containing_type = field.containing_type
if containing_type.__module__ == ctypes.__name__:
containing_type_size = ctypes.sizeof(containing_type)
if array_size == 0:
field_co_re_name, returned = self._struct_name_generator(
struct, field, field_index, True, 0, containing_type_size
)
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
field_index += 1
continue
for i in range(0, array_size):
field_co_re_name, returned = self._struct_name_generator(
struct, field, field_index, True, i, containing_type_size
)
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
field_index += 1
elif field.type_size is not None:
array_size = field.type_size
containing_type = field.containing_type
if containing_type.__module__ == "vmlinux":
# Unwrap all pointer layers to get the base struct type
base_containing_type = containing_type
while hasattr(base_containing_type, "_type_"):
next_type = base_containing_type._type_
# Stop if _type_ is a string (like 'c' for c_char)
# TODO: stacked pointers not handl;ing ctypes check here as well
if isinstance(next_type, str):
break
base_containing_type = next_type
# Get the base struct name
base_struct_name = (
base_containing_type.__name__
if hasattr(base_containing_type, "__name__")
else str(base_containing_type)
)
# Look up the size using the base struct name
containing_type_size = self.handler[base_struct_name].current_offset
if array_size == 0:
field_co_re_name, returned = self._struct_name_generator(
struct, field, field_index, True, 0, containing_type_size
)
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
field_index += 1
else:
for i in range(0, array_size):
field_co_re_name, returned = self._struct_name_generator(
struct,
field,
field_index,
True,
i,
containing_type_size,
)
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata(
"llvm.preserve.access.index", debug_info
)
self.generated_field_names[struct.name][field_name] = (
globvar
)
field_index += 1
else:
field_co_re_name, returned = self._struct_name_generator(
struct, field, field_index
)
field_index += 1
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
return debug_info
def _struct_name_generator(
self,
struct: DependencyNode,
field,
field_index: int,
is_indexed: bool = False,
index: int = 0,
containing_type_size: int = 0,
) -> tuple[str, bool]:
# TODO: Does not support Unions as well as recursive pointer and array type naming
if is_indexed:
name = (
"llvm."
+ struct.name.removeprefix("struct_")
+ f":0:{field.offset + index * containing_type_size}"
+ "$"
+ f"0:{field_index}:{index}"
)
return name, True
elif struct.name.startswith("struct_"):
name = (
"llvm."
+ struct.name.removeprefix("struct_")
+ f":0:{field.offset}"
+ "$"
+ f"0:{field_index}"
)
return name, True
else:
logger.warning(
"Blindly handling non-struct type to avoid type errors in vmlinux IR generation. Possibly a union."
)
self.type_number += 1
unprocessed_type = "unprocessed_type_" + str(self.handler[struct.name].name)
if self.unprocessed_store.__contains__(unprocessed_type):
return unprocessed_type + "_" + str(self.type_number), False
else:
self.unprocessed_store.append(unprocessed_type)
return unprocessed_type, False
# raise TypeError(
# "Name generation cannot occur due to type name not starting with struct"
# )

View File

@ -0,0 +1,378 @@
import logging
from typing import Any
import ctypes
from llvmlite import ir
from pythonbpf.local_symbol import LocalSymbol
from pythonbpf.vmlinux_parser.assignment_info import AssignmentType
logger = logging.getLogger(__name__)
class VmlinuxHandler:
"""Handler for vmlinux-related operations"""
_instance = None
@classmethod
def get_instance(cls):
"""Get the singleton instance"""
if cls._instance is None:
logger.warning("VmlinuxHandler used before initialization")
return None
return cls._instance
@classmethod
def initialize(cls, vmlinux_symtab):
"""Initialize the handler with vmlinux symbol table"""
cls._instance = cls(vmlinux_symtab)
return cls._instance
def __init__(self, vmlinux_symtab):
"""Initialize with vmlinux symbol table"""
self.vmlinux_symtab = vmlinux_symtab
logger.info(
f"VmlinuxHandler initialized with {len(vmlinux_symtab) if vmlinux_symtab else 0} symbols"
)
def is_vmlinux_enum(self, name):
"""Check if name is a vmlinux enum constant"""
return (
name in self.vmlinux_symtab
and self.vmlinux_symtab[name].value_type == AssignmentType.CONSTANT
)
def get_struct_debug_info(self, name: str) -> Any:
if (
name in self.vmlinux_symtab
and self.vmlinux_symtab[name].value_type == AssignmentType.STRUCT
):
return self.vmlinux_symtab[name].debug_info
else:
raise ValueError(f"{name} is not a vmlinux struct type")
def get_vmlinux_struct_type(self, name):
"""Check if name is a vmlinux struct type"""
if (
name in self.vmlinux_symtab
and self.vmlinux_symtab[name].value_type == AssignmentType.STRUCT
):
return self.vmlinux_symtab[name].python_type
else:
raise ValueError(f"{name} is not a vmlinux struct type")
def is_vmlinux_struct(self, name):
"""Check if name is a vmlinux struct"""
return (
name in self.vmlinux_symtab
and self.vmlinux_symtab[name].value_type == AssignmentType.STRUCT
)
def handle_vmlinux_enum(self, name):
"""Handle vmlinux enum constants by returning LLVM IR constants"""
if self.is_vmlinux_enum(name):
value = self.vmlinux_symtab[name].value
logger.info(f"Resolving vmlinux enum {name} = {value}")
return ir.Constant(ir.IntType(64), value), ir.IntType(64)
return None
def get_vmlinux_enum_value(self, name):
"""Handle vmlinux enum constants by returning LLVM IR constants"""
if self.is_vmlinux_enum(name):
value = self.vmlinux_symtab[name].value
logger.info(f"The value of vmlinux enum {name} = {value}")
return value
return None
def handle_vmlinux_struct_field(
self, struct_var_name, field_name, module, builder, local_sym_tab
):
"""Handle access to vmlinux struct fields"""
if struct_var_name in local_sym_tab:
var_info: LocalSymbol = local_sym_tab[struct_var_name]
logger.info(
f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}"
)
python_type: type = var_info.metadata
# Check if this is a context field (ctx) or a cast struct
is_context_field = var_info.var is None
if is_context_field:
# Handle context field access (original behavior)
struct_name = python_type.__name__
globvar_ir, field_data = self.get_field_type(struct_name, field_name)
builder.function.args[0].type = ir.PointerType(ir.IntType(8))
field_ptr = self.load_ctx_field(
builder,
builder.function.args[0],
globvar_ir,
field_data,
struct_name,
)
return field_ptr, field_data
else:
# Handle cast struct field access
struct_name = python_type.__name__
globvar_ir, field_data = self.get_field_type(struct_name, field_name)
# Handle cast struct field access (use bpf_probe_read_kernel)
# Load the struct pointer from the local variable
struct_ptr = builder.load(var_info.var)
# Use bpf_probe_read_kernel for non-context struct field access
field_value = self.load_struct_field(
builder, struct_ptr, globvar_ir, field_data, struct_name
)
# Return field value and field type
return field_value, field_data
else:
raise RuntimeError("Variable accessed not found in symbol table")
@staticmethod
def load_struct_field(
builder, struct_ptr_int, offset_global, field_data, struct_name=None
):
"""
Generate LLVM IR to load a field from a regular (non-context) struct using bpf_probe_read_kernel.
Args:
builder: llvmlite IRBuilder instance
struct_ptr_int: The struct pointer as an i64 value (already loaded from alloca)
offset_global: Global variable containing the field offset (i64)
field_data: contains data about the field
struct_name: Name of the struct being accessed (optional)
Returns:
The loaded value
"""
# Load the offset value
offset = builder.load(offset_global)
# Convert i64 to pointer type (BPF stores pointers as i64)
i8_ptr_type = ir.PointerType(ir.IntType(8))
struct_ptr = builder.inttoptr(struct_ptr_int, i8_ptr_type)
# GEP with offset to get field pointer
field_ptr = builder.gep(
struct_ptr,
[offset],
inbounds=False,
)
# Determine the appropriate field size based on field information
field_size_bytes = 8 # Default to 8 bytes (64-bit)
int_width = 64 # Default to 64-bit
needs_zext = False
if field_data is not None:
# Try to determine the size from field metadata
if field_data.type.__module__ == ctypes.__name__:
try:
field_size_bytes = ctypes.sizeof(field_data.type)
field_size_bits = field_size_bytes * 8
if field_size_bits in [8, 16, 32, 64]:
int_width = field_size_bits
logger.info(
f"Determined field size: {int_width} bits ({field_size_bytes} bytes)"
)
# Special handling for struct_xdp_md i32 fields
if struct_name == "struct_xdp_md" and int_width == 32:
needs_zext = True
logger.info(
"struct_xdp_md i32 field detected, will zero-extend to i64"
)
else:
logger.warning(
f"Unusual field size {field_size_bits} bits, using default 64"
)
except Exception as e:
logger.warning(
f"Could not determine field size: {e}, using default 64"
)
elif field_data.type.__module__ == "vmlinux":
# For pointers to structs or complex vmlinux types
if field_data.ctype_complex_type is not None and issubclass(
field_data.ctype_complex_type, ctypes._Pointer
):
int_width = 64 # Pointers are always 64-bit
field_size_bytes = 8
logger.info("Field is a pointer type, using 64 bits")
else:
logger.warning("Complex vmlinux field type, using default 64 bits")
# Allocate local storage for the field value
local_storage = builder.alloca(ir.IntType(int_width))
local_storage_i8_ptr = builder.bitcast(local_storage, i8_ptr_type)
# Use bpf_probe_read_kernel to safely read the field
# This generates:
# %gep = getelementptr i8, ptr %struct_ptr, i64 %offset (already done above as field_ptr)
# %passed = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 2, ptr %gep)
# %result = call i64 inttoptr (i64 113 to ptr)(ptr %local_storage, i32 %size, ptr %passed)
from pythonbpf.helper import emit_probe_read_kernel_call
emit_probe_read_kernel_call(
builder, local_storage_i8_ptr, field_size_bytes, field_ptr
)
# Load the value from local storage
value = builder.load(local_storage)
# Zero-extend i32 to i64 if needed
if needs_zext:
value = builder.zext(value, ir.IntType(64))
logger.info("Zero-extended i32 value to i64")
return value
@staticmethod
def load_ctx_field(builder, ctx_arg, offset_global, field_data, struct_name=None):
"""
Generate LLVM IR to load a field from BPF context using offset.
Args:
builder: llvmlite IRBuilder instance
ctx_arg: The context pointer argument (ptr/i8*)
offset_global: Global variable containing the field offset (i64)
field_data: contains data about the field
struct_name: Name of the struct being accessed (optional)
Returns:
The loaded value (i64 register or appropriately sized)
"""
# Load the offset value
offset = builder.load(offset_global)
# Ensure ctx_arg is treated as i8* (byte pointer)
i8_ptr_type = ir.PointerType()
# Cast ctx_arg to i8* if it isn't already
if str(ctx_arg.type) != str(i8_ptr_type):
ctx_i8_ptr = builder.bitcast(ctx_arg, i8_ptr_type)
else:
ctx_i8_ptr = ctx_arg
# GEP with explicit type - this is the key fix
field_ptr = builder.gep(
ctx_i8_ptr,
[offset],
inbounds=False,
)
# Get or declare the BPF passthrough intrinsic
module = builder.function.module
try:
passthrough_fn = module.globals.get("llvm.bpf.passthrough.p0.p0")
if passthrough_fn is None:
raise KeyError
except (KeyError, AttributeError):
passthrough_type = ir.FunctionType(
i8_ptr_type,
[ir.IntType(32), i8_ptr_type],
)
passthrough_fn = ir.Function(
module,
passthrough_type,
name="llvm.bpf.passthrough.p0.p0",
)
# Call passthrough to satisfy BPF verifier
verified_ptr = builder.call(
passthrough_fn, [ir.Constant(ir.IntType(32), 0), field_ptr], tail=True
)
# Determine the appropriate IR type based on field information
int_width = 64 # Default to 64-bit
needs_zext = False # Track if we need zero-extension for xdp_md
if field_data is not None:
# Try to determine the size from field metadata
if field_data.type.__module__ == ctypes.__name__:
try:
field_size_bytes = ctypes.sizeof(field_data.type)
field_size_bits = field_size_bytes * 8
if field_size_bits in [8, 16, 32, 64]:
int_width = field_size_bits
logger.info(f"Determined field size: {int_width} bits")
# Special handling for struct_xdp_md i32 fields
# Load as i32 but extend to i64 before storing
if struct_name == "struct_xdp_md" and int_width == 32:
needs_zext = True
logger.info(
"struct_xdp_md i32 field detected, will zero-extend to i64"
)
else:
logger.warning(
f"Unusual field size {field_size_bits} bits, using default 64"
)
except Exception as e:
logger.warning(
f"Could not determine field size: {e}, using default 64"
)
elif field_data.type.__module__ == "vmlinux":
# For pointers to structs or complex vmlinux types
if field_data.ctype_complex_type is not None and issubclass(
field_data.ctype_complex_type, ctypes._Pointer
):
int_width = 64 # Pointers are always 64-bit
logger.info("Field is a pointer type, using 64 bits")
# TODO: Add handling for other complex types (arrays, embedded structs, etc.)
else:
logger.warning("Complex vmlinux field type, using default 64 bits")
# Bitcast to appropriate pointer type based on determined width
ptr_type = ir.PointerType(ir.IntType(int_width))
typed_ptr = builder.bitcast(verified_ptr, ptr_type)
# Load and return the value
value = builder.load(typed_ptr)
# Zero-extend i32 to i64 for struct_xdp_md fields
if needs_zext:
value = builder.zext(value, ir.IntType(64))
logger.info("Zero-extended i32 value to i64 for struct_xdp_md field")
return value
def has_field(self, struct_name, field_name):
"""Check if a vmlinux struct has a specific field"""
if self.is_vmlinux_struct(struct_name):
python_type = self.vmlinux_symtab[struct_name].python_type
return hasattr(python_type, field_name)
return False
def get_field_type(self, vmlinux_struct_name, field_name):
"""Get the type of a field in a vmlinux struct"""
if self.is_vmlinux_struct(vmlinux_struct_name):
python_type = self.vmlinux_symtab[vmlinux_struct_name].python_type
if hasattr(python_type, field_name):
return self.vmlinux_symtab[vmlinux_struct_name].members[field_name]
else:
raise ValueError(
f"Field {field_name} not found in vmlinux struct {vmlinux_struct_name}"
)
else:
raise ValueError(f"{vmlinux_struct_name} is not a vmlinux struct")
def get_field_index(self, vmlinux_struct_name, field_name):
"""Get the type of a field in a vmlinux struct"""
if self.is_vmlinux_struct(vmlinux_struct_name):
python_type = self.vmlinux_symtab[vmlinux_struct_name].python_type
if hasattr(python_type, field_name):
return list(
self.vmlinux_symtab[vmlinux_struct_name].members.keys()
).index(field_name)
else:
raise ValueError(
f"Field {field_name} not found in vmlinux struct {vmlinux_struct_name}"
)
else:
raise ValueError(f"{vmlinux_struct_name} is not a vmlinux struct")

View File

@ -1,19 +1,22 @@
BPF_CLANG := clang
CFLAGS := -O2 -emit-llvm -target bpf -c
CFLAGS := -emit-llvm -target bpf -c
SRC := $(wildcard *.bpf.c)
LL := $(SRC:.bpf.c=.bpf.ll)
OBJ := $(SRC:.bpf.c=.bpf.o)
LL0 := $(SRC:.bpf.c=.bpf.o0.ll)
.PHONY: all clean
all: $(LL) $(OBJ)
all: $(LL) $(OBJ) $(LL0)
%.bpf.o: %.bpf.c
$(BPF_CLANG) -O2 -g -target bpf -c $< -o $@
%.bpf.ll: %.bpf.c
$(BPF_CLANG) $(CFLAGS) -g -S $< -o $@
$(BPF_CLANG) $(CFLAGS) -O2 -g -S $< -o $@
%.bpf.o0.ll: %.bpf.c
$(BPF_CLANG) $(CFLAGS) -O0 -g -S $< -o $@
clean:
rm -f $(LL) $(OBJ)
rm -f $(LL) $(OBJ) $(LL0)

View File

@ -1,11 +1,10 @@
#include <linux/bpf.h>
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#define u64 unsigned long long
#define u32 unsigned int
#include <bpf/bpf_endian.h>
SEC("xdp")
int hello(struct xdp_md *ctx) {
bpf_printk("Hello, World!\n");
bpf_printk("Hello, World! %ud \n", ctx->data);
return XDP_PASS;
}

View File

@ -1,25 +0,0 @@
#define __TARGET_ARCH_arm64
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
// Map: key = struct request*, value = u64 timestamp
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, struct request *);
__type(value, u64);
__uint(max_entries, 1024);
} start SEC(".maps");
// Attach to kprobe for blk_start_request
SEC("kprobe/blk_start_request")
int BPF_KPROBE(trace_start, struct request *req)
{
u64 ts = bpf_ktime_get_ns();
bpf_map_update_elem(&start, &req, &ts, BPF_ANY);
return 0;
}
char LICENSE[] SEC("license") = "GPL";

View File

@ -1,23 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
struct trace_entry {
short unsigned int type;
unsigned char flags;
unsigned char preempt_count;
int pid;
};
struct trace_event_raw_sys_enter {
struct trace_entry ent;
long int id;
long unsigned int args[6];
char __data[0];
};
struct event {
__u32 pid;
__u32 uid;
@ -33,7 +19,7 @@ struct {
SEC("tp/syscalls/sys_enter_setuid")
int handle_setuid_entry(struct trace_event_raw_sys_enter *ctx) {
struct event data = {};
struct blk_integrity_iter it = {};
// Extract UID from the syscall arguments
data.uid = (unsigned int)ctx->args[0];
data.ts = bpf_ktime_get_ns();

View File

@ -0,0 +1,27 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <linux/types.h>
struct test_struct {
__u64 a;
__u64 b;
};
struct test_struct w = {};
volatile __u64 prev_time = 0;
SEC("tracepoint/syscalls/sys_enter_execve")
int trace_execve(void *ctx)
{
bpf_printk("previous %ul now %ul", w.b, w.a);
__u64 ts = bpf_ktime_get_ns();
bpf_printk("prev %ul now %ul", prev_time, ts);
w.a = ts;
w.b = prev_time;
prev_time = ts;
return 0;
}
char LICENSE[] SEC("license") = "GPL";

View File

@ -0,0 +1,15 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
SEC("xdp")
int print_xdp_data(struct xdp_md *ctx)
{
// 'data' is a pointer to the start of packet data
long data = (long)ctx->data;
bpf_printk("ctx->data = %lld\n", data);
return XDP_PASS;
}
char LICENSE[] SEC("license") = "GPL";

76
tests/c-form/kprobe.bpf.c Normal file
View File

@ -0,0 +1,76 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char LICENSE[] SEC("license") = "GPL";
SEC("kprobe/do_unlinkat")
int kprobe_execve(struct pt_regs *ctx)
{
bpf_printk("unlinkat created");
unsigned long r15 = ctx->r15;
bpf_printk("r15: %lld", r15);
unsigned long r14 = ctx->r14;
bpf_printk("r14: %lld", r14);
unsigned long r13 = ctx->r13;
bpf_printk("r13: %lld", r13);
unsigned long r12 = ctx->r12;
bpf_printk("r12: %lld", r12);
unsigned long bp = ctx->bp;
bpf_printk("rbp: %lld", bp);
unsigned long bx = ctx->bx;
bpf_printk("rbx: %lld", bx);
unsigned long r11 = ctx->r11;
bpf_printk("r11: %lld", r11);
unsigned long r10 = ctx->r10;
bpf_printk("r10: %lld", r10);
unsigned long r9 = ctx->r9;
bpf_printk("r9: %lld", r9);
unsigned long r8 = ctx->r8;
bpf_printk("r8: %lld", r8);
unsigned long ax = ctx->ax;
bpf_printk("rax: %lld", ax);
unsigned long cx = ctx->cx;
bpf_printk("rcx: %lld", cx);
unsigned long dx = ctx->dx;
bpf_printk("rdx: %lld", dx);
unsigned long si = ctx->si;
bpf_printk("rsi: %lld", si);
unsigned long di = ctx->di;
bpf_printk("rdi: %lld", di);
unsigned long orig_ax = ctx->orig_ax;
bpf_printk("orig_rax: %lld", orig_ax);
unsigned long ip = ctx->ip;
bpf_printk("rip: %lld", ip);
unsigned long cs = ctx->cs;
bpf_printk("cs: %lld", cs);
unsigned long flags = ctx->flags;
bpf_printk("eflags: %lld", flags);
unsigned long sp = ctx->sp;
bpf_printk("rsp: %lld", sp);
unsigned long ss = ctx->ss;
bpf_printk("ss: %lld", ss);
return 0;
}

View File

@ -0,0 +1,18 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
char LICENSE[] SEC("license") = "GPL";
SEC("kprobe/blk_mq_start_request")
int example(struct pt_regs *ctx)
{
u64 a = ctx->r15;
struct request *req = (struct request *)(ctx->di);
unsigned int something_ns = BPF_CORE_READ(req, timeout);
unsigned int data_len = BPF_CORE_READ(req, __data_len);
bpf_printk("data length %lld %ld %ld\n", data_len, something_ns, a);
return 0;
}

View File

@ -0,0 +1,18 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
char LICENSE[] SEC("license") = "GPL";
SEC("kprobe/blk_mq_start_request")
int example(struct pt_regs *ctx)
{
u64 a = ctx->r15;
struct request *req = (struct request *)(ctx->di);
unsigned int something_ns = req->timeout;
unsigned int data_len = req->__data_len;
bpf_printk("data length %lld %ld %ld\n", data_len, something_ns, a);
return 0;
}

View File

@ -0,0 +1,42 @@
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
/*
Information gained from reversing this (multiple kernel versions):
There is no point of
```llvm
tail call void @llvm.dbg.value(metadata ptr %0, metadata !60, metadata !DIExpression()), !dbg !70
```
and the first argument of passthrough is fucking useless. It just needs to be a distinct integer:
```llvm
%9 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 3, ptr %8)
```
*/
SEC("tp/syscalls/sys_enter_execve")
int handle_setuid_entry(struct trace_event_raw_sys_enter *ctx) {
// Access each argument separately with clear variable assignments
long int id = ctx->id;
bpf_printk("This is context field %d", id);
/*
* the IR to aim for is
* %2 = alloca ptr, align 8
* store ptr %0, ptr %2, align 8
* Above, %0 is the arg pointer
* %5 = load ptr, ptr %2, align 8
* %6 = getelementptr inbounds %struct.trace_event_raw_sys_enter, ptr %5, i32 0, i32 2
* %7 = load i64, ptr @"llvm.trace_event_raw_sys_enter:0:16$0:2:0", align 8
* %8 = bitcast ptr %5 to ptr
* %9 = getelementptr i8, ptr %8, i64 %7
* %10 = bitcast ptr %9 to ptr
* %11 = call ptr @llvm.bpf.passthrough.p0.p0(i32 0, ptr %10)
* %12 = load i64, ptr %11, align 8, !dbg !101
*
*/
return 0;
}
char LICENSE[] SEC("license") = "GPL";

121617
tests/c-form/vmlinux.h vendored

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,21 @@
// xdp_rewrite.c
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/if_ether.h>
SEC("xdp")
int xdp_rewrite_mac(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct ethhdr *eth = data;
if ((void*)(eth + 1) > data_end)
return XDP_PASS;
__u8 new_src[ETH_ALEN] = {0x02,0x00,0x00,0x00,0x00,0x02};
for (int i = 0; i < ETH_ALEN; i++) eth->h_source[i] = new_src[i];
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";

View File

@ -0,0 +1,40 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
# NOTE: This example tries to reinterpret the variable `x` to a different type.
# We do not allow this for now, as stack allocations are typed and have to be
# done in the first basic block. Allowing re-interpretation would require
# re-allocation of stack space (possibly in a new basic block), which is not
# supported in eBPF yet.
# We can allow bitcasts in cases where the width of the types is the same in
# the future. But for now, we do not allow any re-interpretation of variables.
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
last.update(0, 1)
x = last.lookup(0)
x = 20
if x == 2:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -0,0 +1,18 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!") if True else print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -0,0 +1,109 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64, c_int32
@bpf
@bpfglobal
def somevalue() -> c_int32:
return c_int32(42)
@bpf
@bpfglobal
def somevalue2() -> c_int64:
return c_int64(69)
@bpf
@bpfglobal
def somevalue1() -> c_int32:
return c_int32(42)
# --- Passing examples ---
# Simple constant return
@bpf
@bpfglobal
def g1() -> c_int64:
return c_int64(42)
# Constructor with one constant argument
@bpf
@bpfglobal
def g2() -> c_int64:
return c_int64(69)
# --- Failing examples ---
# No return annotation
# @bpf
# @bpfglobal
# def g3():
# return 42
# Return annotation is complex
# @bpf
# @bpfglobal
# def g4() -> List[int]:
# return []
# # Return is missing
# @bpf
# @bpfglobal
# def g5() -> c_int64:
# pass
# # Return is a variable reference
# #TODO: maybe fix this sometime later. It defaults to 0
# CONST = 5
# @bpf
# @bpfglobal
# def g6() -> c_int64:
# return c_int64(CONST)
# Constructor with multiple args
# TODO: this is not working. should it work ?
@bpf
@bpfglobal
def g7() -> c_int64:
return c_int64(1)
# Dataclass call
# TODO: fails with dataclass
# @dataclass
# class Point:
# x: c_int64
# y: c_int64
# @bpf
# @bpfglobal
# def g8() -> Point:
# return Point(1, 2)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def sometag(ctx: c_void_p) -> c_int64:
print("test")
global somevalue
somevalue = 2
print(f"{somevalue}")
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
compile()

View File

@ -11,6 +11,7 @@ from ctypes import c_void_p, c_int64
# We cannot allocate space for the intermediate type now.
# We probably need to track the ref/deref chain for each variable.
@bpf
@map
def count() -> HashMap:

View File

@ -0,0 +1,23 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64
# This should not pass as somevalue is not declared at all.
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def sometag(ctx: c_void_p) -> c_int64:
print("test")
print(f"{somevalue}") # noqa: F821
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
compile()

View File

@ -0,0 +1,30 @@
import logging
from pythonbpf import bpf, section, bpfglobal, compile_to_ir
from pythonbpf import compile # noqa: F401
from vmlinux import TASK_COMM_LEN # noqa: F401
from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
from ctypes import c_int64, c_int32, c_void_p # noqa: F401
# from vmlinux import struct_uinput_device
# from vmlinux import struct_blk_integrity_iter
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: struct_trace_event_raw_sys_enter) -> c_int64:
b = ctx.args
c = b[0]
print(f"This is context args field {c}")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("args_test.py", "args_test.ll", loglevel=logging.INFO)
compile()

View File

@ -0,0 +1,22 @@
from vmlinux import XDP_PASS
from pythonbpf import bpf, section, bpfglobal, compile_to_ir
import logging
from ctypes import c_int64, c_void_p
@bpf
@section("kprobe/blk_mq_start_request")
def example(ctx: c_void_p) -> c_int64:
d = XDP_PASS # This gives an error, but
e = XDP_PASS + 0 # this does not
print(f"test1 {e} test2 {d}")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("assignment_handling.py", "assignment_handling.ll", loglevel=logging.INFO)

View File

@ -0,0 +1,54 @@
from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir
from pythonbpf.maps import HashMap
from pythonbpf.helper import XDP_PASS
from vmlinux import TASK_COMM_LEN # noqa: F401
from vmlinux import struct_qspinlock # noqa: F401
# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
# from vmlinux import struct_posix_cputimers # noqa: F401
from vmlinux import struct_xdp_md
# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
# from vmlinux import struct_ring_buffer_per_cpu # noqa: F401
# from vmlinux import struct_request # noqa: F401
from ctypes import c_int64
# Instructions to how to run this program
# 1. Install PythonBPF: pip install pythonbpf
# 2. Run the program: python examples/xdp_pass.py
# 3. Run the program with sudo: sudo tools/check.sh run examples/xdp_pass.o
# 4. Attach object file to any network device with something like ./check.sh xdp examples/xdp_pass.o tailscale0
# 5. send traffic through the device and observe effects
@bpf
@map
def count() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=1)
@bpf
@section("xdp")
def hello_world(ctx: struct_xdp_md) -> c_int64:
key = 0
one = 1
prev = count().lookup(key)
if prev:
prevval = prev + 1
print(f"count: {prevval}")
count().update(key, prevval)
return XDP_PASS
else:
count().update(key, one)
return XDP_PASS
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("xdp_pass.py", "xdp_pass.ll")

Some files were not shown because too many files have changed in this diff Show More