Merge pull request #8 from pythonbpf/refactor

Add dwarf module
This commit is contained in:
varunrmallya
2025-09-30 20:39:48 +05:30
committed by GitHub
16 changed files with 138 additions and 184 deletions

View File

@ -1,46 +0,0 @@
from __future__ import print_function
from bcc import BPF
from bcc.utils import printb
# load BPF program
b = BPF(text="""
#include <uapi/linux/ptrace.h>
BPF_HASH(last);
int do_trace(struct pt_regs *ctx) {
u64 ts, *tsp, delta, key = 0;
// attempt to read stored timestamp
tsp = last.lookup(&key);
if (tsp != NULL) {
delta = bpf_ktime_get_ns() - *tsp;
if (delta < 1000000000) {
// output if time is less than 1 second
bpf_trace_printk("%d\\n", delta / 1000000);
}
last.delete(&key);
}
// update stored timestamp
ts = bpf_ktime_get_ns();
last.update(&key, &ts);
return 0;
}
""")
b.attach_kprobe(event=b.get_syscall_fnname("sync"), fn_name="do_trace")
print("Tracing for quick sync's... Ctrl-C to end")
# TODO
# format output
start = 0
while 1:
try:
(task, pid, cpu, flags, ts, ms) = b.trace_fields()
if start == 0:
start = ts
ts = ts - start
printb(b"At time %.2f s: multiple syncs detected, last %s ms ago" % (ts, ms))
except KeyboardInterrupt:
exit()

View File

@ -10,18 +10,10 @@ from ctypes import c_void_p, c_int64, c_int32, c_uint64
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello(ctx: c_void_p) -> c_int32:
print("entered")
print("multi constant support")
return c_int32(0)
@bpf
@section("tracepoint/syscalls/sys_exit_execve")
def hello_again(ctx: c_void_p) -> c_int64:
print("multi constant support")
print("exited")
key = 0
delta = 0
@ -45,11 +37,9 @@ def hello_again(ctx: c_void_p) -> c_int64:
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -10,7 +10,6 @@ from ctypes import c_void_p, c_int64, c_int32, c_uint64
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("blk_start_request")
def trace_start(ctx: c_void_p) -> c_int32:

View File

@ -1,15 +0,0 @@
# This is what it is going to look like
# pylint: disable-all# type: ignore
from pythonbpf.decorators import tracepoint, syscalls, bpfglobal, bpf
from ctypes import c_void_p, c_int32
@bpf
@tracepoint(syscalls.sys_clone)
def trace_clone(ctx: c_void_p) -> c_int32:
print("Hello, World!")
return c_int32(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"

View File

@ -5,6 +5,7 @@ from .functions_pass import func_proc
from pythonbpf.maps import maps_proc
from .structs.structs_pass import structs_proc
from .globals_pass import globals_processing
from .debuginfo import DW_LANG_C11, DwarfBehaviorEnum
import os
import subprocess
import inspect
@ -12,6 +13,8 @@ from pathlib import Path
from pylibbpf import BpfProgram
import tempfile
VERSION = "v0.1.3"
def find_bpf_chunks(tree):
"""Find all functions decorated with @bpf in the AST."""
@ -50,16 +53,18 @@ def compile_to_ir(filename: str, output: str):
module.triple = "bpf"
if not hasattr(module, '_debug_compile_unit'):
module._file_metadata = module.add_debug_info("DIFile", { # type: ignore
module._file_metadata = module.add_debug_info("DIFile", { # type: ignore
"filename": filename,
"directory": os.path.dirname(filename)
})
module._debug_compile_unit = module.add_debug_info("DICompileUnit", { # type: ignore
"language": 29, # DW_LANG_C11
"file": module._file_metadata, # type: ignore
"producer": "PythonBPF DSL Compiler",
"isOptimized": True,
module._debug_compile_unit = module.add_debug_info("DICompileUnit", { # type: ignore
"language": DW_LANG_C11,
"file": module._file_metadata, # type: ignore
"producer": f"PythonBPF {VERSION}",
"isOptimized": True, # TODO: This is probably not true
# TODO: add a global field here that keeps track of all the globals. Works without it, but I think it might
# be required for kprobes.
"runtimeVersion": 0,
"emissionKind": 1,
"splitDebugInlining": False,
@ -67,32 +72,32 @@ def compile_to_ir(filename: str, output: str):
}, is_distinct=True)
module.add_named_metadata(
"llvm.dbg.cu", module._debug_compile_unit) # type: ignore
"llvm.dbg.cu", module._debug_compile_unit) # type: ignore
processor(source, filename, module)
wchar_size = module.add_metadata([ir.Constant(ir.IntType(32), 1),
wchar_size = module.add_metadata([DwarfBehaviorEnum.ERROR_IF_MISMATCH,
"wchar_size",
ir.Constant(ir.IntType(32), 4)])
frame_pointer = module.add_metadata([ir.Constant(ir.IntType(32), 7),
frame_pointer = module.add_metadata([DwarfBehaviorEnum.OVERRIDE_USE_LARGEST,
"frame-pointer",
ir.Constant(ir.IntType(32), 2)])
# Add Debug Info Version (3 = DWARF v3, which LLVM expects)
debug_info_version = module.add_metadata([ir.Constant(ir.IntType(32), 2),
debug_info_version = module.add_metadata([DwarfBehaviorEnum.WARNING_IF_MISMATCH,
"Debug Info Version",
ir.Constant(ir.IntType(32), 3)])
# Add explicit DWARF version (4 is common, works with LLVM BPF backend)
dwarf_version = module.add_metadata([ir.Constant(ir.IntType(32), 2),
# Add explicit DWARF version 5
dwarf_version = module.add_metadata([DwarfBehaviorEnum.OVERRIDE_USE_LARGEST,
"Dwarf Version",
ir.Constant(ir.IntType(32), 4)])
ir.Constant(ir.IntType(32), 5)])
module.add_named_metadata("llvm.module.flags", wchar_size)
module.add_named_metadata("llvm.module.flags", frame_pointer)
module.add_named_metadata("llvm.module.flags", debug_info_version)
module.add_named_metadata("llvm.module.flags", dwarf_version)
module.add_named_metadata("llvm.ident", ["llvmlite PythonBPF v0.0.1"])
module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"])
print(f"IR written to {output}")
with open(output, "w") as f:

View File

@ -0,0 +1,3 @@
from .dwarf_constants import *
from .dtypes import *
from .debug_info_generator import DebugInfoGenerator

View File

@ -0,0 +1,92 @@
"""
Debug information generation module for Python-BPF
Provides utilities for generating DWARF/BTF debug information
"""
from . import dwarf_constants as dc
from typing import Dict, Any, List, Optional, Union
class DebugInfoGenerator:
def __init__(self, module):
self.module = module
self._type_cache = {} # Cache for common debug types
def get_basic_type(self, name: str, size: int, encoding: int) -> Any:
"""Get or create a basic type with caching"""
key = (name, size, encoding)
if key not in self._type_cache:
self._type_cache[key] = self.module.add_debug_info("DIBasicType", {
"name": name,
"size": size,
"encoding": encoding
})
return self._type_cache[key]
def get_uint32_type(self) -> Any:
"""Get debug info for unsigned 32-bit integer"""
return self.get_basic_type("unsigned int", 32, dc.DW_ATE_unsigned)
def get_uint64_type(self) -> Any:
"""Get debug info for unsigned 64-bit integer"""
return self.get_basic_type("unsigned long long", 64, dc.DW_ATE_unsigned)
def create_pointer_type(self, base_type: Any, size: int = 64) -> Any:
"""Create a pointer type to the given base type"""
return self.module.add_debug_info("DIDerivedType", {
"tag": dc.DW_TAG_pointer_type,
"baseType": base_type,
"size": size
})
def create_array_type(self, base_type: Any, count: int) -> Any:
"""Create an array type of the given base type with specified count"""
subrange = self.module.add_debug_info("DISubrange", {"count": count})
return self.module.add_debug_info("DICompositeType", {
"tag": dc.DW_TAG_array_type,
"baseType": base_type,
"size": self._compute_array_size(base_type, count),
"elements": [subrange]
})
@staticmethod
def _compute_array_size(base_type: Any, count: int) -> int:
# Extract size from base_type if possible
# For simplicity, assuming base_type has a size attribute
return getattr(base_type, "size", 32) * count
def create_struct_member(self, name: str, base_type: Any, offset: int) -> Any:
"""Create a struct member with the given name, type, and offset"""
return self.module.add_debug_info("DIDerivedType", {
"tag": dc.DW_TAG_member,
"name": name,
"file": self.module._file_metadata,
"baseType": base_type,
"size": getattr(base_type, "size", 64),
"offset": offset
})
def create_struct_type(self, members: List[Any], size: int, is_distinct: bool) -> Any:
"""Create a struct type with the given members and size"""
return self.module.add_debug_info("DICompositeType", {
"tag": dc.DW_TAG_structure_type,
"file": self.module._file_metadata,
"size": size,
"elements": members,
}, is_distinct=is_distinct)
def create_global_var_debug_info(self, name: str, var_type: Any, is_local: bool = False) -> Any:
"""Create debug info for a global variable"""
global_var = self.module.add_debug_info("DIGlobalVariable", {
"name": name,
"scope": self.module._debug_compile_unit,
"file": self.module._file_metadata,
"type": var_type,
"isLocal": is_local,
"isDefinition": True
}, is_distinct=True)
return self.module.add_debug_info("DIGlobalVariableExpression", {
"var": global_var,
"expr": self.module.add_debug_info("DIExpression", {})
})

View File

@ -0,0 +1,6 @@
import llvmlite.ir as ir
class DwarfBehaviorEnum:
ERROR_IF_MISMATCH = ir.Constant(ir.IntType(32), 1)
WARNING_IF_MISMATCH = ir.Constant(ir.IntType(32), 2)
OVERRIDE_USE_LARGEST = ir.Constant(ir.IntType(32), 7)

View File

@ -1,8 +1,8 @@
import ast
from llvmlite import ir
from pythonbpf import dwarf_constants as dc
from enum import Enum
from .maps_utils import MapProcessorRegistry
from ..debuginfo import dwarf_constants as dc, DebugInfoGenerator
import logging
logger = logging.getLogger(__name__)
@ -55,53 +55,15 @@ def create_bpf_map(module, map_name, map_params):
def create_map_debug_info(module, map_global, map_name, map_params):
"""Generate debug information metadata for BPF map"""
file_metadata = module._file_metadata
compile_unit = module._debug_compile_unit
generator = DebugInfoGenerator(module)
# Create basic type for unsigned int (32-bit)
uint_type = module.add_debug_info("DIBasicType", {
"name": "unsigned int",
"size": 32,
"encoding": dc.DW_ATE_unsigned
})
uint_type = generator.get_uint32_type()
ulong_type = generator.get_uint64_type()
array_type = generator.create_array_type(uint_type, map_params.get("type", BPFMapType.HASH).value)
type_ptr = generator.create_pointer_type(array_type, 64)
key_ptr = generator.create_pointer_type(array_type if "key_size" in map_params else ulong_type, 64)
value_ptr = generator.create_pointer_type(array_type if "value_size" in map_params else ulong_type, 64)
# Create basic type for unsigned long long (64-bit)
ulong_type = module.add_debug_info("DIBasicType", {
"name": "unsigned long long",
"size": 64,
"encoding": dc.DW_ATE_unsigned
})
# Create array type for map type field (array of 1 unsigned int)
array_subrange = module.add_debug_info(
"DISubrange", {"count": map_params.get("type", BPFMapType.HASH).value})
array_type = module.add_debug_info("DICompositeType", {
"tag": dc.DW_TAG_array_type,
"baseType": uint_type,
"size": 32,
"elements": [array_subrange]
})
# Create pointer types
type_ptr = module.add_debug_info("DIDerivedType", {
"tag": dc.DW_TAG_pointer_type,
"baseType": array_type,
"size": 64
})
key_ptr = module.add_debug_info("DIDerivedType", {
"tag": dc.DW_TAG_pointer_type,
# Adjust based on actual key type
"baseType": array_type if "key_size" in map_params else uint_type,
"size": 64
})
value_ptr = module.add_debug_info("DIDerivedType", {
"tag": dc.DW_TAG_pointer_type,
# Adjust based on actual value type
"baseType": array_type if "value_size" in map_params else ulong_type,
"size": 64
})
elements_arr = []
@ -117,69 +79,27 @@ def create_map_debug_info(module, map_global, map_name, map_params):
ptr = key_ptr
else:
ptr = value_ptr
member = module.add_debug_info("DIDerivedType", {
"tag": dc.DW_TAG_member,
"name": elem,
"file": file_metadata,
"baseType": ptr,
"size": 64,
"offset": cnt * 64
})
# TODO: the best way to do this is not 64, but get the size each time. this will not work for structs.
member = generator.create_struct_member(elem, ptr, cnt * 64)
elements_arr.append(member)
cnt += 1
if "max_entries" in map_params:
array_subrange_max_entries = module.add_debug_info(
"DISubrange", {"count": map_params["max_entries"]})
array_type_max_entries = module.add_debug_info("DICompositeType", {
"tag": dc.DW_TAG_array_type,
"baseType": uint_type,
"size": 32,
"elements": [array_subrange_max_entries]
})
max_entries_ptr = module.add_debug_info("DIDerivedType", {
"tag": dc.DW_TAG_pointer_type,
"baseType": array_type_max_entries,
"size": 64
})
max_entries_member = module.add_debug_info("DIDerivedType", {
"tag": dc.DW_TAG_member,
"name": "max_entries",
"file": file_metadata,
"baseType": max_entries_ptr,
"size": 64,
"offset": cnt * 64
})
max_entries_array = generator.create_array_type(uint_type, map_params["max_entries"])
max_entries_ptr = generator.create_pointer_type(max_entries_array, 64)
max_entries_member = generator.create_struct_member("max_entries", max_entries_ptr, cnt * 64)
elements_arr.append(max_entries_member)
# Create the struct type
struct_type = module.add_debug_info("DICompositeType", {
"tag": dc.DW_TAG_structure_type,
"file": file_metadata,
"size": 64 * len(elements_arr), # 4 * 64-bit pointers
"elements": elements_arr,
}, is_distinct=True)
struct_type = generator.create_struct_type(elements_arr, 64 * len(elements_arr), is_distinct=True)
# Create global variable debug info
global_var = module.add_debug_info("DIGlobalVariable", {
"name": map_name,
"scope": compile_unit,
"file": file_metadata,
"type": struct_type,
"isLocal": False,
"isDefinition": True
}, is_distinct=True)
# Create global variable expression
global_var_expr = module.add_debug_info("DIGlobalVariableExpression", {
"var": global_var,
"expr": module.add_debug_info("DIExpression", {})
})
global_var = generator.create_global_var_debug_info(map_name, struct_type, is_local=False)
# Attach debug info to the global variable
map_global.set_metadata("dbg", global_var_expr)
map_global.set_metadata("dbg", global_var)
return global_var_expr
return global_var
@MapProcessorRegistry.register("HashMap")