20 Commits

Author SHA1 Message Date
8bd210cede add debug info storage on assignment_info.py dataclass 2025-10-26 15:46:42 +05:30
7bf6f9c48c add function_debug_info.py and format 2025-10-26 15:12:36 +05:30
a1fe2ed4bc change to 64 bit pointers. May be an issue. revert this commit if issues arise 2025-10-26 15:00:53 +05:30
93285dbdd8 geenrate gep IR 2025-10-26 02:12:33 +05:30
1ea44dd8e1 Use pointer arithmetic to resolve vmlinux struct fields 2025-10-25 05:40:45 +05:30
96216d4411 Consistently use Dataclass syntac for AssignmentInfo and related classes 2025-10-25 05:10:47 +05:30
028d9c2c08 generate IR partly 2025-10-25 04:41:13 +05:30
c6b5ecb47e find global variable ir and field data from metadata 2025-10-24 03:34:27 +05:30
30bcfcbbd0 remove compile error on normal c_void_p in arg and separate localsymbol to avoid circular dep 2025-10-24 03:08:39 +05:30
f18a4399ea format chore 2025-10-24 02:40:07 +05:30
4e01df735f complete part of expr passing for attribute of i64 type 2025-10-24 02:38:39 +05:30
64674cf646 add alloc for only i64 2025-10-24 02:06:39 +05:30
36a1a0903e Merge branch 'master' into vmlinux-handler 2025-10-22 12:02:51 +05:30
f2bc7f1434 pass context to memory allocation 2025-10-22 12:01:52 +05:30
b3921c424d parse context from first function argument to local symbol table 2025-10-22 11:40:49 +05:30
adf32560a0 bpf passthrough gen in codegen
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-22 03:45:54 +05:30
21cea97d78 add return None statements 2025-10-21 07:02:34 +05:30
d8729342dc add bpf_passthrough generation 2025-10-21 07:01:37 +05:30
4179fbfc88 move around examples 2025-10-21 06:03:16 +05:30
ba397036b4 add failing examples to work on 2025-10-21 05:49:44 +05:30
21 changed files with 450 additions and 121719 deletions

View File

@ -2,27 +2,15 @@ import ast
import logging import logging
from llvmlite import ir from llvmlite import ir
from dataclasses import dataclass from .local_symbol import LocalSymbol
from typing import Any
from pythonbpf.helper import HelperHandlerRegistry from pythonbpf.helper import HelperHandlerRegistry
from pythonbpf.vmlinux_parser.dependency_node import Field
from .expr import VmlinuxHandlerRegistry from .expr import VmlinuxHandlerRegistry
from pythonbpf.type_deducer import ctypes_to_ir from pythonbpf.type_deducer import ctypes_to_ir
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@dataclass
class LocalSymbol:
var: ir.AllocaInstr
ir_type: ir.Type
metadata: Any = None
def __iter__(self):
yield self.var
yield self.ir_type
yield self.metadata
def create_targets_and_rvals(stmt): def create_targets_and_rvals(stmt):
"""Create lists of targets and right-hand values from an assignment statement.""" """Create lists of targets and right-hand values from an assignment statement."""
if isinstance(stmt.targets[0], ast.Tuple): if isinstance(stmt.targets[0], ast.Tuple):
@ -60,21 +48,11 @@ def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab):
continue continue
var_name = target.id var_name = target.id
# Skip if already allocated # Skip if already allocated
if var_name in local_sym_tab: if var_name in local_sym_tab:
logger.debug(f"Variable {var_name} already allocated, skipping") logger.debug(f"Variable {var_name} already allocated, skipping")
continue continue
# When allocating a variable, check if it's a vmlinux struct type
if isinstance(
stmt.value, ast.Name
) and VmlinuxHandlerRegistry.is_vmlinux_struct(stmt.value.id):
# Handle vmlinux struct allocation
# This requires more implementation
print(stmt.value)
pass
# Determine type and allocate based on rval # Determine type and allocate based on rval
if isinstance(rval, ast.Call): if isinstance(rval, ast.Call):
_allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab) _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab)
@ -248,9 +226,50 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_
logger.error(f"Struct variable '{struct_var}' not found") logger.error(f"Struct variable '{struct_var}' not found")
return return
struct_type = local_sym_tab[struct_var].metadata struct_type: type = local_sym_tab[struct_var].metadata
if not struct_type or struct_type not in structs_sym_tab: if not struct_type or struct_type not in structs_sym_tab:
logger.error(f"Struct type '{struct_type}' not found") if VmlinuxHandlerRegistry.is_vmlinux_struct(struct_type.__name__):
# Handle vmlinux struct field access
vmlinux_struct_name = struct_type.__name__
if not VmlinuxHandlerRegistry.has_field(vmlinux_struct_name, field_name):
logger.error(
f"Field '{field_name}' not found in vmlinux struct '{vmlinux_struct_name}'"
)
return
field_type: tuple[ir.GlobalVariable, Field] = (
VmlinuxHandlerRegistry.get_field_type(vmlinux_struct_name, field_name)
)
field_ir, field = field_type
# TODO: For now, we only support integer type allocations.
# loaded_value = builder.load(field_ir, align=8)
# #TODO: fatal flaw that this always assumes first argument of function to be the context of what this gets.
base_ptr = builder.function.args[0]
local_sym_tab[
struct_var
].var = base_ptr # This is repurposing of var to store the pointer of the base type
local_sym_tab[struct_var].ir_type = field_ir
# gep_result = builder.gep(
# base_ptr,
# [loaded_value],
# inbounds=False, # Not using inbounds GEP
# )
# print("DEBUG", loaded_value, base_ptr, gep_result)
# Use i64 for allocation since that's what the global variable contains
actual_ir_type = ir.IntType(64)
# Allocate with the actual IR type, not the GlobalVariable
var = _allocate_with_type(builder, var_name, actual_ir_type)
local_sym_tab[var_name] = LocalSymbol(var, actual_ir_type, field)
logger.info(
f"Pre-allocated {var_name} from vmlinux struct {vmlinux_struct_name}.{field_name}"
)
return
else:
logger.error(f"Struct type '{struct_type}' not found")
return return
struct_info = structs_sym_tab[struct_type] struct_info = structs_sym_tab[struct_type]

View File

@ -3,6 +3,8 @@ import logging
from llvmlite import ir from llvmlite import ir
from pythonbpf.expr import eval_expr from pythonbpf.expr import eval_expr
from pythonbpf.helper import emit_probe_read_kernel_str_call from pythonbpf.helper import emit_probe_read_kernel_str_call
from pythonbpf.type_deducer import ctypes_to_ir
from pythonbpf.vmlinux_parser.dependency_node import Field
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -148,7 +150,18 @@ def handle_variable_assignment(
val, val_type = val_result val, val_type = val_result
logger.info(f"Evaluated value for {var_name}: {val} of type {val_type}, {var_type}") logger.info(f"Evaluated value for {var_name}: {val} of type {val_type}, {var_type}")
if val_type != var_type: if val_type != var_type:
if isinstance(val_type, ir.IntType) and isinstance(var_type, ir.IntType): if isinstance(val_type, Field):
logger.info("Handling assignment to struct field")
# TODO: handling only ctype struct fields for now. Handle other stuff too later.
if var_type == ctypes_to_ir(val_type.type.__name__):
builder.store(val, var_ptr)
logger.info(f"Assigned ctype struct field to {var_name}")
return True
logger.error(
f"Failed to assign ctype struct field to {var_name}: {val_type} != {var_type}"
)
return False
elif isinstance(val_type, ir.IntType) and isinstance(var_type, ir.IntType):
# Allow implicit int widening # Allow implicit int widening
if val_type.width < var_type.width: if val_type.width < var_type.width:
val = builder.sext(val, var_type) val = builder.sext(val, var_type)

View File

@ -37,6 +37,24 @@ def finalize_module(original_str):
return re.sub(pattern, replacement, original_str) return re.sub(pattern, replacement, original_str)
def bpf_passthrough_gen(module):
i32_ty = ir.IntType(32)
ptr_ty = ir.PointerType(ir.IntType(64))
fnty = ir.FunctionType(ptr_ty, [i32_ty, ptr_ty])
# Declare the intrinsic
passthrough = ir.Function(module, fnty, "llvm.bpf.passthrough.p0.p0")
# Set function attributes
# TODO: the ones commented are supposed to be there but cannot be added due to llvmlite limitations at the moment
# passthrough.attributes.add("nofree")
# passthrough.attributes.add("nosync")
passthrough.attributes.add("nounwind")
# passthrough.attributes.add("memory(none)")
return passthrough
def find_bpf_chunks(tree): def find_bpf_chunks(tree):
"""Find all functions decorated with @bpf in the AST.""" """Find all functions decorated with @bpf in the AST."""
bpf_functions = [] bpf_functions = []
@ -57,6 +75,8 @@ def processor(source_code, filename, module):
for func_node in bpf_chunks: for func_node in bpf_chunks:
logger.info(f"Found BPF function/struct: {func_node.name}") logger.info(f"Found BPF function/struct: {func_node.name}")
bpf_passthrough_gen(module)
vmlinux_symtab = vmlinux_proc(tree, module) vmlinux_symtab = vmlinux_proc(tree, module)
if vmlinux_symtab: if vmlinux_symtab:
handler = VmlinuxHandler.initialize(vmlinux_symtab) handler = VmlinuxHandler.initialize(vmlinux_symtab)
@ -137,7 +157,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"]) module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"])
module_string = finalize_module(str(module)) module_string: str = finalize_module(str(module))
logger.info(f"IR written to {output}") logger.info(f"IR written to {output}")
with open(output, "w") as f: with open(output, "w") as f:

View File

@ -72,20 +72,28 @@ def _handle_attribute_expr(
if var_name in local_sym_tab: if var_name in local_sym_tab:
var_ptr, var_type, var_metadata = local_sym_tab[var_name] var_ptr, var_type, var_metadata = local_sym_tab[var_name]
logger.info(f"Loading attribute {attr_name} from variable {var_name}") logger.info(f"Loading attribute {attr_name} from variable {var_name}")
logger.info(f"Variable type: {var_type}, Variable ptr: {var_ptr}") logger.info(
f"Variable type: {var_type}, Variable ptr: {var_ptr}, Variable Metadata: {var_metadata}"
)
if (
hasattr(var_metadata, "__module__")
and var_metadata.__module__ == "vmlinux"
):
# Try vmlinux handler when var_metadata is not a string, but has a module attribute.
# This has been done to keep everything separate in vmlinux struct handling.
vmlinux_result = VmlinuxHandlerRegistry.handle_attribute(
expr, local_sym_tab, None, builder
)
if vmlinux_result is not None:
return vmlinux_result
else:
raise RuntimeError("Vmlinux struct did not process successfully")
metadata = structs_sym_tab[var_metadata] metadata = structs_sym_tab[var_metadata]
if attr_name in metadata.fields: if attr_name in metadata.fields:
gep = metadata.gep(builder, var_ptr, attr_name) gep = metadata.gep(builder, var_ptr, attr_name)
val = builder.load(gep) val = builder.load(gep)
field_type = metadata.field_type(attr_name) field_type = metadata.field_type(attr_name)
return val, field_type return val, field_type
# Try vmlinux handler as fallback
vmlinux_result = VmlinuxHandlerRegistry.handle_attribute(
expr, local_sym_tab, None, builder
)
if vmlinux_result is not None:
return vmlinux_result
return None return None

View File

@ -1,5 +1,7 @@
import ast import ast
from pythonbpf.vmlinux_parser.vmlinux_exports_handler import VmlinuxHandler
class VmlinuxHandlerRegistry: class VmlinuxHandlerRegistry:
"""Registry for vmlinux handler operations""" """Registry for vmlinux handler operations"""
@ -7,7 +9,7 @@ class VmlinuxHandlerRegistry:
_handler = None _handler = None
@classmethod @classmethod
def set_handler(cls, handler): def set_handler(cls, handler: VmlinuxHandler):
"""Set the vmlinux handler""" """Set the vmlinux handler"""
cls._handler = handler cls._handler = handler
@ -37,9 +39,37 @@ class VmlinuxHandlerRegistry:
) )
return None return None
@classmethod
def get_struct_debug_info(cls, name):
if cls._handler is None:
return False
return cls._handler.get_struct_debug_info(name)
@classmethod @classmethod
def is_vmlinux_struct(cls, name): def is_vmlinux_struct(cls, name):
"""Check if a name refers to a vmlinux struct""" """Check if a name refers to a vmlinux struct"""
if cls._handler is None: if cls._handler is None:
return False return False
return cls._handler.is_vmlinux_struct(name) return cls._handler.is_vmlinux_struct(name)
@classmethod
def get_struct_type(cls, name):
"""Try to handle a struct name as vmlinux struct"""
if cls._handler is None:
return None
return cls._handler.get_vmlinux_struct_type(name)
@classmethod
def has_field(cls, vmlinux_struct_name, field_name):
"""Check if a vmlinux struct has a specific field"""
if cls._handler is None:
return False
return cls._handler.has_field(vmlinux_struct_name, field_name)
@classmethod
def get_field_type(cls, vmlinux_struct_name, field_name):
"""Get the type of a field in a vmlinux struct"""
if cls._handler is None:
return None
assert isinstance(cls._handler, VmlinuxHandler)
return cls._handler.get_field_type(vmlinux_struct_name, field_name)

View File

@ -0,0 +1,21 @@
import ast
import llvmlite.ir as ir
from pythonbpf.debuginfo import DebugInfoGenerator
from pythonbpf.expr import VmlinuxHandlerRegistry
def generate_function_debug_info(
func_node: ast.FunctionDef, module: ir.Module, func: ir.Function
):
generator = DebugInfoGenerator(module)
leading_argument = func_node.args.args[0]
leading_argument_name = leading_argument.arg
# TODO: add ctypes handling as well here
print(leading_argument.arg, leading_argument.annotation.id)
context_debug_info = VmlinuxHandlerRegistry.get_struct_debug_info(
name=leading_argument.annotation.id
)
print(context_debug_info)
pass

View File

@ -7,7 +7,12 @@ from pythonbpf.helper import (
reset_scratch_pool, reset_scratch_pool,
) )
from pythonbpf.type_deducer import ctypes_to_ir from pythonbpf.type_deducer import ctypes_to_ir
from pythonbpf.expr import eval_expr, handle_expr, convert_to_bool from pythonbpf.expr import (
eval_expr,
handle_expr,
convert_to_bool,
VmlinuxHandlerRegistry,
)
from pythonbpf.assign_pass import ( from pythonbpf.assign_pass import (
handle_variable_assignment, handle_variable_assignment,
handle_struct_field_assignment, handle_struct_field_assignment,
@ -16,8 +21,9 @@ from pythonbpf.allocation_pass import (
handle_assign_allocation, handle_assign_allocation,
allocate_temp_pool, allocate_temp_pool,
create_targets_and_rvals, create_targets_and_rvals,
LocalSymbol,
) )
from .function_debug_info import generate_function_debug_info
from .return_utils import handle_none_return, handle_xdp_return, is_xdp_name from .return_utils import handle_none_return, handle_xdp_return, is_xdp_name
from .function_metadata import get_probe_string, is_global_function, infer_return_type from .function_metadata import get_probe_string, is_global_function, infer_return_type
@ -324,6 +330,28 @@ def process_func_body(
local_sym_tab = {} local_sym_tab = {}
# Add the context parameter (first function argument) to the local symbol table
if func_node.args.args and len(func_node.args.args) > 0:
context_arg = func_node.args.args[0]
context_name = context_arg.arg
if hasattr(context_arg, "annotation") and context_arg.annotation:
if isinstance(context_arg.annotation, ast.Name):
context_type_name = context_arg.annotation.id
elif isinstance(context_arg.annotation, ast.Attribute):
context_type_name = context_arg.annotation.attr
else:
raise TypeError(
f"Unsupported annotation type: {ast.dump(context_arg.annotation)}"
)
if VmlinuxHandlerRegistry.is_vmlinux_struct(context_type_name):
resolved_type = VmlinuxHandlerRegistry.get_struct_type(
context_type_name
)
context_type = LocalSymbol(None, None, resolved_type)
local_sym_tab[context_name] = context_type
logger.info(f"Added argument '{context_name}' to local symbol table")
# pre-allocate dynamic variables # pre-allocate dynamic variables
local_sym_tab = allocate_mem( local_sym_tab = allocate_mem(
module, module,
@ -374,7 +402,7 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t
func.linkage = "dso_local" func.linkage = "dso_local"
func.attributes.add("nounwind") func.attributes.add("nounwind")
func.attributes.add("noinline") func.attributes.add("noinline")
func.attributes.add("optnone") # func.attributes.add("optnone")
if func_node.args.args: if func_node.args.args:
# Only look at the first argument for now # Only look at the first argument for now
@ -412,7 +440,7 @@ def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab):
func_type = get_probe_string(func_node) func_type = get_probe_string(func_node)
logger.info(f"Found probe_string of {func_node.name}: {func_type}") logger.info(f"Found probe_string of {func_node.name}: {func_type}")
process_bpf_chunk( func = process_bpf_chunk(
func_node, func_node,
module, module,
ctypes_to_ir(infer_return_type(func_node)), ctypes_to_ir(infer_return_type(func_node)),
@ -420,6 +448,9 @@ def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab):
structs_sym_tab, structs_sym_tab,
) )
logger.info(f"Generating Debug Info for Function {func_node.name}")
generate_function_debug_info(func_node, module, func)
# TODO: WIP, for string assignment to fixed-size arrays # TODO: WIP, for string assignment to fixed-size arrays
def assign_string_to_array(builder, target_array_ptr, source_string_ptr, array_length): def assign_string_to_array(builder, target_array_ptr, source_string_ptr, array_length):

View File

@ -78,9 +78,9 @@ def bpf_map_lookup_elem_emitter(
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
# TODO: I have changed the return type to i64*, as we are # TODO: I have changed the return type to i64*, as we are
# allocating space for that type in allocate_mem. This is # allocating space for that type in allocate_mem. This is
# temporary, and we will honour other widths later. But this # temporary, and we will honour other widths later. But this
# allows us to have cool binary ops on the returned value. # allows us to have cool binary ops on the returned value.
fn_type = ir.FunctionType( fn_type = ir.FunctionType(
ir.PointerType(ir.IntType(64)), # Return type: void* ir.PointerType(ir.IntType(64)), # Return type: void*
[ir.PointerType(), ir.PointerType()], # Args: (void*, void*) [ir.PointerType(), ir.PointerType()], # Args: (void*, void*)

15
pythonbpf/local_symbol.py Normal file
View File

@ -0,0 +1,15 @@
import llvmlite.ir as ir
from dataclasses import dataclass
from typing import Any
@dataclass
class LocalSymbol:
var: ir.AllocaInstr
ir_type: ir.Type
metadata: Any = None
def __iter__(self):
yield self.var
yield self.ir_type
yield self.metadata

View File

@ -13,6 +13,8 @@ mapping = {
"c_float": ir.FloatType(), "c_float": ir.FloatType(),
"c_double": ir.DoubleType(), "c_double": ir.DoubleType(),
"c_void_p": ir.IntType(64), "c_void_p": ir.IntType(64),
"c_long": ir.IntType(64),
"c_longlong": ir.IntType(64),
# Not so sure about this one # Not so sure about this one
"str": ir.PointerType(ir.IntType(8)), "str": ir.PointerType(ir.IntType(8)),
} }

View File

@ -1,12 +1,11 @@
from enum import Enum, auto from enum import Enum, auto
from typing import Any, Dict, List, Optional, TypedDict from typing import Any, Dict, List, Optional
from dataclasses import dataclass from dataclasses import dataclass
import llvmlite.ir as ir import llvmlite.ir as ir
from pythonbpf.vmlinux_parser.dependency_node import Field from pythonbpf.vmlinux_parser.dependency_node import Field
@dataclass
class AssignmentType(Enum): class AssignmentType(Enum):
CONSTANT = auto() CONSTANT = auto()
STRUCT = auto() STRUCT = auto()
@ -16,7 +15,7 @@ class AssignmentType(Enum):
@dataclass @dataclass
class FunctionSignature(TypedDict): class FunctionSignature:
return_type: str return_type: str
param_types: List[str] param_types: List[str]
varargs: bool varargs: bool
@ -24,7 +23,7 @@ class FunctionSignature(TypedDict):
# Thew name of the assignment will be in the dict that uses this class # Thew name of the assignment will be in the dict that uses this class
@dataclass @dataclass
class AssignmentInfo(TypedDict): class AssignmentInfo:
value_type: AssignmentType value_type: AssignmentType
python_type: type python_type: type
value: Optional[Any] value: Optional[Any]
@ -34,3 +33,4 @@ class AssignmentInfo(TypedDict):
# Value is a tuple that contains the global variable representing that field # Value is a tuple that contains the global variable representing that field
# along with all the information about that field as a Field type. # along with all the information about that field as a Field type.
members: Optional[Dict[str, tuple[ir.GlobalVariable, Field]]] # For structs. members: Optional[Dict[str, tuple[ir.GlobalVariable, Field]]] # For structs.
debug_info: Any

View File

@ -86,19 +86,19 @@ def vmlinux_proc(tree: ast.AST, module):
if not import_statements: if not import_statements:
logger.info("No vmlinux imports found") logger.info("No vmlinux imports found")
return return None
# Import vmlinux module directly # Import vmlinux module directly
try: try:
vmlinux_mod = importlib.import_module("vmlinux") vmlinux_mod = importlib.import_module("vmlinux")
except ImportError: except ImportError:
logger.warning("Could not import vmlinux module") logger.warning("Could not import vmlinux module")
return return None
source_file = inspect.getsourcefile(vmlinux_mod) source_file = inspect.getsourcefile(vmlinux_mod)
if source_file is None: if source_file is None:
logger.warning("Cannot find source for vmlinux module") logger.warning("Cannot find source for vmlinux module")
return return None
with open(source_file, "r") as f: with open(source_file, "r") as f:
mod_ast = ast.parse(f.read(), filename=source_file) mod_ast = ast.parse(f.read(), filename=source_file)
@ -148,6 +148,7 @@ def process_vmlinux_assign(node, module, assignments: dict[str, AssignmentInfo])
pointer_level=None, pointer_level=None,
signature=None, signature=None,
members=None, members=None,
debug_info=None,
) )
logger.info( logger.info(
f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}" f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}"

View File

@ -73,9 +73,8 @@ class IRGenerator:
) )
# Generate IR first to populate field names # Generate IR first to populate field names
self.generated_debug_info.append( struct_debug_info = self.gen_ir(struct, self.generated_debug_info)
(struct, self.gen_ir(struct, self.generated_debug_info)) self.generated_debug_info.append((struct, struct_debug_info))
)
# Fill the assignments dictionary with struct information # Fill the assignments dictionary with struct information
if struct.name not in self.assignments: if struct.name not in self.assignments:
@ -105,6 +104,7 @@ class IRGenerator:
pointer_level=None, pointer_level=None,
signature=None, signature=None,
members=members_dict, members=members_dict,
debug_info=struct_debug_info,
) )
logger.info(f"Added struct assignment info for {struct.name}") logger.info(f"Added struct assignment info for {struct.name}")

View File

@ -1,6 +1,9 @@
import logging import logging
from typing import Any
from llvmlite import ir from llvmlite import ir
from pythonbpf.local_symbol import LocalSymbol
from pythonbpf.vmlinux_parser.assignment_info import AssignmentType from pythonbpf.vmlinux_parser.assignment_info import AssignmentType
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -36,20 +39,39 @@ class VmlinuxHandler:
"""Check if name is a vmlinux enum constant""" """Check if name is a vmlinux enum constant"""
return ( return (
name in self.vmlinux_symtab name in self.vmlinux_symtab
and self.vmlinux_symtab[name]["value_type"] == AssignmentType.CONSTANT and self.vmlinux_symtab[name].value_type == AssignmentType.CONSTANT
) )
def get_struct_debug_info(self, name: str) -> Any:
if (
name in self.vmlinux_symtab
and self.vmlinux_symtab[name].value_type == AssignmentType.STRUCT
):
return self.vmlinux_symtab[name].debug_info
else:
raise ValueError(f"{name} is not a vmlinux struct type")
def get_vmlinux_struct_type(self, name):
"""Check if name is a vmlinux struct type"""
if (
name in self.vmlinux_symtab
and self.vmlinux_symtab[name].value_type == AssignmentType.STRUCT
):
return self.vmlinux_symtab[name].python_type
else:
raise ValueError(f"{name} is not a vmlinux struct type")
def is_vmlinux_struct(self, name): def is_vmlinux_struct(self, name):
"""Check if name is a vmlinux struct""" """Check if name is a vmlinux struct"""
return ( return (
name in self.vmlinux_symtab name in self.vmlinux_symtab
and self.vmlinux_symtab[name]["value_type"] == AssignmentType.STRUCT and self.vmlinux_symtab[name].value_type == AssignmentType.STRUCT
) )
def handle_vmlinux_enum(self, name): def handle_vmlinux_enum(self, name):
"""Handle vmlinux enum constants by returning LLVM IR constants""" """Handle vmlinux enum constants by returning LLVM IR constants"""
if self.is_vmlinux_enum(name): if self.is_vmlinux_enum(name):
value = self.vmlinux_symtab[name]["value"] value = self.vmlinux_symtab[name].value
logger.info(f"Resolving vmlinux enum {name} = {value}") logger.info(f"Resolving vmlinux enum {name} = {value}")
return ir.Constant(ir.IntType(64), value), ir.IntType(64) return ir.Constant(ir.IntType(64), value), ir.IntType(64)
return None return None
@ -57,34 +79,132 @@ class VmlinuxHandler:
def get_vmlinux_enum_value(self, name): def get_vmlinux_enum_value(self, name):
"""Handle vmlinux enum constants by returning LLVM IR constants""" """Handle vmlinux enum constants by returning LLVM IR constants"""
if self.is_vmlinux_enum(name): if self.is_vmlinux_enum(name):
value = self.vmlinux_symtab[name]["value"] value = self.vmlinux_symtab[name].value
logger.info(f"The value of vmlinux enum {name} = {value}") logger.info(f"The value of vmlinux enum {name} = {value}")
return value return value
return None return None
def handle_vmlinux_struct(self, struct_name, module, builder):
"""Handle vmlinux struct initializations"""
if self.is_vmlinux_struct(struct_name):
# TODO: Implement core-specific struct handling
# This will be more complex and depends on the BTF information
logger.info(f"Handling vmlinux struct {struct_name}")
# Return struct type and allocated pointer
# This is a stub, actual implementation will be more complex
return None
return None
def handle_vmlinux_struct_field( def handle_vmlinux_struct_field(
self, struct_var_name, field_name, module, builder, local_sym_tab self, struct_var_name, field_name, module, builder, local_sym_tab
): ):
"""Handle access to vmlinux struct fields""" """Handle access to vmlinux struct fields"""
# Check if it's a variable of vmlinux struct type
if struct_var_name in local_sym_tab: if struct_var_name in local_sym_tab:
var_info = local_sym_tab[struct_var_name] # noqa: F841 var_info: LocalSymbol = local_sym_tab[struct_var_name]
# Need to check if this variable is a vmlinux struct
# This will depend on how you track vmlinux struct types in your symbol table
logger.info( logger.info(
f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}" f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}"
) )
python_type: type = var_info.metadata
globvar_ir, field_data = self.get_field_type(
python_type.__name__, field_name
)
builder.function.args[0].type = ir.PointerType(ir.IntType(64))
print(builder.function.args[0])
field_ptr = self.load_ctx_field(
builder, builder.function.args[0], globvar_ir
)
print(field_ptr)
# Return pointer to field and field type # Return pointer to field and field type
return None return field_ptr, field_data
return None else:
raise RuntimeError("Variable accessed not found in symbol table")
@staticmethod
def load_ctx_field(builder, ctx_arg, offset_global):
"""
Generate LLVM IR to load a field from BPF context using offset.
Args:
builder: llvmlite IRBuilder instance
ctx_arg: The context pointer argument (ptr/i8*)
offset_global: Global variable containing the field offset (i64)
Returns:
The loaded value (i64 register)
"""
# Load the offset value
offset = builder.load(offset_global)
# # Ensure ctx_arg is treated as i8* (byte pointer)
# # i8_type = ir.IntType(8)
# i8_ptr_type = ir.PointerType()
# Cast ctx_arg to i8* if it isn't already
# if str(ctx_arg.type) != str(i8_ptr_type):
# ctx_i8_ptr = builder.bitcast(ctx_arg, i8_ptr_type)
# else:
# ctx_i8_ptr = ctx_arg
# GEP with explicit type - this is the key fix
field_ptr = builder.gep(
ctx_arg,
[offset],
inbounds=False,
)
# Get or declare the BPF passthrough intrinsic
module = builder.function.module
try:
passthrough_fn = module.globals.get("llvm.bpf.passthrough.p0.p0")
if passthrough_fn is None:
raise KeyError
except (KeyError, AttributeError):
passthrough_type = ir.FunctionType(
ir.PointerType(),
[ir.IntType(32), ir.PointerType()],
)
passthrough_fn = ir.Function(
module,
passthrough_type,
name="llvm.bpf.passthrough.p0.p0",
)
# Call passthrough to satisfy BPF verifier
verified_ptr = builder.call(
passthrough_fn, [ir.Constant(ir.IntType(32), 0), field_ptr], tail=True
)
# Bitcast to i64* (assuming field is 64-bit, adjust if needed)
i64_ptr_type = ir.PointerType(ir.IntType(64))
typed_ptr = builder.bitcast(verified_ptr, i64_ptr_type)
# Load and return the value
value = builder.load(typed_ptr)
return value
def has_field(self, struct_name, field_name):
"""Check if a vmlinux struct has a specific field"""
if self.is_vmlinux_struct(struct_name):
python_type = self.vmlinux_symtab[struct_name].python_type
return hasattr(python_type, field_name)
return False
def get_field_type(self, vmlinux_struct_name, field_name):
"""Get the type of a field in a vmlinux struct"""
if self.is_vmlinux_struct(vmlinux_struct_name):
python_type = self.vmlinux_symtab[vmlinux_struct_name].python_type
if hasattr(python_type, field_name):
return self.vmlinux_symtab[vmlinux_struct_name].members[field_name]
else:
raise ValueError(
f"Field {field_name} not found in vmlinux struct {vmlinux_struct_name}"
)
else:
raise ValueError(f"{vmlinux_struct_name} is not a vmlinux struct")
def get_field_index(self, vmlinux_struct_name, field_name):
"""Get the type of a field in a vmlinux struct"""
if self.is_vmlinux_struct(vmlinux_struct_name):
python_type = self.vmlinux_symtab[vmlinux_struct_name].python_type
if hasattr(python_type, field_name):
return list(
self.vmlinux_symtab[vmlinux_struct_name].members.keys()
).index(field_name)
else:
raise ValueError(
f"Field {field_name} not found in vmlinux struct {vmlinux_struct_name}"
)
else:
raise ValueError(f"{vmlinux_struct_name} is not a vmlinux struct")

View File

@ -1,5 +1,5 @@
BPF_CLANG := clang BPF_CLANG := clang
CFLAGS := -O2 -emit-llvm -target bpf -c CFLAGS := -O0 -emit-llvm -target bpf -c
SRC := $(wildcard *.bpf.c) SRC := $(wildcard *.bpf.c)
LL := $(SRC:.bpf.c=.bpf.ll) LL := $(SRC:.bpf.c=.bpf.ll)

View File

@ -1,25 +0,0 @@
#define __TARGET_ARCH_arm64
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
// Map: key = struct request*, value = u64 timestamp
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, struct request *);
__type(value, u64);
__uint(max_entries, 1024);
} start SEC(".maps");
// Attach to kprobe for blk_start_request
SEC("kprobe/blk_start_request")
int BPF_KPROBE(trace_start, struct request *req)
{
u64 ts = bpf_ktime_get_ns();
bpf_map_update_elem(&start, &req, &ts, BPF_ANY);
return 0;
}
char LICENSE[] SEC("license") = "GPL";

View File

@ -0,0 +1,42 @@
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
/*
Information gained from reversing this (multiple kernel versions):
There is no point of
```llvm
tail call void @llvm.dbg.value(metadata ptr %0, metadata !60, metadata !DIExpression()), !dbg !70
```
and the first argument of passthrough is fucking useless. It just needs to be a distinct integer:
```llvm
%9 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 3, ptr %8)
```
*/
SEC("tp/syscalls/sys_enter_execve")
int handle_setuid_entry(struct trace_event_raw_sys_enter *ctx) {
// Access each argument separately with clear variable assignments
long int id = ctx->id;
bpf_printk("This is context field %d", id);
/*
* the IR to aim for is
* %2 = alloca ptr, align 8
* store ptr %0, ptr %2, align 8
* Above, %0 is the arg pointer
* %5 = load ptr, ptr %2, align 8
* %6 = getelementptr inbounds %struct.trace_event_raw_sys_enter, ptr %5, i32 0, i32 2
* %7 = load i64, ptr @"llvm.trace_event_raw_sys_enter:0:16$0:2:0", align 8
* %8 = bitcast ptr %5 to ptr
* %9 = getelementptr i8, ptr %8, i64 %7
* %10 = bitcast ptr %9 to ptr
* %11 = call ptr @llvm.bpf.passthrough.p0.p0(i32 0, ptr %10)
* %12 = load i64, ptr %11, align 8, !dbg !101
*
*/
return 0;
}
char LICENSE[] SEC("license") = "GPL";

121617
tests/c-form/vmlinux.h vendored

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,21 @@
// xdp_rewrite.c
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/if_ether.h>
SEC("xdp")
int xdp_rewrite_mac(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct ethhdr *eth = data;
if ((void*)(eth + 1) > data_end)
return XDP_PASS;
__u8 new_src[ETH_ALEN] = {0x02,0x00,0x00,0x00,0x00,0x02};
for (int i = 0; i < ETH_ALEN; i++) eth->h_source[i] = new_src[i];
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";

View File

@ -0,0 +1,29 @@
import logging
from pythonbpf import bpf, section, bpfglobal, compile_to_ir
from pythonbpf import compile # noqa: F401
from vmlinux import TASK_COMM_LEN # noqa: F401
from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
from ctypes import c_int64, c_void_p # noqa: F401
# from vmlinux import struct_uinput_device
# from vmlinux import struct_blk_integrity_iter
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: struct_trace_event_raw_sys_enter) -> c_int64:
b = ctx.id
print(f"This is context field {b}")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("struct_field_access.py", "struct_field_access.ll", loglevel=logging.INFO)
compile()

View File

@ -1,4 +1,4 @@
from pythonbpf import bpf, map, section, bpfglobal, compile, struct from pythonbpf import bpf, map, section, bpfglobal, compile, struct, compile_to_ir
from ctypes import c_void_p, c_int64, c_int32, c_uint64 from ctypes import c_void_p, c_int64, c_int32, c_uint64
from pythonbpf.maps import HashMap from pythonbpf.maps import HashMap
from pythonbpf.helper import ktime from pythonbpf.helper import ktime
@ -71,4 +71,5 @@ def LICENSE() -> str:
return "GPL" return "GPL"
compile_to_ir("comprehensive.py", "comprehensive.ll")
compile() compile()