diff --git a/README.md b/README.md index 1b4c3f8..fe88f18 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,12 @@ Python-BPF is an LLVM IR generator for eBPF programs written in Python. It uses --- +## Try It Out! +Run +```bash +curl -s https://raw.githubusercontent.com/pythonbpf/Python-BPF/refs/heads/master/tools/setup.sh | sudo bash +``` + ## Installation Dependencies: diff --git a/pyproject.toml b/pyproject.toml index 09fc8d3..cea909f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,12 +4,26 @@ build-backend = "setuptools.build_meta" [project] name = "pythonbpf" -version = "0.1.4" +version = "0.1.5" description = "Reduced Python frontend for eBPF" authors = [ { name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" }, { name = "varun-r-mallya", email="varunrmallya@gmail.com" } ] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: System :: Operating System Kernels :: Linux", +] readme = "README.md" license = {text = "Apache-2.0"} requires-python = ">=3.8" diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index 6833795..22c457d 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -5,6 +5,7 @@ from llvmlite import ir from dataclasses import dataclass from typing import Any from pythonbpf.helper import HelperHandlerRegistry +from .expr import VmlinuxHandlerRegistry from pythonbpf.type_deducer import ctypes_to_ir logger = logging.getLogger(__name__) @@ -64,6 +65,15 @@ def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): if var_name in local_sym_tab: logger.debug(f"Variable {var_name} already allocated, skipping") continue + + # When allocating a variable, check if it's a vmlinux struct type + if isinstance(stmt.value, ast.Name) and VmlinuxHandlerRegistry.is_vmlinux_struct( + stmt.value.id + ): + # Handle vmlinux struct allocation + # This requires more implementation + print(stmt.value) + pass # Determine type and allocate based on rval if isinstance(rval, ast.Call): @@ -85,7 +95,6 @@ def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): f"Unsupported assignment value type for {var_name}: {type(rval).__name__}" ) - def _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab): """Allocate memory for variable assigned from a call.""" diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index d33d866..17f5393 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -5,6 +5,8 @@ from .functions import func_proc from .maps import maps_proc from .structs import structs_proc from .vmlinux_parser import vmlinux_proc +from pythonbpf.vmlinux_parser.vmlinux_exports_handler import VmlinuxHandler +from .expr import VmlinuxHandlerRegistry from .globals_pass import ( globals_list_creation, globals_processing, @@ -19,10 +21,20 @@ from pylibbpf import BpfObject import tempfile from logging import Logger import logging +import re logger: Logger = logging.getLogger(__name__) -VERSION = "v0.1.4" +VERSION = "v0.1.5" + + +def finalize_module(original_str): + """After all IR generation is complete, we monkey patch btf_ama attribute""" + + # Create a string with applied transformation of btf_ama attribute addition to BTF struct field accesses. + pattern = r'(@"llvm\.[^"]+:[^"]*" = external global i64, !llvm\.preserve\.access\.index ![0-9]+)' + replacement = r'\1 "btf_ama"' + return re.sub(pattern, replacement, original_str) def find_bpf_chunks(tree): @@ -45,11 +57,14 @@ def processor(source_code, filename, module): for func_node in bpf_chunks: logger.info(f"Found BPF function/struct: {func_node.name}") - vmlinux_proc(tree, module) + vmlinux_symtab = vmlinux_proc(tree, module) + if vmlinux_symtab: + handler = VmlinuxHandler.initialize(vmlinux_symtab) + VmlinuxHandlerRegistry.set_handler(handler) + populate_global_symbol_table(tree, module) license_processing(tree, module) globals_processing(tree, module) - structs_sym_tab = structs_proc(tree, module, bpf_chunks) map_sym_tab = maps_proc(tree, module, bpf_chunks) func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab) @@ -122,10 +137,12 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"]) + module_string = finalize_module(str(module)) + logger.info(f"IR written to {output}") with open(output, "w") as f: f.write(f'source_filename = "{filename}"\n') - f.write(str(module)) + f.write(module_string) f.write("\n") return output, structs_sym_tab, maps_sym_tab diff --git a/pythonbpf/debuginfo/debug_info_generator.py b/pythonbpf/debuginfo/debug_info_generator.py index ab9fed4..62f0cc3 100644 --- a/pythonbpf/debuginfo/debug_info_generator.py +++ b/pythonbpf/debuginfo/debug_info_generator.py @@ -81,6 +81,20 @@ class DebugInfoGenerator: }, ) + def create_array_type_vmlinux(self, type_info: Any, count: int) -> Any: + """Create an array type of the given base type with specified count""" + base_type, type_sizing = type_info + subrange = self.module.add_debug_info("DISubrange", {"count": count}) + return self.module.add_debug_info( + "DICompositeType", + { + "tag": dc.DW_TAG_array_type, + "baseType": base_type, + "size": type_sizing, + "elements": [subrange], + }, + ) + @staticmethod def _compute_array_size(base_type: Any, count: int) -> int: # Extract size from base_type if possible @@ -101,6 +115,23 @@ class DebugInfoGenerator: }, ) + def create_struct_member_vmlinux( + self, name: str, base_type_with_size: Any, offset: int + ) -> Any: + """Create a struct member with the given name, type, and offset""" + base_type, type_size = base_type_with_size + return self.module.add_debug_info( + "DIDerivedType", + { + "tag": dc.DW_TAG_member, + "name": name, + "file": self.module._file_metadata, + "baseType": base_type, + "size": type_size, + "offset": offset, + }, + ) + def create_struct_type( self, members: List[Any], size: int, is_distinct: bool ) -> Any: @@ -116,6 +147,22 @@ class DebugInfoGenerator: is_distinct=is_distinct, ) + def create_struct_type_with_name( + self, name: str, members: List[Any], size: int, is_distinct: bool + ) -> Any: + """Create a struct type with the given members and size""" + return self.module.add_debug_info( + "DICompositeType", + { + "name": name, + "tag": dc.DW_TAG_structure_type, + "file": self.module._file_metadata, + "size": size, + "elements": members, + }, + is_distinct=is_distinct, + ) + def create_global_var_debug_info( self, name: str, var_type: Any, is_local: bool = False ) -> Any: diff --git a/pythonbpf/expr/__init__.py b/pythonbpf/expr/__init__.py index 3c403dd..ac3a975 100644 --- a/pythonbpf/expr/__init__.py +++ b/pythonbpf/expr/__init__.py @@ -2,6 +2,7 @@ from .expr_pass import eval_expr, handle_expr, get_operand_value from .type_normalization import convert_to_bool, get_base_type_and_depth from .ir_ops import deref_to_depth from .call_registry import CallHandlerRegistry +from .vmlinux_registry import VmlinuxHandlerRegistry __all__ = [ "eval_expr", @@ -11,4 +12,5 @@ __all__ = [ "deref_to_depth", "get_operand_value", "CallHandlerRegistry", + "VmlinuxHandlerRegistry", ] diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index 8bbd524..2a7cd5f 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -12,6 +12,7 @@ from .type_normalization import ( get_base_type_and_depth, deref_to_depth, ) +from .vmlinux_registry import VmlinuxHandlerRegistry logger: Logger = logging.getLogger(__name__) @@ -27,8 +28,12 @@ def _handle_name_expr(expr: ast.Name, local_sym_tab: Dict, builder: ir.IRBuilder val = builder.load(var) return val, local_sym_tab[expr.id].ir_type else: - logger.info(f"Undefined variable {expr.id}") - return None + # Check if it's a vmlinux enum/constant + vmlinux_result = VmlinuxHandlerRegistry.handle_name(expr.id) + if vmlinux_result is not None: + return vmlinux_result + + raise SyntaxError(f"Undefined variable {expr.id}") def _handle_constant_expr(module, builder, expr: ast.Constant): @@ -74,6 +79,13 @@ def _handle_attribute_expr( val = builder.load(gep) field_type = metadata.field_type(attr_name) return val, field_type + + # Try vmlinux handler as fallback + vmlinux_result = VmlinuxHandlerRegistry.handle_attribute( + expr, local_sym_tab, None, builder + ) + if vmlinux_result is not None: + return vmlinux_result return None @@ -130,7 +142,12 @@ def get_operand_value( logger.info(f"var is {var}, base_type is {base_type}, depth is {depth}") val = deref_to_depth(func, builder, var, depth) return val - raise ValueError(f"Undefined variable: {operand.id}") + else: + # Check if it's a vmlinux enum/constant + vmlinux_result = VmlinuxHandlerRegistry.handle_name(operand.id) + if vmlinux_result is not None: + val, _ = vmlinux_result + return val elif isinstance(operand, ast.Constant): if isinstance(operand.value, int): cst = ir.Constant(ir.IntType(64), int(operand.value)) @@ -332,6 +349,7 @@ def _handle_unary_op( neg_one = ir.Constant(ir.IntType(64), -1) result = builder.mul(operand, neg_one) return result, ir.IntType(64) + return None # ============================================================================ diff --git a/pythonbpf/expr/vmlinux_registry.py b/pythonbpf/expr/vmlinux_registry.py new file mode 100644 index 0000000..9e9d52e --- /dev/null +++ b/pythonbpf/expr/vmlinux_registry.py @@ -0,0 +1,45 @@ +import ast + + +class VmlinuxHandlerRegistry: + """Registry for vmlinux handler operations""" + + _handler = None + + @classmethod + def set_handler(cls, handler): + """Set the vmlinux handler""" + cls._handler = handler + + @classmethod + def get_handler(cls): + """Get the vmlinux handler""" + return cls._handler + + @classmethod + def handle_name(cls, name): + """Try to handle a name as vmlinux enum/constant""" + if cls._handler is None: + return None + return cls._handler.handle_vmlinux_enum(name) + + @classmethod + def handle_attribute(cls, expr, local_sym_tab, module, builder): + """Try to handle an attribute access as vmlinux struct field""" + if cls._handler is None: + return None + + if isinstance(expr.value, ast.Name): + var_name = expr.value.id + field_name = expr.attr + return cls._handler.handle_vmlinux_struct_field( + var_name, field_name, module, builder, local_sym_tab + ) + return None + + @classmethod + def is_vmlinux_struct(cls, name): + """Check if a name refers to a vmlinux struct""" + if cls._handler is None: + return False + return cls._handler.is_vmlinux_struct(name) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 5836ce0..e391092 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -310,7 +310,13 @@ def process_stmt( def process_func_body( - module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab + module, + builder, + func_node, + func, + ret_type, + map_sym_tab, + structs_sym_tab, ): """Process the body of a bpf function""" # TODO: A lot. We just have print -> bpf_trace_printk for now @@ -383,7 +389,13 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t builder = ir.IRBuilder(block) process_func_body( - module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab + module, + builder, + func_node, + func, + ret_type, + map_sym_tab, + structs_sym_tab, ) return func diff --git a/pythonbpf/helper/printk_formatter.py b/pythonbpf/helper/printk_formatter.py index cee069f..a18f135 100644 --- a/pythonbpf/helper/printk_formatter.py +++ b/pythonbpf/helper/printk_formatter.py @@ -3,6 +3,7 @@ import logging from llvmlite import ir from pythonbpf.expr import eval_expr, get_base_type_and_depth, deref_to_depth +from pythonbpf.expr.vmlinux_registry import VmlinuxHandlerRegistry logger = logging.getLogger(__name__) @@ -108,6 +109,16 @@ def _process_name_in_fval(name_node, fmt_parts, exprs, local_sym_tab): if local_sym_tab and name_node.id in local_sym_tab: _, var_type, tmp = local_sym_tab[name_node.id] _populate_fval(var_type, name_node, fmt_parts, exprs) + else: + # Try to resolve through vmlinux registry if not in local symbol table + result = VmlinuxHandlerRegistry.handle_name(name_node.id) + if result: + val, var_type = result + _populate_fval(var_type, name_node, fmt_parts, exprs) + else: + raise ValueError( + f"Variable '{name_node.id}' not found in symbol table or vmlinux" + ) def _process_attr_in_fval(attr_node, fmt_parts, exprs, local_sym_tab, struct_sym_tab): diff --git a/pythonbpf/maps/maps_pass.py b/pythonbpf/maps/maps_pass.py index 8459848..85837d7 100644 --- a/pythonbpf/maps/maps_pass.py +++ b/pythonbpf/maps/maps_pass.py @@ -6,6 +6,8 @@ from llvmlite import ir from .maps_utils import MapProcessorRegistry from .map_types import BPFMapType from .map_debug_info import create_map_debug_info, create_ringbuf_debug_info +from pythonbpf.expr.vmlinux_registry import VmlinuxHandlerRegistry + logger: Logger = logging.getLogger(__name__) @@ -51,7 +53,7 @@ def _parse_map_params(rval, expected_args=None): """Parse map parameters from call arguments and keywords.""" params = {} - + handler = VmlinuxHandlerRegistry.get_handler() # Parse positional arguments if expected_args: for i, arg_name in enumerate(expected_args): @@ -65,7 +67,12 @@ def _parse_map_params(rval, expected_args=None): # Parse keyword arguments (override positional) for keyword in rval.keywords: if isinstance(keyword.value, ast.Name): - params[keyword.arg] = keyword.value.id + name = keyword.value.id + if handler and handler.is_vmlinux_enum(name): + result = handler.get_vmlinux_enum_value(name) + params[keyword.arg] = result if result is not None else name + else: + params[keyword.arg] = name elif isinstance(keyword.value, ast.Constant): params[keyword.arg] = keyword.value.value diff --git a/pythonbpf/tbaa_gen/__init__.py b/pythonbpf/tbaa_gen/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pythonbpf/vmlinux_parser/assignment_info.py b/pythonbpf/vmlinux_parser/assignment_info.py new file mode 100644 index 0000000..465432d --- /dev/null +++ b/pythonbpf/vmlinux_parser/assignment_info.py @@ -0,0 +1,36 @@ +from enum import Enum, auto +from typing import Any, Dict, List, Optional, TypedDict +from dataclasses import dataclass +import llvmlite.ir as ir + +from pythonbpf.vmlinux_parser.dependency_node import Field + + +@dataclass +class AssignmentType(Enum): + CONSTANT = auto() + STRUCT = auto() + ARRAY = auto() # probably won't be used + FUNCTION_POINTER = auto() + POINTER = auto() # again, probably won't be used + + +@dataclass +class FunctionSignature(TypedDict): + return_type: str + param_types: List[str] + varargs: bool + + +# Thew name of the assignment will be in the dict that uses this class +@dataclass +class AssignmentInfo(TypedDict): + value_type: AssignmentType + python_type: type + value: Optional[Any] + pointer_level: Optional[int] + signature: Optional[FunctionSignature] # For function pointers + # The key of the dict is the name of the field. + # Value is a tuple that contains the global variable representing that field + # along with all the information about that field as a Field type. + members: Optional[Dict[str, tuple[ir.GlobalVariable, Field]]] # For structs. diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 3cb3a97..a508ff7 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -1,6 +1,7 @@ import logging from functools import lru_cache import importlib + from .dependency_handler import DependencyHandler from .dependency_node import DependencyNode import ctypes @@ -15,7 +16,11 @@ def get_module_symbols(module_name: str): return [name for name in dir(imported_module)], imported_module -def process_vmlinux_class(node, llvm_module, handler: DependencyHandler): +def process_vmlinux_class( + node, + llvm_module, + handler: DependencyHandler, +): symbols_in_module, imported_module = get_module_symbols("vmlinux") if node.name in symbols_in_module: vmlinux_type = getattr(imported_module, node.name) @@ -25,7 +30,10 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler): def process_vmlinux_post_ast( - elem_type_class, llvm_handler, handler: DependencyHandler, processing_stack=None + elem_type_class, + llvm_handler, + handler: DependencyHandler, + processing_stack=None, ): # Initialize processing stack on first call if processing_stack is None: @@ -46,7 +54,7 @@ def process_vmlinux_post_ast( logger.debug(f"Node {current_symbol_name} already processed and ready") return True - # XXX:Check it's use. It's probably not being used. + # XXX:Check its use. It's probably not being used. if current_symbol_name in processing_stack: logger.debug( f"Dependency already in processing stack for {current_symbol_name}, skipping" @@ -60,6 +68,10 @@ def process_vmlinux_post_ast( pass else: new_dep_node = DependencyNode(name=current_symbol_name) + + # elem_type_class is the actual vmlinux struct/class + new_dep_node.set_ctype_struct(elem_type_class) + handler.add_node(new_dep_node) class_obj = getattr(imported_module, current_symbol_name) # Inspect the class fields @@ -94,12 +106,47 @@ def process_vmlinux_post_ast( [elem_type, elem_bitfield_size] = elem_temp_list local_module_name = getattr(elem_type, "__module__", None) new_dep_node.add_field(elem_name, elem_type, ready=False) + if local_module_name == ctypes.__name__: + # TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. Current processing is a single dereference new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) - new_dep_node.set_field_ready(elem_name, is_ready=True) - logger.debug( - f"Field {elem_name} is direct ctypes type: {elem_type}" - ) + + # Process pointer to ctype + if isinstance(elem_type, type) and issubclass( + elem_type, ctypes._Pointer + ): + # Get the pointed-to type + pointed_type = elem_type._type_ + logger.debug(f"Found pointer to type: {pointed_type}") + new_dep_node.set_field_containing_type(elem_name, pointed_type) + new_dep_node.set_field_ctype_complex_type( + elem_name, ctypes._Pointer + ) + new_dep_node.set_field_ready(elem_name, is_ready=True) + + # Process function pointers (CFUNCTYPE) + elif hasattr(elem_type, "_restype_") and hasattr( + elem_type, "_argtypes_" + ): + # This is a CFUNCTYPE or similar + logger.info( + f"Function pointer detected for {elem_name} with return type {elem_type._restype_} and arguments {elem_type._argtypes_}" + ) + # Set the field as ready but mark it with special handling + new_dep_node.set_field_ctype_complex_type( + elem_name, ctypes.CFUNCTYPE + ) + new_dep_node.set_field_ready(elem_name, is_ready=True) + logger.warning( + "Blindly processing CFUNCTYPE ctypes to ensure compilation. Unsupported" + ) + + else: + # Regular ctype + new_dep_node.set_field_ready(elem_name, is_ready=True) + logger.debug( + f"Field {elem_name} is direct ctypes type: {elem_type}" + ) elif local_module_name == "vmlinux": new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) logger.debug( @@ -112,13 +159,21 @@ def process_vmlinux_post_ast( type_length = elem_type._length_ if containing_type.__module__ == "vmlinux": - pass + new_dep_node.add_dependent( + elem_type._type_.__name__ + if hasattr(elem_type._type_, "__name__") + else str(elem_type._type_) + ) elif containing_type.__module__ == ctypes.__name__: if isinstance(elem_type, type): if issubclass(elem_type, ctypes.Array): ctype_complex_type = ctypes.Array elif issubclass(elem_type, ctypes._Pointer): ctype_complex_type = ctypes._Pointer + else: + raise ImportError( + "Non Array and Pointer type ctype imports not supported in current version" + ) else: raise TypeError("Unsupported ctypes subclass") else: @@ -137,10 +192,35 @@ def process_vmlinux_post_ast( ) new_dep_node.set_field_type(elem_name, elem_type) if containing_type.__module__ == "vmlinux": - process_vmlinux_post_ast( - containing_type, llvm_handler, handler, processing_stack + containing_type_name = ( + containing_type.__name__ + if hasattr(containing_type, "__name__") + else str(containing_type) ) - new_dep_node.set_field_ready(elem_name, True) + + # Check for self-reference or already processed + if containing_type_name == current_symbol_name: + # Self-referential pointer + logger.debug( + f"Self-referential pointer in {current_symbol_name}.{elem_name}" + ) + new_dep_node.set_field_ready(elem_name, True) + elif handler.has_node(containing_type_name): + # Already processed + logger.debug( + f"Reusing already processed {containing_type_name}" + ) + new_dep_node.set_field_ready(elem_name, True) + else: + # Process recursively - THIS WAS MISSING + new_dep_node.add_dependent(containing_type_name) + process_vmlinux_post_ast( + containing_type, + llvm_handler, + handler, + processing_stack, + ) + new_dep_node.set_field_ready(elem_name, True) elif containing_type.__module__ == ctypes.__name__: logger.debug(f"Processing ctype internal{containing_type}") new_dep_node.set_field_ready(elem_name, True) @@ -149,8 +229,16 @@ def process_vmlinux_post_ast( "Module not supported in recursive resolution" ) else: + new_dep_node.add_dependent( + elem_type.__name__ + if hasattr(elem_type, "__name__") + else str(elem_type) + ) process_vmlinux_post_ast( - elem_type, llvm_handler, handler, processing_stack + elem_type, + llvm_handler, + handler, + processing_stack, ) new_dep_node.set_field_ready(elem_name, True) else: @@ -161,7 +249,7 @@ def process_vmlinux_post_ast( else: raise ImportError("UNSUPPORTED Module") - logging.info( + logger.info( f"{current_symbol_name} processed and handler readiness {handler.is_ready}" ) return True diff --git a/pythonbpf/vmlinux_parser/dependency_handler.py b/pythonbpf/vmlinux_parser/dependency_handler.py index fb49b00..b34d27f 100644 --- a/pythonbpf/vmlinux_parser/dependency_handler.py +++ b/pythonbpf/vmlinux_parser/dependency_handler.py @@ -147,3 +147,27 @@ class DependencyHandler: int: The number of nodes """ return len(self._nodes) + + def __getitem__(self, name: str) -> DependencyNode: + """ + Get a node by name using dictionary-style access. + + Args: + name: The name of the node to retrieve + + Returns: + DependencyNode: The node with the given name + + Raises: + KeyError: If no node with the given name exists + + Example: + node = handler["some-dep_node_name"] + """ + if name not in self._nodes: + raise KeyError(f"No node with name '{name}' found") + return self._nodes[name] + + @property + def nodes(self): + return self._nodes diff --git a/pythonbpf/vmlinux_parser/dependency_node.py b/pythonbpf/vmlinux_parser/dependency_node.py index a17ffaf..dd413ad 100644 --- a/pythonbpf/vmlinux_parser/dependency_node.py +++ b/pythonbpf/vmlinux_parser/dependency_node.py @@ -1,5 +1,6 @@ from dataclasses import dataclass, field from typing import Dict, Any, Optional +import ctypes # TODO: FIX THE FUCKING TYPE NAME CONVENTION. @@ -13,9 +14,35 @@ class Field: containing_type: Optional[Any] type_size: Optional[int] bitfield_size: Optional[int] + offset: int value: Any = None ready: bool = False + def __hash__(self): + """ + Create a hash based on the immutable attributes that define this field's identity. + This allows Field objects to be used as dictionary keys. + """ + # Use a tuple of the fields that uniquely identify this field + identity = ( + self.name, + id(self.type), # Use id for non-hashable types + id(self.ctype_complex_type) if self.ctype_complex_type else None, + id(self.containing_type) if self.containing_type else None, + self.type_size, + self.bitfield_size, + self.offset, + self.value if self.value else None, + ) + return hash(identity) + + def __eq__(self, other): + """ + Define equality consistent with the hash function. + Two fields are equal if they have they are the same + """ + return self is other + def set_ready(self, is_ready: bool = True) -> None: """Set the readiness state of this field.""" self.ready = is_ready @@ -60,6 +87,10 @@ class Field: if mark_ready: self.ready = True + def set_offset(self, offset: int) -> None: + """Set the offset of this field""" + self.offset = offset + @dataclass class DependencyNode: @@ -106,8 +137,11 @@ class DependencyNode: """ name: str + depends_on: Optional[list[str]] = None fields: Dict[str, Field] = field(default_factory=dict) _ready_cache: Optional[bool] = field(default=None, repr=False) + current_offset: int = 0 + ctype_struct: Optional[Any] = field(default=None, repr=False) def add_field( self, @@ -119,8 +153,11 @@ class DependencyNode: ctype_complex_type: Optional[int] = None, bitfield_size: Optional[int] = None, ready: bool = False, + offset: int = 0, ) -> None: """Add a field to the node with an optional initial value and readiness state.""" + if self.depends_on is None: + self.depends_on = [] self.fields[name] = Field( name=name, type=field_type, @@ -130,10 +167,21 @@ class DependencyNode: type_size=type_size, ctype_complex_type=ctype_complex_type, bitfield_size=bitfield_size, + offset=offset, ) # Invalidate readiness cache self._ready_cache = None + def set_ctype_struct(self, ctype_struct: Any) -> None: + """Set the ctypes structure for automatic offset calculation.""" + self.ctype_struct = ctype_struct + + def __sizeof__(self): + # If we have a ctype_struct, use its size + if self.ctype_struct is not None: + return ctypes.sizeof(self.ctype_struct) + return self.current_offset + def get_field(self, name: str) -> Field: """Get a field by name.""" return self.fields[name] @@ -200,15 +248,112 @@ class DependencyNode: # Invalidate readiness cache self._ready_cache = None - def set_field_ready(self, name: str, is_ready: bool = False) -> None: + def set_field_ready( + self, + name: str, + is_ready: bool = False, + size_of_containing_type: Optional[int] = None, + ) -> None: """Mark a field as ready or not ready.""" if name not in self.fields: raise KeyError(f"Field '{name}' does not exist in node '{self.name}'") self.fields[name].set_ready(is_ready) + + # Use ctypes built-in offset if available + if self.ctype_struct is not None: + try: + self.fields[name].set_offset(getattr(self.ctype_struct, name).offset) + except AttributeError: + # Fallback to manual calculation if field not found in ctype_struct + self.fields[name].set_offset(self.current_offset) + self.current_offset += self._calculate_size( + name, size_of_containing_type + ) + else: + # Manual offset calculation when no ctype_struct is available + self.fields[name].set_offset(self.current_offset) + self.current_offset += self._calculate_size(name, size_of_containing_type) + # Invalidate readiness cache self._ready_cache = None + def _calculate_size( + self, name: str, size_of_containing_type: Optional[int] = None + ) -> int: + processing_field = self.fields[name] + # size_of_field will be in bytes + if processing_field.type.__module__ == ctypes.__name__: + size_of_field = ctypes.sizeof(processing_field.type) + return size_of_field + elif processing_field.type.__module__ == "vmlinux": + if processing_field.ctype_complex_type is not None: + if issubclass(processing_field.ctype_complex_type, ctypes.Array): + if processing_field.containing_type.__module__ == ctypes.__name__: + if ( + processing_field.containing_type is not None + and processing_field.type_size is not None + ): + size_of_field = ( + ctypes.sizeof(processing_field.containing_type) + * processing_field.type_size + ) + else: + raise RuntimeError( + f"{processing_field} has no containing_type or type_size" + ) + return size_of_field + elif processing_field.containing_type.__module__ == "vmlinux": + if ( + size_of_containing_type is not None + and processing_field.type_size is not None + ): + size_of_field = ( + size_of_containing_type * processing_field.type_size + ) + else: + raise RuntimeError( + f"{processing_field} has no containing_type or type_size" + ) + return size_of_field + elif issubclass(processing_field.ctype_complex_type, ctypes._Pointer): + return ctypes.sizeof(ctypes.c_void_p) + else: + raise NotImplementedError( + "This subclass of ctype not supported yet" + ) + elif processing_field.type_size is not None: + # Handle vmlinux types with type_size but no ctype_complex_type + # This means it's a direct vmlinux struct field (not array/pointer wrapped) + # The type_size should already contain the full size of the struct + # But if there's a containing_type from vmlinux, we need that size + if processing_field.containing_type is not None: + if processing_field.containing_type.__module__ == "vmlinux": + # For vmlinux containing types, we need the pre-calculated size + if size_of_containing_type is not None: + return size_of_containing_type * processing_field.type_size + else: + raise RuntimeError( + f"Field {name}: vmlinux containing_type requires size_of_containing_type" + ) + else: + raise ModuleNotFoundError( + f"Containing type module {processing_field.containing_type.__module__} not supported" + ) + else: + raise RuntimeError("Wrong type found with no containing type") + else: + # No ctype_complex_type and no type_size, must rely on size_of_containing_type + if size_of_containing_type is None: + raise RuntimeError( + f"Size of containing type {size_of_containing_type} is None" + ) + return size_of_containing_type + + else: + raise ModuleNotFoundError("Module is not supported for the operation") + raise RuntimeError("control should not reach here") + @property def is_ready(self) -> bool: """Check if the node is ready (all fields are ready).""" @@ -235,3 +380,9 @@ class DependencyNode: def get_not_ready_fields(self) -> Dict[str, Field]: """Get all fields that are marked as not ready.""" return {name: elem for name, elem in self.fields.items() if not elem.ready} + + def add_dependent(self, dep_type): + if dep_type in self.depends_on: + return + else: + self.depends_on.append(dep_type) diff --git a/pythonbpf/vmlinux_parser/import_detector.py b/pythonbpf/vmlinux_parser/import_detector.py index 2ce9cb5..6df7a98 100644 --- a/pythonbpf/vmlinux_parser/import_detector.py +++ b/pythonbpf/vmlinux_parser/import_detector.py @@ -1,17 +1,17 @@ import ast import logging -from typing import List, Tuple, Dict import importlib import inspect +from .assignment_info import AssignmentInfo, AssignmentType from .dependency_handler import DependencyHandler -from .ir_generation import IRGenerator +from .ir_gen import IRGenerator from .class_handler import process_vmlinux_class logger = logging.getLogger(__name__) -def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]: +def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]: """ Parse AST and detect import statements from vmlinux. @@ -82,7 +82,7 @@ def vmlinux_proc(tree: ast.AST, module): # initialise dependency handler handler = DependencyHandler() # initialise assignment dictionary of name to type - assignments: Dict[str, type] = {} + assignments: dict[str, AssignmentInfo] = {} if not import_statements: logger.info("No vmlinux imports found") @@ -128,8 +128,35 @@ def vmlinux_proc(tree: ast.AST, module): f"{imported_name} not found as ClassDef or Assign in vmlinux" ) - IRGenerator(module, handler) + IRGenerator(module, handler, assignments) + return assignments -def process_vmlinux_assign(node, module, assignments: Dict[str, type]): - raise NotImplementedError("Assignment handling has not been implemented yet") +def process_vmlinux_assign(node, module, assignments: dict[str, AssignmentInfo]): + """Process assignments from vmlinux module.""" + # Only handle single-target assignments + if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name): + target_name = node.targets[0].id + + # Handle constant value assignments + if isinstance(node.value, ast.Constant): + # Fixed: using proper TypedDict creation syntax with named arguments + assignments[target_name] = AssignmentInfo( + value_type=AssignmentType.CONSTANT, + python_type=type(node.value.value), + value=node.value.value, + pointer_level=None, + signature=None, + members=None, + ) + logger.info( + f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}" + ) + + # Handle other assignment types that we may need to support + else: + logger.warning( + f"Unsupported assignment type for {target_name}: {ast.dump(node.value)}" + ) + else: + raise ValueError("Not a simple assignment") diff --git a/pythonbpf/vmlinux_parser/ir_gen/__init__.py b/pythonbpf/vmlinux_parser/ir_gen/__init__.py new file mode 100644 index 0000000..3a13651 --- /dev/null +++ b/pythonbpf/vmlinux_parser/ir_gen/__init__.py @@ -0,0 +1,3 @@ +from .ir_generation import IRGenerator + +__all__ = ["IRGenerator"] diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py new file mode 100644 index 0000000..232cf10 --- /dev/null +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -0,0 +1,161 @@ +from pythonbpf.debuginfo import DebugInfoGenerator, dwarf_constants as dc +from ..dependency_node import DependencyNode +import ctypes +import logging +from typing import List, Any, Tuple + +logger = logging.getLogger(__name__) + + +def debug_info_generation( + struct: DependencyNode, + llvm_module, + generated_debug_info: List[Tuple[DependencyNode, Any]], +) -> Any: + """ + Generate DWARF debug information for a struct defined in a DependencyNode. + + Args: + struct: The dependency node containing struct information + llvm_module: The LLVM module to add debug info to + generated_debug_info: List of tuples (struct, debug_info) to track generated debug info + + Returns: + The generated global variable debug info + """ + # Set up debug info generator + generator = DebugInfoGenerator(llvm_module) + + # Check if debug info for this struct has already been generated + for existing_struct, debug_info in generated_debug_info: + if existing_struct.name == struct.name: + return debug_info + + # Process all fields and create members for the struct + members = [] + for field_name, field in struct.fields.items(): + # Get appropriate debug type for this field + field_type = _get_field_debug_type( + field_name, field, generator, struct, generated_debug_info + ) + # Create struct member with proper offset + member = generator.create_struct_member_vmlinux( + field_name, field_type, field.offset * 8 + ) + members.append(member) + + if struct.name.startswith("struct_"): + struct_name = struct.name.removeprefix("struct_") + else: + raise ValueError("Unions are not supported in the current version") + # Create struct type with all members + struct_type = generator.create_struct_type_with_name( + struct_name, members, struct.__sizeof__() * 8, is_distinct=True + ) + + return struct_type + + +def _get_field_debug_type( + field_name: str, + field, + generator: DebugInfoGenerator, + parent_struct: DependencyNode, + generated_debug_info: List[Tuple[DependencyNode, Any]], +) -> tuple[Any, int]: + """ + Determine the appropriate debug type for a field based on its Python/ctypes type. + + Args: + field_name: Name of the field + field: Field object containing type information + generator: DebugInfoGenerator instance + parent_struct: The parent struct containing this field + generated_debug_info: List of already generated debug info + + Returns: + The debug info type for this field + """ + # Handle complex types (arrays, pointers) + if field.ctype_complex_type is not None: + if issubclass(field.ctype_complex_type, ctypes.Array): + # Handle array types + element_type, base_type_size = _get_basic_debug_type( + field.containing_type, generator + ) + return generator.create_array_type_vmlinux( + (element_type, base_type_size * field.type_size), field.type_size + ), field.type_size * base_type_size + elif issubclass(field.ctype_complex_type, ctypes._Pointer): + # Handle pointer types + pointee_type, _ = _get_basic_debug_type(field.containing_type, generator) + return generator.create_pointer_type(pointee_type), 64 + + # Handle other vmlinux types (nested structs) + if field.type.__module__ == "vmlinux": + # If it's a struct from vmlinux, check if we've already generated debug info for it + struct_name = field.type.__name__ + + # Look for existing debug info in the list + for existing_struct, debug_info in generated_debug_info: + if existing_struct.name == struct_name: + # Use existing debug info + return debug_info, existing_struct.__sizeof__() + + # If not found, create a forward declaration + # This will be completed when the actual struct is processed + logger.warning("Forward declaration in struct created") + forward_type = generator.create_struct_type([], 0, is_distinct=True) + return forward_type, 0 + + # Handle basic C types + return _get_basic_debug_type(field.type, generator) + + +def _get_basic_debug_type(ctype, generator: DebugInfoGenerator) -> Any: + """ + Map a ctypes type to a DWARF debug type. + + Args: + ctype: A ctypes type or Python type + generator: DebugInfoGenerator instance + + Returns: + The corresponding debug type + """ + # Map ctypes to debug info types + if ctype == ctypes.c_char or ctype == ctypes.c_byte: + return generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8 + elif ctype == ctypes.c_ubyte or ctype == ctypes.c_uint8: + return generator.get_basic_type("unsigned char", 8, dc.DW_ATE_unsigned_char), 8 + elif ctype == ctypes.c_short or ctype == ctypes.c_int16: + return generator.get_basic_type("short", 16, dc.DW_ATE_signed), 16 + elif ctype == ctypes.c_ushort or ctype == ctypes.c_uint16: + return generator.get_basic_type("unsigned short", 16, dc.DW_ATE_unsigned), 16 + elif ctype == ctypes.c_int or ctype == ctypes.c_int32: + return generator.get_basic_type("int", 32, dc.DW_ATE_signed), 32 + elif ctype == ctypes.c_uint or ctype == ctypes.c_uint32: + return generator.get_basic_type("unsigned int", 32, dc.DW_ATE_unsigned), 32 + elif ctype == ctypes.c_long: + return generator.get_basic_type("long", 64, dc.DW_ATE_signed), 64 + elif ctype == ctypes.c_ulong: + return generator.get_basic_type("unsigned long", 64, dc.DW_ATE_unsigned), 64 + elif ctype == ctypes.c_longlong or ctype == ctypes.c_int64: + return generator.get_basic_type("long long", 64, dc.DW_ATE_signed), 64 + elif ctype == ctypes.c_ulonglong or ctype == ctypes.c_uint64: + return generator.get_basic_type( + "unsigned long long", 64, dc.DW_ATE_unsigned + ), 64 + elif ctype == ctypes.c_float: + return generator.get_basic_type("float", 32, dc.DW_ATE_float), 32 + elif ctype == ctypes.c_double: + return generator.get_basic_type("double", 64, dc.DW_ATE_float), 64 + elif ctype == ctypes.c_bool: + return generator.get_basic_type("bool", 8, dc.DW_ATE_boolean), 8 + elif ctype == ctypes.c_char_p: + char_type = generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8 + return generator.create_pointer_type(char_type) + elif ctype == ctypes.c_void_p: + return generator.create_pointer_type(None), 64 + else: + return generator.get_uint64_type(), 64 diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py new file mode 100644 index 0000000..960671e --- /dev/null +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -0,0 +1,225 @@ +import ctypes +import logging + +from ..assignment_info import AssignmentInfo, AssignmentType +from ..dependency_handler import DependencyHandler +from .debug_info_gen import debug_info_generation +from ..dependency_node import DependencyNode +import llvmlite.ir as ir + +logger = logging.getLogger(__name__) + + +class IRGenerator: + # get the assignments dict and add this stuff to it. + def __init__(self, llvm_module, handler: DependencyHandler, assignments): + self.llvm_module = llvm_module + self.handler: DependencyHandler = handler + self.generated: list[str] = [] + self.generated_debug_info: list = [] + # Use struct_name and field_name as key instead of Field object + self.generated_field_names: dict[str, dict[str, ir.GlobalVariable]] = {} + self.assignments: dict[str, AssignmentInfo] = assignments + if not handler.is_ready: + raise ImportError( + "Semantic analysis of vmlinux imports failed. Cannot generate IR" + ) + for struct in handler: + self.struct_processor(struct) + + def struct_processor(self, struct, processing_stack=None): + # Initialize processing stack on first call + if processing_stack is None: + processing_stack = set() + + # If already generated, skip + if struct.name in self.generated: + return + + # Detect circular dependency + if struct.name in processing_stack: + logger.info( + f"Circular dependency detected for {struct.name}, skipping recursive processing" + ) + # For circular dependencies, we can either: + # 1. Use forward declarations (opaque pointers) + # 2. Mark as incomplete and process later + # 3. Generate a placeholder type + # Here we'll just skip and let it be processed in its own call + return + + logger.info(f"IR generating for {struct.name}") + + # Add to processing stack before processing dependencies + processing_stack.add(struct.name) + + try: + # Process all dependencies first + if struct.depends_on is None: + pass + else: + for dependency in struct.depends_on: + if dependency not in self.generated: + # Check if dependency exists in handler + if dependency in self.handler.nodes: + dep_node_from_dependency = self.handler[dependency] + # Pass the processing_stack down to track circular refs + self.struct_processor( + dep_node_from_dependency, processing_stack + ) + else: + raise RuntimeError( + f"Warning: Dependency {dependency} not found in handler" + ) + + # Generate IR first to populate field names + self.generated_debug_info.append( + (struct, self.gen_ir(struct, self.generated_debug_info)) + ) + + # Fill the assignments dictionary with struct information + if struct.name not in self.assignments: + # Create a members dictionary for AssignmentInfo + members_dict = {} + for field_name, field in struct.fields.items(): + # Get the generated field name from our dictionary, or use field_name if not found + if ( + struct.name in self.generated_field_names + and field_name in self.generated_field_names[struct.name] + ): + field_global_variable = self.generated_field_names[struct.name][ + field_name + ] + members_dict[field_name] = (field_global_variable, field) + else: + raise ValueError( + f"llvm global name not found for struct field {field_name}" + ) + # members_dict[field_name] = (field_name, field) + + # Add struct to assignments dictionary + self.assignments[struct.name] = AssignmentInfo( + value_type=AssignmentType.STRUCT, + python_type=struct.ctype_struct, + value=None, + pointer_level=None, + signature=None, + members=members_dict, + ) + logger.info(f"Added struct assignment info for {struct.name}") + + self.generated.append(struct.name) + + finally: + # Remove from processing stack after we're done + processing_stack.discard(struct.name) + + def gen_ir(self, struct, generated_debug_info): + # TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite + # accepts our issue, we will resort to normal accessed attribute based attribute addition + # currently we generate all possible field accesses for CO-RE and put into the assignment table + debug_info = debug_info_generation( + struct, self.llvm_module, generated_debug_info + ) + field_index = 0 + + # Make sure the struct has an entry in our field names dictionary + if struct.name not in self.generated_field_names: + self.generated_field_names[struct.name] = {} + + for field_name, field in struct.fields.items(): + # does not take arrays and similar types into consideration yet. + if field.ctype_complex_type is not None and issubclass( + field.ctype_complex_type, ctypes.Array + ): + array_size = field.type_size + containing_type = field.containing_type + if containing_type.__module__ == ctypes.__name__: + containing_type_size = ctypes.sizeof(containing_type) + if array_size == 0: + field_co_re_name = self._struct_name_generator( + struct, field, field_index, True, 0, containing_type_size + ) + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = globvar + field_index += 1 + continue + for i in range(0, array_size): + field_co_re_name = self._struct_name_generator( + struct, field, field_index, True, i, containing_type_size + ) + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = globvar + field_index += 1 + elif field.type_size is not None: + array_size = field.type_size + containing_type = field.containing_type + if containing_type.__module__ == "vmlinux": + containing_type_size = self.handler[ + containing_type.__name__ + ].current_offset + for i in range(0, array_size): + field_co_re_name = self._struct_name_generator( + struct, field, field_index, True, i, containing_type_size + ) + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = globvar + field_index += 1 + else: + field_co_re_name = self._struct_name_generator( + struct, field, field_index + ) + field_index += 1 + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = globvar + return debug_info + + def _struct_name_generator( + self, + struct: DependencyNode, + field, + field_index: int, + is_indexed: bool = False, + index: int = 0, + containing_type_size: int = 0, + ) -> str: + # TODO: Does not support Unions as well as recursive pointer and array type naming + if is_indexed: + name = ( + "llvm." + + struct.name.removeprefix("struct_") + + f":0:{field.offset + index * containing_type_size}" + + "$" + + f"0:{field_index}:{index}" + ) + return name + elif struct.name.startswith("struct_"): + name = ( + "llvm." + + struct.name.removeprefix("struct_") + + f":0:{field.offset}" + + "$" + + f"0:{field_index}" + ) + return name + else: + print(self.handler[struct.name]) + raise TypeError( + "Name generation cannot occur due to type name not starting with struct" + ) diff --git a/pythonbpf/vmlinux_parser/ir_generation.py b/pythonbpf/vmlinux_parser/ir_generation.py deleted file mode 100644 index 62b13bc..0000000 --- a/pythonbpf/vmlinux_parser/ir_generation.py +++ /dev/null @@ -1,14 +0,0 @@ -import logging -from .dependency_handler import DependencyHandler - -logger = logging.getLogger(__name__) - - -class IRGenerator: - def __init__(self, module, handler: DependencyHandler): - self.module = module - self.handler: DependencyHandler = handler - if not handler.is_ready: - raise ImportError( - "Semantic analysis of vmlinux imports failed. Cannot generate IR" - ) diff --git a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py new file mode 100644 index 0000000..1986b44 --- /dev/null +++ b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py @@ -0,0 +1,90 @@ +import logging +from llvmlite import ir + +from pythonbpf.vmlinux_parser.assignment_info import AssignmentType + +logger = logging.getLogger(__name__) + + +class VmlinuxHandler: + """Handler for vmlinux-related operations""" + + _instance = None + + @classmethod + def get_instance(cls): + """Get the singleton instance""" + if cls._instance is None: + logger.warning("VmlinuxHandler used before initialization") + return None + return cls._instance + + @classmethod + def initialize(cls, vmlinux_symtab): + """Initialize the handler with vmlinux symbol table""" + cls._instance = cls(vmlinux_symtab) + return cls._instance + + def __init__(self, vmlinux_symtab): + """Initialize with vmlinux symbol table""" + self.vmlinux_symtab = vmlinux_symtab + logger.info( + f"VmlinuxHandler initialized with {len(vmlinux_symtab) if vmlinux_symtab else 0} symbols" + ) + + def is_vmlinux_enum(self, name): + """Check if name is a vmlinux enum constant""" + return ( + name in self.vmlinux_symtab + and self.vmlinux_symtab[name]["value_type"] == AssignmentType.CONSTANT + ) + + def is_vmlinux_struct(self, name): + """Check if name is a vmlinux struct""" + return ( + name in self.vmlinux_symtab + and self.vmlinux_symtab[name]["value_type"] == AssignmentType.STRUCT + ) + + def handle_vmlinux_enum(self, name): + """Handle vmlinux enum constants by returning LLVM IR constants""" + if self.is_vmlinux_enum(name): + value = self.vmlinux_symtab[name]["value"] + logger.info(f"Resolving vmlinux enum {name} = {value}") + return ir.Constant(ir.IntType(64), value), ir.IntType(64) + return None + + def get_vmlinux_enum_value(self, name): + """Handle vmlinux enum constants by returning LLVM IR constants""" + if self.is_vmlinux_enum(name): + value = self.vmlinux_symtab[name]["value"] + logger.info(f"The value of vmlinux enum {name} = {value}") + return value + return None + + def handle_vmlinux_struct(self, struct_name, module, builder): + """Handle vmlinux struct initializations""" + if self.is_vmlinux_struct(struct_name): + # TODO: Implement core-specific struct handling + # This will be more complex and depends on the BTF information + logger.info(f"Handling vmlinux struct {struct_name}") + # Return struct type and allocated pointer + # This is a stub, actual implementation will be more complex + return None + return None + + def handle_vmlinux_struct_field( + self, struct_var_name, field_name, module, builder, local_sym_tab + ): + """Handle access to vmlinux struct fields""" + # Check if it's a variable of vmlinux struct type + if struct_var_name in local_sym_tab: + var_info = local_sym_tab[struct_var_name] # noqa: F841 + # Need to check if this variable is a vmlinux struct + # This will depend on how you track vmlinux struct types in your symbol table + logger.info( + f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}" + ) + # Return pointer to field and field type + return None + return None diff --git a/tests/c-form/ex7.bpf.c b/tests/c-form/ex7.bpf.c index a462444..33ed6a5 100644 --- a/tests/c-form/ex7.bpf.c +++ b/tests/c-form/ex7.bpf.c @@ -1,23 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include "vmlinux.h" #include #include -struct trace_entry { - short unsigned int type; - unsigned char flags; - unsigned char preempt_count; - int pid; -}; - -struct trace_event_raw_sys_enter { - struct trace_entry ent; - long int id; - long unsigned int args[6]; - char __data[0]; -}; - struct event { __u32 pid; __u32 uid; @@ -33,7 +19,7 @@ struct { SEC("tp/syscalls/sys_enter_setuid") int handle_setuid_entry(struct trace_event_raw_sys_enter *ctx) { struct event data = {}; - + struct blk_integrity_iter it = {}; // Extract UID from the syscall arguments data.uid = (unsigned int)ctx->args[0]; data.ts = bpf_ktime_get_ns(); diff --git a/tests/failing_tests/xdp_pass.py b/tests/failing_tests/xdp_pass.py index a7b4550..c8510dc 100644 --- a/tests/failing_tests/xdp_pass.py +++ b/tests/failing_tests/xdp_pass.py @@ -1,10 +1,17 @@ from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS -from vmlinux import struct_xdp_md -from vmlinux import struct_xdp_buff # noqa: F401 -from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 +from vmlinux import TASK_COMM_LEN # noqa: F401 +from vmlinux import struct_qspinlock # noqa: F401 + +# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 +# from vmlinux import struct_posix_cputimers # noqa: F401 +from vmlinux import struct_xdp_md + +# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 +# from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 +# from vmlinux import struct_request # noqa: F401 from ctypes import c_int64 # Instructions to how to run this program diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py new file mode 100644 index 0000000..97ab54a --- /dev/null +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -0,0 +1,47 @@ +import logging + +from pythonbpf import bpf, section, bpfglobal, compile_to_ir, map +from pythonbpf import compile # noqa: F401 +from vmlinux import TASK_COMM_LEN # noqa: F401 +from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 +from ctypes import c_uint64, c_int32, c_int64 +from pythonbpf.maps import HashMap + +# from vmlinux import struct_uinput_device +# from vmlinux import struct_blk_integrity_iter + + +@bpf +@map +def mymap() -> HashMap: + return HashMap(key=c_int32, value=c_uint64, max_entries=TASK_COMM_LEN) + + +@bpf +@map +def mymap2() -> HashMap: + return HashMap(key=c_int32, value=c_uint64, max_entries=18) + + +# Instructions to how to run this program +# 1. Install PythonBPF: pip install pythonbpf +# 2. Run the program: python examples/simple_struct_test.py +# 3. Run the program with sudo: sudo tools/check.sh run examples/simple_struct_test.o +# 4. Attach object file to any network device with something like ./check.sh run examples/simple_struct_test.o tailscale0 +# 5. send traffic through the device and observe effects +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello_world(ctx: struct_trace_event_raw_sys_enter) -> c_int64: + a = 2 + TASK_COMM_LEN + TASK_COMM_LEN + print(f"Hello, World{TASK_COMM_LEN} and {a}") + return c_int64(TASK_COMM_LEN + 2) + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile_to_ir("simple_struct_test.py", "simple_struct_test.ll", loglevel=logging.DEBUG) +# compile() diff --git a/tools/setup.sh b/tools/setup.sh new file mode 100755 index 0000000..6dc62f7 --- /dev/null +++ b/tools/setup.sh @@ -0,0 +1,199 @@ +#!/bin/bash + +print_warning() { + echo -e "\033[1;33m$1\033[0m" +} +print_info() { + echo -e "\033[1;32m$1\033[0m" +} + +if [ "$EUID" -ne 0 ]; then + echo "Please run this script with sudo." + exit 1 +fi + +print_warning "====================================================================" +print_warning " WARNING " +print_warning " This script will run kernel-level BPF programs. " +print_warning " BPF programs run with kernel privileges and could potentially " +print_warning " affect system stability if not used properly. " +print_warning " " +print_warning " This is a non-interactive version for curl piping. " +print_warning " The script will proceed automatically with installation. " +print_warning "====================================================================" +echo + +print_info "This script will:" +echo "1. Check and install required dependencies (libelf, clang, python, bpftool)" +echo "2. Download example programs from the Python-BPF GitHub repository" +echo "3. Create a Python virtual environment with necessary packages" +echo "4. Set up a Jupyter notebook server" +echo "Starting in 5 seconds. Press Ctrl+C to cancel..." +sleep 5 + +WORK_DIR="/tmp/python_bpf_setup" +REAL_USER=$(logname || echo "$SUDO_USER") + +echo "Creating temporary directory: $WORK_DIR" +mkdir -p "$WORK_DIR" +cd "$WORK_DIR" || exit 1 + +if [ -f /etc/os-release ]; then + . /etc/os-release + DISTRO=$ID +else + echo "Cannot determine Linux distribution. Exiting." + exit 1 +fi + +install_dependencies() { + case $DISTRO in + ubuntu|debian|pop|mint|elementary|zorin) + echo "Detected Ubuntu/Debian-based system" + apt update + + # Check and install libelf + if ! dpkg -l libelf-dev >/dev/null 2>&1; then + echo "Installing libelf-dev..." + apt install -y libelf-dev + else + echo "libelf-dev is already installed." + fi + + # Check and install clang + if ! command -v clang >/dev/null 2>&1; then + echo "Installing clang..." + apt install -y clang + else + echo "clang is already installed." + fi + + # Check and install python + if ! command -v python3 >/dev/null 2>&1; then + echo "Installing python3..." + apt install -y python3 python3-pip python3-venv + else + echo "python3 is already installed." + fi + + # Check and install bpftool + if ! command -v bpftool >/dev/null 2>&1; then + echo "Installing bpftool..." + apt install -y linux-tools-common linux-tools-generic + + # If bpftool still not found, try installing linux-tools-$(uname -r) + if ! command -v bpftool >/dev/null 2>&1; then + KERNEL_VERSION=$(uname -r) + apt install -y linux-tools-$KERNEL_VERSION + fi + else + echo "bpftool is already installed." + fi + ;; + + arch|manjaro|endeavouros) + echo "Detected Arch-based Linux system" + + # Check and install libelf + if ! pacman -Q libelf >/dev/null 2>&1; then + echo "Installing libelf..." + pacman -S --noconfirm libelf + else + echo "libelf is already installed." + fi + + # Check and install clang + if ! command -v clang >/dev/null 2>&1; then + echo "Installing clang..." + pacman -S --noconfirm clang + else + echo "clang is already installed." + fi + + # Check and install python + if ! command -v python3 >/dev/null 2>&1; then + echo "Installing python3..." + pacman -S --noconfirm python python-pip + else + echo "python3 is already installed." + fi + + # Check and install bpftool + if ! command -v bpftool >/dev/null 2>&1; then + echo "Installing bpftool..." + pacman -S --noconfirm bpf linux-headers + else + echo "bpftool is already installed." + fi + ;; + + *) + echo "Unsupported distribution: $DISTRO" + echo "This script only supports Ubuntu/Debian and Arch Linux derivatives." + exit 1 + ;; + esac +} + +echo "Checking and installing dependencies..." +install_dependencies + +# Download example programs +echo "Downloading example programs from Python-BPF GitHub repository..." +mkdir -p examples +cd examples || exit 1 + +echo "Fetching example files list..." +FILES=$(curl -s "https://api.github.com/repos/pythonbpf/Python-BPF/contents/examples" | grep -o '"path": "examples/[^"]*"' | awk -F'"' '{print $4}') + +if [ -z "$FILES" ]; then + echo "Failed to fetch file list from repository. Using fallback method..." + # Fallback to downloading common example files + EXAMPLES=( + "binops_demo.py" + "blk_request.py" + "clone-matplotlib.ipynb" + "clone_plot.py" + "hello_world.py" + "kprobes.py" + "struct_and_perf.py" + "sys_sync.py" + "xdp_pass.py" + ) + + for example in "${EXAMPLES[@]}"; do + echo "Downloading: $example" + curl -s -O "https://raw.githubusercontent.com/pythonbpf/Python-BPF/master/examples/$example" + done +else + for file in $FILES; do + filename=$(basename "$file") + echo "Downloading: $filename" + curl -s -o "$filename" "https://raw.githubusercontent.com/pythonbpf/Python-BPF/master/$file" + done +fi + +cd "$WORK_DIR" || exit 1 +chown -R "$REAL_USER:$(id -gn "$REAL_USER")" . + +echo "Creating Python virtual environment..." +su - "$REAL_USER" -c "cd \"$WORK_DIR\" && python3 -m venv venv" + +echo "Installing Python packages..." +su - "$REAL_USER" -c "cd \"$WORK_DIR\" && source venv/bin/activate && pip install --upgrade pip && pip install jupyter pythonbpf pylibbpf matplotlib" + +cat > "$WORK_DIR/start_jupyter.sh" << EOF +#!/bin/bash +cd "$WORK_DIR" +source venv/bin/activate +cd examples +sudo ../venv/bin/python -m notebook --ip=0.0.0.0 --allow-root +EOF + +chmod +x "$WORK_DIR/start_jupyter.sh" +chown "$REAL_USER:$(id -gn "$REAL_USER")" "$WORK_DIR/start_jupyter.sh" + +print_info "========================================================" +print_info "Setup complete! To start Jupyter Notebook, run:" +print_info "$ sudo $WORK_DIR/start_jupyter.sh" +print_info "========================================================"