diff --git a/pythonbpf/binary_ops.py b/pythonbpf/binary_ops.py index c0ca0ec..e8c28ba 100644 --- a/pythonbpf/binary_ops.py +++ b/pythonbpf/binary_ops.py @@ -35,6 +35,17 @@ def get_operand_value(operand, builder, local_sym_tab): def handle_binary_op_impl(rval, builder, local_sym_tab): + """ + Handle binary operations and emit corresponding LLVM IR instructions. + + Args: + rval: The AST BinOp node representing the binary operation + builder: LLVM IR builder for emitting instructions + local_sym_tab: Symbol table mapping variable names to their IR representations + + Returns: + The LLVM IR value representing the result of the binary operation + """ op = rval.op left = get_operand_value(rval.left, builder, local_sym_tab) right = get_operand_value(rval.right, builder, local_sym_tab) @@ -63,6 +74,18 @@ def handle_binary_op_impl(rval, builder, local_sym_tab): def handle_binary_op(rval, builder, var_name, local_sym_tab): + """ + Handle binary operations and optionally store the result to a variable. + + Args: + rval: The AST BinOp node representing the binary operation + builder: LLVM IR builder for emitting instructions + var_name: Optional variable name to store the result + local_sym_tab: Symbol table mapping variable names to their IR representations + + Returns: + A tuple of (result_value, result_type) + """ result = handle_binary_op_impl(rval, builder, local_sym_tab) if var_name and var_name in local_sym_tab: logger.info( diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index f1be840..5ea5174 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -37,6 +37,14 @@ def find_bpf_chunks(tree): def processor(source_code, filename, module): + """ + Process Python source code and convert BPF-decorated functions to LLVM IR. + + Args: + source_code: The Python source code to process + filename: The name of the source file + module: The LLVM IR module to populate + """ tree = ast.parse(source_code, filename) logger.debug(ast.dump(tree, indent=4)) @@ -56,6 +64,17 @@ def processor(source_code, filename, module): def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): + """ + Compile a Python BPF program to LLVM IR. + + Args: + filename: Path to the Python source file containing BPF programs + output: Path where the LLVM IR (.ll) file will be written + loglevel: Logging level for compilation messages + + Returns: + Path to the generated LLVM IR file + """ logging.basicConfig( level=loglevel, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s" ) @@ -129,6 +148,18 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): def compile(loglevel=logging.INFO) -> bool: + """ + Compile the calling Python BPF program to an object file. + + This function should be called from a Python file containing BPF programs. + It will compile the calling file to LLVM IR and then to a BPF object file. + + Args: + loglevel: Logging level for compilation messages + + Returns: + True if compilation succeeded, False otherwise + """ # Look one level up the stack to the caller of this function caller_frame = inspect.stack()[1] caller_file = Path(caller_frame.filename).resolve() @@ -162,6 +193,18 @@ def compile(loglevel=logging.INFO) -> bool: def BPF(loglevel=logging.INFO) -> BpfProgram: + """ + Compile the calling Python BPF program and return a BpfProgram object. + + This function compiles the calling file's BPF programs to an object file + and loads it into a BpfProgram object for immediate use. + + Args: + loglevel: Logging level for compilation messages + + Returns: + A BpfProgram object that can be used to load and attach BPF programs + """ caller_frame = inspect.stack()[1] src = inspect.getsource(caller_frame.frame) with tempfile.NamedTemporaryFile( diff --git a/pythonbpf/decorators.py b/pythonbpf/decorators.py index c863dda..ec1137e 100644 --- a/pythonbpf/decorators.py +++ b/pythonbpf/decorators.py @@ -23,6 +23,15 @@ def struct(cls): def section(name: str): + """ + Decorator to specify the ELF section name for a BPF program. + + Args: + name: The section name (e.g., 'xdp', 'tracepoint/syscalls/sys_enter_execve') + + Returns: + A decorator function that marks the function with the section name + """ def wrapper(fn): fn._section = name return fn diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index 21be196..16fd698 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -332,6 +332,21 @@ def eval_expr( map_sym_tab, structs_sym_tab=None, ): + """ + Evaluate an expression and return its LLVM IR value and type. + + Args: + func: The LLVM IR function being built + module: The LLVM IR module + builder: LLVM IR builder + expr: The AST expression node to evaluate + local_sym_tab: Local symbol table + map_sym_tab: Map symbol table + structs_sym_tab: Struct symbol table + + Returns: + A tuple of (value, type) or None if evaluation fails + """ logger.info(f"Evaluating expression: {ast.dump(expr)}") if isinstance(expr, ast.Name): return _handle_name_expr(expr, local_sym_tab, builder) diff --git a/pythonbpf/expr/type_normalization.py b/pythonbpf/expr/type_normalization.py index 7a2fb57..af4b9e1 100644 --- a/pythonbpf/expr/type_normalization.py +++ b/pythonbpf/expr/type_normalization.py @@ -17,7 +17,15 @@ COMPARISON_OPS = { def _get_base_type_and_depth(ir_type): - """Get the base type for pointer types.""" + """ + Get the base type and pointer depth for an LLVM IR type. + + Args: + ir_type: The LLVM IR type to analyze + + Returns: + A tuple of (base_type, depth) where depth is the number of pointer levels + """ cur_type = ir_type depth = 0 while isinstance(cur_type, ir.PointerType): @@ -27,7 +35,18 @@ def _get_base_type_and_depth(ir_type): def _deref_to_depth(func, builder, val, target_depth): - """Dereference a pointer to a certain depth.""" + """ + Dereference a pointer to a certain depth with null checks. + + Args: + func: The LLVM IR function being built + builder: LLVM IR builder + val: The pointer value to dereference + target_depth: Number of levels to dereference + + Returns: + The dereferenced value, or None if dereferencing fails + """ cur_val = val cur_type = val.type @@ -73,7 +92,18 @@ def _deref_to_depth(func, builder, val, target_depth): def _normalize_types(func, builder, lhs, rhs): - """Normalize types for comparison.""" + """ + Normalize types for comparison by casting or dereferencing as needed. + + Args: + func: The LLVM IR function being built + builder: LLVM IR builder + lhs: Left-hand side value + rhs: Right-hand side value + + Returns: + A tuple of (normalized_lhs, normalized_rhs) or (None, None) on error + """ logger.info(f"Normalizing types: {lhs.type} vs {rhs.type}") if isinstance(lhs.type, ir.IntType) and isinstance(rhs.type, ir.IntType): @@ -99,7 +129,16 @@ def _normalize_types(func, builder, lhs, rhs): def convert_to_bool(builder, val): - """Convert a value to boolean.""" + """ + Convert an LLVM IR value to a boolean (i1) type. + + Args: + builder: LLVM IR builder + val: The value to convert + + Returns: + An i1 boolean value + """ if val.type == ir.IntType(1): return val if isinstance(val.type, ir.PointerType): @@ -110,7 +149,19 @@ def convert_to_bool(builder, val): def handle_comparator(func, builder, op, lhs, rhs): - """Handle comparison operations.""" + """ + Handle comparison operations between two values. + + Args: + func: The LLVM IR function being built + builder: LLVM IR builder + op: The AST comparison operator node + lhs: Left-hand side value + rhs: Right-hand side value + + Returns: + A tuple of (result, ir.IntType(1)) or None on error + """ if lhs.type != rhs.type: lhs, rhs = _normalize_types(func, builder, lhs, rhs) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 7fc3feb..84ab40e 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -243,6 +243,21 @@ def handle_assign( def handle_cond( func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab=None ): + """ + Evaluate a condition expression and convert it to a boolean value. + + Args: + func: The LLVM IR function being built + module: The LLVM IR module + builder: LLVM IR builder + cond: The AST condition node to evaluate + local_sym_tab: Local symbol table + map_sym_tab: Map symbol table + structs_sym_tab: Struct symbol table + + Returns: + LLVM IR boolean value representing the condition result + """ val = eval_expr( func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab )[0] @@ -298,6 +313,18 @@ def handle_if( def handle_return(builder, stmt, local_sym_tab, ret_type): + """ + Handle return statements in BPF functions. + + Args: + builder: LLVM IR builder + stmt: The AST Return node + local_sym_tab: Local symbol table + ret_type: Expected return type + + Returns: + True if a return was emitted, False otherwise + """ logger.info(f"Handling return statement: {ast.dump(stmt)}") if stmt.value is None: return _handle_none_return(builder) @@ -329,6 +356,23 @@ def process_stmt( did_return, ret_type=ir.IntType(64), ): + """ + Process a single statement in a BPF function. + + Args: + func: The LLVM IR function being built + module: The LLVM IR module + builder: LLVM IR builder + stmt: The AST statement node to process + local_sym_tab: Local symbol table + map_sym_tab: Map symbol table + structs_sym_tab: Struct symbol table + did_return: Whether a return has been emitted + ret_type: Expected return type + + Returns: + True if a return was emitted, False otherwise + """ logger.info(f"Processing statement: {ast.dump(stmt)}") if isinstance(stmt, ast.Expr): handle_expr( @@ -363,6 +407,25 @@ def process_stmt( def allocate_mem( module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab ): + """ + Pre-allocate stack memory for local variables in a BPF function. + + This function scans the function body and creates alloca instructions + for all local variables before processing the function statements. + + Args: + module: The LLVM IR module + builder: LLVM IR builder + body: List of AST statements in the function body + func: The LLVM IR function being built + ret_type: Expected return type + map_sym_tab: Map symbol table + local_sym_tab: Local symbol table to populate + structs_sym_tab: Struct symbol table + + Returns: + Updated local symbol table + """ for stmt in body: has_metadata = False if isinstance(stmt, ast.If): @@ -556,6 +619,16 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab): + """ + Process all BPF function chunks and generate LLVM IR. + + Args: + tree: The Python AST (not used in current implementation) + module: The LLVM IR module to add functions to + chunks: List of AST function nodes decorated with @bpf + map_sym_tab: Map symbol table + structs_sym_tab: Struct symbol table + """ for func_node in chunks: is_global = False for decorator in func_node.decorator_list: @@ -581,6 +654,18 @@ def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab): def infer_return_type(func_node: ast.FunctionDef): + """ + Infer the return type of a BPF function from annotations or return statements. + + Args: + func_node: The AST function node + + Returns: + String representation of the return type (e.g., 'c_int64') + + Raises: + TypeError: If func_node is not a FunctionDef + """ if not isinstance(func_node, (ast.FunctionDef, ast.AsyncFunctionDef)): raise TypeError("Expected ast.FunctionDef") if func_node.returns is not None: diff --git a/pythonbpf/globals_pass.py b/pythonbpf/globals_pass.py index 1e97763..cf81ce2 100644 --- a/pythonbpf/globals_pass.py +++ b/pythonbpf/globals_pass.py @@ -12,6 +12,16 @@ global_sym_tab = [] def populate_global_symbol_table(tree, module: ir.Module): + """ + Populate the global symbol table with BPF functions, maps, and globals. + + Args: + tree: The Python AST to scan for global symbols + module: The LLVM IR module (not used in current implementation) + + Returns: + False (legacy return value) + """ for node in tree.body: if isinstance(node, ast.FunctionDef): for dec in node.decorator_list: @@ -33,6 +43,17 @@ def populate_global_symbol_table(tree, module: ir.Module): def emit_global(module: ir.Module, node, name): + """ + Emit a BPF global variable into the LLVM IR module. + + Args: + module: The LLVM IR module to add the global variable to + node: The AST function node containing the global definition + name: The name of the global variable + + Returns: + The created global variable + """ logger.info(f"global identifier {name} processing") # deduce LLVM type from the annotated return if not isinstance(node.returns, ast.Name): @@ -117,7 +138,11 @@ def globals_processing(tree, module): def emit_llvm_compiler_used(module: ir.Module, names: list[str]): """ - Emit the @llvm.compiler.used global given a list of function/global names. + Emit the @llvm.compiler.used global to prevent LLVM from optimizing away symbols. + + Args: + module: The LLVM IR module to add the compiler.used metadata to + names: List of function/global names that must be preserved """ ptr_ty = ir.PointerType() used_array_ty = ir.ArrayType(ptr_ty, len(names)) @@ -138,6 +163,13 @@ def emit_llvm_compiler_used(module: ir.Module, names: list[str]): def globals_list_creation(tree, module: ir.Module): + """ + Collect all BPF symbols and emit @llvm.compiler.used metadata. + + Args: + tree: The Python AST to scan for symbols + module: The LLVM IR module to add metadata to + """ collected = ["LICENSE"] for node in tree.body: diff --git a/pythonbpf/helper/helpers.py b/pythonbpf/helper/helpers.py index 485467e..b2fb116 100644 --- a/pythonbpf/helper/helpers.py +++ b/pythonbpf/helper/helpers.py @@ -2,10 +2,22 @@ import ctypes def ktime(): + """ + Get the current kernel time in nanoseconds. + + Returns: + A c_int64 stub value (actual implementation is in BPF runtime) + """ return ctypes.c_int64(0) def pid(): + """ + Get the current process ID (PID). + + Returns: + A c_int32 stub value (actual implementation is in BPF runtime) + """ return ctypes.c_int32(0) diff --git a/pythonbpf/license_pass.py b/pythonbpf/license_pass.py index c3d3dc0..926ce0a 100644 --- a/pythonbpf/license_pass.py +++ b/pythonbpf/license_pass.py @@ -7,6 +7,16 @@ logger: Logger = logging.getLogger(__name__) def emit_license(module: ir.Module, license_str: str): + """ + Emit a LICENSE global variable into the LLVM IR module. + + Args: + module: The LLVM IR module to add the LICENSE variable to + license_str: The license string (e.g., 'GPL') + + Returns: + The created global variable + """ license_bytes = license_str.encode("utf8") + b"\x00" elems = [ir.Constant(ir.IntType(8), b) for b in license_bytes] ty = ir.ArrayType(ir.IntType(8), len(elems)) diff --git a/pythonbpf/maps/maps.py b/pythonbpf/maps/maps.py index a2d7c21..9091b71 100644 --- a/pythonbpf/maps/maps.py +++ b/pythonbpf/maps/maps.py @@ -1,18 +1,51 @@ # This file provides type and function hints only and does not actually give any functionality. class HashMap: + """ + A BPF hash map for storing key-value pairs. + + This is a type hint class used during compilation. The actual BPF map + implementation is generated as LLVM IR. + """ + def __init__(self, key, value, max_entries): + """ + Initialize a HashMap definition. + + Args: + key: The ctypes type for keys (e.g., c_int64) + value: The ctypes type for values (e.g., c_int64) + max_entries: Maximum number of entries the map can hold + """ self.key = key self.value = value self.max_entries = max_entries self.entries = {} def lookup(self, key): + """ + Look up a value by key in the map. + + Args: + key: The key to look up + + Returns: + The value if found, None otherwise + """ if key in self.entries: return self.entries[key] else: return None def delete(self, key): + """ + Delete an entry from the map by key. + + Args: + key: The key to delete + + Raises: + KeyError: If the key is not found in the map + """ if key in self.entries: del self.entries[key] else: @@ -20,6 +53,17 @@ class HashMap: # TODO: define the flags that can be added def update(self, key, value, flags=None): + """ + Update or insert a key-value pair in the map. + + Args: + key: The key to update + value: The new value + flags: Optional flags for update behavior + + Raises: + KeyError: If the key is not found in the map + """ if key in self.entries: self.entries[key] = value else: @@ -27,25 +71,76 @@ class HashMap: class PerfEventArray: + """ + A BPF perf event array for sending data to userspace. + + This is a type hint class used during compilation. + """ + def __init__(self, key_size, value_size): + """ + Initialize a PerfEventArray definition. + + Args: + key_size: The size/type for keys + value_size: The size/type for values + """ self.key_type = key_size self.value_type = value_size self.entries = {} def output(self, data): + """ + Output data to the perf event array. + + Args: + data: The data to output + """ pass # Placeholder for output method class RingBuf: + """ + A BPF ring buffer for efficient data transfer to userspace. + + This is a type hint class used during compilation. + """ + def __init__(self, max_entries): + """ + Initialize a RingBuf definition. + + Args: + max_entries: Maximum number of entries the ring buffer can hold + """ self.max_entries = max_entries def reserve(self, size: int, flags=0): + """ + Reserve space in the ring buffer. + + Args: + size: Size in bytes to reserve + flags: Optional reservation flags + + Returns: + 0 as a placeholder (actual implementation is in BPF runtime) + + Raises: + ValueError: If size exceeds max_entries + """ if size > self.max_entries: raise ValueError("size cannot be greater than set maximum entries") return 0 def submit(self, data, flags=0): + """ + Submit data to the ring buffer. + + Args: + data: The data to submit + flags: Optional submission flags + """ pass # add discard, output and also give names to flags and stuff diff --git a/pythonbpf/maps/maps_pass.py b/pythonbpf/maps/maps_pass.py index 95748a8..ea2b2eb 100644 --- a/pythonbpf/maps/maps_pass.py +++ b/pythonbpf/maps/maps_pass.py @@ -20,6 +20,15 @@ def maps_proc(tree, module, chunks): def is_map(func_node): + """ + Check if a function node is decorated with @map. + + Args: + func_node: The AST function node to check + + Returns: + True if the function is decorated with @map, False otherwise + """ return any( isinstance(decorator, ast.Name) and decorator.id == "map" for decorator in func_node.decorator_list @@ -65,7 +74,17 @@ class BPFMapType(Enum): def create_bpf_map(module, map_name, map_params): - """Create a BPF map in the module with given parameters and debug info""" + """ + Create a BPF map in the module with given parameters and debug info. + + Args: + module: The LLVM IR module to add the map to + map_name: The name of the BPF map + map_params: Dictionary of map parameters (type, key_size, value_size, max_entries) + + Returns: + The created global variable representing the map + """ # Create the anonymous struct type for BPF map map_struct_type = ir.LiteralStructType( diff --git a/pythonbpf/structs/struct_type.py b/pythonbpf/structs/struct_type.py index 90abf05..86db5d1 100644 --- a/pythonbpf/structs/struct_type.py +++ b/pythonbpf/structs/struct_type.py @@ -2,18 +2,64 @@ from llvmlite import ir class StructType: + """ + Wrapper class for LLVM IR struct types with field access helpers. + + Attributes: + ir_type: The LLVM IR struct type + fields: Dictionary mapping field names to their types + size: Total size of the struct in bytes + """ + def __init__(self, ir_type, fields, size): + """ + Initialize a StructType. + + Args: + ir_type: The LLVM IR struct type + fields: Dictionary mapping field names to their types + size: Total size of the struct in bytes + """ self.ir_type = ir_type self.fields = fields self.size = size def field_idx(self, field_name): + """ + Get the index of a field in the struct. + + Args: + field_name: The name of the field + + Returns: + The zero-based index of the field + """ return list(self.fields.keys()).index(field_name) def field_type(self, field_name): + """ + Get the LLVM IR type of a field. + + Args: + field_name: The name of the field + + Returns: + The LLVM IR type of the field + """ return self.fields[field_name] def gep(self, builder, ptr, field_name): + """ + Generate a GEP (GetElementPtr) instruction to access a struct field. + + Args: + builder: LLVM IR builder + ptr: Pointer to the struct + field_name: Name of the field to access + + Returns: + A pointer to the field + """ idx = self.field_idx(field_name) return builder.gep( ptr, @@ -22,6 +68,18 @@ class StructType: ) def field_size(self, field_name): + """ + Calculate the size of a field in bytes. + + Args: + field_name: The name of the field + + Returns: + The size of the field in bytes + + Raises: + TypeError: If the field type is not supported + """ fld = self.fields[field_name] if isinstance(fld, ir.ArrayType): return fld.count * (fld.element.width // 8) diff --git a/pythonbpf/structs/structs_pass.py b/pythonbpf/structs/structs_pass.py index d79fe0e..dbfd674 100644 --- a/pythonbpf/structs/structs_pass.py +++ b/pythonbpf/structs/structs_pass.py @@ -26,6 +26,15 @@ def structs_proc(tree, module, chunks): def is_bpf_struct(cls_node): + """ + Check if a class node is decorated with @struct. + + Args: + cls_node: The AST class node to check + + Returns: + True if the class is decorated with @struct, False otherwise + """ return any( isinstance(decorator, ast.Name) and decorator.id == "struct" for decorator in cls_node.decorator_list @@ -33,7 +42,16 @@ def is_bpf_struct(cls_node): def process_bpf_struct(cls_node, module): - """Process a single BPF struct definition""" + """ + Process a single BPF struct definition and create its LLVM IR representation. + + Args: + cls_node: The AST class node representing the struct + module: The LLVM IR module (not used in current implementation) + + Returns: + A StructType object containing the struct's type information + """ fields = parse_struct_fields(cls_node) field_types = list(fields.values()) @@ -44,7 +62,18 @@ def process_bpf_struct(cls_node, module): def parse_struct_fields(cls_node): - """Parse fields of a struct class node""" + """ + Parse fields of a struct class node. + + Args: + cls_node: The AST class node representing the struct + + Returns: + A dictionary mapping field names to their LLVM IR types + + Raises: + TypeError: If a field has an unsupported type annotation + """ fields = {} for item in cls_node.body: @@ -57,7 +86,18 @@ def parse_struct_fields(cls_node): def get_type_from_ann(annotation): - """Convert an AST annotation node to an LLVM IR type for struct fields""" + """ + Convert an AST annotation node to an LLVM IR type for struct fields. + + Args: + annotation: The AST annotation node (e.g., c_int64, str(32)) + + Returns: + The corresponding LLVM IR type + + Raises: + TypeError: If the annotation type is not supported + """ if isinstance(annotation, ast.Call) and isinstance(annotation.func, ast.Name): if annotation.func.id == "str": # Char array @@ -72,7 +112,15 @@ def get_type_from_ann(annotation): def calc_struct_size(field_types): - """Calculate total size of the struct with alignment and padding""" + """ + Calculate total size of the struct with alignment and padding. + + Args: + field_types: List of LLVM IR types for each field + + Returns: + The total size of the struct in bytes + """ curr_offset = 0 for ftype in field_types: if isinstance(ftype, ir.IntType): diff --git a/pythonbpf/type_deducer.py b/pythonbpf/type_deducer.py index 9867cc6..bb402dc 100644 --- a/pythonbpf/type_deducer.py +++ b/pythonbpf/type_deducer.py @@ -19,10 +19,31 @@ mapping = { def ctypes_to_ir(ctype: str): + """ + Convert a ctypes type name to its corresponding LLVM IR type. + + Args: + ctype: String name of the ctypes type (e.g., 'c_int64', 'c_void_p') + + Returns: + The corresponding LLVM IR type + + Raises: + NotImplementedError: If the ctype is not supported + """ if ctype in mapping: return mapping[ctype] raise NotImplementedError(f"No mapping for {ctype}") def is_ctypes(ctype: str) -> bool: + """ + Check if a given type name is a supported ctypes type. + + Args: + ctype: String name of the type to check + + Returns: + True if the type is a supported ctypes type, False otherwise + """ return ctype in mapping