mirror of
https://github.com/varun-r-mallya/Python-BPF.git
synced 2025-12-31 21:06:25 +00:00
Merge pull request #56 from pythonbpf/vmlinux-ir-gen
Adds IR and debug info generation capabilities for vmlinux imported structs
This commit is contained in:
@ -19,12 +19,22 @@ from pylibbpf import BpfProgram
|
|||||||
import tempfile
|
import tempfile
|
||||||
from logging import Logger
|
from logging import Logger
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
logger: Logger = logging.getLogger(__name__)
|
logger: Logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
VERSION = "v0.1.4"
|
VERSION = "v0.1.4"
|
||||||
|
|
||||||
|
|
||||||
|
def finalize_module(original_str):
|
||||||
|
"""After all IR generation is complete, we monkey patch btf_ama attribute"""
|
||||||
|
|
||||||
|
# Create a string with applied transformation of btf_ama attribute addition to BTF struct field accesses.
|
||||||
|
pattern = r'(@"llvm\.[^"]+:[^"]*" = external global i64, !llvm\.preserve\.access\.index ![0-9]+)'
|
||||||
|
replacement = r'\1 "btf_ama"'
|
||||||
|
return re.sub(pattern, replacement, original_str)
|
||||||
|
|
||||||
|
|
||||||
def find_bpf_chunks(tree):
|
def find_bpf_chunks(tree):
|
||||||
"""Find all functions decorated with @bpf in the AST."""
|
"""Find all functions decorated with @bpf in the AST."""
|
||||||
bpf_functions = []
|
bpf_functions = []
|
||||||
@ -121,10 +131,12 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
|
|||||||
|
|
||||||
module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"])
|
module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"])
|
||||||
|
|
||||||
|
module_string = finalize_module(str(module))
|
||||||
|
|
||||||
logger.info(f"IR written to {output}")
|
logger.info(f"IR written to {output}")
|
||||||
with open(output, "w") as f:
|
with open(output, "w") as f:
|
||||||
f.write(f'source_filename = "{filename}"\n')
|
f.write(f'source_filename = "{filename}"\n')
|
||||||
f.write(str(module))
|
f.write(module_string)
|
||||||
f.write("\n")
|
f.write("\n")
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|||||||
@ -60,6 +60,10 @@ def process_vmlinux_post_ast(
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
new_dep_node = DependencyNode(name=current_symbol_name)
|
new_dep_node = DependencyNode(name=current_symbol_name)
|
||||||
|
|
||||||
|
# elem_type_class is the actual vmlinux struct/class
|
||||||
|
new_dep_node.set_ctype_struct(elem_type_class)
|
||||||
|
|
||||||
handler.add_node(new_dep_node)
|
handler.add_node(new_dep_node)
|
||||||
class_obj = getattr(imported_module, current_symbol_name)
|
class_obj = getattr(imported_module, current_symbol_name)
|
||||||
# Inspect the class fields
|
# Inspect the class fields
|
||||||
@ -71,9 +75,6 @@ def process_vmlinux_post_ast(
|
|||||||
if len(field_elem) == 2:
|
if len(field_elem) == 2:
|
||||||
field_name, field_type = field_elem
|
field_name, field_type = field_elem
|
||||||
elif len(field_elem) == 3:
|
elif len(field_elem) == 3:
|
||||||
raise NotImplementedError(
|
|
||||||
"Bitfields are not supported in the current version"
|
|
||||||
)
|
|
||||||
field_name, field_type, bitfield_size = field_elem
|
field_name, field_type, bitfield_size = field_elem
|
||||||
field_table[field_name] = [field_type, bitfield_size]
|
field_table[field_name] = [field_type, bitfield_size]
|
||||||
elif hasattr(class_obj, "__annotations__"):
|
elif hasattr(class_obj, "__annotations__"):
|
||||||
@ -144,15 +145,35 @@ def process_vmlinux_post_ast(
|
|||||||
)
|
)
|
||||||
new_dep_node.set_field_type(elem_name, elem_type)
|
new_dep_node.set_field_type(elem_name, elem_type)
|
||||||
if containing_type.__module__ == "vmlinux":
|
if containing_type.__module__ == "vmlinux":
|
||||||
process_vmlinux_post_ast(
|
containing_type_name = (
|
||||||
containing_type, llvm_handler, handler, processing_stack
|
containing_type.__name__
|
||||||
)
|
if hasattr(containing_type, "__name__")
|
||||||
size_of_containing_type = (
|
else str(containing_type)
|
||||||
handler[containing_type.__name__]
|
|
||||||
).__sizeof__()
|
|
||||||
new_dep_node.set_field_ready(
|
|
||||||
elem_name, True, size_of_containing_type
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Check for self-reference or already processed
|
||||||
|
if containing_type_name == current_symbol_name:
|
||||||
|
# Self-referential pointer
|
||||||
|
logger.debug(
|
||||||
|
f"Self-referential pointer in {current_symbol_name}.{elem_name}"
|
||||||
|
)
|
||||||
|
new_dep_node.set_field_ready(elem_name, True)
|
||||||
|
elif handler.has_node(containing_type_name):
|
||||||
|
# Already processed
|
||||||
|
logger.debug(
|
||||||
|
f"Reusing already processed {containing_type_name}"
|
||||||
|
)
|
||||||
|
new_dep_node.set_field_ready(elem_name, True)
|
||||||
|
else:
|
||||||
|
# Process recursively - THIS WAS MISSING
|
||||||
|
new_dep_node.add_dependent(containing_type_name)
|
||||||
|
process_vmlinux_post_ast(
|
||||||
|
containing_type,
|
||||||
|
llvm_handler,
|
||||||
|
handler,
|
||||||
|
processing_stack,
|
||||||
|
)
|
||||||
|
new_dep_node.set_field_ready(elem_name, True)
|
||||||
elif containing_type.__module__ == ctypes.__name__:
|
elif containing_type.__module__ == ctypes.__name__:
|
||||||
logger.debug(f"Processing ctype internal{containing_type}")
|
logger.debug(f"Processing ctype internal{containing_type}")
|
||||||
new_dep_node.set_field_ready(elem_name, True)
|
new_dep_node.set_field_ready(elem_name, True)
|
||||||
@ -169,12 +190,7 @@ def process_vmlinux_post_ast(
|
|||||||
process_vmlinux_post_ast(
|
process_vmlinux_post_ast(
|
||||||
elem_type, llvm_handler, handler, processing_stack
|
elem_type, llvm_handler, handler, processing_stack
|
||||||
)
|
)
|
||||||
size_of_containing_type = (
|
new_dep_node.set_field_ready(elem_name, True)
|
||||||
handler[elem_type.__name__]
|
|
||||||
).__sizeof__()
|
|
||||||
new_dep_node.set_field_ready(
|
|
||||||
elem_name, True, size_of_containing_type
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver"
|
f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver"
|
||||||
|
|||||||
@ -167,3 +167,7 @@ class DependencyHandler:
|
|||||||
if name not in self._nodes:
|
if name not in self._nodes:
|
||||||
raise KeyError(f"No node with name '{name}' found")
|
raise KeyError(f"No node with name '{name}' found")
|
||||||
return self._nodes[name]
|
return self._nodes[name]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def nodes(self):
|
||||||
|
return self._nodes
|
||||||
|
|||||||
@ -116,6 +116,7 @@ class DependencyNode:
|
|||||||
fields: Dict[str, Field] = field(default_factory=dict)
|
fields: Dict[str, Field] = field(default_factory=dict)
|
||||||
_ready_cache: Optional[bool] = field(default=None, repr=False)
|
_ready_cache: Optional[bool] = field(default=None, repr=False)
|
||||||
current_offset: int = 0
|
current_offset: int = 0
|
||||||
|
ctype_struct: Optional[Any] = field(default=None, repr=False)
|
||||||
|
|
||||||
def add_field(
|
def add_field(
|
||||||
self,
|
self,
|
||||||
@ -146,7 +147,14 @@ class DependencyNode:
|
|||||||
# Invalidate readiness cache
|
# Invalidate readiness cache
|
||||||
self._ready_cache = None
|
self._ready_cache = None
|
||||||
|
|
||||||
|
def set_ctype_struct(self, ctype_struct: Any) -> None:
|
||||||
|
"""Set the ctypes structure for automatic offset calculation."""
|
||||||
|
self.ctype_struct = ctype_struct
|
||||||
|
|
||||||
def __sizeof__(self):
|
def __sizeof__(self):
|
||||||
|
# If we have a ctype_struct, use its size
|
||||||
|
if self.ctype_struct is not None:
|
||||||
|
return ctypes.sizeof(self.ctype_struct)
|
||||||
return self.current_offset
|
return self.current_offset
|
||||||
|
|
||||||
def get_field(self, name: str) -> Field:
|
def get_field(self, name: str) -> Field:
|
||||||
@ -226,8 +234,22 @@ class DependencyNode:
|
|||||||
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
|
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
|
||||||
|
|
||||||
self.fields[name].set_ready(is_ready)
|
self.fields[name].set_ready(is_ready)
|
||||||
self.fields[name].set_offset(self.current_offset)
|
|
||||||
self.current_offset += self._calculate_size(name, size_of_containing_type)
|
# Use ctypes built-in offset if available
|
||||||
|
if self.ctype_struct is not None:
|
||||||
|
try:
|
||||||
|
self.fields[name].set_offset(getattr(self.ctype_struct, name).offset)
|
||||||
|
except AttributeError:
|
||||||
|
# Fallback to manual calculation if field not found in ctype_struct
|
||||||
|
self.fields[name].set_offset(self.current_offset)
|
||||||
|
self.current_offset += self._calculate_size(
|
||||||
|
name, size_of_containing_type
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Manual offset calculation when no ctype_struct is available
|
||||||
|
self.fields[name].set_offset(self.current_offset)
|
||||||
|
self.current_offset += self._calculate_size(name, size_of_containing_type)
|
||||||
|
|
||||||
# Invalidate readiness cache
|
# Invalidate readiness cache
|
||||||
self._ready_cache = None
|
self._ready_cache = None
|
||||||
|
|
||||||
@ -275,8 +297,28 @@ class DependencyNode:
|
|||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
"This subclass of ctype not supported yet"
|
"This subclass of ctype not supported yet"
|
||||||
)
|
)
|
||||||
|
elif processing_field.type_size is not None:
|
||||||
|
# Handle vmlinux types with type_size but no ctype_complex_type
|
||||||
|
# This means it's a direct vmlinux struct field (not array/pointer wrapped)
|
||||||
|
# The type_size should already contain the full size of the struct
|
||||||
|
# But if there's a containing_type from vmlinux, we need that size
|
||||||
|
if processing_field.containing_type is not None:
|
||||||
|
if processing_field.containing_type.__module__ == "vmlinux":
|
||||||
|
# For vmlinux containing types, we need the pre-calculated size
|
||||||
|
if size_of_containing_type is not None:
|
||||||
|
return size_of_containing_type * processing_field.type_size
|
||||||
|
else:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Field {name}: vmlinux containing_type requires size_of_containing_type"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ModuleNotFoundError(
|
||||||
|
f"Containing type module {processing_field.containing_type.__module__} not supported"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise RuntimeError("Wrong type found with no containing type")
|
||||||
else:
|
else:
|
||||||
# search up pre-created stuff and get size
|
# No ctype_complex_type and no type_size, must rely on size_of_containing_type
|
||||||
if size_of_containing_type is None:
|
if size_of_containing_type is None:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Size of containing type {size_of_containing_type} is None"
|
f"Size of containing type {size_of_containing_type} is None"
|
||||||
|
|||||||
15
pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py
Normal file
15
pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from pythonbpf.debuginfo import DebugInfoGenerator
|
||||||
|
|
||||||
|
|
||||||
|
def debug_info_generation(struct, llvm_module):
|
||||||
|
generator = DebugInfoGenerator(llvm_module)
|
||||||
|
# this is sample debug info generation
|
||||||
|
# i64type = generator.get_uint64_type()
|
||||||
|
|
||||||
|
struct_type = generator.create_struct_type([], 64 * 4, is_distinct=True)
|
||||||
|
|
||||||
|
global_var = generator.create_global_var_debug_info(
|
||||||
|
struct.name, struct_type, is_local=False
|
||||||
|
)
|
||||||
|
|
||||||
|
return global_var
|
||||||
@ -1,12 +1,17 @@
|
|||||||
|
import ctypes
|
||||||
import logging
|
import logging
|
||||||
from pythonbpf.vmlinux_parser.dependency_handler import DependencyHandler
|
from ..dependency_handler import DependencyHandler
|
||||||
|
from .debug_info_gen import debug_info_generation
|
||||||
|
from ..dependency_node import DependencyNode
|
||||||
|
import llvmlite.ir as ir
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class IRGenerator:
|
class IRGenerator:
|
||||||
def __init__(self, module, handler: DependencyHandler):
|
# get the assignments dict and add this stuff to it.
|
||||||
self.module = module
|
def __init__(self, llvm_module, handler: DependencyHandler, assignment=None):
|
||||||
|
self.llvm_module = llvm_module
|
||||||
self.handler: DependencyHandler = handler
|
self.handler: DependencyHandler = handler
|
||||||
self.generated: list[str] = []
|
self.generated: list[str] = []
|
||||||
if not handler.is_ready:
|
if not handler.is_ready:
|
||||||
@ -15,22 +20,142 @@ class IRGenerator:
|
|||||||
)
|
)
|
||||||
for struct in handler:
|
for struct in handler:
|
||||||
self.struct_processor(struct)
|
self.struct_processor(struct)
|
||||||
print()
|
|
||||||
|
|
||||||
def struct_processor(self, struct):
|
def struct_processor(self, struct, processing_stack=None):
|
||||||
if struct.name not in self.generated:
|
# Initialize processing stack on first call
|
||||||
print(f"IR generating for {struct.name}")
|
if processing_stack is None:
|
||||||
print(f"Struct is {struct}")
|
processing_stack = set()
|
||||||
for dependency in struct.depends_on:
|
|
||||||
if dependency not in self.generated:
|
# If already generated, skip
|
||||||
dep_node_from_dependency = self.handler[dependency]
|
if struct.name in self.generated:
|
||||||
self.struct_processor(dep_node_from_dependency)
|
return
|
||||||
self.generated.append(dependency)
|
|
||||||
# write actual processor logic here after assuming all dependencies are resolved
|
# Detect circular dependency
|
||||||
# this part cannot yet resolve circular dependencies. Gets stuck on an infinite loop during that.
|
if struct.name in processing_stack:
|
||||||
|
logger.info(
|
||||||
|
f"Circular dependency detected for {struct.name}, skipping recursive processing"
|
||||||
|
)
|
||||||
|
# For circular dependencies, we can either:
|
||||||
|
# 1. Use forward declarations (opaque pointers)
|
||||||
|
# 2. Mark as incomplete and process later
|
||||||
|
# 3. Generate a placeholder type
|
||||||
|
# Here we'll just skip and let it be processed in its own call
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"IR generating for {struct.name}")
|
||||||
|
|
||||||
|
# Add to processing stack before processing dependencies
|
||||||
|
processing_stack.add(struct.name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Process all dependencies first
|
||||||
|
if struct.depends_on is None:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
for dependency in struct.depends_on:
|
||||||
|
if dependency not in self.generated:
|
||||||
|
# Check if dependency exists in handler
|
||||||
|
if dependency in self.handler.nodes:
|
||||||
|
dep_node_from_dependency = self.handler[dependency]
|
||||||
|
# Pass the processing_stack down to track circular refs
|
||||||
|
self.struct_processor(
|
||||||
|
dep_node_from_dependency, processing_stack
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Warning: Dependency {dependency} not found in handler"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Actual processor logic here after dependencies are resolved
|
||||||
|
self.gen_ir(struct)
|
||||||
self.generated.append(struct.name)
|
self.generated.append(struct.name)
|
||||||
|
|
||||||
def struct_name_generator(
|
finally:
|
||||||
|
# Remove from processing stack after we're done
|
||||||
|
processing_stack.discard(struct.name)
|
||||||
|
|
||||||
|
def gen_ir(self, struct):
|
||||||
|
# TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite
|
||||||
|
# accepts our issue, we will resort to normal accessed attribute based attribute addition
|
||||||
|
# currently we generate all possible field accesses for CO-RE and put into the assignment table
|
||||||
|
debug_info = debug_info_generation(struct, self.llvm_module)
|
||||||
|
field_index = 0
|
||||||
|
for field_name, field in struct.fields.items():
|
||||||
|
# does not take arrays and similar types into consideration yet.
|
||||||
|
if field.ctype_complex_type is not None and issubclass(
|
||||||
|
field.ctype_complex_type, ctypes.Array
|
||||||
|
):
|
||||||
|
array_size = field.type_size
|
||||||
|
containing_type = field.containing_type
|
||||||
|
if containing_type.__module__ == ctypes.__name__:
|
||||||
|
containing_type_size = ctypes.sizeof(containing_type)
|
||||||
|
for i in range(0, array_size):
|
||||||
|
field_co_re_name = self._struct_name_generator(
|
||||||
|
struct, field, field_index, True, i, containing_type_size
|
||||||
|
)
|
||||||
|
globvar = ir.GlobalVariable(
|
||||||
|
self.llvm_module, ir.IntType(64), name=field_co_re_name
|
||||||
|
)
|
||||||
|
globvar.linkage = "external"
|
||||||
|
globvar.set_metadata("llvm.preserve.access.index", debug_info)
|
||||||
|
field_index += 1
|
||||||
|
elif field.type_size is not None:
|
||||||
|
array_size = field.type_size
|
||||||
|
containing_type = field.containing_type
|
||||||
|
if containing_type.__module__ == "vmlinux":
|
||||||
|
containing_type_size = self.handler[
|
||||||
|
containing_type.__name__
|
||||||
|
].current_offset
|
||||||
|
for i in range(0, array_size):
|
||||||
|
field_co_re_name = self._struct_name_generator(
|
||||||
|
struct, field, field_index, True, i, containing_type_size
|
||||||
|
)
|
||||||
|
globvar = ir.GlobalVariable(
|
||||||
|
self.llvm_module, ir.IntType(64), name=field_co_re_name
|
||||||
|
)
|
||||||
|
globvar.linkage = "external"
|
||||||
|
globvar.set_metadata("llvm.preserve.access.index", debug_info)
|
||||||
|
field_index += 1
|
||||||
|
else:
|
||||||
|
field_co_re_name = self._struct_name_generator(
|
||||||
|
struct, field, field_index
|
||||||
|
)
|
||||||
|
field_index += 1
|
||||||
|
globvar = ir.GlobalVariable(
|
||||||
|
self.llvm_module, ir.IntType(64), name=field_co_re_name
|
||||||
|
)
|
||||||
|
globvar.linkage = "external"
|
||||||
|
globvar.set_metadata("llvm.preserve.access.index", debug_info)
|
||||||
|
|
||||||
|
def _struct_name_generator(
|
||||||
self,
|
self,
|
||||||
) -> None:
|
struct: DependencyNode,
|
||||||
pass
|
field,
|
||||||
|
field_index: int,
|
||||||
|
is_indexed: bool = False,
|
||||||
|
index: int = 0,
|
||||||
|
containing_type_size: int = 0,
|
||||||
|
) -> str:
|
||||||
|
if is_indexed:
|
||||||
|
name = (
|
||||||
|
"llvm."
|
||||||
|
+ struct.name.removeprefix("struct_")
|
||||||
|
+ f":0:{field.offset + index * containing_type_size}"
|
||||||
|
+ "$"
|
||||||
|
+ f"0:{field_index}:{index}"
|
||||||
|
)
|
||||||
|
return name
|
||||||
|
elif struct.name.startswith("struct_"):
|
||||||
|
name = (
|
||||||
|
"llvm."
|
||||||
|
+ struct.name.removeprefix("struct_")
|
||||||
|
+ f":0:{field.offset}"
|
||||||
|
+ "$"
|
||||||
|
+ f"0:{field_index}"
|
||||||
|
)
|
||||||
|
return name
|
||||||
|
else:
|
||||||
|
print(self.handler[struct.name])
|
||||||
|
raise TypeError(
|
||||||
|
"Name generation cannot occur due to type name not starting with struct"
|
||||||
|
)
|
||||||
|
|||||||
@ -2,13 +2,16 @@ from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir
|
|||||||
from pythonbpf.maps import HashMap
|
from pythonbpf.maps import HashMap
|
||||||
from pythonbpf.helper import XDP_PASS
|
from pythonbpf.helper import XDP_PASS
|
||||||
from vmlinux import TASK_COMM_LEN # noqa: F401
|
from vmlinux import TASK_COMM_LEN # noqa: F401
|
||||||
from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
|
|
||||||
|
|
||||||
# from vmlinux import struct_request
|
from vmlinux import struct_qspinlock # noqa: F401
|
||||||
|
|
||||||
|
# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
|
||||||
|
# from vmlinux import struct_posix_cputimers # noqa: F401
|
||||||
from vmlinux import struct_xdp_md
|
from vmlinux import struct_xdp_md
|
||||||
|
|
||||||
# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
|
# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
|
||||||
# from vmlinux import struct_ring_buffer_per_cpu # noqa: F401
|
# from vmlinux import struct_ring_buffer_per_cpu # noqa: F401
|
||||||
|
# from vmlinux import struct_request # noqa: F401
|
||||||
from ctypes import c_int64
|
from ctypes import c_int64
|
||||||
|
|
||||||
# Instructions to how to run this program
|
# Instructions to how to run this program
|
||||||
|
|||||||
Reference in New Issue
Block a user