mirror of
https://github.com/varun-r-mallya/Python-BPF.git
synced 2025-12-31 21:06:25 +00:00
Merge pull request #58 from pythonbpf/vmlinux-handler
finish table construction for vmlinux symbol info transfer
This commit is contained in:
@ -55,11 +55,11 @@ def processor(source_code, filename, module):
|
|||||||
for func_node in bpf_chunks:
|
for func_node in bpf_chunks:
|
||||||
logger.info(f"Found BPF function/struct: {func_node.name}")
|
logger.info(f"Found BPF function/struct: {func_node.name}")
|
||||||
|
|
||||||
vmlinux_proc(tree, module)
|
vmlinux_symtab = vmlinux_proc(tree, module)
|
||||||
populate_global_symbol_table(tree, module)
|
populate_global_symbol_table(tree, module)
|
||||||
license_processing(tree, module)
|
license_processing(tree, module)
|
||||||
globals_processing(tree, module)
|
globals_processing(tree, module)
|
||||||
|
print("DEBUG:", vmlinux_symtab)
|
||||||
structs_sym_tab = structs_proc(tree, module, bpf_chunks)
|
structs_sym_tab = structs_proc(tree, module, bpf_chunks)
|
||||||
map_sym_tab = maps_proc(tree, module, bpf_chunks)
|
map_sym_tab = maps_proc(tree, module, bpf_chunks)
|
||||||
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
|
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
|
||||||
|
|||||||
36
pythonbpf/vmlinux_parser/assignment_info.py
Normal file
36
pythonbpf/vmlinux_parser/assignment_info.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
from enum import Enum, auto
|
||||||
|
from typing import Any, Dict, List, Optional, TypedDict
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import llvmlite.ir as ir
|
||||||
|
|
||||||
|
from pythonbpf.vmlinux_parser.dependency_node import Field
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AssignmentType(Enum):
|
||||||
|
CONSTANT = auto()
|
||||||
|
STRUCT = auto()
|
||||||
|
ARRAY = auto() # probably won't be used
|
||||||
|
FUNCTION_POINTER = auto()
|
||||||
|
POINTER = auto() # again, probably won't be used
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FunctionSignature(TypedDict):
|
||||||
|
return_type: str
|
||||||
|
param_types: List[str]
|
||||||
|
varargs: bool
|
||||||
|
|
||||||
|
|
||||||
|
# Thew name of the assignment will be in the dict that uses this class
|
||||||
|
@dataclass
|
||||||
|
class AssignmentInfo(TypedDict):
|
||||||
|
value_type: AssignmentType
|
||||||
|
python_type: type
|
||||||
|
value: Optional[Any]
|
||||||
|
pointer_level: Optional[int]
|
||||||
|
signature: Optional[FunctionSignature] # For function pointers
|
||||||
|
# The key of the dict is the name of the field.
|
||||||
|
# Value is a tuple that contains the global variable representing that field
|
||||||
|
# along with all the information about that field as a Field type.
|
||||||
|
members: Optional[Dict[str, tuple[ir.GlobalVariable, Field]]] # For structs.
|
||||||
@ -1,6 +1,7 @@
|
|||||||
import logging
|
import logging
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
import importlib
|
import importlib
|
||||||
|
|
||||||
from .dependency_handler import DependencyHandler
|
from .dependency_handler import DependencyHandler
|
||||||
from .dependency_node import DependencyNode
|
from .dependency_node import DependencyNode
|
||||||
import ctypes
|
import ctypes
|
||||||
@ -15,7 +16,11 @@ def get_module_symbols(module_name: str):
|
|||||||
return [name for name in dir(imported_module)], imported_module
|
return [name for name in dir(imported_module)], imported_module
|
||||||
|
|
||||||
|
|
||||||
def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
|
def process_vmlinux_class(
|
||||||
|
node,
|
||||||
|
llvm_module,
|
||||||
|
handler: DependencyHandler,
|
||||||
|
):
|
||||||
symbols_in_module, imported_module = get_module_symbols("vmlinux")
|
symbols_in_module, imported_module = get_module_symbols("vmlinux")
|
||||||
if node.name in symbols_in_module:
|
if node.name in symbols_in_module:
|
||||||
vmlinux_type = getattr(imported_module, node.name)
|
vmlinux_type = getattr(imported_module, node.name)
|
||||||
@ -25,7 +30,10 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
|
|||||||
|
|
||||||
|
|
||||||
def process_vmlinux_post_ast(
|
def process_vmlinux_post_ast(
|
||||||
elem_type_class, llvm_handler, handler: DependencyHandler, processing_stack=None
|
elem_type_class,
|
||||||
|
llvm_handler,
|
||||||
|
handler: DependencyHandler,
|
||||||
|
processing_stack=None,
|
||||||
):
|
):
|
||||||
# Initialize processing stack on first call
|
# Initialize processing stack on first call
|
||||||
if processing_stack is None:
|
if processing_stack is None:
|
||||||
@ -46,7 +54,7 @@ def process_vmlinux_post_ast(
|
|||||||
logger.debug(f"Node {current_symbol_name} already processed and ready")
|
logger.debug(f"Node {current_symbol_name} already processed and ready")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# XXX:Check it's use. It's probably not being used.
|
# XXX:Check its use. It's probably not being used.
|
||||||
if current_symbol_name in processing_stack:
|
if current_symbol_name in processing_stack:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Dependency already in processing stack for {current_symbol_name}, skipping"
|
f"Dependency already in processing stack for {current_symbol_name}, skipping"
|
||||||
@ -98,6 +106,7 @@ def process_vmlinux_post_ast(
|
|||||||
[elem_type, elem_bitfield_size] = elem_temp_list
|
[elem_type, elem_bitfield_size] = elem_temp_list
|
||||||
local_module_name = getattr(elem_type, "__module__", None)
|
local_module_name = getattr(elem_type, "__module__", None)
|
||||||
new_dep_node.add_field(elem_name, elem_type, ready=False)
|
new_dep_node.add_field(elem_name, elem_type, ready=False)
|
||||||
|
|
||||||
if local_module_name == ctypes.__name__:
|
if local_module_name == ctypes.__name__:
|
||||||
# TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. Current processing is a single dereference
|
# TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. Current processing is a single dereference
|
||||||
new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size)
|
new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size)
|
||||||
@ -226,7 +235,10 @@ def process_vmlinux_post_ast(
|
|||||||
else str(elem_type)
|
else str(elem_type)
|
||||||
)
|
)
|
||||||
process_vmlinux_post_ast(
|
process_vmlinux_post_ast(
|
||||||
elem_type, llvm_handler, handler, processing_stack
|
elem_type,
|
||||||
|
llvm_handler,
|
||||||
|
handler,
|
||||||
|
processing_stack,
|
||||||
)
|
)
|
||||||
new_dep_node.set_field_ready(elem_name, True)
|
new_dep_node.set_field_ready(elem_name, True)
|
||||||
else:
|
else:
|
||||||
@ -237,7 +249,7 @@ def process_vmlinux_post_ast(
|
|||||||
else:
|
else:
|
||||||
raise ImportError("UNSUPPORTED Module")
|
raise ImportError("UNSUPPORTED Module")
|
||||||
|
|
||||||
logging.info(
|
logger.info(
|
||||||
f"{current_symbol_name} processed and handler readiness {handler.is_ready}"
|
f"{current_symbol_name} processed and handler readiness {handler.is_ready}"
|
||||||
)
|
)
|
||||||
return True
|
return True
|
||||||
|
|||||||
@ -18,6 +18,31 @@ class Field:
|
|||||||
value: Any = None
|
value: Any = None
|
||||||
ready: bool = False
|
ready: bool = False
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
"""
|
||||||
|
Create a hash based on the immutable attributes that define this field's identity.
|
||||||
|
This allows Field objects to be used as dictionary keys.
|
||||||
|
"""
|
||||||
|
# Use a tuple of the fields that uniquely identify this field
|
||||||
|
identity = (
|
||||||
|
self.name,
|
||||||
|
id(self.type), # Use id for non-hashable types
|
||||||
|
id(self.ctype_complex_type) if self.ctype_complex_type else None,
|
||||||
|
id(self.containing_type) if self.containing_type else None,
|
||||||
|
self.type_size,
|
||||||
|
self.bitfield_size,
|
||||||
|
self.offset,
|
||||||
|
self.value if self.value else None,
|
||||||
|
)
|
||||||
|
return hash(identity)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
"""
|
||||||
|
Define equality consistent with the hash function.
|
||||||
|
Two fields are equal if they have they are the same
|
||||||
|
"""
|
||||||
|
return self is other
|
||||||
|
|
||||||
def set_ready(self, is_ready: bool = True) -> None:
|
def set_ready(self, is_ready: bool = True) -> None:
|
||||||
"""Set the readiness state of this field."""
|
"""Set the readiness state of this field."""
|
||||||
self.ready = is_ready
|
self.ready = is_ready
|
||||||
|
|||||||
@ -1,9 +1,9 @@
|
|||||||
import ast
|
import ast
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Tuple, Any
|
|
||||||
import importlib
|
import importlib
|
||||||
import inspect
|
import inspect
|
||||||
|
|
||||||
|
from .assignment_info import AssignmentInfo, AssignmentType
|
||||||
from .dependency_handler import DependencyHandler
|
from .dependency_handler import DependencyHandler
|
||||||
from .ir_gen import IRGenerator
|
from .ir_gen import IRGenerator
|
||||||
from .class_handler import process_vmlinux_class
|
from .class_handler import process_vmlinux_class
|
||||||
@ -11,7 +11,7 @@ from .class_handler import process_vmlinux_class
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]:
|
def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]:
|
||||||
"""
|
"""
|
||||||
Parse AST and detect import statements from vmlinux.
|
Parse AST and detect import statements from vmlinux.
|
||||||
|
|
||||||
@ -82,7 +82,7 @@ def vmlinux_proc(tree: ast.AST, module):
|
|||||||
# initialise dependency handler
|
# initialise dependency handler
|
||||||
handler = DependencyHandler()
|
handler = DependencyHandler()
|
||||||
# initialise assignment dictionary of name to type
|
# initialise assignment dictionary of name to type
|
||||||
assignments: dict[str, tuple[type, Any]] = {}
|
assignments: dict[str, AssignmentInfo] = {}
|
||||||
|
|
||||||
if not import_statements:
|
if not import_statements:
|
||||||
logger.info("No vmlinux imports found")
|
logger.info("No vmlinux imports found")
|
||||||
@ -128,20 +128,35 @@ def vmlinux_proc(tree: ast.AST, module):
|
|||||||
f"{imported_name} not found as ClassDef or Assign in vmlinux"
|
f"{imported_name} not found as ClassDef or Assign in vmlinux"
|
||||||
)
|
)
|
||||||
|
|
||||||
IRGenerator(module, handler)
|
IRGenerator(module, handler, assignments)
|
||||||
return assignments
|
return assignments
|
||||||
|
|
||||||
|
|
||||||
def process_vmlinux_assign(node, module, assignments: dict[str, tuple[type, Any]]):
|
def process_vmlinux_assign(node, module, assignments: dict[str, AssignmentInfo]):
|
||||||
# Check if this is a simple assignment with a constant value
|
"""Process assignments from vmlinux module."""
|
||||||
|
# Only handle single-target assignments
|
||||||
if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
|
if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
|
||||||
target_name = node.targets[0].id
|
target_name = node.targets[0].id
|
||||||
|
|
||||||
|
# Handle constant value assignments
|
||||||
if isinstance(node.value, ast.Constant):
|
if isinstance(node.value, ast.Constant):
|
||||||
assignments[target_name] = (type(node.value.value), node.value.value)
|
# Fixed: using proper TypedDict creation syntax with named arguments
|
||||||
|
assignments[target_name] = AssignmentInfo(
|
||||||
|
value_type=AssignmentType.CONSTANT,
|
||||||
|
python_type=type(node.value.value),
|
||||||
|
value=node.value.value,
|
||||||
|
pointer_level=None,
|
||||||
|
signature=None,
|
||||||
|
members=None,
|
||||||
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}"
|
f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Handle other assignment types that we may need to support
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported assignment type for {target_name}")
|
logger.warning(
|
||||||
|
f"Unsupported assignment type for {target_name}: {ast.dump(node.value)}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Not a simple assignment")
|
raise ValueError("Not a simple assignment")
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
import ctypes
|
import ctypes
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from ..assignment_info import AssignmentInfo, AssignmentType
|
||||||
from ..dependency_handler import DependencyHandler
|
from ..dependency_handler import DependencyHandler
|
||||||
from .debug_info_gen import debug_info_generation
|
from .debug_info_gen import debug_info_generation
|
||||||
from ..dependency_node import DependencyNode
|
from ..dependency_node import DependencyNode
|
||||||
@ -10,11 +12,14 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
class IRGenerator:
|
class IRGenerator:
|
||||||
# get the assignments dict and add this stuff to it.
|
# get the assignments dict and add this stuff to it.
|
||||||
def __init__(self, llvm_module, handler: DependencyHandler, assignment=None):
|
def __init__(self, llvm_module, handler: DependencyHandler, assignments):
|
||||||
self.llvm_module = llvm_module
|
self.llvm_module = llvm_module
|
||||||
self.handler: DependencyHandler = handler
|
self.handler: DependencyHandler = handler
|
||||||
self.generated: list[str] = []
|
self.generated: list[str] = []
|
||||||
self.generated_debug_info: list = []
|
self.generated_debug_info: list = []
|
||||||
|
# Use struct_name and field_name as key instead of Field object
|
||||||
|
self.generated_field_names: dict[str, dict[str, ir.GlobalVariable]] = {}
|
||||||
|
self.assignments: dict[str, AssignmentInfo] = assignments
|
||||||
if not handler.is_ready:
|
if not handler.is_ready:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"Semantic analysis of vmlinux imports failed. Cannot generate IR"
|
"Semantic analysis of vmlinux imports failed. Cannot generate IR"
|
||||||
@ -67,10 +72,42 @@ class IRGenerator:
|
|||||||
f"Warning: Dependency {dependency} not found in handler"
|
f"Warning: Dependency {dependency} not found in handler"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Actual processor logic here after dependencies are resolved
|
# Generate IR first to populate field names
|
||||||
self.generated_debug_info.append(
|
self.generated_debug_info.append(
|
||||||
(struct, self.gen_ir(struct, self.generated_debug_info))
|
(struct, self.gen_ir(struct, self.generated_debug_info))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Fill the assignments dictionary with struct information
|
||||||
|
if struct.name not in self.assignments:
|
||||||
|
# Create a members dictionary for AssignmentInfo
|
||||||
|
members_dict = {}
|
||||||
|
for field_name, field in struct.fields.items():
|
||||||
|
# Get the generated field name from our dictionary, or use field_name if not found
|
||||||
|
if (
|
||||||
|
struct.name in self.generated_field_names
|
||||||
|
and field_name in self.generated_field_names[struct.name]
|
||||||
|
):
|
||||||
|
field_global_variable = self.generated_field_names[struct.name][
|
||||||
|
field_name
|
||||||
|
]
|
||||||
|
members_dict[field_name] = (field_global_variable, field)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"llvm global name not found for struct field {field_name}"
|
||||||
|
)
|
||||||
|
# members_dict[field_name] = (field_name, field)
|
||||||
|
|
||||||
|
# Add struct to assignments dictionary
|
||||||
|
self.assignments[struct.name] = AssignmentInfo(
|
||||||
|
value_type=AssignmentType.STRUCT,
|
||||||
|
python_type=struct.ctype_struct,
|
||||||
|
value=None,
|
||||||
|
pointer_level=None,
|
||||||
|
signature=None,
|
||||||
|
members=members_dict,
|
||||||
|
)
|
||||||
|
logger.info(f"Added struct assignment info for {struct.name}")
|
||||||
|
|
||||||
self.generated.append(struct.name)
|
self.generated.append(struct.name)
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
@ -85,6 +122,11 @@ class IRGenerator:
|
|||||||
struct, self.llvm_module, generated_debug_info
|
struct, self.llvm_module, generated_debug_info
|
||||||
)
|
)
|
||||||
field_index = 0
|
field_index = 0
|
||||||
|
|
||||||
|
# Make sure the struct has an entry in our field names dictionary
|
||||||
|
if struct.name not in self.generated_field_names:
|
||||||
|
self.generated_field_names[struct.name] = {}
|
||||||
|
|
||||||
for field_name, field in struct.fields.items():
|
for field_name, field in struct.fields.items():
|
||||||
# does not take arrays and similar types into consideration yet.
|
# does not take arrays and similar types into consideration yet.
|
||||||
if field.ctype_complex_type is not None and issubclass(
|
if field.ctype_complex_type is not None and issubclass(
|
||||||
@ -94,6 +136,18 @@ class IRGenerator:
|
|||||||
containing_type = field.containing_type
|
containing_type = field.containing_type
|
||||||
if containing_type.__module__ == ctypes.__name__:
|
if containing_type.__module__ == ctypes.__name__:
|
||||||
containing_type_size = ctypes.sizeof(containing_type)
|
containing_type_size = ctypes.sizeof(containing_type)
|
||||||
|
if array_size == 0:
|
||||||
|
field_co_re_name = self._struct_name_generator(
|
||||||
|
struct, field, field_index, True, 0, containing_type_size
|
||||||
|
)
|
||||||
|
globvar = ir.GlobalVariable(
|
||||||
|
self.llvm_module, ir.IntType(64), name=field_co_re_name
|
||||||
|
)
|
||||||
|
globvar.linkage = "external"
|
||||||
|
globvar.set_metadata("llvm.preserve.access.index", debug_info)
|
||||||
|
self.generated_field_names[struct.name][field_name] = globvar
|
||||||
|
field_index += 1
|
||||||
|
continue
|
||||||
for i in range(0, array_size):
|
for i in range(0, array_size):
|
||||||
field_co_re_name = self._struct_name_generator(
|
field_co_re_name = self._struct_name_generator(
|
||||||
struct, field, field_index, True, i, containing_type_size
|
struct, field, field_index, True, i, containing_type_size
|
||||||
@ -103,6 +157,7 @@ class IRGenerator:
|
|||||||
)
|
)
|
||||||
globvar.linkage = "external"
|
globvar.linkage = "external"
|
||||||
globvar.set_metadata("llvm.preserve.access.index", debug_info)
|
globvar.set_metadata("llvm.preserve.access.index", debug_info)
|
||||||
|
self.generated_field_names[struct.name][field_name] = globvar
|
||||||
field_index += 1
|
field_index += 1
|
||||||
elif field.type_size is not None:
|
elif field.type_size is not None:
|
||||||
array_size = field.type_size
|
array_size = field.type_size
|
||||||
@ -120,6 +175,7 @@ class IRGenerator:
|
|||||||
)
|
)
|
||||||
globvar.linkage = "external"
|
globvar.linkage = "external"
|
||||||
globvar.set_metadata("llvm.preserve.access.index", debug_info)
|
globvar.set_metadata("llvm.preserve.access.index", debug_info)
|
||||||
|
self.generated_field_names[struct.name][field_name] = globvar
|
||||||
field_index += 1
|
field_index += 1
|
||||||
else:
|
else:
|
||||||
field_co_re_name = self._struct_name_generator(
|
field_co_re_name = self._struct_name_generator(
|
||||||
@ -131,6 +187,7 @@ class IRGenerator:
|
|||||||
)
|
)
|
||||||
globvar.linkage = "external"
|
globvar.linkage = "external"
|
||||||
globvar.set_metadata("llvm.preserve.access.index", debug_info)
|
globvar.set_metadata("llvm.preserve.access.index", debug_info)
|
||||||
|
self.generated_field_names[struct.name][field_name] = globvar
|
||||||
return debug_info
|
return debug_info
|
||||||
|
|
||||||
def _struct_name_generator(
|
def _struct_name_generator(
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile
|
from pythonbpf import bpf, section, bpfglobal, compile_to_ir
|
||||||
from vmlinux import TASK_COMM_LEN # noqa: F401
|
from vmlinux import TASK_COMM_LEN # noqa: F401
|
||||||
from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
|
from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
|
||||||
|
|
||||||
@ -27,4 +27,3 @@ def LICENSE() -> str:
|
|||||||
|
|
||||||
|
|
||||||
compile_to_ir("simple_struct_test.py", "simple_struct_test.ll")
|
compile_to_ir("simple_struct_test.py", "simple_struct_test.ll")
|
||||||
compile()
|
|
||||||
|
|||||||
Reference in New Issue
Block a user