mirror of
https://github.com/varun-r-mallya/Python-BPF.git
synced 2026-03-21 20:51:28 +00:00
Compare commits
5 Commits
abbf17748d
...
8774277000
| Author | SHA1 | Date | |
|---|---|---|---|
| 8774277000 | |||
| 8743ea17f3 | |||
| f8844104a6 | |||
| 3343bedd11 | |||
| 75d3ad4fe2 |
@ -12,7 +12,7 @@
|
|||||||
#
|
#
|
||||||
# See https://github.com/pre-commit/pre-commit
|
# See https://github.com/pre-commit/pre-commit
|
||||||
|
|
||||||
exclude: 'vmlinux.*\.py$'
|
exclude: 'vmlinux.py'
|
||||||
|
|
||||||
ci:
|
ci:
|
||||||
autoupdate_commit_msg: "chore: update pre-commit hooks"
|
autoupdate_commit_msg: "chore: update pre-commit hooks"
|
||||||
|
|||||||
@ -1 +1,3 @@
|
|||||||
from .import_detector import vmlinux_proc
|
from .import_detector import vmlinux_proc
|
||||||
|
|
||||||
|
__all__ = ["vmlinux_proc"]
|
||||||
|
|||||||
177
pythonbpf/vmlinux_parser/class_handler.py
Normal file
177
pythonbpf/vmlinux_parser/class_handler.py
Normal file
@ -0,0 +1,177 @@
|
|||||||
|
import logging
|
||||||
|
from functools import lru_cache
|
||||||
|
import importlib
|
||||||
|
from .dependency_handler import DependencyHandler
|
||||||
|
from .dependency_node import DependencyNode
|
||||||
|
import ctypes
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1)
|
||||||
|
def get_module_symbols(module_name: str):
|
||||||
|
imported_module = importlib.import_module(module_name)
|
||||||
|
return [name for name in dir(imported_module)], imported_module
|
||||||
|
|
||||||
|
def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
|
||||||
|
symbols_in_module, imported_module = get_module_symbols("vmlinux")
|
||||||
|
if node.name in symbols_in_module:
|
||||||
|
vmlinux_type = getattr(imported_module, node.name)
|
||||||
|
process_vmlinux_post_ast(vmlinux_type, llvm_module, handler)
|
||||||
|
else:
|
||||||
|
raise ImportError(f"{node.name} not in vmlinux")
|
||||||
|
|
||||||
|
# Recursive function that gets all the dependent classes and adds them to handler
|
||||||
|
def process_vmlinux_post_ast(node, llvm_module, handler: DependencyHandler, processing_stack=None):
|
||||||
|
"""
|
||||||
|
Recursively process vmlinux classes and their dependencies.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
node: The class/type to process
|
||||||
|
llvm_module: The LLVM module context
|
||||||
|
handler: DependencyHandler to track all nodes
|
||||||
|
processing_stack: Set of currently processing nodes to detect cycles
|
||||||
|
"""
|
||||||
|
# Initialize processing stack on first call
|
||||||
|
if processing_stack is None:
|
||||||
|
processing_stack = set()
|
||||||
|
|
||||||
|
symbols_in_module, imported_module = get_module_symbols("vmlinux")
|
||||||
|
|
||||||
|
# Handle both node objects and type objects
|
||||||
|
if hasattr(node, "name"):
|
||||||
|
current_symbol_name = node.name
|
||||||
|
elif hasattr(node, "__name__"):
|
||||||
|
current_symbol_name = node.__name__
|
||||||
|
else:
|
||||||
|
current_symbol_name = str(node)
|
||||||
|
|
||||||
|
if current_symbol_name not in symbols_in_module:
|
||||||
|
raise ImportError(f"{current_symbol_name} not present in module vmlinux")
|
||||||
|
|
||||||
|
# Check if we're already processing this node (circular dependency)
|
||||||
|
if current_symbol_name in processing_stack:
|
||||||
|
logger.debug(f"Circular dependency detected for {current_symbol_name}, skipping")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check if already processed
|
||||||
|
if handler.has_node(current_symbol_name):
|
||||||
|
existing_node = handler.get_node(current_symbol_name)
|
||||||
|
# If the node exists and is ready, we're done
|
||||||
|
if existing_node and existing_node.is_ready:
|
||||||
|
logger.info(f"Node {current_symbol_name} already processed and ready")
|
||||||
|
return True
|
||||||
|
|
||||||
|
logger.info(f"Resolving vmlinux class {current_symbol_name}")
|
||||||
|
logger.debug(
|
||||||
|
f"Current handler state: {handler.is_ready} readiness and {handler.get_all_nodes()} all nodes"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add to processing stack to detect cycles
|
||||||
|
processing_stack.add(current_symbol_name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
field_table = {} # should contain the field and it's type.
|
||||||
|
|
||||||
|
# Get the class object from the module
|
||||||
|
class_obj = getattr(imported_module, current_symbol_name)
|
||||||
|
|
||||||
|
# Inspect the class fields
|
||||||
|
if hasattr(class_obj, "_fields_"):
|
||||||
|
for field_name, field_type in class_obj._fields_:
|
||||||
|
field_table[field_name] = field_type
|
||||||
|
elif hasattr(class_obj, "__annotations__"):
|
||||||
|
for field_name, field_type in class_obj.__annotations__.items():
|
||||||
|
field_table[field_name] = field_type
|
||||||
|
else:
|
||||||
|
raise TypeError("Could not get required class and definition")
|
||||||
|
|
||||||
|
logger.debug(f"Extracted fields for {current_symbol_name}: {field_table}")
|
||||||
|
|
||||||
|
# Create or get the node
|
||||||
|
if handler.has_node(current_symbol_name):
|
||||||
|
new_dep_node = handler.get_node(current_symbol_name)
|
||||||
|
else:
|
||||||
|
new_dep_node = DependencyNode(name=current_symbol_name)
|
||||||
|
handler.add_node(new_dep_node)
|
||||||
|
|
||||||
|
# Process each field
|
||||||
|
for elem_name, elem_type in field_table.items():
|
||||||
|
module_name = getattr(elem_type, "__module__", None)
|
||||||
|
|
||||||
|
if module_name == ctypes.__name__:
|
||||||
|
# Simple ctypes - mark as ready immediately
|
||||||
|
new_dep_node.add_field(elem_name, elem_type, ready=True)
|
||||||
|
|
||||||
|
elif module_name == "vmlinux":
|
||||||
|
# Complex vmlinux type - needs recursive processing
|
||||||
|
new_dep_node.add_field(elem_name, elem_type, ready=False)
|
||||||
|
logger.debug(f"Processing vmlinux field: {elem_name}, type: {elem_type}")
|
||||||
|
|
||||||
|
identify_ctypes_type(elem_name, elem_type, new_dep_node)
|
||||||
|
|
||||||
|
# Determine the actual symbol to process
|
||||||
|
symbol_name = (
|
||||||
|
elem_type.__name__
|
||||||
|
if hasattr(elem_type, "__name__")
|
||||||
|
else str(elem_type)
|
||||||
|
)
|
||||||
|
vmlinux_symbol = None
|
||||||
|
|
||||||
|
# Handle pointers/arrays to other types
|
||||||
|
if hasattr(elem_type, "_type_"):
|
||||||
|
containing_module_name = getattr(
|
||||||
|
(elem_type._type_), "__module__", None
|
||||||
|
)
|
||||||
|
if containing_module_name == ctypes.__name__:
|
||||||
|
# Pointer/Array to ctypes - mark as ready
|
||||||
|
new_dep_node.set_field_ready(elem_name, True)
|
||||||
|
continue
|
||||||
|
elif containing_module_name == "vmlinux":
|
||||||
|
# Pointer/Array to vmlinux type
|
||||||
|
symbol_name = (
|
||||||
|
(elem_type._type_).__name__
|
||||||
|
if hasattr((elem_type._type_), "__name__")
|
||||||
|
else str(elem_type._type_)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Self-referential check
|
||||||
|
if symbol_name == current_symbol_name:
|
||||||
|
logger.debug(f"Self-referential field {elem_name} in {current_symbol_name}")
|
||||||
|
# For pointers to self, we can mark as ready since the type is being defined
|
||||||
|
new_dep_node.set_field_ready(elem_name, True)
|
||||||
|
continue
|
||||||
|
|
||||||
|
vmlinux_symbol = getattr(imported_module, symbol_name)
|
||||||
|
else:
|
||||||
|
# Direct vmlinux type (not pointer/array)
|
||||||
|
vmlinux_symbol = getattr(imported_module, symbol_name)
|
||||||
|
|
||||||
|
# Recursively process the dependency
|
||||||
|
if vmlinux_symbol is not None:
|
||||||
|
if process_vmlinux_post_ast(vmlinux_symbol, llvm_module, handler, processing_stack):
|
||||||
|
new_dep_node.set_field_ready(elem_name, True)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"{elem_name} with type {elem_type} not supported in recursive resolver"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Successfully processed node: {current_symbol_name}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Remove from processing stack when done
|
||||||
|
processing_stack.discard(current_symbol_name)
|
||||||
|
|
||||||
|
|
||||||
|
def identify_ctypes_type(elem_name, elem_type, new_dep_node: DependencyNode):
|
||||||
|
if isinstance(elem_type, type):
|
||||||
|
if issubclass(elem_type, ctypes.Array):
|
||||||
|
new_dep_node.set_field_type(elem_name, ctypes.Array)
|
||||||
|
new_dep_node.set_field_containing_type(elem_name, elem_type._type_)
|
||||||
|
new_dep_node.set_field_type_size(elem_name, elem_type._length_)
|
||||||
|
elif issubclass(elem_type, ctypes._Pointer):
|
||||||
|
new_dep_node.set_field_type(elem_name, ctypes._Pointer)
|
||||||
|
new_dep_node.set_field_containing_type(elem_name, elem_type._type_)
|
||||||
|
else:
|
||||||
|
raise TypeError("Instance sent instead of Class")
|
||||||
@ -1,12 +1,15 @@
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Dict, Any, Optional
|
from typing import Dict, Any, Optional
|
||||||
|
|
||||||
|
#TODO: FIX THE FUCKING TYPE NAME CONVENTION.
|
||||||
@dataclass
|
@dataclass
|
||||||
class Field:
|
class Field:
|
||||||
"""Represents a field in a dependency node with its type and readiness state."""
|
"""Represents a field in a dependency node with its type and readiness state."""
|
||||||
|
|
||||||
name: str
|
name: str
|
||||||
type: type
|
type: type
|
||||||
|
containing_type: Optional[Any]
|
||||||
|
type_size: Optional[int]
|
||||||
value: Any = None
|
value: Any = None
|
||||||
ready: bool = False
|
ready: bool = False
|
||||||
|
|
||||||
@ -20,6 +23,26 @@ class Field:
|
|||||||
if mark_ready:
|
if mark_ready:
|
||||||
self.ready = True
|
self.ready = True
|
||||||
|
|
||||||
|
def set_type(self, given_type, mark_ready: bool = True) -> None:
|
||||||
|
"""Set value of the type field and mark as ready"""
|
||||||
|
self.type = given_type
|
||||||
|
if mark_ready:
|
||||||
|
self.ready = True
|
||||||
|
|
||||||
|
def set_containing_type(
|
||||||
|
self, containing_type: Optional[Any], mark_ready: bool = True
|
||||||
|
) -> None:
|
||||||
|
"""Set the containing_type of this field and optionally mark it as ready."""
|
||||||
|
self.containing_type = containing_type
|
||||||
|
if mark_ready:
|
||||||
|
self.ready = True
|
||||||
|
|
||||||
|
def set_type_size(self, type_size: Any, mark_ready: bool = True) -> None:
|
||||||
|
"""Set the type_size of this field and optionally mark it as ready."""
|
||||||
|
self.type_size = type_size
|
||||||
|
if mark_ready:
|
||||||
|
self.ready = True
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DependencyNode:
|
class DependencyNode:
|
||||||
@ -64,13 +87,29 @@ class DependencyNode:
|
|||||||
ready_fields = somestruct.get_ready_fields()
|
ready_fields = somestruct.get_ready_fields()
|
||||||
print(f"Ready fields: {[field.name for field in ready_fields.values()]}") # ['field_1', 'field_2']
|
print(f"Ready fields: {[field.name for field in ready_fields.values()]}") # ['field_1', 'field_2']
|
||||||
"""
|
"""
|
||||||
|
|
||||||
name: str
|
name: str
|
||||||
fields: Dict[str, Field] = field(default_factory=dict)
|
fields: Dict[str, Field] = field(default_factory=dict)
|
||||||
_ready_cache: Optional[bool] = field(default=None, repr=False)
|
_ready_cache: Optional[bool] = field(default=None, repr=False)
|
||||||
|
|
||||||
def add_field(self, name: str, field_type: type, initial_value: Any = None, ready: bool = False) -> None:
|
def add_field(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
field_type: type,
|
||||||
|
initial_value: Any = None,
|
||||||
|
containing_type: Optional[Any] = None,
|
||||||
|
type_size: Optional[int] = None,
|
||||||
|
ready: bool = False,
|
||||||
|
) -> None:
|
||||||
"""Add a field to the node with an optional initial value and readiness state."""
|
"""Add a field to the node with an optional initial value and readiness state."""
|
||||||
self.fields[name] = Field(name=name, type=field_type, value=initial_value, ready=ready)
|
self.fields[name] = Field(
|
||||||
|
name=name,
|
||||||
|
type=field_type,
|
||||||
|
value=initial_value,
|
||||||
|
ready=ready,
|
||||||
|
containing_type=containing_type,
|
||||||
|
type_size=type_size,
|
||||||
|
)
|
||||||
# Invalidate readiness cache
|
# Invalidate readiness cache
|
||||||
self._ready_cache = None
|
self._ready_cache = None
|
||||||
|
|
||||||
@ -87,6 +126,37 @@ class DependencyNode:
|
|||||||
# Invalidate readiness cache
|
# Invalidate readiness cache
|
||||||
self._ready_cache = None
|
self._ready_cache = None
|
||||||
|
|
||||||
|
def set_field_type(self, name: str, type: Any, mark_ready: bool = True) -> None:
|
||||||
|
"""Set a field's type and optionally mark it as ready."""
|
||||||
|
if name not in self.fields:
|
||||||
|
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
|
||||||
|
|
||||||
|
self.fields[name].set_type(type, mark_ready)
|
||||||
|
# Invalidate readiness cache
|
||||||
|
self._ready_cache = None
|
||||||
|
|
||||||
|
def set_field_containing_type(
|
||||||
|
self, name: str, containing_type: Any, mark_ready: bool = True
|
||||||
|
) -> None:
|
||||||
|
"""Set a field's containing_type and optionally mark it as ready."""
|
||||||
|
if name not in self.fields:
|
||||||
|
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
|
||||||
|
|
||||||
|
self.fields[name].set_containing_type(containing_type, mark_ready)
|
||||||
|
# Invalidate readiness cache
|
||||||
|
self._ready_cache = None
|
||||||
|
|
||||||
|
def set_field_type_size(
|
||||||
|
self, name: str, type_size: Any, mark_ready: bool = True
|
||||||
|
) -> None:
|
||||||
|
"""Set a field's type_size and optionally mark it as ready."""
|
||||||
|
if name not in self.fields:
|
||||||
|
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
|
||||||
|
|
||||||
|
self.fields[name].set_type_size(type_size, mark_ready)
|
||||||
|
# Invalidate readiness cache
|
||||||
|
self._ready_cache = None
|
||||||
|
|
||||||
def set_field_ready(self, name: str, is_ready: bool = True) -> None:
|
def set_field_ready(self, name: str, is_ready: bool = True) -> None:
|
||||||
"""Mark a field as ready or not ready."""
|
"""Mark a field as ready or not ready."""
|
||||||
if name not in self.fields:
|
if name not in self.fields:
|
||||||
|
|||||||
@ -6,7 +6,7 @@ import inspect
|
|||||||
|
|
||||||
from .dependency_handler import DependencyHandler
|
from .dependency_handler import DependencyHandler
|
||||||
from .ir_generation import IRGenerator
|
from .ir_generation import IRGenerator
|
||||||
from .vmlinux_class_handler import process_vmlinux_class
|
from .class_handler import process_vmlinux_class
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -58,8 +58,8 @@ def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]:
|
|||||||
# Valid single import
|
# Valid single import
|
||||||
for alias in node.names:
|
for alias in node.names:
|
||||||
import_name = alias.name
|
import_name = alias.name
|
||||||
# Use alias if provided, otherwise use the original name
|
# Use alias if provided, otherwise use the original name (commented)
|
||||||
as_name = alias.asname if alias.asname else alias.name
|
# as_name = alias.asname if alias.asname else alias.name
|
||||||
vmlinux_imports.append(("vmlinux", node))
|
vmlinux_imports.append(("vmlinux", node))
|
||||||
logger.info(f"Found vmlinux import: {import_name}")
|
logger.info(f"Found vmlinux import: {import_name}")
|
||||||
|
|
||||||
@ -68,13 +68,14 @@ def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]:
|
|||||||
for alias in node.names:
|
for alias in node.names:
|
||||||
if alias.name == "vmlinux" or alias.name.startswith("vmlinux."):
|
if alias.name == "vmlinux" or alias.name.startswith("vmlinux."):
|
||||||
raise SyntaxError(
|
raise SyntaxError(
|
||||||
f"Direct import of vmlinux module is not supported. "
|
"Direct import of vmlinux module is not supported. "
|
||||||
f"Use 'from vmlinux import <type>' instead."
|
"Use 'from vmlinux import <type>' instead."
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"Total vmlinux imports detected: {len(vmlinux_imports)}")
|
logger.info(f"Total vmlinux imports detected: {len(vmlinux_imports)}")
|
||||||
return vmlinux_imports
|
return vmlinux_imports
|
||||||
|
|
||||||
|
|
||||||
def vmlinux_proc(tree: ast.AST, module):
|
def vmlinux_proc(tree: ast.AST, module):
|
||||||
import_statements = detect_import_statement(tree)
|
import_statements = detect_import_statement(tree)
|
||||||
|
|
||||||
@ -107,7 +108,10 @@ def vmlinux_proc(tree: ast.AST, module):
|
|||||||
imported_name = alias.name
|
imported_name = alias.name
|
||||||
found = False
|
found = False
|
||||||
for mod_node in mod_ast.body:
|
for mod_node in mod_ast.body:
|
||||||
if isinstance(mod_node, ast.ClassDef) and mod_node.name == imported_name:
|
if (
|
||||||
|
isinstance(mod_node, ast.ClassDef)
|
||||||
|
and mod_node.name == imported_name
|
||||||
|
):
|
||||||
process_vmlinux_class(mod_node, module, handler)
|
process_vmlinux_class(mod_node, module, handler)
|
||||||
found = True
|
found = True
|
||||||
break
|
break
|
||||||
@ -120,9 +124,12 @@ def vmlinux_proc(tree: ast.AST, module):
|
|||||||
if found:
|
if found:
|
||||||
break
|
break
|
||||||
if not found:
|
if not found:
|
||||||
logger.info(f"{imported_name} not found as ClassDef or Assign in vmlinux")
|
logger.info(
|
||||||
|
f"{imported_name} not found as ClassDef or Assign in vmlinux"
|
||||||
|
)
|
||||||
|
|
||||||
IRGenerator(module, handler)
|
IRGenerator(module, handler)
|
||||||
|
|
||||||
|
|
||||||
def process_vmlinux_assign(node, module, assignments: Dict[str, type]):
|
def process_vmlinux_assign(node, module, assignments: Dict[str, type]):
|
||||||
raise NotImplementedError("Assignment handling has not been implemented yet")
|
raise NotImplementedError("Assignment handling has not been implemented yet")
|
||||||
|
|||||||
@ -1,96 +0,0 @@
|
|||||||
import ast
|
|
||||||
import logging
|
|
||||||
from functools import lru_cache
|
|
||||||
import importlib
|
|
||||||
from .dependency_handler import DependencyHandler
|
|
||||||
from .dependency_node import DependencyNode
|
|
||||||
import ctypes
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=1)
|
|
||||||
def get_module_symbols(module_name: str):
|
|
||||||
imported_module = importlib.import_module(module_name)
|
|
||||||
return [name for name in dir(imported_module)], imported_module
|
|
||||||
|
|
||||||
|
|
||||||
# Recursive function that gets all the dependent classes and adds them to handler
|
|
||||||
def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
|
|
||||||
symbols_in_module, imported_module = get_module_symbols("vmlinux")
|
|
||||||
|
|
||||||
# Handle both node objects and type objects
|
|
||||||
if hasattr(node, 'name'):
|
|
||||||
current_symbol_name = node.name
|
|
||||||
elif hasattr(node, '__name__'):
|
|
||||||
current_symbol_name = node.__name__
|
|
||||||
else:
|
|
||||||
current_symbol_name = str(node)
|
|
||||||
|
|
||||||
if current_symbol_name not in symbols_in_module:
|
|
||||||
raise ImportError(f"{current_symbol_name} not present in module vmlinux")
|
|
||||||
logger.info(f"Resolving vmlinux class {current_symbol_name}")
|
|
||||||
logger.debug(f"Current handler state: {handler.is_ready} readiness and {handler.get_all_nodes()} all nodes")
|
|
||||||
field_table = {} # should contain the field and it's type.
|
|
||||||
|
|
||||||
# Get the class object from the module
|
|
||||||
class_obj = getattr(imported_module, current_symbol_name)
|
|
||||||
|
|
||||||
# Below, I've written a general structure that gets class-info
|
|
||||||
# everytime, no matter the format in which it is present
|
|
||||||
|
|
||||||
# Inspect the class fields
|
|
||||||
# Assuming class_obj has fields stored in some standard way
|
|
||||||
# If it's a ctypes-like structure with _fields_
|
|
||||||
if hasattr(class_obj, '_fields_'):
|
|
||||||
for field_name, field_type in class_obj._fields_:
|
|
||||||
field_table[field_name] = field_type
|
|
||||||
|
|
||||||
# If it's using __annotations__
|
|
||||||
elif hasattr(class_obj, '__annotations__'):
|
|
||||||
for field_name, field_type in class_obj.__annotations__.items():
|
|
||||||
field_table[field_name] = field_type
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise TypeError("Could not get required class and definition")
|
|
||||||
|
|
||||||
logger.debug(f"Extracted fields for {current_symbol_name}: {field_table}")
|
|
||||||
if handler.has_node(current_symbol_name):
|
|
||||||
logger.info("Extraction pruned due to already available field")
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
new_dep_node = DependencyNode(name=current_symbol_name)
|
|
||||||
for elem_name, elem_type in field_table.items():
|
|
||||||
module_name = getattr(elem_type, "__module__", None)
|
|
||||||
if module_name == ctypes.__name__:
|
|
||||||
new_dep_node.add_field(elem_name, elem_type, ready=True)
|
|
||||||
elif module_name == "vmlinux":
|
|
||||||
new_dep_node.add_field(elem_name, elem_type, ready=False)
|
|
||||||
print("elem_name:", elem_name, "elem_type:", elem_type)
|
|
||||||
# currently fails when a non-normal type appears which is basically everytime
|
|
||||||
identify_ctypes_type(elem_type)
|
|
||||||
symbol_name = elem_type.__name__ if hasattr(elem_type, '__name__') else str(elem_type)
|
|
||||||
vmlinux_symbol = getattr(imported_module, symbol_name)
|
|
||||||
if process_vmlinux_class(vmlinux_symbol, llvm_module, handler):
|
|
||||||
new_dep_node.set_field_ready(elem_name, True)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"{elem_name} with type {elem_type} not supported in recursive resolver")
|
|
||||||
handler.add_node(new_dep_node)
|
|
||||||
logger.info(f"added node: {current_symbol_name}")
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def identify_ctypes_type(t):
|
|
||||||
if isinstance(t, type): # t is a type/class
|
|
||||||
if issubclass(t, ctypes.Array):
|
|
||||||
print("Array type")
|
|
||||||
print("Element type:", t._type_)
|
|
||||||
print("Length:", t._length_)
|
|
||||||
elif issubclass(t, ctypes._Pointer):
|
|
||||||
print("Pointer type")
|
|
||||||
print("Points to:", t._type_)
|
|
||||||
elif issubclass(t, ctypes._SimpleCData):
|
|
||||||
print("Scalar type")
|
|
||||||
print("Base type:", t)
|
|
||||||
else:
|
|
||||||
raise TypeError("Instance sent instead of Class")
|
|
||||||
@ -1,9 +1,9 @@
|
|||||||
from pythonbpf import bpf, map, section, bpfglobal, compile, compile_to_ir
|
from pythonbpf import bpf, map, section, bpfglobal, compile, compile_to_ir
|
||||||
from pythonbpf.maps import HashMap
|
from pythonbpf.maps import HashMap
|
||||||
from pythonbpf.helper import XDP_PASS
|
from pythonbpf.helper import XDP_PASS
|
||||||
from vmlinux import struct_xdp_md
|
|
||||||
from vmlinux import struct_ring_buffer_per_cpu # noqa: F401
|
from vmlinux import struct_ring_buffer_per_cpu # noqa: F401
|
||||||
from vmlinux import struct_xdp_buff # noqa: F401
|
from vmlinux import struct_xdp_buff # noqa: F401
|
||||||
|
from vmlinux import struct_xdp_md
|
||||||
from ctypes import c_int64
|
from ctypes import c_int64
|
||||||
|
|
||||||
# Instructions to how to run this program
|
# Instructions to how to run this program
|
||||||
|
|||||||
@ -26,8 +26,13 @@ import tempfile
|
|||||||
|
|
||||||
|
|
||||||
class BTFConverter:
|
class BTFConverter:
|
||||||
def __init__(self, btf_source="/sys/kernel/btf/vmlinux", output_file="vmlinux.py",
|
def __init__(
|
||||||
keep_intermediate=False, verbose=False):
|
self,
|
||||||
|
btf_source="/sys/kernel/btf/vmlinux",
|
||||||
|
output_file="vmlinux.py",
|
||||||
|
keep_intermediate=False,
|
||||||
|
verbose=False,
|
||||||
|
):
|
||||||
self.btf_source = btf_source
|
self.btf_source = btf_source
|
||||||
self.output_file = output_file
|
self.output_file = output_file
|
||||||
self.keep_intermediate = keep_intermediate
|
self.keep_intermediate = keep_intermediate
|
||||||
@ -44,11 +49,7 @@ class BTFConverter:
|
|||||||
self.log(f"{description}...")
|
self.log(f"{description}...")
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
cmd,
|
cmd, shell=True, check=True, capture_output=True, text=True
|
||||||
shell=True,
|
|
||||||
check=True,
|
|
||||||
capture_output=True,
|
|
||||||
text=True
|
|
||||||
)
|
)
|
||||||
if self.verbose and result.stdout:
|
if self.verbose and result.stdout:
|
||||||
print(result.stdout)
|
print(result.stdout)
|
||||||
@ -69,51 +70,55 @@ class BTFConverter:
|
|||||||
"""Step 1.5: Preprocess enum definitions."""
|
"""Step 1.5: Preprocess enum definitions."""
|
||||||
self.log("Preprocessing enum definitions...")
|
self.log("Preprocessing enum definitions...")
|
||||||
|
|
||||||
with open(input_file, 'r') as f:
|
with open(input_file, "r") as f:
|
||||||
original_code = f.read()
|
original_code = f.read()
|
||||||
|
|
||||||
# Extract anonymous enums
|
# Extract anonymous enums
|
||||||
enums = re.findall(
|
enums = re.findall(
|
||||||
r'(?<!typedef\s)(enum\s*\{[^}]*\})\s*(\w+)\s*(?::\s*\d+)?\s*;',
|
r"(?<!typedef\s)(enum\s*\{[^}]*\})\s*(\w+)\s*(?::\s*\d+)?\s*;",
|
||||||
original_code
|
original_code,
|
||||||
)
|
)
|
||||||
enum_defs = [enum_block + ';' for enum_block, _ in enums]
|
enum_defs = [enum_block + ";" for enum_block, _ in enums]
|
||||||
|
|
||||||
# Replace anonymous enums with int declarations
|
# Replace anonymous enums with int declarations
|
||||||
processed_code = re.sub(
|
processed_code = re.sub(
|
||||||
r'(?<!typedef\s)enum\s*\{[^}]*\}\s*(\w+)\s*(?::\s*\d+)?\s*;',
|
r"(?<!typedef\s)enum\s*\{[^}]*\}\s*(\w+)\s*(?::\s*\d+)?\s*;",
|
||||||
r'int \1;',
|
r"int \1;",
|
||||||
original_code
|
original_code,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepend enum definitions
|
# Prepend enum definitions
|
||||||
if enum_defs:
|
if enum_defs:
|
||||||
enum_text = '\n'.join(enum_defs) + '\n\n'
|
enum_text = "\n".join(enum_defs) + "\n\n"
|
||||||
processed_code = enum_text + processed_code
|
processed_code = enum_text + processed_code
|
||||||
|
|
||||||
output_file = os.path.join(self.temp_dir, "vmlinux_processed.h")
|
output_file = os.path.join(self.temp_dir, "vmlinux_processed.h")
|
||||||
with open(output_file, 'w') as f:
|
with open(output_file, "w") as f:
|
||||||
f.write(processed_code)
|
f.write(processed_code)
|
||||||
|
|
||||||
return output_file
|
return output_file
|
||||||
|
|
||||||
def step2_5_process_kioctx(self, input_file):
|
def step2_5_process_kioctx(self, input_file):
|
||||||
#TODO: this is a very bad bug and design decision. A single struct has an issue mostly.
|
# TODO: this is a very bad bug and design decision. A single struct has an issue mostly.
|
||||||
"""Step 2.5: Process struct kioctx to extract nested anonymous structs."""
|
"""Step 2.5: Process struct kioctx to extract nested anonymous structs."""
|
||||||
self.log("Processing struct kioctx nested structs...")
|
self.log("Processing struct kioctx nested structs...")
|
||||||
|
|
||||||
with open(input_file, 'r') as f:
|
with open(input_file, "r") as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
|
|
||||||
# Pattern to match struct kioctx with its full body (handles multiple nesting levels)
|
# Pattern to match struct kioctx with its full body (handles multiple nesting levels)
|
||||||
kioctx_pattern = r'struct\s+kioctx\s*\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}\s*;'
|
kioctx_pattern = (
|
||||||
|
r"struct\s+kioctx\s*\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}\s*;"
|
||||||
|
)
|
||||||
|
|
||||||
def process_kioctx_replacement(match):
|
def process_kioctx_replacement(match):
|
||||||
full_struct = match.group(0)
|
full_struct = match.group(0)
|
||||||
self.log(f"Found struct kioctx, length: {len(full_struct)} chars")
|
self.log(f"Found struct kioctx, length: {len(full_struct)} chars")
|
||||||
|
|
||||||
# Extract the struct body (everything between outermost { and })
|
# Extract the struct body (everything between outermost { and })
|
||||||
body_match = re.search(r'struct\s+kioctx\s*\{(.*)\}\s*;', full_struct, re.DOTALL)
|
body_match = re.search(
|
||||||
|
r"struct\s+kioctx\s*\{(.*)\}\s*;", full_struct, re.DOTALL
|
||||||
|
)
|
||||||
if not body_match:
|
if not body_match:
|
||||||
return full_struct
|
return full_struct
|
||||||
|
|
||||||
@ -121,7 +126,7 @@ class BTFConverter:
|
|||||||
|
|
||||||
# Find all anonymous structs within the body
|
# Find all anonymous structs within the body
|
||||||
# Pattern: struct { ... } followed by ; (not a member name)
|
# Pattern: struct { ... } followed by ; (not a member name)
|
||||||
anon_struct_pattern = r'struct\s*\{[^}]*\}'
|
# anon_struct_pattern = r"struct\s*\{[^}]*\}"
|
||||||
|
|
||||||
anon_structs = []
|
anon_structs = []
|
||||||
anon_counter = 4 # Start from 4, counting down to 1
|
anon_counter = 4 # Start from 4, counting down to 1
|
||||||
@ -131,7 +136,9 @@ class BTFConverter:
|
|||||||
anon_struct_content = m.group(0)
|
anon_struct_content = m.group(0)
|
||||||
|
|
||||||
# Extract the body of the anonymous struct
|
# Extract the body of the anonymous struct
|
||||||
anon_body_match = re.search(r'struct\s*\{(.*)\}', anon_struct_content, re.DOTALL)
|
anon_body_match = re.search(
|
||||||
|
r"struct\s*\{(.*)\}", anon_struct_content, re.DOTALL
|
||||||
|
)
|
||||||
if not anon_body_match:
|
if not anon_body_match:
|
||||||
return anon_struct_content
|
return anon_struct_content
|
||||||
|
|
||||||
@ -154,7 +161,7 @@ class BTFConverter:
|
|||||||
processed_body = body
|
processed_body = body
|
||||||
|
|
||||||
# Find all occurrences and process them
|
# Find all occurrences and process them
|
||||||
pattern_with_semicolon = r'struct\s*\{([^}]*)\}\s*;'
|
pattern_with_semicolon = r"struct\s*\{([^}]*)\}\s*;"
|
||||||
matches = list(re.finditer(pattern_with_semicolon, body, re.DOTALL))
|
matches = list(re.finditer(pattern_with_semicolon, body, re.DOTALL))
|
||||||
|
|
||||||
if not matches:
|
if not matches:
|
||||||
@ -178,14 +185,16 @@ class BTFConverter:
|
|||||||
|
|
||||||
# Replace in the body
|
# Replace in the body
|
||||||
replacement = f"struct {anon_name} {member_name};"
|
replacement = f"struct {anon_name} {member_name};"
|
||||||
processed_body = processed_body[:start_pos] + replacement + processed_body[end_pos:]
|
processed_body = (
|
||||||
|
processed_body[:start_pos] + replacement + processed_body[end_pos:]
|
||||||
|
)
|
||||||
|
|
||||||
anon_counter -= 1
|
anon_counter -= 1
|
||||||
|
|
||||||
# Rebuild the complete definition
|
# Rebuild the complete definition
|
||||||
if anon_structs:
|
if anon_structs:
|
||||||
# Prepend the anonymous struct definitions
|
# Prepend the anonymous struct definitions
|
||||||
anon_definitions = '\n'.join(anon_structs) + '\n\n'
|
anon_definitions = "\n".join(anon_structs) + "\n\n"
|
||||||
new_struct = f"struct kioctx {{{processed_body}}};"
|
new_struct = f"struct kioctx {{{processed_body}}};"
|
||||||
return anon_definitions + new_struct
|
return anon_definitions + new_struct
|
||||||
else:
|
else:
|
||||||
@ -193,14 +202,11 @@ class BTFConverter:
|
|||||||
|
|
||||||
# Apply the transformation
|
# Apply the transformation
|
||||||
processed_content = re.sub(
|
processed_content = re.sub(
|
||||||
kioctx_pattern,
|
kioctx_pattern, process_kioctx_replacement, content, flags=re.DOTALL
|
||||||
process_kioctx_replacement,
|
|
||||||
content,
|
|
||||||
flags=re.DOTALL
|
|
||||||
)
|
)
|
||||||
|
|
||||||
output_file = os.path.join(self.temp_dir, "vmlinux_kioctx_processed.h")
|
output_file = os.path.join(self.temp_dir, "vmlinux_kioctx_processed.h")
|
||||||
with open(output_file, 'w') as f:
|
with open(output_file, "w") as f:
|
||||||
f.write(processed_content)
|
f.write(processed_content)
|
||||||
|
|
||||||
self.log(f"Saved kioctx-processed output to {output_file}")
|
self.log(f"Saved kioctx-processed output to {output_file}")
|
||||||
@ -218,7 +224,7 @@ class BTFConverter:
|
|||||||
output_file = os.path.join(self.temp_dir, "vmlinux_raw.py")
|
output_file = os.path.join(self.temp_dir, "vmlinux_raw.py")
|
||||||
cmd = (
|
cmd = (
|
||||||
f"clang2py {input_file} -o {output_file} "
|
f"clang2py {input_file} -o {output_file} "
|
||||||
f"--clang-args=\"-fno-ms-extensions -I/usr/include -I/usr/include/linux\""
|
f'--clang-args="-fno-ms-extensions -I/usr/include -I/usr/include/linux"'
|
||||||
)
|
)
|
||||||
self.run_command(cmd, "Converting to Python ctypes")
|
self.run_command(cmd, "Converting to Python ctypes")
|
||||||
return output_file
|
return output_file
|
||||||
@ -234,25 +240,21 @@ class BTFConverter:
|
|||||||
data = re.sub(r"\('_[0-9]+',\s*ctypes\.[a-zA-Z0-9_]+,\s*0\),?\s*\n?", "", data)
|
data = re.sub(r"\('_[0-9]+',\s*ctypes\.[a-zA-Z0-9_]+,\s*0\),?\s*\n?", "", data)
|
||||||
|
|
||||||
# Replace ('_20', ctypes.c_uint64, 64) → ('_20', ctypes.c_uint64)
|
# Replace ('_20', ctypes.c_uint64, 64) → ('_20', ctypes.c_uint64)
|
||||||
data = re.sub(r"\('(_[0-9]+)',\s*(ctypes\.[a-zA-Z0-9_]+),\s*[0-9]+\)", r"('\1', \2)", data)
|
data = re.sub(
|
||||||
|
r"\('(_[0-9]+)',\s*(ctypes\.[a-zA-Z0-9_]+),\s*[0-9]+\)", r"('\1', \2)", data
|
||||||
|
)
|
||||||
|
|
||||||
# Replace ('_20', ctypes.c_char, 8) with ('_20', ctypes.c_uint8, 8)
|
# Replace ('_20', ctypes.c_char, 8) with ('_20', ctypes.c_uint8, 8)
|
||||||
data = re.sub(
|
data = re.sub(r"(ctypes\.c_char)(\s*,\s*\d+\))", r"ctypes.c_uint8\2", data)
|
||||||
r"(ctypes\.c_char)(\s*,\s*\d+\))",
|
|
||||||
r"ctypes.c_uint8\2",
|
|
||||||
data
|
|
||||||
)
|
|
||||||
|
|
||||||
# below to replace those c_bool with bitfield greater than 8
|
# below to replace those c_bool with bitfield greater than 8
|
||||||
def repl(m):
|
def repl(m):
|
||||||
name, bits = m.groups()
|
name, bits = m.groups()
|
||||||
return f"('{name}', ctypes.c_uint32, {bits})" if int(bits) > 8 else m.group(0)
|
return (
|
||||||
|
f"('{name}', ctypes.c_uint32, {bits})" if int(bits) > 8 else m.group(0)
|
||||||
|
)
|
||||||
|
|
||||||
data = re.sub(
|
data = re.sub(r"\('([^']+)',\s*ctypes\.c_bool,\s*(\d+)\)", repl, data)
|
||||||
r"\('([^']+)',\s*ctypes\.c_bool,\s*(\d+)\)",
|
|
||||||
repl,
|
|
||||||
data
|
|
||||||
)
|
|
||||||
|
|
||||||
# Remove ctypes. prefix from invalid entries
|
# Remove ctypes. prefix from invalid entries
|
||||||
invalid_ctypes = ["bpf_iter_state", "_cache_type", "fs_context_purpose"]
|
invalid_ctypes = ["bpf_iter_state", "_cache_type", "fs_context_purpose"]
|
||||||
@ -269,6 +271,7 @@ class BTFConverter:
|
|||||||
if not self.keep_intermediate and self.temp_dir != ".":
|
if not self.keep_intermediate and self.temp_dir != ".":
|
||||||
self.log(f"Cleaning up temporary directory: {self.temp_dir}")
|
self.log(f"Cleaning up temporary directory: {self.temp_dir}")
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
def convert(self):
|
def convert(self):
|
||||||
@ -292,6 +295,7 @@ class BTFConverter:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"\n✗ Error during conversion: {e}", file=sys.stderr)
|
print(f"\n✗ Error during conversion: {e}", file=sys.stderr)
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
finally:
|
finally:
|
||||||
@ -304,18 +308,13 @@ class BTFConverter:
|
|||||||
dependencies = {
|
dependencies = {
|
||||||
"bpftool": "bpftool --version",
|
"bpftool": "bpftool --version",
|
||||||
"clang": "clang --version",
|
"clang": "clang --version",
|
||||||
"clang2py": "clang2py --version"
|
"clang2py": "clang2py --version",
|
||||||
}
|
}
|
||||||
|
|
||||||
missing = []
|
missing = []
|
||||||
for tool, cmd in dependencies.items():
|
for tool, cmd in dependencies.items():
|
||||||
try:
|
try:
|
||||||
subprocess.run(
|
subprocess.run(cmd, shell=True, check=True, capture_output=True)
|
||||||
cmd,
|
|
||||||
shell=True,
|
|
||||||
check=True,
|
|
||||||
capture_output=True
|
|
||||||
)
|
|
||||||
except subprocess.CalledProcessError:
|
except subprocess.CalledProcessError:
|
||||||
missing.append(tool)
|
missing.append(tool)
|
||||||
|
|
||||||
@ -337,31 +336,31 @@ Examples:
|
|||||||
%(prog)s
|
%(prog)s
|
||||||
%(prog)s -o kernel_types.py
|
%(prog)s -o kernel_types.py
|
||||||
%(prog)s --btf-source /sys/kernel/btf/custom_module -k -v
|
%(prog)s --btf-source /sys/kernel/btf/custom_module -k -v
|
||||||
"""
|
""",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--btf-source",
|
"--btf-source",
|
||||||
default="/sys/kernel/btf/vmlinux",
|
default="/sys/kernel/btf/vmlinux",
|
||||||
help="Path to BTF source (default: /sys/kernel/btf/vmlinux)"
|
help="Path to BTF source (default: /sys/kernel/btf/vmlinux)",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-o", "--output",
|
"-o",
|
||||||
|
"--output",
|
||||||
default="vmlinux.py",
|
default="vmlinux.py",
|
||||||
help="Output Python file (default: vmlinux.py)"
|
help="Output Python file (default: vmlinux.py)",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-k", "--keep-intermediate",
|
"-k",
|
||||||
|
"--keep-intermediate",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Keep intermediate files (vmlinux.h, vmlinux_processed.h, etc.)"
|
help="Keep intermediate files (vmlinux.h, vmlinux_processed.h, etc.)",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-v", "--verbose",
|
"-v", "--verbose", action="store_true", help="Enable verbose output"
|
||||||
action="store_true",
|
|
||||||
help="Enable verbose output"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@ -370,7 +369,7 @@ Examples:
|
|||||||
btf_source=args.btf_source,
|
btf_source=args.btf_source,
|
||||||
output_file=args.output,
|
output_file=args.output,
|
||||||
keep_intermediate=args.keep_intermediate,
|
keep_intermediate=args.keep_intermediate,
|
||||||
verbose=args.verbose
|
verbose=args.verbose,
|
||||||
)
|
)
|
||||||
|
|
||||||
converter.convert()
|
converter.convert()
|
||||||
|
|||||||
Reference in New Issue
Block a user