Compare commits

...

14 Commits

16 changed files with 527 additions and 11 deletions

1
.gitignore vendored
View File

@ -8,3 +8,4 @@ __pycache__/
*.o
.ipynb_checkpoints/
vmlinux.py
~*

View File

@ -41,7 +41,7 @@ repos:
- id: ruff
args: ["--fix", "--show-fixes"]
- id: ruff-format
exclude: ^(docs)|^(tests)|^(examples)
# exclude: ^(docs)|^(tests)|^(examples)
# Checking static types
- repo: https://github.com/pre-commit/mirrors-mypy

View File

@ -308,6 +308,7 @@
"def hist() -> HashMap:\n",
" return HashMap(key=c_int32, value=c_uint64, max_entries=4096)\n",
"\n",
"\n",
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_clone\")\n",
"def hello(ctx: c_void_p) -> c_int64:\n",
@ -329,6 +330,7 @@
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"b = BPF()"
]
},
@ -357,7 +359,6 @@
}
],
"source": [
"\n",
"b.load_and_attach()\n",
"hist = BpfMap(b, hist)\n",
"print(\"Recording\")\n",

View File

@ -8,12 +8,14 @@ def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return c_int64(0)
@bpf
@section("kprobe/do_unlinkat")
def hello_world2(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:

View File

@ -27,7 +27,7 @@ def hello(ctx: c_void_p) -> c_int32:
dataobj.pid = pid()
dataobj.ts = ktime()
# dataobj.comm = strobj
print(f"clone called at {dataobj.ts} by pid" f"{dataobj.pid}, comm {strobj}")
print(f"clone called at {dataobj.ts} by pid{dataobj.pid}, comm {strobj}")
events.output(dataobj)
return c_int32(0)

View File

@ -40,5 +40,6 @@ def hello_world(ctx: c_void_p) -> c_int64:
def LICENSE() -> str:
return "GPL"
compile_to_ir("xdp_pass.py", "xdp_pass.ll")
compile()

View File

@ -0,0 +1,149 @@
from typing import Optional, Dict, List, Iterator
from .dependency_node import DependencyNode
class DependencyHandler:
"""
Manages a collection of DependencyNode objects with no duplicates.
Ensures that no two nodes with the same name can be added and provides
methods to check readiness and retrieve specific nodes.
Example usage:
# Create a handler
handler = DependencyHandler()
# Create some dependency nodes
node1 = DependencyNode(name="node1")
node1.add_field("field1", str)
node1.set_field_value("field1", "value1")
node2 = DependencyNode(name="node2")
node2.add_field("field1", int)
# Add nodes to the handler
handler.add_node(node1)
handler.add_node(node2)
# Check if a specific node exists
print(handler.has_node("node1")) # True
# Get a reference to a node and modify it
node = handler.get_node("node2")
node.set_field_value("field1", 42)
# Check if all nodes are ready
print(handler.is_ready) # False (node2 is ready, but node1 isn't)
"""
def __init__(self):
# Using a dictionary with node names as keys ensures name uniqueness
# and provides efficient lookups
self._nodes: Dict[str, DependencyNode] = {}
def add_node(self, node: DependencyNode) -> bool:
"""
Add a dependency node to the handler.
Args:
node: The DependencyNode to add
Returns:
bool: True if the node was added, False if a node with the same name already exists
Raises:
TypeError: If the provided object is not a DependencyNode
"""
if not isinstance(node, DependencyNode):
raise TypeError(f"Expected DependencyNode, got {type(node).__name__}")
# Check if a node with this name already exists
if node.name in self._nodes:
return False
self._nodes[node.name] = node
return True
@property
def is_ready(self) -> bool:
"""
Check if all nodes are ready.
Returns:
bool: True if all nodes are ready (or if there are no nodes), False otherwise
"""
if not self._nodes:
return True
return all(node.is_ready for node in self._nodes.values())
def has_node(self, name: str) -> bool:
"""
Check if a node with the given name exists.
Args:
name: The name to check
Returns:
bool: True if a node with the given name exists, False otherwise
"""
return name in self._nodes
def get_node(self, name: str) -> Optional[DependencyNode]:
"""
Get a node by name for manipulation.
Args:
name: The name of the node to retrieve
Returns:
Optional[DependencyNode]: The node with the given name, or None if not found
"""
return self._nodes.get(name)
def remove_node(self, node_or_name) -> bool:
"""
Remove a node by name or reference.
Args:
node_or_name: The node to remove or its name
Returns:
bool: True if the node was removed, False if not found
"""
if isinstance(node_or_name, DependencyNode):
name = node_or_name.name
else:
name = node_or_name
if name in self._nodes:
del self._nodes[name]
return True
return False
def get_all_nodes(self) -> List[DependencyNode]:
"""
Get all nodes stored in the handler.
Returns:
List[DependencyNode]: List of all nodes
"""
return list(self._nodes.values())
def __iter__(self) -> Iterator[DependencyNode]:
"""
Iterate over all nodes.
Returns:
Iterator[DependencyNode]: Iterator over all nodes
"""
return iter(self._nodes.values())
def __len__(self) -> int:
"""
Get the number of nodes in the handler.
Returns:
int: The number of nodes
"""
return len(self._nodes)

View File

@ -0,0 +1,120 @@
from dataclasses import dataclass, field
from typing import Dict, Any, Optional
@dataclass
class Field:
"""Represents a field in a dependency node with its type and readiness state."""
name: str
type: type
value: Any = None
ready: bool = False
def set_ready(self, is_ready: bool = True) -> None:
"""Set the readiness state of this field."""
self.ready = is_ready
def set_value(self, value: Any, mark_ready: bool = True) -> None:
"""Set the value of this field and optionally mark it as ready."""
self.value = value
if mark_ready:
self.ready = True
@dataclass
class DependencyNode:
"""
A node with typed fields and readiness tracking.
Example usage:
# Create a dependency node for a Person
somestruct = DependencyNode(name="struct_1")
# Add fields with their types
somestruct.add_field("field_1", str)
somestruct.add_field("field_2", int)
somestruct.add_field("field_3", str)
# Check if the node is ready (should be False initially)
print(f"Is node ready? {somestruct.is_ready}") # False
# Set some field values
somestruct.set_field_value("field_1", "someproperty")
somestruct.set_field_value("field_2", 30)
# Check if the node is ready (still False because email is not ready)
print(f"Is node ready? {somestruct.is_ready}") # False
# Set the last field and make the node ready
somestruct.set_field_value("field_3", "anotherproperty")
# Now the node should be ready
print(f"Is node ready? {somestruct.is_ready}") # True
# You can also mark a field as not ready
somestruct.set_field_ready("field_3", False)
# Now the node is not ready again
print(f"Is node ready? {somestruct.is_ready}") # False
# Get all field values
print(somestruct.get_field_values()) # {'field_1': 'someproperty', 'field_2': 30, 'field_3': 'anotherproperty'}
# Get only ready fields
ready_fields = somestruct.get_ready_fields()
print(f"Ready fields: {[field.name for field in ready_fields.values()]}") # ['field_1', 'field_2']
"""
name: str
fields: Dict[str, Field] = field(default_factory=dict)
_ready_cache: Optional[bool] = field(default=None, repr=False)
def add_field(self, name: str, field_type: type, initial_value: Any = None, ready: bool = False) -> None:
"""Add a field to the node with an optional initial value and readiness state."""
self.fields[name] = Field(name=name, type=field_type, value=initial_value, ready=ready)
# Invalidate readiness cache
self._ready_cache = None
def get_field(self, name: str) -> Field:
"""Get a field by name."""
return self.fields[name]
def set_field_value(self, name: str, value: Any, mark_ready: bool = True) -> None:
"""Set a field's value and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_value(value, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_ready(self, name: str, is_ready: bool = True) -> None:
"""Mark a field as ready or not ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_ready(is_ready)
# Invalidate readiness cache
self._ready_cache = None
@property
def is_ready(self) -> bool:
"""Check if the node is ready (all fields are ready)."""
# Use cached value if available
if self._ready_cache is not None:
return self._ready_cache
# Calculate readiness only when needed
if not self.fields:
self._ready_cache = False
return False
self._ready_cache = all(elem.ready for elem in self.fields.values())
return self._ready_cache
def get_field_values(self) -> Dict[str, Any]:
"""Get a dictionary of field names to their values."""
return {name: elem.value for name, elem in self.fields.items()}
def get_ready_fields(self) -> Dict[str, Field]:
"""Get all fields that are marked as ready."""
return {name: elem for name, elem in self.fields.items() if elem.ready}

View File

@ -1,5 +1,128 @@
import llvmlite.ir as ir
import ast
import logging
from typing import List, Tuple, Dict
import importlib
import inspect
def vmlinux_proc(tree, module):
pass
from .dependency_handler import DependencyHandler
from .ir_generation import IRGenerator
from .vmlinux_class_handler import process_vmlinux_class
logger = logging.getLogger(__name__)
def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]:
"""
Parse AST and detect import statements from vmlinux.
Returns a list of tuples (module_name, imported_item) for vmlinux imports.
Raises SyntaxError for invalid import patterns.
Args:
tree: The AST to parse
Returns:
List of tuples containing (module_name, imported_item) for each vmlinux import
Raises:
SyntaxError: If multiple imports from vmlinux are attempted or import * is used
"""
vmlinux_imports = []
for node in ast.walk(tree):
# Handle "from vmlinux import ..." statements
if isinstance(node, ast.ImportFrom):
if node.module == "vmlinux":
# Check for wildcard import: from vmlinux import *
if any(alias.name == "*" for alias in node.names):
raise SyntaxError(
"Wildcard imports from vmlinux are not supported. "
"Please import specific types explicitly."
)
# Check for multiple imports: from vmlinux import A, B, C
if len(node.names) > 1:
imported_names = [alias.name for alias in node.names]
raise SyntaxError(
f"Multiple imports from vmlinux are not supported. "
f"Found: {', '.join(imported_names)}. "
f"Please use separate import statements for each type."
)
# Check if no specific import is specified (should not happen with valid Python)
if len(node.names) == 0:
raise SyntaxError(
"Import from vmlinux must specify at least one type."
)
# Valid single import
for alias in node.names:
import_name = alias.name
# Use alias if provided, otherwise use the original name
as_name = alias.asname if alias.asname else alias.name
vmlinux_imports.append(("vmlinux", node))
logger.info(f"Found vmlinux import: {import_name}")
# Handle "import vmlinux" statements (not typical but should be rejected)
elif isinstance(node, ast.Import):
for alias in node.names:
if alias.name == "vmlinux" or alias.name.startswith("vmlinux."):
raise SyntaxError(
f"Direct import of vmlinux module is not supported. "
f"Use 'from vmlinux import <type>' instead."
)
logger.info(f"Total vmlinux imports detected: {len(vmlinux_imports)}")
return vmlinux_imports
def vmlinux_proc(tree: ast.AST, module):
import_statements = detect_import_statement(tree)
# initialise dependency handler
handler = DependencyHandler()
# initialise assignment dictionary of name to type
assignments: Dict[str, type] = {}
if not import_statements:
logger.info("No vmlinux imports found")
return
# Import vmlinux module directly
try:
vmlinux_mod = importlib.import_module("vmlinux")
except ImportError:
logger.warning("Could not import vmlinux module")
return
source_file = inspect.getsourcefile(vmlinux_mod)
if source_file is None:
logger.warning("Cannot find source for vmlinux module")
return
with open(source_file, "r") as f:
mod_ast = ast.parse(f.read(), filename=source_file)
for import_mod, import_node in import_statements:
for alias in import_node.names:
imported_name = alias.name
found = False
for mod_node in mod_ast.body:
if isinstance(mod_node, ast.ClassDef) and mod_node.name == imported_name:
process_vmlinux_class(mod_node, module, handler)
found = True
break
if isinstance(mod_node, ast.Assign):
for target in mod_node.targets:
if isinstance(target, ast.Name) and target.id == imported_name:
process_vmlinux_assign(mod_node, module, assignments)
found = True
break
if found:
break
if not found:
logger.info(f"{imported_name} not found as ClassDef or Assign in vmlinux")
IRGenerator(module, handler)
def process_vmlinux_assign(node, module, assignments: Dict[str, type]):
raise NotImplementedError("Assignment handling has not been implemented yet")

View File

@ -0,0 +1,8 @@
# here, we will iterate through the dependencies and generate IR once dependencies are resolved fully
from .dependency_handler import DependencyHandler
class IRGenerator:
def __init__(self, module, handler):
self.module = module
self.handler: DependencyHandler = handler

View File

@ -0,0 +1,96 @@
import ast
import logging
from functools import lru_cache
import importlib
from .dependency_handler import DependencyHandler
from .dependency_node import DependencyNode
import ctypes
logger = logging.getLogger(__name__)
@lru_cache(maxsize=1)
def get_module_symbols(module_name: str):
imported_module = importlib.import_module(module_name)
return [name for name in dir(imported_module)], imported_module
# Recursive function that gets all the dependent classes and adds them to handler
def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
symbols_in_module, imported_module = get_module_symbols("vmlinux")
# Handle both node objects and type objects
if hasattr(node, 'name'):
current_symbol_name = node.name
elif hasattr(node, '__name__'):
current_symbol_name = node.__name__
else:
current_symbol_name = str(node)
if current_symbol_name not in symbols_in_module:
raise ImportError(f"{current_symbol_name} not present in module vmlinux")
logger.info(f"Resolving vmlinux class {current_symbol_name}")
logger.debug(f"Current handler state: {handler.is_ready} readiness and {handler.get_all_nodes()} all nodes")
field_table = {} # should contain the field and it's type.
# Get the class object from the module
class_obj = getattr(imported_module, current_symbol_name)
# Below, I've written a general structure that gets class-info
# everytime, no matter the format in which it is present
# Inspect the class fields
# Assuming class_obj has fields stored in some standard way
# If it's a ctypes-like structure with _fields_
if hasattr(class_obj, '_fields_'):
for field_name, field_type in class_obj._fields_:
field_table[field_name] = field_type
# If it's using __annotations__
elif hasattr(class_obj, '__annotations__'):
for field_name, field_type in class_obj.__annotations__.items():
field_table[field_name] = field_type
else:
raise TypeError("Could not get required class and definition")
logger.debug(f"Extracted fields for {current_symbol_name}: {field_table}")
if handler.has_node(current_symbol_name):
logger.info("Extraction pruned due to already available field")
return True
else:
new_dep_node = DependencyNode(name=current_symbol_name)
for elem_name, elem_type in field_table.items():
module_name = getattr(elem_type, "__module__", None)
if module_name == ctypes.__name__:
new_dep_node.add_field(elem_name, elem_type, ready=True)
elif module_name == "vmlinux":
new_dep_node.add_field(elem_name, elem_type, ready=False)
print("elem_name:", elem_name, "elem_type:", elem_type)
# currently fails when a non-normal type appears which is basically everytime
identify_ctypes_type(elem_type)
symbol_name = elem_type.__name__ if hasattr(elem_type, '__name__') else str(elem_type)
vmlinux_symbol = getattr(imported_module, symbol_name)
if process_vmlinux_class(vmlinux_symbol, llvm_module, handler):
new_dep_node.set_field_ready(elem_name, True)
else:
raise ValueError(f"{elem_name} with type {elem_type} not supported in recursive resolver")
handler.add_node(new_dep_node)
logger.info(f"added node: {current_symbol_name}")
return True
def identify_ctypes_type(t):
if isinstance(t, type): # t is a type/class
if issubclass(t, ctypes.Array):
print("Array type")
print("Element type:", t._type_)
print("Length:", t._length_)
elif issubclass(t, ctypes._Pointer):
print("Pointer type")
print("Points to:", t._type_)
elif issubclass(t, ctypes._SimpleCData):
print("Scalar type")
print("Base type:", t)
else:
raise TypeError("Instance sent instead of Class")

View File

@ -3,16 +3,19 @@ import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64, c_int32
@bpf
@bpfglobal
def somevalue() -> c_int32:
return c_int32(42)
@bpf
@bpfglobal
def somevalue2() -> c_int64:
return c_int64(69)
@bpf
@bpfglobal
def somevalue1() -> c_int32:
@ -21,12 +24,14 @@ def somevalue1() -> c_int32:
# --- Passing examples ---
# Simple constant return
@bpf
@bpfglobal
def g1() -> c_int64:
return c_int64(42)
# Constructor with one constant argument
@bpf
@bpfglobal
@ -62,15 +67,17 @@ def g2() -> c_int64:
# def g6() -> c_int64:
# return c_int64(CONST)
# Constructor with multiple args
#TODO: this is not working. should it work ?
# TODO: this is not working. should it work ?
@bpf
@bpfglobal
def g7() -> c_int64:
return c_int64(1)
# Dataclass call
#TODO: fails with dataclass
# TODO: fails with dataclass
# @dataclass
# class Point:
# x: c_int64
@ -91,6 +98,7 @@ def sometag(ctx: c_void_p) -> c_int64:
print(f"{somevalue}")
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:

View File

@ -11,6 +11,7 @@ from ctypes import c_void_p, c_int64
# We cannot allocate space for the intermediate type now.
# We probably need to track the ref/deref chain for each variable.
@bpf
@map
def count() -> HashMap:

View File

@ -3,6 +3,7 @@ import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64
# This should not pass as somevalue is not declared at all.
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
@ -11,6 +12,7 @@ def sometag(ctx: c_void_p) -> c_int64:
print(f"{somevalue}") # noqa: F821
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:

View File

@ -1,6 +1,9 @@
from pythonbpf import bpf, map, section, bpfglobal, compile, compile_to_ir
from pythonbpf.maps import HashMap
from vmlinux import struct_xdp_md, XDP_PASS
from pythonbpf.helper import XDP_PASS
from vmlinux import struct_xdp_md
from vmlinux import struct_ring_buffer_per_cpu # noqa: F401
from vmlinux import struct_xdp_buff # noqa: F401
from ctypes import c_int64
# Instructions to how to run this program
@ -39,5 +42,6 @@ def hello_world(ctx: struct_xdp_md) -> c_int64:
def LICENSE() -> str:
return "GPL"
compile_to_ir("xdp_pass.py", "xdp_pass.ll")
compile()

View File

@ -6,8 +6,8 @@ from ctypes import c_void_p, c_int32
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int32:
print("Hello, World!")
a = 1 # int64
return c_int32(a) # typecast to int32
a = 1 # int64
return c_int32(a) # typecast to int32
@bpf