format chore

This commit is contained in:
2025-10-11 22:00:25 +05:30
parent abbf17748d
commit 75d3ad4fe2
6 changed files with 104 additions and 78 deletions

View File

@ -12,7 +12,7 @@
# #
# See https://github.com/pre-commit/pre-commit # See https://github.com/pre-commit/pre-commit
exclude: 'vmlinux.*\.py$' exclude: 'vmlinux.py'
ci: ci:
autoupdate_commit_msg: "chore: update pre-commit hooks" autoupdate_commit_msg: "chore: update pre-commit hooks"

View File

@ -1 +1,3 @@
from .import_detector import vmlinux_proc from .import_detector import vmlinux_proc
__all__ = ["vmlinux_proc"]

View File

@ -1,4 +1,3 @@
import ast
import logging import logging
from functools import lru_cache from functools import lru_cache
import importlib import importlib
@ -20,9 +19,9 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
symbols_in_module, imported_module = get_module_symbols("vmlinux") symbols_in_module, imported_module = get_module_symbols("vmlinux")
# Handle both node objects and type objects # Handle both node objects and type objects
if hasattr(node, 'name'): if hasattr(node, "name"):
current_symbol_name = node.name current_symbol_name = node.name
elif hasattr(node, '__name__'): elif hasattr(node, "__name__"):
current_symbol_name = node.__name__ current_symbol_name = node.__name__
else: else:
current_symbol_name = str(node) current_symbol_name = str(node)
@ -30,7 +29,9 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
if current_symbol_name not in symbols_in_module: if current_symbol_name not in symbols_in_module:
raise ImportError(f"{current_symbol_name} not present in module vmlinux") raise ImportError(f"{current_symbol_name} not present in module vmlinux")
logger.info(f"Resolving vmlinux class {current_symbol_name}") logger.info(f"Resolving vmlinux class {current_symbol_name}")
logger.debug(f"Current handler state: {handler.is_ready} readiness and {handler.get_all_nodes()} all nodes") logger.debug(
f"Current handler state: {handler.is_ready} readiness and {handler.get_all_nodes()} all nodes"
)
field_table = {} # should contain the field and it's type. field_table = {} # should contain the field and it's type.
# Get the class object from the module # Get the class object from the module
@ -42,12 +43,12 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
# Inspect the class fields # Inspect the class fields
# Assuming class_obj has fields stored in some standard way # Assuming class_obj has fields stored in some standard way
# If it's a ctypes-like structure with _fields_ # If it's a ctypes-like structure with _fields_
if hasattr(class_obj, '_fields_'): if hasattr(class_obj, "_fields_"):
for field_name, field_type in class_obj._fields_: for field_name, field_type in class_obj._fields_:
field_table[field_name] = field_type field_table[field_name] = field_type
# If it's using __annotations__ # If it's using __annotations__
elif hasattr(class_obj, '__annotations__'): elif hasattr(class_obj, "__annotations__"):
for field_name, field_type in class_obj.__annotations__.items(): for field_name, field_type in class_obj.__annotations__.items():
field_table[field_name] = field_type field_table[field_name] = field_type
@ -69,17 +70,24 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
print("elem_name:", elem_name, "elem_type:", elem_type) print("elem_name:", elem_name, "elem_type:", elem_type)
# currently fails when a non-normal type appears which is basically everytime # currently fails when a non-normal type appears which is basically everytime
identify_ctypes_type(elem_type) identify_ctypes_type(elem_type)
symbol_name = elem_type.__name__ if hasattr(elem_type, '__name__') else str(elem_type) symbol_name = (
elem_type.__name__
if hasattr(elem_type, "__name__")
else str(elem_type)
)
vmlinux_symbol = getattr(imported_module, symbol_name) vmlinux_symbol = getattr(imported_module, symbol_name)
if process_vmlinux_class(vmlinux_symbol, llvm_module, handler): if process_vmlinux_class(vmlinux_symbol, llvm_module, handler):
new_dep_node.set_field_ready(elem_name, True) new_dep_node.set_field_ready(elem_name, True)
else: else:
raise ValueError(f"{elem_name} with type {elem_type} not supported in recursive resolver") raise ValueError(
f"{elem_name} with type {elem_type} not supported in recursive resolver"
)
handler.add_node(new_dep_node) handler.add_node(new_dep_node)
logger.info(f"added node: {current_symbol_name}") logger.info(f"added node: {current_symbol_name}")
return True return True
def identify_ctypes_type(t): def identify_ctypes_type(t):
if isinstance(t, type): # t is a type/class if isinstance(t, type): # t is a type/class
if issubclass(t, ctypes.Array): if issubclass(t, ctypes.Array):

View File

@ -5,6 +5,7 @@ from typing import Dict, Any, Optional
@dataclass @dataclass
class Field: class Field:
"""Represents a field in a dependency node with its type and readiness state.""" """Represents a field in a dependency node with its type and readiness state."""
name: str name: str
type: type type: type
value: Any = None value: Any = None
@ -64,13 +65,22 @@ class DependencyNode:
ready_fields = somestruct.get_ready_fields() ready_fields = somestruct.get_ready_fields()
print(f"Ready fields: {[field.name for field in ready_fields.values()]}") # ['field_1', 'field_2'] print(f"Ready fields: {[field.name for field in ready_fields.values()]}") # ['field_1', 'field_2']
""" """
name: str name: str
fields: Dict[str, Field] = field(default_factory=dict) fields: Dict[str, Field] = field(default_factory=dict)
_ready_cache: Optional[bool] = field(default=None, repr=False) _ready_cache: Optional[bool] = field(default=None, repr=False)
def add_field(self, name: str, field_type: type, initial_value: Any = None, ready: bool = False) -> None: def add_field(
self,
name: str,
field_type: type,
initial_value: Any = None,
ready: bool = False,
) -> None:
"""Add a field to the node with an optional initial value and readiness state.""" """Add a field to the node with an optional initial value and readiness state."""
self.fields[name] = Field(name=name, type=field_type, value=initial_value, ready=ready) self.fields[name] = Field(
name=name, type=field_type, value=initial_value, ready=ready
)
# Invalidate readiness cache # Invalidate readiness cache
self._ready_cache = None self._ready_cache = None

View File

@ -6,7 +6,7 @@ import inspect
from .dependency_handler import DependencyHandler from .dependency_handler import DependencyHandler
from .ir_generation import IRGenerator from .ir_generation import IRGenerator
from .vmlinux_class_handler import process_vmlinux_class from .class_handler import process_vmlinux_class
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -58,8 +58,8 @@ def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]:
# Valid single import # Valid single import
for alias in node.names: for alias in node.names:
import_name = alias.name import_name = alias.name
# Use alias if provided, otherwise use the original name # Use alias if provided, otherwise use the original name (commented)
as_name = alias.asname if alias.asname else alias.name # as_name = alias.asname if alias.asname else alias.name
vmlinux_imports.append(("vmlinux", node)) vmlinux_imports.append(("vmlinux", node))
logger.info(f"Found vmlinux import: {import_name}") logger.info(f"Found vmlinux import: {import_name}")
@ -68,13 +68,14 @@ def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]:
for alias in node.names: for alias in node.names:
if alias.name == "vmlinux" or alias.name.startswith("vmlinux."): if alias.name == "vmlinux" or alias.name.startswith("vmlinux."):
raise SyntaxError( raise SyntaxError(
f"Direct import of vmlinux module is not supported. " "Direct import of vmlinux module is not supported. "
f"Use 'from vmlinux import <type>' instead." "Use 'from vmlinux import <type>' instead."
) )
logger.info(f"Total vmlinux imports detected: {len(vmlinux_imports)}") logger.info(f"Total vmlinux imports detected: {len(vmlinux_imports)}")
return vmlinux_imports return vmlinux_imports
def vmlinux_proc(tree: ast.AST, module): def vmlinux_proc(tree: ast.AST, module):
import_statements = detect_import_statement(tree) import_statements = detect_import_statement(tree)
@ -107,7 +108,10 @@ def vmlinux_proc(tree: ast.AST, module):
imported_name = alias.name imported_name = alias.name
found = False found = False
for mod_node in mod_ast.body: for mod_node in mod_ast.body:
if isinstance(mod_node, ast.ClassDef) and mod_node.name == imported_name: if (
isinstance(mod_node, ast.ClassDef)
and mod_node.name == imported_name
):
process_vmlinux_class(mod_node, module, handler) process_vmlinux_class(mod_node, module, handler)
found = True found = True
break break
@ -120,9 +124,12 @@ def vmlinux_proc(tree: ast.AST, module):
if found: if found:
break break
if not found: if not found:
logger.info(f"{imported_name} not found as ClassDef or Assign in vmlinux") logger.info(
f"{imported_name} not found as ClassDef or Assign in vmlinux"
)
IRGenerator(module, handler) IRGenerator(module, handler)
def process_vmlinux_assign(node, module, assignments: Dict[str, type]): def process_vmlinux_assign(node, module, assignments: Dict[str, type]):
raise NotImplementedError("Assignment handling has not been implemented yet") raise NotImplementedError("Assignment handling has not been implemented yet")

View File

@ -26,8 +26,13 @@ import tempfile
class BTFConverter: class BTFConverter:
def __init__(self, btf_source="/sys/kernel/btf/vmlinux", output_file="vmlinux.py", def __init__(
keep_intermediate=False, verbose=False): self,
btf_source="/sys/kernel/btf/vmlinux",
output_file="vmlinux.py",
keep_intermediate=False,
verbose=False,
):
self.btf_source = btf_source self.btf_source = btf_source
self.output_file = output_file self.output_file = output_file
self.keep_intermediate = keep_intermediate self.keep_intermediate = keep_intermediate
@ -44,11 +49,7 @@ class BTFConverter:
self.log(f"{description}...") self.log(f"{description}...")
try: try:
result = subprocess.run( result = subprocess.run(
cmd, cmd, shell=True, check=True, capture_output=True, text=True
shell=True,
check=True,
capture_output=True,
text=True
) )
if self.verbose and result.stdout: if self.verbose and result.stdout:
print(result.stdout) print(result.stdout)
@ -69,51 +70,55 @@ class BTFConverter:
"""Step 1.5: Preprocess enum definitions.""" """Step 1.5: Preprocess enum definitions."""
self.log("Preprocessing enum definitions...") self.log("Preprocessing enum definitions...")
with open(input_file, 'r') as f: with open(input_file, "r") as f:
original_code = f.read() original_code = f.read()
# Extract anonymous enums # Extract anonymous enums
enums = re.findall( enums = re.findall(
r'(?<!typedef\s)(enum\s*\{[^}]*\})\s*(\w+)\s*(?::\s*\d+)?\s*;', r"(?<!typedef\s)(enum\s*\{[^}]*\})\s*(\w+)\s*(?::\s*\d+)?\s*;",
original_code original_code,
) )
enum_defs = [enum_block + ';' for enum_block, _ in enums] enum_defs = [enum_block + ";" for enum_block, _ in enums]
# Replace anonymous enums with int declarations # Replace anonymous enums with int declarations
processed_code = re.sub( processed_code = re.sub(
r'(?<!typedef\s)enum\s*\{[^}]*\}\s*(\w+)\s*(?::\s*\d+)?\s*;', r"(?<!typedef\s)enum\s*\{[^}]*\}\s*(\w+)\s*(?::\s*\d+)?\s*;",
r'int \1;', r"int \1;",
original_code original_code,
) )
# Prepend enum definitions # Prepend enum definitions
if enum_defs: if enum_defs:
enum_text = '\n'.join(enum_defs) + '\n\n' enum_text = "\n".join(enum_defs) + "\n\n"
processed_code = enum_text + processed_code processed_code = enum_text + processed_code
output_file = os.path.join(self.temp_dir, "vmlinux_processed.h") output_file = os.path.join(self.temp_dir, "vmlinux_processed.h")
with open(output_file, 'w') as f: with open(output_file, "w") as f:
f.write(processed_code) f.write(processed_code)
return output_file return output_file
def step2_5_process_kioctx(self, input_file): def step2_5_process_kioctx(self, input_file):
#TODO: this is a very bad bug and design decision. A single struct has an issue mostly. # TODO: this is a very bad bug and design decision. A single struct has an issue mostly.
"""Step 2.5: Process struct kioctx to extract nested anonymous structs.""" """Step 2.5: Process struct kioctx to extract nested anonymous structs."""
self.log("Processing struct kioctx nested structs...") self.log("Processing struct kioctx nested structs...")
with open(input_file, 'r') as f: with open(input_file, "r") as f:
content = f.read() content = f.read()
# Pattern to match struct kioctx with its full body (handles multiple nesting levels) # Pattern to match struct kioctx with its full body (handles multiple nesting levels)
kioctx_pattern = r'struct\s+kioctx\s*\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}\s*;' kioctx_pattern = (
r"struct\s+kioctx\s*\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}\s*;"
)
def process_kioctx_replacement(match): def process_kioctx_replacement(match):
full_struct = match.group(0) full_struct = match.group(0)
self.log(f"Found struct kioctx, length: {len(full_struct)} chars") self.log(f"Found struct kioctx, length: {len(full_struct)} chars")
# Extract the struct body (everything between outermost { and }) # Extract the struct body (everything between outermost { and })
body_match = re.search(r'struct\s+kioctx\s*\{(.*)\}\s*;', full_struct, re.DOTALL) body_match = re.search(
r"struct\s+kioctx\s*\{(.*)\}\s*;", full_struct, re.DOTALL
)
if not body_match: if not body_match:
return full_struct return full_struct
@ -121,7 +126,7 @@ class BTFConverter:
# Find all anonymous structs within the body # Find all anonymous structs within the body
# Pattern: struct { ... } followed by ; (not a member name) # Pattern: struct { ... } followed by ; (not a member name)
anon_struct_pattern = r'struct\s*\{[^}]*\}' # anon_struct_pattern = r"struct\s*\{[^}]*\}"
anon_structs = [] anon_structs = []
anon_counter = 4 # Start from 4, counting down to 1 anon_counter = 4 # Start from 4, counting down to 1
@ -131,7 +136,9 @@ class BTFConverter:
anon_struct_content = m.group(0) anon_struct_content = m.group(0)
# Extract the body of the anonymous struct # Extract the body of the anonymous struct
anon_body_match = re.search(r'struct\s*\{(.*)\}', anon_struct_content, re.DOTALL) anon_body_match = re.search(
r"struct\s*\{(.*)\}", anon_struct_content, re.DOTALL
)
if not anon_body_match: if not anon_body_match:
return anon_struct_content return anon_struct_content
@ -154,7 +161,7 @@ class BTFConverter:
processed_body = body processed_body = body
# Find all occurrences and process them # Find all occurrences and process them
pattern_with_semicolon = r'struct\s*\{([^}]*)\}\s*;' pattern_with_semicolon = r"struct\s*\{([^}]*)\}\s*;"
matches = list(re.finditer(pattern_with_semicolon, body, re.DOTALL)) matches = list(re.finditer(pattern_with_semicolon, body, re.DOTALL))
if not matches: if not matches:
@ -178,14 +185,16 @@ class BTFConverter:
# Replace in the body # Replace in the body
replacement = f"struct {anon_name} {member_name};" replacement = f"struct {anon_name} {member_name};"
processed_body = processed_body[:start_pos] + replacement + processed_body[end_pos:] processed_body = (
processed_body[:start_pos] + replacement + processed_body[end_pos:]
)
anon_counter -= 1 anon_counter -= 1
# Rebuild the complete definition # Rebuild the complete definition
if anon_structs: if anon_structs:
# Prepend the anonymous struct definitions # Prepend the anonymous struct definitions
anon_definitions = '\n'.join(anon_structs) + '\n\n' anon_definitions = "\n".join(anon_structs) + "\n\n"
new_struct = f"struct kioctx {{{processed_body}}};" new_struct = f"struct kioctx {{{processed_body}}};"
return anon_definitions + new_struct return anon_definitions + new_struct
else: else:
@ -193,14 +202,11 @@ class BTFConverter:
# Apply the transformation # Apply the transformation
processed_content = re.sub( processed_content = re.sub(
kioctx_pattern, kioctx_pattern, process_kioctx_replacement, content, flags=re.DOTALL
process_kioctx_replacement,
content,
flags=re.DOTALL
) )
output_file = os.path.join(self.temp_dir, "vmlinux_kioctx_processed.h") output_file = os.path.join(self.temp_dir, "vmlinux_kioctx_processed.h")
with open(output_file, 'w') as f: with open(output_file, "w") as f:
f.write(processed_content) f.write(processed_content)
self.log(f"Saved kioctx-processed output to {output_file}") self.log(f"Saved kioctx-processed output to {output_file}")
@ -218,7 +224,7 @@ class BTFConverter:
output_file = os.path.join(self.temp_dir, "vmlinux_raw.py") output_file = os.path.join(self.temp_dir, "vmlinux_raw.py")
cmd = ( cmd = (
f"clang2py {input_file} -o {output_file} " f"clang2py {input_file} -o {output_file} "
f"--clang-args=\"-fno-ms-extensions -I/usr/include -I/usr/include/linux\"" f'--clang-args="-fno-ms-extensions -I/usr/include -I/usr/include/linux"'
) )
self.run_command(cmd, "Converting to Python ctypes") self.run_command(cmd, "Converting to Python ctypes")
return output_file return output_file
@ -234,25 +240,21 @@ class BTFConverter:
data = re.sub(r"\('_[0-9]+',\s*ctypes\.[a-zA-Z0-9_]+,\s*0\),?\s*\n?", "", data) data = re.sub(r"\('_[0-9]+',\s*ctypes\.[a-zA-Z0-9_]+,\s*0\),?\s*\n?", "", data)
# Replace ('_20', ctypes.c_uint64, 64) → ('_20', ctypes.c_uint64) # Replace ('_20', ctypes.c_uint64, 64) → ('_20', ctypes.c_uint64)
data = re.sub(r"\('(_[0-9]+)',\s*(ctypes\.[a-zA-Z0-9_]+),\s*[0-9]+\)", r"('\1', \2)", data) data = re.sub(
r"\('(_[0-9]+)',\s*(ctypes\.[a-zA-Z0-9_]+),\s*[0-9]+\)", r"('\1', \2)", data
)
# Replace ('_20', ctypes.c_char, 8) with ('_20', ctypes.c_uint8, 8) # Replace ('_20', ctypes.c_char, 8) with ('_20', ctypes.c_uint8, 8)
data = re.sub( data = re.sub(r"(ctypes\.c_char)(\s*,\s*\d+\))", r"ctypes.c_uint8\2", data)
r"(ctypes\.c_char)(\s*,\s*\d+\))",
r"ctypes.c_uint8\2",
data
)
# below to replace those c_bool with bitfield greater than 8 # below to replace those c_bool with bitfield greater than 8
def repl(m): def repl(m):
name, bits = m.groups() name, bits = m.groups()
return f"('{name}', ctypes.c_uint32, {bits})" if int(bits) > 8 else m.group(0) return (
f"('{name}', ctypes.c_uint32, {bits})" if int(bits) > 8 else m.group(0)
)
data = re.sub( data = re.sub(r"\('([^']+)',\s*ctypes\.c_bool,\s*(\d+)\)", repl, data)
r"\('([^']+)',\s*ctypes\.c_bool,\s*(\d+)\)",
repl,
data
)
# Remove ctypes. prefix from invalid entries # Remove ctypes. prefix from invalid entries
invalid_ctypes = ["bpf_iter_state", "_cache_type", "fs_context_purpose"] invalid_ctypes = ["bpf_iter_state", "_cache_type", "fs_context_purpose"]
@ -269,6 +271,7 @@ class BTFConverter:
if not self.keep_intermediate and self.temp_dir != ".": if not self.keep_intermediate and self.temp_dir != ".":
self.log(f"Cleaning up temporary directory: {self.temp_dir}") self.log(f"Cleaning up temporary directory: {self.temp_dir}")
import shutil import shutil
shutil.rmtree(self.temp_dir, ignore_errors=True) shutil.rmtree(self.temp_dir, ignore_errors=True)
def convert(self): def convert(self):
@ -292,6 +295,7 @@ class BTFConverter:
except Exception as e: except Exception as e:
print(f"\n✗ Error during conversion: {e}", file=sys.stderr) print(f"\n✗ Error during conversion: {e}", file=sys.stderr)
import traceback import traceback
traceback.print_exc() traceback.print_exc()
sys.exit(1) sys.exit(1)
finally: finally:
@ -304,18 +308,13 @@ class BTFConverter:
dependencies = { dependencies = {
"bpftool": "bpftool --version", "bpftool": "bpftool --version",
"clang": "clang --version", "clang": "clang --version",
"clang2py": "clang2py --version" "clang2py": "clang2py --version",
} }
missing = [] missing = []
for tool, cmd in dependencies.items(): for tool, cmd in dependencies.items():
try: try:
subprocess.run( subprocess.run(cmd, shell=True, check=True, capture_output=True)
cmd,
shell=True,
check=True,
capture_output=True
)
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
missing.append(tool) missing.append(tool)
@ -337,31 +336,31 @@ Examples:
%(prog)s %(prog)s
%(prog)s -o kernel_types.py %(prog)s -o kernel_types.py
%(prog)s --btf-source /sys/kernel/btf/custom_module -k -v %(prog)s --btf-source /sys/kernel/btf/custom_module -k -v
""" """,
) )
parser.add_argument( parser.add_argument(
"--btf-source", "--btf-source",
default="/sys/kernel/btf/vmlinux", default="/sys/kernel/btf/vmlinux",
help="Path to BTF source (default: /sys/kernel/btf/vmlinux)" help="Path to BTF source (default: /sys/kernel/btf/vmlinux)",
) )
parser.add_argument( parser.add_argument(
"-o", "--output", "-o",
"--output",
default="vmlinux.py", default="vmlinux.py",
help="Output Python file (default: vmlinux.py)" help="Output Python file (default: vmlinux.py)",
) )
parser.add_argument( parser.add_argument(
"-k", "--keep-intermediate", "-k",
"--keep-intermediate",
action="store_true", action="store_true",
help="Keep intermediate files (vmlinux.h, vmlinux_processed.h, etc.)" help="Keep intermediate files (vmlinux.h, vmlinux_processed.h, etc.)",
) )
parser.add_argument( parser.add_argument(
"-v", "--verbose", "-v", "--verbose", action="store_true", help="Enable verbose output"
action="store_true",
help="Enable verbose output"
) )
args = parser.parse_args() args = parser.parse_args()
@ -370,7 +369,7 @@ Examples:
btf_source=args.btf_source, btf_source=args.btf_source,
output_file=args.output, output_file=args.output,
keep_intermediate=args.keep_intermediate, keep_intermediate=args.keep_intermediate,
verbose=args.verbose verbose=args.verbose,
) )
converter.convert() converter.convert()