format chore

2026-04-02 20:01:27 +00:00 · 2025-10-11 22:00:25 +05:30
parent abbf17748d
commit 75d3ad4fe2
6 changed files with 104 additions and 78 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -12,7 +12,7 @@
 #
 # See https://github.com/pre-commit/pre-commit

-exclude: 'vmlinux.*\.py$'
+exclude: 'vmlinux.py'

 ci:
  autoupdate_commit_msg: "chore: update pre-commit hooks"
--- a/pythonbpf/vmlinux_parser/init.py
+++ b/pythonbpf/vmlinux_parser/init.py
@ -1 +1,3 @@
 from .import_detector import vmlinux_proc
+
+__all__ = ["vmlinux_proc"]
--- a/pythonbpf/vmlinux_parser/vmlinux_class_handler.py
+++ b/pythonbpf/vmlinux_parser/vmlinux_class_handler.py
@ -1,4 +1,3 @@
-import ast
 import logging
 from functools import lru_cache
 import importlib
@ -20,9 +19,9 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
    symbols_in_module, imported_module = get_module_symbols("vmlinux")

    # Handle both node objects and type objects
-    if hasattr(node, 'name'):
+    if hasattr(node, "name"):
        current_symbol_name = node.name
-    elif hasattr(node, '__name__'):
+    elif hasattr(node, "__name__"):
        current_symbol_name = node.__name__
    else:
        current_symbol_name = str(node)
@ -30,7 +29,9 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
    if current_symbol_name not in symbols_in_module:
        raise ImportError(f"{current_symbol_name} not present in module vmlinux")
    logger.info(f"Resolving vmlinux class {current_symbol_name}")
-    logger.debug(f"Current handler state: {handler.is_ready} readiness and {handler.get_all_nodes()} all nodes")
+    logger.debug(
+        f"Current handler state: {handler.is_ready} readiness and {handler.get_all_nodes()} all nodes"
+    )
    field_table = {}  # should contain the field and it's type.

    # Get the class object from the module
@ -42,12 +43,12 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
    # Inspect the class fields
    # Assuming class_obj has fields stored in some standard way
    # If it's a ctypes-like structure with _fields_
-    if hasattr(class_obj, '_fields_'):
+    if hasattr(class_obj, "_fields_"):
        for field_name, field_type in class_obj._fields_:
            field_table[field_name] = field_type

    # If it's using __annotations__
-    elif hasattr(class_obj, '__annotations__'):
+    elif hasattr(class_obj, "__annotations__"):
        for field_name, field_type in class_obj.__annotations__.items():
            field_table[field_name] = field_type

@ -69,17 +70,24 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
                print("elem_name:", elem_name, "elem_type:", elem_type)
                # currently fails when a non-normal type appears which is basically everytime
                identify_ctypes_type(elem_type)
-                symbol_name = elem_type.__name__ if hasattr(elem_type, '__name__') else str(elem_type)
+                symbol_name = (
+                    elem_type.__name__
+                    if hasattr(elem_type, "__name__")
+                    else str(elem_type)
+                )
                vmlinux_symbol = getattr(imported_module, symbol_name)
                if process_vmlinux_class(vmlinux_symbol, llvm_module, handler):
                    new_dep_node.set_field_ready(elem_name, True)
            else:
-                raise ValueError(f"{elem_name} with type {elem_type} not supported in recursive resolver")
+                raise ValueError(
+                    f"{elem_name} with type {elem_type} not supported in recursive resolver"
+                )
        handler.add_node(new_dep_node)
        logger.info(f"added node: {current_symbol_name}")

    return True

+
 def identify_ctypes_type(t):
    if isinstance(t, type):  # t is a type/class
        if issubclass(t, ctypes.Array):
--- a/pythonbpf/vmlinux_parser/dependency_node.py
+++ b/pythonbpf/vmlinux_parser/dependency_node.py
@ -5,6 +5,7 @@ from typing import Dict, Any, Optional
@dataclass
 class Field:
    """Represents a field in a dependency node with its type and readiness state."""
+
    name: str
    type: type
    value: Any = None
@ -64,13 +65,22 @@ class DependencyNode:
        ready_fields = somestruct.get_ready_fields()
        print(f"Ready fields: {[field.name for field in ready_fields.values()]}")  # ['field_1', 'field_2']
    """
+
    name: str
    fields: Dict[str, Field] = field(default_factory=dict)
    _ready_cache: Optional[bool] = field(default=None, repr=False)

-    def add_field(self, name: str, field_type: type, initial_value: Any = None, ready: bool = False) -> None:
+    def add_field(
+        self,
+        name: str,
+        field_type: type,
+        initial_value: Any = None,
+        ready: bool = False,
+    ) -> None:
        """Add a field to the node with an optional initial value and readiness state."""
-        self.fields[name] = Field(name=name, type=field_type, value=initial_value, ready=ready)
+        self.fields[name] = Field(
+            name=name, type=field_type, value=initial_value, ready=ready
+        )
        # Invalidate readiness cache
        self._ready_cache = None

--- a/pythonbpf/vmlinux_parser/import_detector.py
+++ b/pythonbpf/vmlinux_parser/import_detector.py
@ -6,7 +6,7 @@ import inspect

 from .dependency_handler import DependencyHandler
 from .ir_generation import IRGenerator
-from .vmlinux_class_handler import process_vmlinux_class
+from .class_handler import process_vmlinux_class

 logger = logging.getLogger(__name__)

@ -58,8 +58,8 @@ def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]:
                # Valid single import
                for alias in node.names:
                    import_name = alias.name
-                    # Use alias if provided, otherwise use the original name
-                    as_name = alias.asname if alias.asname else alias.name
+                    # Use alias if provided, otherwise use the original name (commented)
+                    # as_name = alias.asname if alias.asname else alias.name
                    vmlinux_imports.append(("vmlinux", node))
                    logger.info(f"Found vmlinux import: {import_name}")

@ -68,13 +68,14 @@ def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]:
            for alias in node.names:
                if alias.name == "vmlinux" or alias.name.startswith("vmlinux."):
                    raise SyntaxError(
-                        f"Direct import of vmlinux module is not supported. "
-                        f"Use 'from vmlinux import <type>' instead."
+                        "Direct import of vmlinux module is not supported. "
+                        "Use 'from vmlinux import <type>' instead."
                    )

    logger.info(f"Total vmlinux imports detected: {len(vmlinux_imports)}")
    return vmlinux_imports

+
 def vmlinux_proc(tree: ast.AST, module):
    import_statements = detect_import_statement(tree)

@ -107,7 +108,10 @@ def vmlinux_proc(tree: ast.AST, module):
            imported_name = alias.name
            found = False
            for mod_node in mod_ast.body:
-                if isinstance(mod_node, ast.ClassDef) and mod_node.name == imported_name:
+                if (
+                    isinstance(mod_node, ast.ClassDef)
+                    and mod_node.name == imported_name
+                ):
                    process_vmlinux_class(mod_node, module, handler)
                    found = True
                    break
@ -120,9 +124,12 @@ def vmlinux_proc(tree: ast.AST, module):
                if found:
                    break
            if not found:
-                logger.info(f"{imported_name} not found as ClassDef or Assign in vmlinux")
+                logger.info(
+                    f"{imported_name} not found as ClassDef or Assign in vmlinux"
+                )

    IRGenerator(module, handler)

+
 def process_vmlinux_assign(node, module, assignments: Dict[str, type]):
    raise NotImplementedError("Assignment handling has not been implemented yet")
--- a/tools/vmlinux-gen.py
+++ b/tools/vmlinux-gen.py
@ -26,8 +26,13 @@ import tempfile


 class BTFConverter:
-    def __init__(self, btf_source="/sys/kernel/btf/vmlinux", output_file="vmlinux.py",
-                 keep_intermediate=False, verbose=False):
+    def __init__(
+        self,
+        btf_source="/sys/kernel/btf/vmlinux",
+        output_file="vmlinux.py",
+        keep_intermediate=False,
+        verbose=False,
+    ):
        self.btf_source = btf_source
        self.output_file = output_file
        self.keep_intermediate = keep_intermediate
@ -44,11 +49,7 @@ class BTFConverter:
        self.log(f"{description}...")
        try:
            result = subprocess.run(
-                cmd,
-                shell=True,
-                check=True,
-                capture_output=True,
-                text=True
+                cmd, shell=True, check=True, capture_output=True, text=True
            )
            if self.verbose and result.stdout:
                print(result.stdout)
@ -69,30 +70,30 @@ class BTFConverter:
        """Step 1.5: Preprocess enum definitions."""
        self.log("Preprocessing enum definitions...")

-        with open(input_file, 'r') as f:
+        with open(input_file, "r") as f:
            original_code = f.read()

        # Extract anonymous enums
        enums = re.findall(
-            r'(?<!typedef\s)(enum\s*\{[^}]*\})\s*(\w+)\s*(?::\s*\d+)?\s*;',
-            original_code
+            r"(?<!typedef\s)(enum\s*\{[^}]*\})\s*(\w+)\s*(?::\s*\d+)?\s*;",
+            original_code,
        )
-        enum_defs = [enum_block + ';' for enum_block, _ in enums]
+        enum_defs = [enum_block + ";" for enum_block, _ in enums]

        # Replace anonymous enums with int declarations
        processed_code = re.sub(
-            r'(?<!typedef\s)enum\s*\{[^}]*\}\s*(\w+)\s*(?::\s*\d+)?\s*;',
-            r'int \1;',
-            original_code
+            r"(?<!typedef\s)enum\s*\{[^}]*\}\s*(\w+)\s*(?::\s*\d+)?\s*;",
+            r"int \1;",
+            original_code,
        )

        # Prepend enum definitions
        if enum_defs:
-            enum_text = '\n'.join(enum_defs) + '\n\n'
+            enum_text = "\n".join(enum_defs) + "\n\n"
            processed_code = enum_text + processed_code

        output_file = os.path.join(self.temp_dir, "vmlinux_processed.h")
-        with open(output_file, 'w') as f:
+        with open(output_file, "w") as f:
            f.write(processed_code)

        return output_file
@ -102,18 +103,22 @@ class BTFConverter:
        """Step 2.5: Process struct kioctx to extract nested anonymous structs."""
        self.log("Processing struct kioctx nested structs...")

-        with open(input_file, 'r') as f:
+        with open(input_file, "r") as f:
            content = f.read()

        # Pattern to match struct kioctx with its full body (handles multiple nesting levels)
-        kioctx_pattern = r'struct\s+kioctx\s*\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}\s*;'
+        kioctx_pattern = (
+            r"struct\s+kioctx\s*\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}\s*;"
+        )

        def process_kioctx_replacement(match):
            full_struct = match.group(0)
            self.log(f"Found struct kioctx, length: {len(full_struct)} chars")

            # Extract the struct body (everything between outermost { and })
-            body_match = re.search(r'struct\s+kioctx\s*\{(.*)\}\s*;', full_struct, re.DOTALL)
+            body_match = re.search(
+                r"struct\s+kioctx\s*\{(.*)\}\s*;", full_struct, re.DOTALL
+            )
            if not body_match:
                return full_struct

@ -121,7 +126,7 @@ class BTFConverter:

            # Find all anonymous structs within the body
            # Pattern: struct { ... } followed by ; (not a member name)
-            anon_struct_pattern = r'struct\s*\{[^}]*\}'
+            # anon_struct_pattern = r"struct\s*\{[^}]*\}"

            anon_structs = []
            anon_counter = 4  # Start from 4, counting down to 1
@ -131,7 +136,9 @@ class BTFConverter:
                anon_struct_content = m.group(0)

                # Extract the body of the anonymous struct
-                anon_body_match = re.search(r'struct\s*\{(.*)\}', anon_struct_content, re.DOTALL)
+                anon_body_match = re.search(
+                    r"struct\s*\{(.*)\}", anon_struct_content, re.DOTALL
+                )
                if not anon_body_match:
                    return anon_struct_content

@ -154,7 +161,7 @@ class BTFConverter:
            processed_body = body

            # Find all occurrences and process them
-            pattern_with_semicolon = r'struct\s*\{([^}]*)\}\s*;'
+            pattern_with_semicolon = r"struct\s*\{([^}]*)\}\s*;"
            matches = list(re.finditer(pattern_with_semicolon, body, re.DOTALL))

            if not matches:
@ -178,14 +185,16 @@ class BTFConverter:

                # Replace in the body
                replacement = f"struct {anon_name} {member_name};"
-                processed_body = processed_body[:start_pos] + replacement + processed_body[end_pos:]
+                processed_body = (
+                    processed_body[:start_pos] + replacement + processed_body[end_pos:]
+                )

                anon_counter -= 1

            # Rebuild the complete definition
            if anon_structs:
                # Prepend the anonymous struct definitions
-                anon_definitions = '\n'.join(anon_structs) + '\n\n'
+                anon_definitions = "\n".join(anon_structs) + "\n\n"
                new_struct = f"struct kioctx {{{processed_body}}};"
                return anon_definitions + new_struct
            else:
@ -193,14 +202,11 @@ class BTFConverter:

        # Apply the transformation
        processed_content = re.sub(
-            kioctx_pattern,
-            process_kioctx_replacement,
-            content,
-            flags=re.DOTALL
+            kioctx_pattern, process_kioctx_replacement, content, flags=re.DOTALL
        )

        output_file = os.path.join(self.temp_dir, "vmlinux_kioctx_processed.h")
-        with open(output_file, 'w') as f:
+        with open(output_file, "w") as f:
            f.write(processed_content)

        self.log(f"Saved kioctx-processed output to {output_file}")
@ -218,7 +224,7 @@ class BTFConverter:
        output_file = os.path.join(self.temp_dir, "vmlinux_raw.py")
        cmd = (
            f"clang2py {input_file} -o {output_file} "
-            f"--clang-args=\"-fno-ms-extensions -I/usr/include -I/usr/include/linux\""
+            f'--clang-args="-fno-ms-extensions -I/usr/include -I/usr/include/linux"'
        )
        self.run_command(cmd, "Converting to Python ctypes")
        return output_file
@ -234,26 +240,22 @@ class BTFConverter:
        data = re.sub(r"\('_[0-9]+',\s*ctypes\.[a-zA-Z0-9_]+,\s*0\),?\s*\n?", "", data)

        # Replace ('_20', ctypes.c_uint64, 64) → ('_20', ctypes.c_uint64)
-        data = re.sub(r"\('(_[0-9]+)',\s*(ctypes\.[a-zA-Z0-9_]+),\s*[0-9]+\)", r"('\1', \2)", data)
+        data = re.sub(
+            r"\('(_[0-9]+)',\s*(ctypes\.[a-zA-Z0-9_]+),\s*[0-9]+\)", r"('\1', \2)", data
+        )

        # Replace ('_20', ctypes.c_char, 8) with ('_20', ctypes.c_uint8, 8)
-        data = re.sub(
-            r"(ctypes\.c_char)(\s*,\s*\d+\))",
-            r"ctypes.c_uint8\2",
-            data
-        )
+        data = re.sub(r"(ctypes\.c_char)(\s*,\s*\d+\))", r"ctypes.c_uint8\2", data)

        # below to replace those c_bool with bitfield greater than 8
        def repl(m):
            name, bits = m.groups()
-            return f"('{name}', ctypes.c_uint32, {bits})" if int(bits) > 8 else m.group(0)
-
-        data = re.sub(
-            r"\('([^']+)',\s*ctypes\.c_bool,\s*(\d+)\)",
-            repl,
-            data
+            return (
+                f"('{name}', ctypes.c_uint32, {bits})" if int(bits) > 8 else m.group(0)
            )

+        data = re.sub(r"\('([^']+)',\s*ctypes\.c_bool,\s*(\d+)\)", repl, data)
+
        # Remove ctypes. prefix from invalid entries
        invalid_ctypes = ["bpf_iter_state", "_cache_type", "fs_context_purpose"]
        for name in invalid_ctypes:
@ -269,6 +271,7 @@ class BTFConverter:
        if not self.keep_intermediate and self.temp_dir != ".":
            self.log(f"Cleaning up temporary directory: {self.temp_dir}")
            import shutil
+
            shutil.rmtree(self.temp_dir, ignore_errors=True)

    def convert(self):
@ -292,6 +295,7 @@ class BTFConverter:
        except Exception as e:
            print(f"\n✗ Error during conversion: {e}", file=sys.stderr)
            import traceback
+
            traceback.print_exc()
            sys.exit(1)
        finally:
@ -304,18 +308,13 @@ class BTFConverter:
        dependencies = {
            "bpftool": "bpftool --version",
            "clang": "clang --version",
-            "clang2py": "clang2py --version"
+            "clang2py": "clang2py --version",
        }

        missing = []
        for tool, cmd in dependencies.items():
            try:
-                subprocess.run(
-                    cmd,
-                    shell=True,
-                    check=True,
-                    capture_output=True
-                )
+                subprocess.run(cmd, shell=True, check=True, capture_output=True)
            except subprocess.CalledProcessError:
                missing.append(tool)

@ -337,31 +336,31 @@ Examples:
  %(prog)s
  %(prog)s -o kernel_types.py
  %(prog)s --btf-source /sys/kernel/btf/custom_module -k -v
-        """
+        """,
    )

    parser.add_argument(
        "--btf-source",
        default="/sys/kernel/btf/vmlinux",
-        help="Path to BTF source (default: /sys/kernel/btf/vmlinux)"
+        help="Path to BTF source (default: /sys/kernel/btf/vmlinux)",
    )

    parser.add_argument(
-        "-o", "--output",
+        "-o",
+        "--output",
        default="vmlinux.py",
-        help="Output Python file (default: vmlinux.py)"
+        help="Output Python file (default: vmlinux.py)",
    )

    parser.add_argument(
-        "-k", "--keep-intermediate",
+        "-k",
+        "--keep-intermediate",
        action="store_true",
-        help="Keep intermediate files (vmlinux.h, vmlinux_processed.h, etc.)"
+        help="Keep intermediate files (vmlinux.h, vmlinux_processed.h, etc.)",
    )

    parser.add_argument(
-        "-v", "--verbose",
-        action="store_true",
-        help="Enable verbose output"
+        "-v", "--verbose", action="store_true", help="Enable verbose output"
    )

    args = parser.parse_args()
@ -370,7 +369,7 @@ Examples:
        btf_source=args.btf_source,
        output_file=args.output,
        keep_intermediate=args.keep_intermediate,
-        verbose=args.verbose
+        verbose=args.verbose,
    )

    converter.convert()