From 8372111616ce5a117acfa2571577e4ac31fe977a Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 15 Oct 2025 21:25:53 +0530 Subject: [PATCH] add basic IR gen strategy --- pythonbpf/codegen.py | 14 +++++- .../vmlinux_parser/ir_gen/debug_info_gen.py | 15 ++++++ .../vmlinux_parser/ir_gen/ir_generation.py | 50 +++++++++++++++---- 3 files changed, 68 insertions(+), 11 deletions(-) create mode 100644 pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index 5db9f88..8d25644 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -19,12 +19,22 @@ from pylibbpf import BpfProgram import tempfile from logging import Logger import logging +import re logger: Logger = logging.getLogger(__name__) VERSION = "v0.1.4" +def finalize_module(original_str): + """After all IR generation is complete, we monkey patch btf_ama attribute""" + + # Create a string with applied transformation of btf_ama attribute addition to BTF struct field accesses. + pattern = r'(@"llvm\.[^"]+:[^"]*" = external global i64, !llvm\.preserve\.access\.index ![0-9]+)' + replacement = r'\1 "btf_ama"' + return re.sub(pattern, replacement, original_str) + + def find_bpf_chunks(tree): """Find all functions decorated with @bpf in the AST.""" bpf_functions = [] @@ -121,10 +131,12 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"]) + module_string = finalize_module(str(module)) + logger.info(f"IR written to {output}") with open(output, "w") as f: f.write(f'source_filename = "{filename}"\n') - f.write(str(module)) + f.write(module_string) f.write("\n") return output diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py new file mode 100644 index 0000000..0b38cd6 --- /dev/null +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -0,0 +1,15 @@ +from pythonbpf.debuginfo import DebugInfoGenerator + + +def debug_info_generation(struct, llvm_module): + generator = DebugInfoGenerator(llvm_module) + # this is sample debug info generation + # i64type = generator.get_uint64_type() + + struct_type = generator.create_struct_type([], 64 * 4, is_distinct=True) + + global_var = generator.create_global_var_debug_info( + struct.name, struct_type, is_local=False + ) + + return global_var diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index d500cf0..01e55da 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -1,12 +1,16 @@ import logging -from pythonbpf.vmlinux_parser.dependency_handler import DependencyHandler +from ..dependency_handler import DependencyHandler +from .debug_info_gen import debug_info_generation +from ..dependency_node import DependencyNode +import llvmlite.ir as ir logger = logging.getLogger(__name__) class IRGenerator: - def __init__(self, module, handler: DependencyHandler): - self.module = module + # get the assignments dict and add this stuff to it. + def __init__(self, llvm_module, handler: DependencyHandler, assignment=None): + self.llvm_module = llvm_module self.handler: DependencyHandler = handler self.generated: list[str] = [] if not handler.is_ready: @@ -15,22 +19,48 @@ class IRGenerator: ) for struct in handler: self.struct_processor(struct) - print() def struct_processor(self, struct): if struct.name not in self.generated: print(f"IR generating for {struct.name}") - print(f"Struct is {struct}") for dependency in struct.depends_on: if dependency not in self.generated: dep_node_from_dependency = self.handler[dependency] self.struct_processor(dep_node_from_dependency) self.generated.append(dependency) - # write actual processor logic here after assuming all dependencies are resolved + # actual processor logic here after assuming all dependencies are resolved # this part cannot yet resolve circular dependencies. Gets stuck on an infinite loop during that. + self.gen_ir(struct) self.generated.append(struct.name) - def struct_name_generator( - self, - ) -> None: - pass + def gen_ir(self, struct): + # currently we generate all possible field accesses for CO-RE and put into the assignment table + debug_info = debug_info_generation(struct, self.llvm_module) + field_index = 0 + for field_name, field in struct.fields.items(): + # does not take arrays and similar types into consideration yet. + field_co_re_name = self._struct_name_generator(struct, field, field_index) + field_index += 1 + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + print() + + def _struct_name_generator( + self, struct: DependencyNode, field, field_index: int + ) -> str: + if struct.name.startswith("struct_"): + name = ( + "llvm." + + struct.name.removeprefix("struct_") + + f":0:{field.offset}" + + "$" + + f"0:{field_index}" + ) + return name + else: + raise TypeError( + "Name generation cannot occur due to type name not starting with struct" + )