From 3626a9ac94aee180aae1e1ed779107fcbe8a6bae Mon Sep 17 00:00:00 2001 From: Abid Qadeer Date: Tue, 29 Jul 2025 17:36:22 +0100 Subject: [PATCH] [OMPIRBuilder] Avoid invalid debug location. This fixes #147063. I tried to fix this issue in more general way in https://github.com/llvm/llvm-project/pull/147091 but the reviewer suggested to fix the locations which are causing this issue. So this is a more targeted approach. The problem is that InsertPoint class does not hold the debug location. It is obtained from the instruction at the current position. But if the current position is at the end of the block, we just leave the debug location as is (see SetInsertPoint). We have 2 scenarios that we need to handle: In first situation, we have the location before hand and we can save the correct debug location. For example, in the following code, even if the line 3 does not end up setting the debug location, we can save it before line 1 and then restore it. This can be done either manually or using the llvm::InsertPointGuard as shown below. 1. auto curPos = builder.saveIP(); 2. builder.restoreIP(/* some new pos */); 3. builder.restoreIP(curPos); { llvm::InsertPointGuard IPG(builder); builder.restoreIP(/* some new pos */); } For the 2nd scenario, look at the code below. 1. void fn(InsertPointTy allocIP, InsertPointTy codegenIP) { 2. builder.setInsertPoint(allocIP); 3. // generate some alloca 4. builder.setInsertPoint(codegenIP); 5. } The fn can be called from anywhere and we can't assume the debug location of the builder is at the valid location. So if line 4 does not update the debug location because the codegenIP points at the end of the block, the rest of the code can end up using the debug location of the allocaIP. The solution here is to use the locaiton of the last instruction in that block for such case. I have added a wrapper function over restoreIP that could be called for such cases. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 18 ++++++-- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 2 + .../Target/LLVMIR/omptarget-debug-147063.mlir | 45 +++++++++++++++++++ 3 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/omptarget-debug-147063.mlir diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 3aa4f7ae04c33..6e266912dc59c 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -92,6 +92,18 @@ static bool isConflictIP(IRBuilder<>::InsertPoint IP1, return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint(); } +/// This is wrapper over IRBuilderBase::restoreIP that also restores the current +/// debug location to the last instruction in the specified basic block if the +/// insert point points to the end of the block. +static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, + llvm::IRBuilderBase::InsertPoint IP) { + Builder.restoreIP(IP); + llvm::BasicBlock *BB = Builder.GetInsertBlock(); + llvm::BasicBlock::iterator I = Builder.GetInsertPoint(); + if (!BB->empty() && I == BB->end()) + Builder.SetCurrentDebugLocation(BB->back().getStableDebugLoc()); +} + static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType) { // Valid ordered/unordered and base algorithm combinations. switch (SchedType & ~OMPScheduleType::MonotonicityMask) { @@ -8586,7 +8598,7 @@ Error OpenMPIRBuilder::emitOffloadingArrays( ArrayType *SizeArrayType = ArrayType::get(Int64Ty, Info.NumberOfPtrs); Info.RTArgs.SizesArray = Builder.CreateAlloca( SizeArrayType, /* ArraySize = */ nullptr, ".offload_sizes"); - Builder.restoreIP(CodeGenIP); + restoreIPandDebugLoc(Builder, CodeGenIP); } else { auto *SizesArrayInit = ConstantArray::get( ArrayType::get(Int64Ty, ConstSizes.size()), ConstSizes); @@ -8605,7 +8617,7 @@ Error OpenMPIRBuilder::emitOffloadingArrays( AllocaInst *Buffer = Builder.CreateAlloca( SizeArrayType, /* ArraySize = */ nullptr, ".offload_sizes"); Buffer->setAlignment(OffloadSizeAlign); - Builder.restoreIP(CodeGenIP); + restoreIPandDebugLoc(Builder, CodeGenIP); Builder.CreateMemCpy( Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->getType()), SizesArrayGbl, OffloadSizeAlign, @@ -8615,7 +8627,7 @@ Error OpenMPIRBuilder::emitOffloadingArrays( Info.RTArgs.SizesArray = Buffer; } - Builder.restoreIP(CodeGenIP); + restoreIPandDebugLoc(Builder, CodeGenIP); } // The map types are always constant so we don't need to generate code to diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 49e1e55c686a6..c74632f69569a 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -4326,9 +4326,11 @@ createAlteredByCaptureMap(MapInfoData &mapData, if (!isPtrTy) { auto curInsert = builder.saveIP(); + llvm::DebugLoc DbgLoc = builder.getCurrentDebugLocation(); builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation)); auto *memTempAlloc = builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted"); + builder.SetCurrentDebugLocation(DbgLoc); builder.restoreIP(curInsert); builder.CreateStore(newV, memTempAlloc); diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-147063.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-147063.mlir new file mode 100644 index 0000000000000..12d389adbb388 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-debug-147063.mlir @@ -0,0 +1,45 @@ +// RUN: mlir-translate -mlir-to-llvmir %s + +module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_gpu = false, omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} { + omp.private {type = private} @_QFFfnEv_private_i32 : i32 loc(#loc1) + llvm.func internal @_QFPfn() { + %0 = llvm.mlir.constant(1 : i64) : i64 loc(#loc1) + %1 = llvm.alloca %0 x i32 {bindc_name = "v"} : (i64) -> !llvm.ptr loc(#loc1) + %2 = llvm.mlir.constant(1 : i32) : i32 + omp.parallel private(@_QFFfnEv_private_i32 %1 -> %arg0 : !llvm.ptr) { + llvm.store %2, %arg0 : i32, !llvm.ptr loc(#loc2) + %4 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "v"} loc(#loc2) + omp.target map_entries(%4 -> %arg1 : !llvm.ptr) { + %5 = llvm.mlir.constant(1 : i32) : i32 + %6 = llvm.load %arg1 : !llvm.ptr -> i32 loc(#loc3) + %7 = llvm.add %6, %5 : i32 loc(#loc3) + llvm.store %7, %arg1 : i32, !llvm.ptr loc(#loc3) + omp.terminator loc(#loc3) + } loc(#loc7) + omp.terminator + } loc(#loc4) + llvm.return + } loc(#loc6) +} + +#di_file = #llvm.di_file<"target.f90" in ""> +#di_null_type = #llvm.di_null_type +#di_compile_unit = #llvm.di_compile_unit, + sourceLanguage = DW_LANG_Fortran95, file = #di_file, producer = "flang", + isOptimized = false, emissionKind = LineTablesOnly> +#di_subroutine_type = #llvm.di_subroutine_type< + callingConvention = DW_CC_program, types = #di_null_type> +#di_subprogram = #llvm.di_subprogram, + compileUnit = #di_compile_unit, scope = #di_file, name = "main", + file = #di_file, subprogramFlags = "Definition|MainSubprogram", + type = #di_subroutine_type> +#di_subprogram1 = #llvm.di_subprogram + +#loc1 = loc("test.f90":7:15) +#loc2 = loc("test.f90":1:7) +#loc3 = loc("test.f90":3:7) +#loc4 = loc("test.f90":16:7) +#loc6 = loc(fused<#di_subprogram>[#loc1]) +#loc7 = loc(fused<#di_subprogram1>[#loc3])