From dd83334f19956f15d7caf04c041517258eed9c9c Mon Sep 17 00:00:00 2001 From: Matthew Curtis Date: Mon, 30 Jun 2025 16:23:55 -0500 Subject: [PATCH 1/6] [AMDGPU] Ensure non-reserved CSR spilled regs are live-in Fixes: ``` *** Bad machine code: Using an undefined physical register *** - function: widget - basic block: %bb.0 bb (0x564092cbe140) - instruction: $vgpr63 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec - operand 1: killed $agpr13 LLVM ERROR: Found 1 machine code errors. ``` The detailed sequence of events that led to this change: 1. MachineVerifier fails because $agpr13 is not defined on line 19 below: ``` 1: bb.0.bb: 2: successors: %bb.1(0x80000000); %bb.1(100.00%) 3: liveins: $agpr14, $agpr15, $sgpr12, $sgpr13, $sgpr14, \ 4: $sgpr15, $sgpr30, $sgpr31, $sgpr34, $sgpr35, \ 5: $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr48, \ 6: $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, \ 7: $sgpr54, $sgpr55, $sgpr64, $sgpr65, $sgpr66, \ 8: $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, \ 9: $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, \ 10: $sgpr85, $sgpr86, $sgpr87, $sgpr96, $sgpr97, \ 11: $sgpr98, $sgpr99, $vgpr0, $vgpr31, $vgpr40, $vgpr41, \ 12: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, \ 13: $sgpr10_sgpr11 14: $sgpr16 = COPY $sgpr33 15: $sgpr33 = frame-setup COPY $sgpr32 16: $sgpr18_sgpr19 = S_XOR_SAVEEXEC_B64 -1, \ 17: implicit-def $exec, implicit-def dead $scc, \ 18: implicit $exec 19: $vgpr63 = V_ACCVGPR_READ_B32_e64 killed $agpr13, \ 20: implicit $exec 21: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, \ 22: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, \ 23: implicit $exec :: (store (s32) into %stack.38, \ 24: addrspace 5) 25: ... 26: $vgpr43 = IMPLICIT_DEF 27: $vgpr43 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, \ 28: killed $vgpr43(tied-def 0) 29: $vgpr43 = SI_SPILL_S32_TO_VGPR $sgpr14, 1, \ 30: killed $vgpr43(tied-def 0) 31: $sgpr100_sgpr101 = S_OR_SAVEEXEC_B64 -1, \ 32: implicit-def $exec, implicit-def dead $scc, \ 33: implicit $exec 34: renamable $agpr13 = COPY killed $vgpr43, implicit $exec ``` 2. That instruction is created by emitCSRSpillStores (called by SIFrameLowering::emitPrologue) because $agpr13 is in WWMSpills. See lines 982, 998, and 993 below. ``` 977: // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch 978: // registers. However, save all lanes of callee-saved VGPRs. Due to this, we 979: // might end up flipping the EXEC bits twice. 980: Register ScratchExecCopy; 981: SmallVector, 2> WWMCalleeSavedRegs, WWMScratchRegs; 982: FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs); 983: if (!WWMScratchRegs.empty()) 984: ScratchExecCopy = 985: buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, 986: /*IsProlog*/ true, /*EnableInactiveLanes*/ true); 987: 988: auto StoreWWMRegisters = 989: [&](SmallVectorImpl> &WWMRegs) { 990: for (const auto &Reg : WWMRegs) { 991: Register VGPR = Reg.first; 992: int FI = Reg.second; 993: buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL, 994: VGPR, FI, FrameReg); 995: } 996: }; 997: 998: StoreWWMRegisters(WWMScratchRegs); ``` 3. $agpr13 got added to WWMSpills by SILowerWWMCopies::runOnMachineFunction as it processed the WWM_COPY on line 11 below (corresponds to line 34 above in point #1): ``` 1: %45:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, %45:vgpr_32(tied-def 0) 2: %45:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr14, 1, %45:vgpr_32(tied-def 0) 3: %44:av_32 = WWM_COPY %45:vgpr_32 ``` --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 7 + .../CodeGen/AMDGPU/bug-undef-spilled-agpr.ll | 126 ++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 11552b3a9a438..1978995a90230 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -983,6 +983,7 @@ void SIFrameLowering::emitCSRSpillStores( const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch // registers. However, save all lanes of callee-saved VGPRs. Due to this, we @@ -1005,6 +1006,12 @@ void SIFrameLowering::emitCSRSpillStores( } }; + for (const auto &Reg : WWMScratchRegs) { + if (!MRI.isReserved(Reg.first)) { + MRI.addLiveIn(Reg.first); + MBB.addLiveIn(Reg.first); + } + } StoreWWMRegisters(WWMScratchRegs); auto EnableAllLanes = [&]() { diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll new file mode 100644 index 0000000000000..ef2ae5ca5169b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll @@ -0,0 +1,126 @@ +; Just ensure that llc -O1 does not error out +; RUN: llc -O1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs %s -o - &>/dev/null + +define fastcc void @widget(i1 %arg) { +bb: + br label %bb1 + +bb1: ; preds = %bb3, %bb + br i1 %arg, label %bb3, label %bb2 + +bb2: ; preds = %bb1 + ret void + +bb3: ; preds = %bb1 + %call = call fastcc i1 @baz(i1 false, float 0.000000e+00, i1 false, float 0.000000e+00, i1 false, i1 false, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, ptr addrspace(5) null, i1 false, ptr null, ptr null, ptr null, ptr null, ptr null, ptr addrspace(5) null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr addrspace(5) null) + br label %bb1 +} + +define fastcc i1 @baz(i1 %arg, float %arg1, i1 %arg2, float %arg3, i1 %arg4, i1 %arg5, float %arg6, float %arg7, float %arg8, float %arg9, ptr addrspace(5) %arg10, i1 %arg11, ptr %arg12, ptr %arg13, ptr %arg14, ptr %arg15, ptr %arg16, ptr addrspace(5) %arg17, ptr %arg18, ptr %arg19, ptr %arg20, ptr %arg21, ptr %arg22, ptr %arg23, ptr %arg24, ptr addrspace(5) %arg25) #0 { +bb: + br i1 %arg, label %bb26, label %bb27 + +bb26: ; preds = %bb + ret i1 false + +bb27: ; preds = %bb + br i1 %arg, label %bb29, label %bb28 + +bb28: ; preds = %bb27 + unreachable + +bb29: ; preds = %bb49, %bb47, %bb46, %bb39, %bb36, %bb27 + br i1 %arg4, label %bb55, label %bb30 + +bb30: ; preds = %bb29 + br i1 %arg5, label %bb31, label %bb32 + +bb31: ; preds = %bb30 + store i1 false, ptr addrspace(5) null, align 2147483648 + br label %bb55 + +bb32: ; preds = %bb30 + store float %arg3, ptr addrspace(5) null, align 2147483648 + store float %arg7, ptr addrspace(5) %arg10, align 2147483648 + br i1 %arg2, label %bb34, label %bb33 + +bb33: ; preds = %bb32 + %fcmp = fcmp ogt float %arg6, 0.000000e+00 + br i1 %fcmp, label %bb34, label %bb35 + +bb34: ; preds = %bb33, %bb32 + br i1 %arg11, label %bb37, label %bb36 + +bb35: ; preds = %bb33 + store float 0.000000e+00, ptr addrspace(5) null, align 2147483648 + ret i1 false + +bb36: ; preds = %bb34 + store i32 1, ptr addrspace(5) null, align 2147483648 + br label %bb29 + +bb37: ; preds = %bb34 + %load = load i8, ptr %arg12, align 2 + %trunc = trunc i8 %load to i1 + br i1 %trunc, label %bb38, label %bb54 + +bb38: ; preds = %bb37 + br i1 %arg4, label %bb39, label %bb53 + +bb39: ; preds = %bb38 + store float %arg1, ptr addrspace(5) null, align 2147483648 + %load40 = load float, ptr %arg15, align 8 + call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) %arg25, ptr %arg24, i64 12, i1 false) + %load41 = load float, ptr %arg16, align 4 + call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) %arg17, ptr null, i64 36, i1 false) + %load42 = load float, ptr %arg18, align 4 + %load43 = load float, ptr %arg19, align 4 + store float 0x7FF8000000000000, ptr addrspace(5) null, align 2147483648 + %load44 = load float, ptr %arg14, align 16 + store float %load44, ptr addrspace(5) null, align 2147483648 + %fcmp45 = fcmp ole float %arg9, 0.000000e+00 + br i1 %fcmp45, label %bb29, label %bb46 + +bb46: ; preds = %bb39 + %fsub = fsub float %arg8, %load40 + store float %fsub, ptr addrspace(5) null, align 2147483648 + %fadd = fadd float %load42, %load43 + br i1 %arg, label %bb29, label %bb47 + +bb47: ; preds = %bb46 + br i1 %arg, label %bb29, label %bb48 + +bb48: ; preds = %bb47 + br i1 %arg, label %bb49, label %bb52 + +bb49: ; preds = %bb48 + store float 0.000000e+00, ptr %arg23, align 4 + store float 0.000000e+00, ptr %arg22, align 8 + store float %fadd, ptr addrspace(5) null, align 2147483648 + %load50 = load float, ptr %arg20, align 4 + %fdiv = fdiv float %load41, %load50 + store float %fdiv, ptr addrspace(5) null, align 2147483648 + %load51 = load float, ptr %arg13, align 16 + store float %load51, ptr addrspace(5) null, align 2147483648 + store float 1.000000e+00, ptr %arg21, align 4 + br label %bb29 + +bb52: ; preds = %bb48 + unreachable + +bb53: ; preds = %bb38 + ret i1 false + +bb54: ; preds = %bb37 + ret i1 true + +bb55: ; preds = %bb31, %bb29 + %load56 = load i1, ptr addrspace(5) null, align 2147483648 + ret i1 %load56 +} + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #1 + +attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" } +attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } From 9689f7f7e5397db38071db38d3ed9253c17ba48b Mon Sep 17 00:00:00 2001 From: Matthew Curtis Date: Tue, 1 Jul 2025 06:17:20 -0500 Subject: [PATCH 2/6] fixup! [AMDGPU] Ensure non-reserved CSR spilled regs are live-in --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 8 +++--- .../CodeGen/AMDGPU/bug-undef-spilled-agpr.ll | 27 +++++++++---------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 1978995a90230..1023c27bb3096 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1006,10 +1006,10 @@ void SIFrameLowering::emitCSRSpillStores( } }; - for (const auto &Reg : WWMScratchRegs) { - if (!MRI.isReserved(Reg.first)) { - MRI.addLiveIn(Reg.first); - MBB.addLiveIn(Reg.first); + for (const auto &Reg : make_first_range(WWMScratchRegs)) { + if (!MRI.isReserved(Reg)) { + MRI.addLiveIn(Reg); + MBB.addLiveIn(Reg); } } StoreWWMRegisters(WWMScratchRegs); diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll index ef2ae5ca5169b..e6ad4aaff88a6 100644 --- a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll @@ -36,12 +36,12 @@ bb30: ; preds = %bb29 br i1 %arg5, label %bb31, label %bb32 bb31: ; preds = %bb30 - store i1 false, ptr addrspace(5) null, align 2147483648 + store i1 false, ptr addrspace(5) %arg17, align 8 br label %bb55 bb32: ; preds = %bb30 - store float %arg3, ptr addrspace(5) null, align 2147483648 - store float %arg7, ptr addrspace(5) %arg10, align 2147483648 + store float %arg3, ptr addrspace(5) %arg25, align 8 + store float %arg7, ptr addrspace(5) %arg10, align 8 br i1 %arg2, label %bb34, label %bb33 bb33: ; preds = %bb32 @@ -52,11 +52,11 @@ bb34: ; preds = %bb33, %bb32 br i1 %arg11, label %bb37, label %bb36 bb35: ; preds = %bb33 - store float 0.000000e+00, ptr addrspace(5) null, align 2147483648 + store float 0.000000e+00, ptr addrspace(5) %arg25, align 8 ret i1 false bb36: ; preds = %bb34 - store i32 1, ptr addrspace(5) null, align 2147483648 + store i32 1, ptr addrspace(5) %arg17, align 8 br label %bb29 bb37: ; preds = %bb34 @@ -68,22 +68,22 @@ bb38: ; preds = %bb37 br i1 %arg4, label %bb39, label %bb53 bb39: ; preds = %bb38 - store float %arg1, ptr addrspace(5) null, align 2147483648 + store float %arg1, ptr addrspace(5) %arg25, align 8 %load40 = load float, ptr %arg15, align 8 call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) %arg25, ptr %arg24, i64 12, i1 false) %load41 = load float, ptr %arg16, align 4 call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) %arg17, ptr null, i64 36, i1 false) %load42 = load float, ptr %arg18, align 4 %load43 = load float, ptr %arg19, align 4 - store float 0x7FF8000000000000, ptr addrspace(5) null, align 2147483648 + store float 0x7FF8000000000000, ptr addrspace(5) %arg25, align 8 %load44 = load float, ptr %arg14, align 16 - store float %load44, ptr addrspace(5) null, align 2147483648 + store float %load44, ptr addrspace(5) %arg25, align 8 %fcmp45 = fcmp ole float %arg9, 0.000000e+00 br i1 %fcmp45, label %bb29, label %bb46 bb46: ; preds = %bb39 %fsub = fsub float %arg8, %load40 - store float %fsub, ptr addrspace(5) null, align 2147483648 + store float %fsub, ptr addrspace(5) %arg25, align 8 %fadd = fadd float %load42, %load43 br i1 %arg, label %bb29, label %bb47 @@ -96,12 +96,12 @@ bb48: ; preds = %bb47 bb49: ; preds = %bb48 store float 0.000000e+00, ptr %arg23, align 4 store float 0.000000e+00, ptr %arg22, align 8 - store float %fadd, ptr addrspace(5) null, align 2147483648 + store float %fadd, ptr addrspace(5) %arg25, align 8 %load50 = load float, ptr %arg20, align 4 %fdiv = fdiv float %load41, %load50 - store float %fdiv, ptr addrspace(5) null, align 2147483648 + store float %fdiv, ptr addrspace(5) %arg25, align 8 %load51 = load float, ptr %arg13, align 16 - store float %load51, ptr addrspace(5) null, align 2147483648 + store float %load51, ptr addrspace(5) %arg25, align 8 store float 1.000000e+00, ptr %arg21, align 4 br label %bb29 @@ -115,11 +115,10 @@ bb54: ; preds = %bb37 ret i1 true bb55: ; preds = %bb31, %bb29 - %load56 = load i1, ptr addrspace(5) null, align 2147483648 + %load56 = load i1, ptr addrspace(5) null, align 8 ret i1 %load56 } -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) declare void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #1 attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" } From 2f056b7895eecdf1a8d03663d9b33785f4a11c37 Mon Sep 17 00:00:00 2001 From: Matthew Curtis Date: Tue, 1 Jul 2025 09:36:45 -0500 Subject: [PATCH 3/6] fixup! [AMDGPU] Ensure non-reserved CSR spilled regs are live-in --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 1023c27bb3096..390b3fe22b5cd 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1006,7 +1006,7 @@ void SIFrameLowering::emitCSRSpillStores( } }; - for (const auto &Reg : make_first_range(WWMScratchRegs)) { + for (const Register &Reg : make_first_range(WWMScratchRegs)) { if (!MRI.isReserved(Reg)) { MRI.addLiveIn(Reg); MBB.addLiveIn(Reg); From 3091872e31873bce47913cff080731a4ea2000ce Mon Sep 17 00:00:00 2001 From: Matthew Curtis Date: Mon, 7 Jul 2025 08:44:25 -0500 Subject: [PATCH 4/6] fixup! [AMDGPU] Ensure non-reserved CSR spilled regs are live-in --- llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll index e6ad4aaff88a6..87d44803f7e19 100644 --- a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll @@ -115,7 +115,7 @@ bb54: ; preds = %bb37 ret i1 true bb55: ; preds = %bb31, %bb29 - %load56 = load i1, ptr addrspace(5) null, align 8 + %load56 = load i1, ptr addrspace(5) %arg25, align 8 ret i1 %load56 } From 12ffdb5687b66949e31c645fde1c2a46823b66bf Mon Sep 17 00:00:00 2001 From: Matthew Curtis Date: Thu, 17 Jul 2025 13:27:31 -0500 Subject: [PATCH 5/6] fixup! [AMDGPU] Ensure non-reserved CSR spilled regs are live-in --- .../CodeGen/AMDGPU/bug-undef-spilled-agpr.ll | 125 ------------------ .../CodeGen/AMDGPU/bug-undef-spilled-agpr.mir | 97 ++++++++++++++ 2 files changed, 97 insertions(+), 125 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll create mode 100644 llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll deleted file mode 100644 index 87d44803f7e19..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll +++ /dev/null @@ -1,125 +0,0 @@ -; Just ensure that llc -O1 does not error out -; RUN: llc -O1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs %s -o - &>/dev/null - -define fastcc void @widget(i1 %arg) { -bb: - br label %bb1 - -bb1: ; preds = %bb3, %bb - br i1 %arg, label %bb3, label %bb2 - -bb2: ; preds = %bb1 - ret void - -bb3: ; preds = %bb1 - %call = call fastcc i1 @baz(i1 false, float 0.000000e+00, i1 false, float 0.000000e+00, i1 false, i1 false, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, ptr addrspace(5) null, i1 false, ptr null, ptr null, ptr null, ptr null, ptr null, ptr addrspace(5) null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr addrspace(5) null) - br label %bb1 -} - -define fastcc i1 @baz(i1 %arg, float %arg1, i1 %arg2, float %arg3, i1 %arg4, i1 %arg5, float %arg6, float %arg7, float %arg8, float %arg9, ptr addrspace(5) %arg10, i1 %arg11, ptr %arg12, ptr %arg13, ptr %arg14, ptr %arg15, ptr %arg16, ptr addrspace(5) %arg17, ptr %arg18, ptr %arg19, ptr %arg20, ptr %arg21, ptr %arg22, ptr %arg23, ptr %arg24, ptr addrspace(5) %arg25) #0 { -bb: - br i1 %arg, label %bb26, label %bb27 - -bb26: ; preds = %bb - ret i1 false - -bb27: ; preds = %bb - br i1 %arg, label %bb29, label %bb28 - -bb28: ; preds = %bb27 - unreachable - -bb29: ; preds = %bb49, %bb47, %bb46, %bb39, %bb36, %bb27 - br i1 %arg4, label %bb55, label %bb30 - -bb30: ; preds = %bb29 - br i1 %arg5, label %bb31, label %bb32 - -bb31: ; preds = %bb30 - store i1 false, ptr addrspace(5) %arg17, align 8 - br label %bb55 - -bb32: ; preds = %bb30 - store float %arg3, ptr addrspace(5) %arg25, align 8 - store float %arg7, ptr addrspace(5) %arg10, align 8 - br i1 %arg2, label %bb34, label %bb33 - -bb33: ; preds = %bb32 - %fcmp = fcmp ogt float %arg6, 0.000000e+00 - br i1 %fcmp, label %bb34, label %bb35 - -bb34: ; preds = %bb33, %bb32 - br i1 %arg11, label %bb37, label %bb36 - -bb35: ; preds = %bb33 - store float 0.000000e+00, ptr addrspace(5) %arg25, align 8 - ret i1 false - -bb36: ; preds = %bb34 - store i32 1, ptr addrspace(5) %arg17, align 8 - br label %bb29 - -bb37: ; preds = %bb34 - %load = load i8, ptr %arg12, align 2 - %trunc = trunc i8 %load to i1 - br i1 %trunc, label %bb38, label %bb54 - -bb38: ; preds = %bb37 - br i1 %arg4, label %bb39, label %bb53 - -bb39: ; preds = %bb38 - store float %arg1, ptr addrspace(5) %arg25, align 8 - %load40 = load float, ptr %arg15, align 8 - call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) %arg25, ptr %arg24, i64 12, i1 false) - %load41 = load float, ptr %arg16, align 4 - call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) %arg17, ptr null, i64 36, i1 false) - %load42 = load float, ptr %arg18, align 4 - %load43 = load float, ptr %arg19, align 4 - store float 0x7FF8000000000000, ptr addrspace(5) %arg25, align 8 - %load44 = load float, ptr %arg14, align 16 - store float %load44, ptr addrspace(5) %arg25, align 8 - %fcmp45 = fcmp ole float %arg9, 0.000000e+00 - br i1 %fcmp45, label %bb29, label %bb46 - -bb46: ; preds = %bb39 - %fsub = fsub float %arg8, %load40 - store float %fsub, ptr addrspace(5) %arg25, align 8 - %fadd = fadd float %load42, %load43 - br i1 %arg, label %bb29, label %bb47 - -bb47: ; preds = %bb46 - br i1 %arg, label %bb29, label %bb48 - -bb48: ; preds = %bb47 - br i1 %arg, label %bb49, label %bb52 - -bb49: ; preds = %bb48 - store float 0.000000e+00, ptr %arg23, align 4 - store float 0.000000e+00, ptr %arg22, align 8 - store float %fadd, ptr addrspace(5) %arg25, align 8 - %load50 = load float, ptr %arg20, align 4 - %fdiv = fdiv float %load41, %load50 - store float %fdiv, ptr addrspace(5) %arg25, align 8 - %load51 = load float, ptr %arg13, align 16 - store float %load51, ptr addrspace(5) %arg25, align 8 - store float 1.000000e+00, ptr %arg21, align 4 - br label %bb29 - -bb52: ; preds = %bb48 - unreachable - -bb53: ; preds = %bb38 - ret i1 false - -bb54: ; preds = %bb37 - ret i1 true - -bb55: ; preds = %bb31, %bb29 - %load56 = load i1, ptr addrspace(5) %arg25, align 8 - ret i1 %load56 -} - -declare void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #1 - -attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" } -attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir new file mode 100644 index 0000000000000..7336a54ae42db --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir @@ -0,0 +1,97 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=si-lower-sgpr-spills,greedy,si-lower-wwm-copies,virtregrewriter,prologepilog -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: widget +tracksRegLiveness: true +frameInfo: + adjustsStack: true +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } + - { id: 1, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +machineFunctionInfo: + hasSpilledSGPRs: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: widget + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $agpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 -1 + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: renamable $vgpr62 = IMPLICIT_DEF + ; GCN-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, killed $vgpr62 + ; GCN-NEXT: $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: renamable $agpr0 = COPY killed renamable $vgpr62 + ; GCN-NEXT: $exec = S_MOV_B64 killed $noreg + ; GCN-NEXT: renamable $vgpr62 = IMPLICIT_DEF + ; GCN-NEXT: dead renamable $vgpr62 = V_AND_B32_e32 1, killed $vgpr62, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: liveins: $agpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) + ; GCN-NEXT: liveins: $agpr0, $sgpr86, $sgpr87, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr80_sgpr81, $sgpr82_sgpr83, $sgpr84_sgpr85, $sgpr96_sgpr97, $sgpr98_sgpr99 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.4 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: liveins: $agpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: renamable $vgpr62 = COPY renamable $agpr0 + ; GCN-NEXT: $exec = S_MOV_B64 killed $noreg + ; GCN-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr62, 1 + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.4: + ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec + ; GCN-NEXT: $exec = S_MOV_B64 -1 + ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: SI_RETURN + bb.0: + liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15 + + %45:vgpr_32 = IMPLICIT_DEF + SI_SPILL_S32_SAVE $sgpr15, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5) + %16:vgpr_32 = V_AND_B32_e32 1, %45, implicit $exec + + bb.1: + successors: %bb.3, %bb.2 + + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.3 + + bb.2: + successors: %bb.4(0x04000000), %bb.1(0x7c000000) + liveins: $sgpr86, $sgpr87, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr80_sgpr81, $sgpr82_sgpr83, $sgpr84_sgpr85, $sgpr96_sgpr97, $sgpr98_sgpr99 + + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.4 + + bb.3: + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + $sgpr14 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5) + ADJCALLSTACKDOWN 0, 28, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + S_BRANCH %bb.2 + + bb.4: + SI_RETURN + +... From 515d5887e308d218f08ba2682ce57298b55f22bd Mon Sep 17 00:00:00 2001 From: Matthew Curtis Date: Mon, 28 Jul 2025 13:37:37 -0500 Subject: [PATCH 6/6] fixup! [AMDGPU] Ensure non-reserved CSR spilled regs are live-in --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 390b3fe22b5cd..9b348d46fec4f 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1006,7 +1006,7 @@ void SIFrameLowering::emitCSRSpillStores( } }; - for (const Register &Reg : make_first_range(WWMScratchRegs)) { + for (const Register Reg : make_first_range(WWMScratchRegs)) { if (!MRI.isReserved(Reg)) { MRI.addLiveIn(Reg); MBB.addLiveIn(Reg);