From bf50bed5b55043f5fb7dfc16f3d59192c84b9d81 Mon Sep 17 00:00:00 2001 From: "Wang, Phoebe" Date: Wed, 30 Jul 2025 21:12:43 +0800 Subject: [PATCH 1/2] [X86][APX] Do optimizeMemoryInst for v1X masked load/store Fix redundant LEA: https://godbolt.org/z/hrP1eox4Y --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 23 +++++++++++++++++++++++ llvm/test/CodeGen/X86/apx/cf.ll | 19 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 416c56d5a36f8..f16283be1b996 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2769,6 +2769,29 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { return optimizeGatherScatterInst(II, II->getArgOperand(0)); case Intrinsic::masked_scatter: return optimizeGatherScatterInst(II, II->getArgOperand(1)); + case Intrinsic::masked_load: + // Treat v1X masked load as load X type. + if (auto *VT = dyn_cast(II->getType())) { + if (VT->getNumElements() == 1) { + Value *PtrVal = II->getArgOperand(0); + unsigned AS = PtrVal->getType()->getPointerAddressSpace(); + if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS)) + return true; + } + } + return false; + case Intrinsic::masked_store: + // Treat v1X masked store as store X type. + if (auto *VT = + dyn_cast(II->getArgOperand(0)->getType())) { + if (VT->getNumElements() == 1) { + Value *PtrVal = II->getArgOperand(1); + unsigned AS = PtrVal->getType()->getPointerAddressSpace(); + if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS)) + return true; + } + } + return false; } SmallVector PtrOps; diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll index b111ae542d93a..8c9869207f775 100644 --- a/llvm/test/CodeGen/X86/apx/cf.ll +++ b/llvm/test/CodeGen/X86/apx/cf.ll @@ -194,3 +194,22 @@ entry: call void @llvm.masked.store.v1i64.p0(<1 x i64> %3, ptr %p, i32 4, <1 x i1> %0) ret void } + +define void @sink_gep(ptr %p, i1 %cond) { +; CHECK-LABEL: sink_gep: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb $1, %sil +; CHECK-NEXT: cfcmovnel %eax, 112(%rdi) +; CHECK-NEXT: movl $0, (%rdi) +; CHECK-NEXT: retq +entry: + %0 = getelementptr i8, ptr %p, i64 112 + br label %next + +next: + %1 = bitcast i1 %cond to <1 x i1> + call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr %0, i32 1, <1 x i1> %1) + store i32 0, ptr %p, align 4 + ret void +} From f51c6bdb95785819ea41409c9ff062411b91fc70 Mon Sep 17 00:00:00 2001 From: "Wang, Phoebe" Date: Thu, 31 Jul 2025 10:50:22 +0800 Subject: [PATCH 2/2] Add load --- llvm/test/CodeGen/X86/apx/cf.ll | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll index 8c9869207f775..c97ec38aaff01 100644 --- a/llvm/test/CodeGen/X86/apx/cf.ll +++ b/llvm/test/CodeGen/X86/apx/cf.ll @@ -201,7 +201,8 @@ define void @sink_gep(ptr %p, i1 %cond) { ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb $1, %sil ; CHECK-NEXT: cfcmovnel %eax, 112(%rdi) -; CHECK-NEXT: movl $0, (%rdi) +; CHECK-NEXT: cfcmovnel 112(%rdi), %eax +; CHECK-NEXT: movl %eax, (%rdi) ; CHECK-NEXT: retq entry: %0 = getelementptr i8, ptr %p, i64 112 @@ -210,6 +211,7 @@ entry: next: %1 = bitcast i1 %cond to <1 x i1> call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr %0, i32 1, <1 x i1> %1) - store i32 0, ptr %p, align 4 + %2 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr %0, i32 1, <1 x i1> %1, <1 x i32> zeroinitializer) + store <1 x i32> %2, ptr %p, align 4 ret void }