|
| 1 | +//===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===// |
| 2 | +// |
| 3 | +// The LLVM Compiler Infrastructure |
| 4 | +// |
| 5 | +// This file is distributed under the University of Illinois Open Source |
| 6 | +// License. See LICENSE.TXT for details. |
| 7 | +// |
| 8 | +//===----------------------------------------------------------------------===// |
| 9 | +// |
| 10 | +// Performs general IR level optimizations on SVE intrinsics. |
| 11 | +// |
| 12 | +// The main goal of this pass is to remove unnecessary reinterpret |
| 13 | +// intrinsics (llvm.aarch64.sve.convert.[to|from].svbool), e.g: |
| 14 | +// |
| 15 | +// %1 = @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a) |
| 16 | +// %2 = @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1) |
| 17 | +// |
| 18 | +// This pass also looks for ptest intrinsics & phi instructions where the |
| 19 | +// operands are being needlessly converted to and from svbool_t. |
| 20 | +// |
| 21 | +//===----------------------------------------------------------------------===// |
| 22 | + |
| 23 | +#include "Utils/AArch64BaseInfo.h" |
| 24 | +#include "llvm/ADT/PostOrderIterator.h" |
| 25 | +#include "llvm/ADT/SetVector.h" |
| 26 | +#include "llvm/IR/Constants.h" |
| 27 | +#include "llvm/IR/Dominators.h" |
| 28 | +#include "llvm/IR/IRBuilder.h" |
| 29 | +#include "llvm/IR/Instructions.h" |
| 30 | +#include "llvm/IR/IntrinsicInst.h" |
| 31 | +#include "llvm/IR/IntrinsicsAArch64.h" |
| 32 | +#include "llvm/IR/LLVMContext.h" |
| 33 | +#include "llvm/IR/PatternMatch.h" |
| 34 | +#include "llvm/InitializePasses.h" |
| 35 | +#include "llvm/Support/Debug.h" |
| 36 | + |
| 37 | +using namespace llvm; |
| 38 | +using namespace llvm::PatternMatch; |
| 39 | + |
| 40 | +#define DEBUG_TYPE "sve-intrinsic-opts" |
| 41 | + |
| 42 | +namespace llvm { |
| 43 | +void initializeSVEIntrinsicOptsPass(PassRegistry &); |
| 44 | +} |
| 45 | + |
| 46 | +namespace { |
| 47 | +struct SVEIntrinsicOpts : public ModulePass { |
| 48 | + static char ID; // Pass identification, replacement for typeid |
| 49 | + SVEIntrinsicOpts() : ModulePass(ID) { |
| 50 | + initializeSVEIntrinsicOptsPass(*PassRegistry::getPassRegistry()); |
| 51 | + } |
| 52 | + |
| 53 | + bool runOnModule(Module &M) override; |
| 54 | + void getAnalysisUsage(AnalysisUsage &AU) const override; |
| 55 | + |
| 56 | +private: |
| 57 | + static IntrinsicInst *isReinterpretFromSVBool(Value *V); |
| 58 | + static IntrinsicInst *isReinterpretToSVBool(Value *V); |
| 59 | + |
| 60 | + static bool optimizeIntrinsic(Instruction *I); |
| 61 | + |
| 62 | + bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions); |
| 63 | + |
| 64 | + static bool optimizeConvertFromSVBool(IntrinsicInst *I); |
| 65 | + static bool optimizePTest(IntrinsicInst *I); |
| 66 | + |
| 67 | + static bool processPhiNode(IntrinsicInst *I); |
| 68 | +}; |
| 69 | +} // end anonymous namespace |
| 70 | + |
| 71 | +void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const { |
| 72 | + AU.addRequired<DominatorTreeWrapperPass>(); |
| 73 | + AU.setPreservesCFG(); |
| 74 | +} |
| 75 | + |
| 76 | +char SVEIntrinsicOpts::ID = 0; |
| 77 | +static const char *name = "SVE intrinsics optimizations"; |
| 78 | +INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) |
| 79 | +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); |
| 80 | +INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) |
| 81 | + |
| 82 | +namespace llvm { |
| 83 | +ModulePass *createSVEIntrinsicOptsPass() { return new SVEIntrinsicOpts(); } |
| 84 | +} // namespace llvm |
| 85 | + |
| 86 | +/// Returns V if it's a cast from <n x 16 x i1> (aka svbool_t), nullptr |
| 87 | +/// otherwise. |
| 88 | +IntrinsicInst *SVEIntrinsicOpts::isReinterpretToSVBool(Value *V) { |
| 89 | + IntrinsicInst *I = dyn_cast<IntrinsicInst>(V); |
| 90 | + if (!I) |
| 91 | + return nullptr; |
| 92 | + |
| 93 | + if (I->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool) |
| 94 | + return nullptr; |
| 95 | + |
| 96 | + return I; |
| 97 | +} |
| 98 | + |
| 99 | +/// Returns V if it's a cast to <n x 16 x i1> (aka svbool_t), nullptr otherwise. |
| 100 | +IntrinsicInst *SVEIntrinsicOpts::isReinterpretFromSVBool(Value *V) { |
| 101 | + IntrinsicInst *I = dyn_cast<IntrinsicInst>(V); |
| 102 | + if (!I) |
| 103 | + return nullptr; |
| 104 | + |
| 105 | + if (I->getIntrinsicID() != Intrinsic::aarch64_sve_convert_from_svbool) |
| 106 | + return nullptr; |
| 107 | + |
| 108 | + return I; |
| 109 | +} |
| 110 | + |
| 111 | +/// The function will remove redundant reinterprets casting in the presence |
| 112 | +/// of the control flow |
| 113 | +bool SVEIntrinsicOpts::processPhiNode(IntrinsicInst *X) { |
| 114 | + |
| 115 | + SmallVector<Instruction *, 32> Worklist; |
| 116 | + auto RequiredType = X->getType(); |
| 117 | + |
| 118 | + auto *PN = dyn_cast<PHINode>(X->getArgOperand(0)); |
| 119 | + assert(PN && "Expected Phi Node!"); |
| 120 | + |
| 121 | + // Don't create a new Phi unless we can remove the old one. |
| 122 | + if (!PN->hasOneUse()) |
| 123 | + return false; |
| 124 | + |
| 125 | + for (Value *IncValPhi : PN->incoming_values()) { |
| 126 | + auto *Reinterpret = isReinterpretToSVBool(IncValPhi); |
| 127 | + if (!Reinterpret || |
| 128 | + RequiredType != Reinterpret->getArgOperand(0)->getType()) |
| 129 | + return false; |
| 130 | + } |
| 131 | + |
| 132 | + // Create the new Phi |
| 133 | + LLVMContext &Ctx = PN->getContext(); |
| 134 | + IRBuilder<> Builder(Ctx); |
| 135 | + Builder.SetInsertPoint(PN); |
| 136 | + PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues()); |
| 137 | + Worklist.push_back(PN); |
| 138 | + |
| 139 | + for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) { |
| 140 | + auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(I)); |
| 141 | + NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I)); |
| 142 | + Worklist.push_back(Reinterpret); |
| 143 | + } |
| 144 | + |
| 145 | + // Cleanup Phi Node and reinterprets |
| 146 | + X->replaceAllUsesWith(NPN); |
| 147 | + X->eraseFromParent(); |
| 148 | + |
| 149 | + for (auto &I : Worklist) |
| 150 | + if (I->use_empty()) |
| 151 | + I->eraseFromParent(); |
| 152 | + |
| 153 | + return true; |
| 154 | +} |
| 155 | + |
| 156 | +bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) { |
| 157 | + IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(I->getArgOperand(0)); |
| 158 | + IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(I->getArgOperand(1)); |
| 159 | + |
| 160 | + if (Op1 && Op2 && |
| 161 | + Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && |
| 162 | + Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && |
| 163 | + Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) { |
| 164 | + |
| 165 | + Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)}; |
| 166 | + Type *Tys[] = {Op1->getArgOperand(0)->getType()}; |
| 167 | + Module *M = I->getParent()->getParent()->getParent(); |
| 168 | + |
| 169 | + auto Fn = Intrinsic::getDeclaration(M, I->getIntrinsicID(), Tys); |
| 170 | + auto CI = CallInst::Create(Fn, Ops, I->getName(), I); |
| 171 | + |
| 172 | + I->replaceAllUsesWith(CI); |
| 173 | + I->eraseFromParent(); |
| 174 | + if (Op1->use_empty()) |
| 175 | + Op1->eraseFromParent(); |
| 176 | + if (Op2->use_empty()) |
| 177 | + Op2->eraseFromParent(); |
| 178 | + |
| 179 | + return true; |
| 180 | + } |
| 181 | + |
| 182 | + return false; |
| 183 | +} |
| 184 | + |
| 185 | +bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) { |
| 186 | + assert(isReinterpretFromSVBool(I)); |
| 187 | + |
| 188 | + // If the reinterpret instruction operand is a PHI Node |
| 189 | + if (isa<PHINode>(I->getArgOperand(0))) |
| 190 | + return processPhiNode(I); |
| 191 | + |
| 192 | + // If we have a reinterpret intrinsic I of type A which is converting from |
| 193 | + // another reinterpret Y of type B, and the source type of Y is A, then we can |
| 194 | + // elide away both reinterprets if there are no other users of Y. |
| 195 | + auto *Y = isReinterpretToSVBool(I->getArgOperand(0)); |
| 196 | + if (!Y) |
| 197 | + return false; |
| 198 | + |
| 199 | + Value *SourceVal = Y->getArgOperand(0); |
| 200 | + if (I->getType() != SourceVal->getType()) |
| 201 | + return false; |
| 202 | + |
| 203 | + I->replaceAllUsesWith(SourceVal); |
| 204 | + I->eraseFromParent(); |
| 205 | + if (Y->use_empty()) |
| 206 | + Y->eraseFromParent(); |
| 207 | + |
| 208 | + return true; |
| 209 | +} |
| 210 | + |
| 211 | +bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) { |
| 212 | + IntrinsicInst *IntrI = dyn_cast<IntrinsicInst>(I); |
| 213 | + if (!IntrI) |
| 214 | + return false; |
| 215 | + |
| 216 | + switch (IntrI->getIntrinsicID()) { |
| 217 | + case Intrinsic::aarch64_sve_convert_from_svbool: |
| 218 | + return optimizeConvertFromSVBool(IntrI); |
| 219 | + case Intrinsic::aarch64_sve_ptest_any: |
| 220 | + case Intrinsic::aarch64_sve_ptest_first: |
| 221 | + case Intrinsic::aarch64_sve_ptest_last: |
| 222 | + return optimizePTest(IntrI); |
| 223 | + default: |
| 224 | + return false; |
| 225 | + } |
| 226 | + |
| 227 | + return true; |
| 228 | +} |
| 229 | + |
| 230 | +bool SVEIntrinsicOpts::optimizeFunctions( |
| 231 | + SmallSetVector<Function *, 4> &Functions) { |
| 232 | + bool Changed = false; |
| 233 | + for (auto *F : Functions) { |
| 234 | + DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree(); |
| 235 | + |
| 236 | + // Traverse the DT with an rpo walk so we see defs before uses, allowing |
| 237 | + // simplification to be done incrementally. |
| 238 | + BasicBlock *Root = DT->getRoot(); |
| 239 | + ReversePostOrderTraversal<BasicBlock *> RPOT(Root); |
| 240 | + for (auto *BB : RPOT) |
| 241 | + for (Instruction &I : make_early_inc_range(*BB)) |
| 242 | + Changed |= optimizeIntrinsic(&I); |
| 243 | + } |
| 244 | + return Changed; |
| 245 | +} |
| 246 | + |
| 247 | +bool SVEIntrinsicOpts::runOnModule(Module &M) { |
| 248 | + bool Changed = false; |
| 249 | + SmallSetVector<Function *, 4> Functions; |
| 250 | + |
| 251 | + // Check for SVE intrinsic declarations first so that we only iterate over |
| 252 | + // relevant functions. Where an appropriate declaration is found, store the |
| 253 | + // function(s) where it is used so we can target these only. |
| 254 | + for (auto &F : M.getFunctionList()) { |
| 255 | + if (!F.isDeclaration()) |
| 256 | + continue; |
| 257 | + |
| 258 | + switch (F.getIntrinsicID()) { |
| 259 | + case Intrinsic::aarch64_sve_convert_from_svbool: |
| 260 | + case Intrinsic::aarch64_sve_ptest_any: |
| 261 | + case Intrinsic::aarch64_sve_ptest_first: |
| 262 | + case Intrinsic::aarch64_sve_ptest_last: |
| 263 | + for (auto I = F.user_begin(), E = F.user_end(); I != E;) { |
| 264 | + auto *Inst = dyn_cast<Instruction>(*I++); |
| 265 | + Functions.insert(Inst->getFunction()); |
| 266 | + } |
| 267 | + break; |
| 268 | + default: |
| 269 | + break; |
| 270 | + } |
| 271 | + } |
| 272 | + |
| 273 | + if (!Functions.empty()) |
| 274 | + Changed |= optimizeFunctions(Functions); |
| 275 | + |
| 276 | + return Changed; |
| 277 | +} |
0 commit comments