diff --git a/opt.cpp b/opt.cpp index c9cc2ebb..55da6c15 100644 --- a/opt.cpp +++ b/opt.cpp @@ -934,7 +934,7 @@ Optimize(llvm::Module *module, int optLevel) { */ class IntrinsicsOpt : public llvm::BasicBlockPass { public: - IntrinsicsOpt(); + IntrinsicsOpt() : BasicBlockPass(ID) {}; const char *getPassName() const { return "Intrinsics Cleanup Optimization"; } bool runOnBasicBlock(llvm::BasicBlock &BB); @@ -977,39 +977,6 @@ private: char IntrinsicsOpt::ID = 0; -IntrinsicsOpt::IntrinsicsOpt() - : BasicBlockPass(ID) { - - // All of the mask instructions we may encounter. Note that even if - // compiling for AVX, we may still encounter the regular 4-wide SSE - // MOVMSK instruction. - if (llvm::Function *ssei8Movmsk = - m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_sse2_pmovmskb_128))) { - maskInstructions.push_back(ssei8Movmsk); - } - if (llvm::Function *sseFloatMovmsk = - m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_sse_movmsk_ps))) { - maskInstructions.push_back(sseFloatMovmsk); - } - if (llvm::Function *__movmsk = - m->module->getFunction("__movmsk")) { - maskInstructions.push_back(__movmsk); - } - if (llvm::Function *avxFloatMovmsk = - m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_movmsk_ps_256))) { - maskInstructions.push_back(avxFloatMovmsk); - } - - // And all of the blend instructions - blendInstructions.push_back(BlendInstruction( - m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_sse41_blendvps)), - 0xf, 0, 1, 2)); - blendInstructions.push_back(BlendInstruction( - m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_blendv_ps_256)), - 0xff, 0, 1, 2)); -} - - /** Given an llvm::Value, return true if we can determine that it's an undefined value. This only makes a weak attempt at chasing this down, only detecting flat-out undef values, and bitcasts of undef values. @@ -1036,6 +1003,37 @@ bool IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { DEBUG_START_PASS("IntrinsicsOpt"); + // We can’t initialize mask/blend function vector during pass initialization, + // as they may be optimized out by the time the pass is invoked. + + // All of the mask instructions we may encounter. Note that even if + // compiling for AVX, we may still encounter the regular 4-wide SSE + // MOVMSK instruction. + if (llvm::Function *ssei8Movmsk = + m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_sse2_pmovmskb_128))) { + maskInstructions.push_back(ssei8Movmsk); + } + if (llvm::Function *sseFloatMovmsk = + m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_sse_movmsk_ps))) { + maskInstructions.push_back(sseFloatMovmsk); + } + if (llvm::Function *__movmsk = + m->module->getFunction("__movmsk")) { + maskInstructions.push_back(__movmsk); + } + if (llvm::Function *avxFloatMovmsk = + m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_movmsk_ps_256))) { + maskInstructions.push_back(avxFloatMovmsk); + } + + // And all of the blend instructions + blendInstructions.push_back(BlendInstruction( + m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_sse41_blendvps)), + 0xf, 0, 1, 2)); + blendInstructions.push_back(BlendInstruction( + m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_blendv_ps_256)), + 0xff, 0, 1, 2)); + llvm::Function *avxMaskedLoad32 = m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_maskload_ps_256)); llvm::Function *avxMaskedLoad64 =