diff --git a/cbackend.cpp b/cbackend.cpp index 042fcdd1..de3e7173 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -292,6 +292,15 @@ static void findUsedArrayTypes(const llvm::Module *m, std::vector(Ty); + if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) && + VTy->getElementType()->getPrimitiveSizeInBits() == 64) + return true; + return false; +} + namespace { class CBEMCAsmInfo : public llvm::MCAsmInfo { public: @@ -985,9 +994,8 @@ llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty, void CWriter::printConstantArray(llvm::ConstantArray *CPA, bool Static) { // vec16_i64 should be handled separately - llvm::VectorType *VTy = llvm::dyn_cast(CPA->getOperand(0)->getType()); - if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) && - VTy->getElementType()->getPrimitiveSizeInBits() == 64) { + + if (is_vec16_i64_ty(CPA->getOperand(0)->getType())) { Out << "/* vec16_i64 should be loaded carefully on knc */"; Out << "\n#if defined(KNC)\n"; Out << "hilo2zmm"; @@ -1000,10 +1008,7 @@ void CWriter::printConstantArray(llvm::ConstantArray *CPA, bool Static) { for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) { Out << ", "; - - llvm::VectorType *VTy = llvm::dyn_cast(CPA->getOperand(0)->getType()); - if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) && - VTy->getElementType()->getPrimitiveSizeInBits() == 64) { + if (is_vec16_i64_ty(CPA->getOperand(i)->getType())) { Out << "/* vec16_i64 should be loaded carefully on knc */"; Out << "\n#if defined(KNC) \n"; Out << "hilo2zmm"; @@ -1921,7 +1926,7 @@ void CWriter::writeInstComputationInline(llvm::Instruction &I) { if (NeedBoolTrunc) Out << "(("; - + visit(I); if (NeedBoolTrunc) @@ -2627,9 +2632,7 @@ bool CWriter::doInitialization(llvm::Module &M) { Out << " = " ; // vec16_i64 should be handled separately - llvm::VectorType *VTy = llvm::dyn_cast(I->getType()->getElementType()); - if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) && - VTy->getElementType()->getPrimitiveSizeInBits() == 64) { + if (is_vec16_i64_ty(I->getType()->getElementType())) { Out << "/* vec16_i64 should be loaded carefully on knc */\n"; Out << "\n#if defined(KNC) \n"; Out << "hilo2zmm"; @@ -4002,6 +4005,28 @@ void CWriter::visitCallInst(llvm::CallInst &I) { Callee->getName() == "_aligned_malloc") Out << "(uint8_t *)"; + if (Callee->getName() == "__masked_store_i64") { + llvm::CallSite CS(&I); + llvm::CallSite::arg_iterator AI = CS.arg_begin(); + + if (is_vec16_i64_ty(llvm::cast((*AI)->getType())->getElementType())) { + Out << "/* Replacing store of vec16_i64 val into &vec16_i64 pointer with a simple copy */\n"; + // If we are trying to get a pointer to from a vec16_i64 var + // It would be better to replace this instruction with a masked copy + if (llvm::isa(*AI)) { + writeOperandDeref(*AI); + Out << " = __select("; + writeOperand(*(AI+2)); + Out << ", "; + writeOperand(*(AI+1)); + Out << ", "; + writeOperandDeref(*AI); + Out << ")"; + return; + } + } + } + if (NeedsCast) { // Ok, just cast the pointer type. Out << "(("; diff --git a/examples/intrinsics/known_fails.txt b/examples/intrinsics/known_fails.txt deleted file mode 100644 index 95986887..00000000 --- a/examples/intrinsics/known_fails.txt +++ /dev/null @@ -1,32 +0,0 @@ -knc.h -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -1. pmulus_vi64.ispc (-O2) -The root of the broblem is in the code generator - it assumes __vec16_i64 stores its elements sequentually in memory, -not high and low parts separately as we do. Consequently, this construction works incorrectly - -``` -__vec16_i64 (((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t -*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t -*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t -*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t -*)(&tmp__2_))[0] -``` -where 'tmp__2_' is of __vec16_i64 data type. - -2. soa-18.ispc (-O0) -The same as (1). The generator uses the structure of a kind -``` -struct l_unnamed_0 { - __vec16_i64 field0; - __vec16_i32 field1; -} ptr_; -``` -and a function -``` - __masked_store_i64(((&ptr_.field0)), _slice_ptr7_slice_offset_extract_0_, internal_mask_26_function_mask9_); -``` - -where '_slice_ptr7_slice_offset_extract_0_' is of type __vec16_i64. The -problem is, we store 64 bit ints in memory sequentually and in vectors -separately (hi and lo parts). This way, the '.field0' has wrong layout diff --git a/fail_db.txt b/fail_db.txt index 8c0769cc..a2222b14 100644 --- a/fail_db.txt +++ b/fail_db.txt @@ -393,12 +393,6 @@ ./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.5 icpc13.1 -O2 * ./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.6 icpc15.0 -O2 * ./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.6 icpc13.1 -O2 * -./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.4 icpc15.0 -O0 * -./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.4 icpc13.1 -O0 * -./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.5 icpc15.0 -O0 * -./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.5 icpc13.1 -O0 * -./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.6 icpc15.0 -O0 * -./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.6 icpc13.1 -O0 * ./tests/exclusive-scan-add-1.ispc compfail x86-64 generic-16 Linux LLVM 3.6 clang++3.4 -O2 * ./tests/exclusive-scan-add-10.ispc compfail x86-64 generic-16 Linux LLVM 3.6 clang++3.4 -O2 * ./tests/exclusive-scan-add-2.ispc compfail x86-64 generic-16 Linux LLVM 3.6 clang++3.4 -O2 * @@ -533,6 +527,3 @@ ./tests/exclusive-scan-add-7.ispc compfail x86-64 knc Linux LLVM 3.7 icpc13.1 -O2 * ./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.7 icpc15.0 -O2 * ./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.7 icpc13.1 -O2 * -./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.7 icpc15.0 -O0 * -./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.7 icpc13.1 -O0 * -