fixed soa-18 test
This commit is contained in:
47
cbackend.cpp
47
cbackend.cpp
@@ -292,6 +292,15 @@ static void findUsedArrayTypes(const llvm::Module *m, std::vector<llvm::ArrayTyp
|
||||
TypeFinder(t).run(*m);
|
||||
}
|
||||
|
||||
|
||||
static bool is_vec16_i64_ty(llvm::Type *Ty) {
|
||||
llvm::VectorType *VTy = llvm::dyn_cast<llvm::VectorType>(Ty);
|
||||
if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) &&
|
||||
VTy->getElementType()->getPrimitiveSizeInBits() == 64)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
namespace {
|
||||
class CBEMCAsmInfo : public llvm::MCAsmInfo {
|
||||
public:
|
||||
@@ -985,9 +994,8 @@ llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty,
|
||||
|
||||
void CWriter::printConstantArray(llvm::ConstantArray *CPA, bool Static) {
|
||||
// vec16_i64 should be handled separately
|
||||
llvm::VectorType *VTy = llvm::dyn_cast<llvm::VectorType>(CPA->getOperand(0)->getType());
|
||||
if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) &&
|
||||
VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
|
||||
|
||||
if (is_vec16_i64_ty(CPA->getOperand(0)->getType())) {
|
||||
Out << "/* vec16_i64 should be loaded carefully on knc */";
|
||||
Out << "\n#if defined(KNC)\n";
|
||||
Out << "hilo2zmm";
|
||||
@@ -1000,10 +1008,7 @@ void CWriter::printConstantArray(llvm::ConstantArray *CPA, bool Static) {
|
||||
for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
|
||||
Out << ", ";
|
||||
|
||||
|
||||
llvm::VectorType *VTy = llvm::dyn_cast<llvm::VectorType>(CPA->getOperand(0)->getType());
|
||||
if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) &&
|
||||
VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
|
||||
if (is_vec16_i64_ty(CPA->getOperand(i)->getType())) {
|
||||
Out << "/* vec16_i64 should be loaded carefully on knc */";
|
||||
Out << "\n#if defined(KNC) \n";
|
||||
Out << "hilo2zmm";
|
||||
@@ -1921,7 +1926,7 @@ void CWriter::writeInstComputationInline(llvm::Instruction &I) {
|
||||
|
||||
if (NeedBoolTrunc)
|
||||
Out << "((";
|
||||
|
||||
|
||||
visit(I);
|
||||
|
||||
if (NeedBoolTrunc)
|
||||
@@ -2627,9 +2632,7 @@ bool CWriter::doInitialization(llvm::Module &M) {
|
||||
Out << " = " ;
|
||||
|
||||
// vec16_i64 should be handled separately
|
||||
llvm::VectorType *VTy = llvm::dyn_cast<llvm::VectorType>(I->getType()->getElementType());
|
||||
if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) &&
|
||||
VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
|
||||
if (is_vec16_i64_ty(I->getType()->getElementType())) {
|
||||
Out << "/* vec16_i64 should be loaded carefully on knc */\n";
|
||||
Out << "\n#if defined(KNC) \n";
|
||||
Out << "hilo2zmm";
|
||||
@@ -4002,6 +4005,28 @@ void CWriter::visitCallInst(llvm::CallInst &I) {
|
||||
Callee->getName() == "_aligned_malloc")
|
||||
Out << "(uint8_t *)";
|
||||
|
||||
if (Callee->getName() == "__masked_store_i64") {
|
||||
llvm::CallSite CS(&I);
|
||||
llvm::CallSite::arg_iterator AI = CS.arg_begin();
|
||||
|
||||
if (is_vec16_i64_ty(llvm::cast<llvm::PointerType>((*AI)->getType())->getElementType())) {
|
||||
Out << "/* Replacing store of vec16_i64 val into &vec16_i64 pointer with a simple copy */\n";
|
||||
// If we are trying to get a pointer to from a vec16_i64 var
|
||||
// It would be better to replace this instruction with a masked copy
|
||||
if (llvm::isa<llvm::GetElementPtrInst>(*AI)) {
|
||||
writeOperandDeref(*AI);
|
||||
Out << " = __select(";
|
||||
writeOperand(*(AI+2));
|
||||
Out << ", ";
|
||||
writeOperand(*(AI+1));
|
||||
Out << ", ";
|
||||
writeOperandDeref(*AI);
|
||||
Out << ")";
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (NeedsCast) {
|
||||
// Ok, just cast the pointer type.
|
||||
Out << "((";
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
knc.h
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
1. pmulus_vi64.ispc (-O2)
|
||||
The root of the broblem is in the code generator - it assumes __vec16_i64 stores its elements sequentually in memory,
|
||||
not high and low parts separately as we do. Consequently, this construction works incorrectly
|
||||
|
||||
```
|
||||
__vec16_i64 (((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
|
||||
*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
|
||||
*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
|
||||
*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
|
||||
*)(&tmp__2_))[0]
|
||||
```
|
||||
where 'tmp__2_' is of __vec16_i64 data type.
|
||||
|
||||
2. soa-18.ispc (-O0)
|
||||
The same as (1). The generator uses the structure of a kind
|
||||
```
|
||||
struct l_unnamed_0 {
|
||||
__vec16_i64 field0;
|
||||
__vec16_i32 field1;
|
||||
} ptr_;
|
||||
```
|
||||
and a function
|
||||
```
|
||||
__masked_store_i64(((&ptr_.field0)), _slice_ptr7_slice_offset_extract_0_, internal_mask_26_function_mask9_);
|
||||
```
|
||||
|
||||
where '_slice_ptr7_slice_offset_extract_0_' is of type __vec16_i64. The
|
||||
problem is, we store 64 bit ints in memory sequentually and in vectors
|
||||
separately (hi and lo parts). This way, the '.field0' has wrong layout
|
||||
@@ -393,12 +393,6 @@
|
||||
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.5 icpc13.1 -O2 *
|
||||
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.6 icpc15.0 -O2 *
|
||||
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.6 icpc13.1 -O2 *
|
||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.4 icpc15.0 -O0 *
|
||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.4 icpc13.1 -O0 *
|
||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.5 icpc15.0 -O0 *
|
||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.5 icpc13.1 -O0 *
|
||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.6 icpc15.0 -O0 *
|
||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.6 icpc13.1 -O0 *
|
||||
./tests/exclusive-scan-add-1.ispc compfail x86-64 generic-16 Linux LLVM 3.6 clang++3.4 -O2 *
|
||||
./tests/exclusive-scan-add-10.ispc compfail x86-64 generic-16 Linux LLVM 3.6 clang++3.4 -O2 *
|
||||
./tests/exclusive-scan-add-2.ispc compfail x86-64 generic-16 Linux LLVM 3.6 clang++3.4 -O2 *
|
||||
@@ -533,6 +527,3 @@
|
||||
./tests/exclusive-scan-add-7.ispc compfail x86-64 knc Linux LLVM 3.7 icpc13.1 -O2 *
|
||||
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.7 icpc15.0 -O2 *
|
||||
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.7 icpc13.1 -O2 *
|
||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.7 icpc15.0 -O0 *
|
||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.7 icpc13.1 -O0 *
|
||||
|
||||
|
||||
Reference in New Issue
Block a user