47
cbackend.cpp
47
cbackend.cpp
@@ -292,6 +292,15 @@ static void findUsedArrayTypes(const llvm::Module *m, std::vector<llvm::ArrayTyp
|
|||||||
TypeFinder(t).run(*m);
|
TypeFinder(t).run(*m);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool is_vec16_i64_ty(llvm::Type *Ty) {
|
||||||
|
llvm::VectorType *VTy = llvm::dyn_cast<llvm::VectorType>(Ty);
|
||||||
|
if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) &&
|
||||||
|
VTy->getElementType()->getPrimitiveSizeInBits() == 64)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
class CBEMCAsmInfo : public llvm::MCAsmInfo {
|
class CBEMCAsmInfo : public llvm::MCAsmInfo {
|
||||||
public:
|
public:
|
||||||
@@ -985,9 +994,8 @@ llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty,
|
|||||||
|
|
||||||
void CWriter::printConstantArray(llvm::ConstantArray *CPA, bool Static) {
|
void CWriter::printConstantArray(llvm::ConstantArray *CPA, bool Static) {
|
||||||
// vec16_i64 should be handled separately
|
// vec16_i64 should be handled separately
|
||||||
llvm::VectorType *VTy = llvm::dyn_cast<llvm::VectorType>(CPA->getOperand(0)->getType());
|
|
||||||
if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) &&
|
if (is_vec16_i64_ty(CPA->getOperand(0)->getType())) {
|
||||||
VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
|
|
||||||
Out << "/* vec16_i64 should be loaded carefully on knc */";
|
Out << "/* vec16_i64 should be loaded carefully on knc */";
|
||||||
Out << "\n#if defined(KNC)\n";
|
Out << "\n#if defined(KNC)\n";
|
||||||
Out << "hilo2zmm";
|
Out << "hilo2zmm";
|
||||||
@@ -1000,10 +1008,7 @@ void CWriter::printConstantArray(llvm::ConstantArray *CPA, bool Static) {
|
|||||||
for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
|
for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
|
||||||
Out << ", ";
|
Out << ", ";
|
||||||
|
|
||||||
|
if (is_vec16_i64_ty(CPA->getOperand(i)->getType())) {
|
||||||
llvm::VectorType *VTy = llvm::dyn_cast<llvm::VectorType>(CPA->getOperand(0)->getType());
|
|
||||||
if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) &&
|
|
||||||
VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
|
|
||||||
Out << "/* vec16_i64 should be loaded carefully on knc */";
|
Out << "/* vec16_i64 should be loaded carefully on knc */";
|
||||||
Out << "\n#if defined(KNC) \n";
|
Out << "\n#if defined(KNC) \n";
|
||||||
Out << "hilo2zmm";
|
Out << "hilo2zmm";
|
||||||
@@ -1921,7 +1926,7 @@ void CWriter::writeInstComputationInline(llvm::Instruction &I) {
|
|||||||
|
|
||||||
if (NeedBoolTrunc)
|
if (NeedBoolTrunc)
|
||||||
Out << "((";
|
Out << "((";
|
||||||
|
|
||||||
visit(I);
|
visit(I);
|
||||||
|
|
||||||
if (NeedBoolTrunc)
|
if (NeedBoolTrunc)
|
||||||
@@ -2627,9 +2632,7 @@ bool CWriter::doInitialization(llvm::Module &M) {
|
|||||||
Out << " = " ;
|
Out << " = " ;
|
||||||
|
|
||||||
// vec16_i64 should be handled separately
|
// vec16_i64 should be handled separately
|
||||||
llvm::VectorType *VTy = llvm::dyn_cast<llvm::VectorType>(I->getType()->getElementType());
|
if (is_vec16_i64_ty(I->getType()->getElementType())) {
|
||||||
if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) &&
|
|
||||||
VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
|
|
||||||
Out << "/* vec16_i64 should be loaded carefully on knc */\n";
|
Out << "/* vec16_i64 should be loaded carefully on knc */\n";
|
||||||
Out << "\n#if defined(KNC) \n";
|
Out << "\n#if defined(KNC) \n";
|
||||||
Out << "hilo2zmm";
|
Out << "hilo2zmm";
|
||||||
@@ -4002,6 +4005,28 @@ void CWriter::visitCallInst(llvm::CallInst &I) {
|
|||||||
Callee->getName() == "_aligned_malloc")
|
Callee->getName() == "_aligned_malloc")
|
||||||
Out << "(uint8_t *)";
|
Out << "(uint8_t *)";
|
||||||
|
|
||||||
|
if (Callee->getName() == "__masked_store_i64") {
|
||||||
|
llvm::CallSite CS(&I);
|
||||||
|
llvm::CallSite::arg_iterator AI = CS.arg_begin();
|
||||||
|
|
||||||
|
if (is_vec16_i64_ty(llvm::cast<llvm::PointerType>((*AI)->getType())->getElementType())) {
|
||||||
|
Out << "/* Replacing store of vec16_i64 val into &vec16_i64 pointer with a simple copy */\n";
|
||||||
|
// If we are trying to get a pointer to from a vec16_i64 var
|
||||||
|
// It would be better to replace this instruction with a masked copy
|
||||||
|
if (llvm::isa<llvm::GetElementPtrInst>(*AI)) {
|
||||||
|
writeOperandDeref(*AI);
|
||||||
|
Out << " = __select(";
|
||||||
|
writeOperand(*(AI+2));
|
||||||
|
Out << ", ";
|
||||||
|
writeOperand(*(AI+1));
|
||||||
|
Out << ", ";
|
||||||
|
writeOperandDeref(*AI);
|
||||||
|
Out << ")";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (NeedsCast) {
|
if (NeedsCast) {
|
||||||
// Ok, just cast the pointer type.
|
// Ok, just cast the pointer type.
|
||||||
Out << "((";
|
Out << "((";
|
||||||
|
|||||||
@@ -1,32 +0,0 @@
|
|||||||
knc.h
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
1. pmulus_vi64.ispc (-O2)
|
|
||||||
The root of the broblem is in the code generator - it assumes __vec16_i64 stores its elements sequentually in memory,
|
|
||||||
not high and low parts separately as we do. Consequently, this construction works incorrectly
|
|
||||||
|
|
||||||
```
|
|
||||||
__vec16_i64 (((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
|
|
||||||
*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
|
|
||||||
*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
|
|
||||||
*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
|
|
||||||
*)(&tmp__2_))[0]
|
|
||||||
```
|
|
||||||
where 'tmp__2_' is of __vec16_i64 data type.
|
|
||||||
|
|
||||||
2. soa-18.ispc (-O0)
|
|
||||||
The same as (1). The generator uses the structure of a kind
|
|
||||||
```
|
|
||||||
struct l_unnamed_0 {
|
|
||||||
__vec16_i64 field0;
|
|
||||||
__vec16_i32 field1;
|
|
||||||
} ptr_;
|
|
||||||
```
|
|
||||||
and a function
|
|
||||||
```
|
|
||||||
__masked_store_i64(((&ptr_.field0)), _slice_ptr7_slice_offset_extract_0_, internal_mask_26_function_mask9_);
|
|
||||||
```
|
|
||||||
|
|
||||||
where '_slice_ptr7_slice_offset_extract_0_' is of type __vec16_i64. The
|
|
||||||
problem is, we store 64 bit ints in memory sequentually and in vectors
|
|
||||||
separately (hi and lo parts). This way, the '.field0' has wrong layout
|
|
||||||
@@ -393,12 +393,6 @@
|
|||||||
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.5 icpc13.1 -O2 *
|
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.5 icpc13.1 -O2 *
|
||||||
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.6 icpc15.0 -O2 *
|
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.6 icpc15.0 -O2 *
|
||||||
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.6 icpc13.1 -O2 *
|
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.6 icpc13.1 -O2 *
|
||||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.4 icpc15.0 -O0 *
|
|
||||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.4 icpc13.1 -O0 *
|
|
||||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.5 icpc15.0 -O0 *
|
|
||||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.5 icpc13.1 -O0 *
|
|
||||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.6 icpc15.0 -O0 *
|
|
||||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.6 icpc13.1 -O0 *
|
|
||||||
./tests/exclusive-scan-add-1.ispc compfail x86-64 generic-16 Linux LLVM 3.6 clang++3.4 -O2 *
|
./tests/exclusive-scan-add-1.ispc compfail x86-64 generic-16 Linux LLVM 3.6 clang++3.4 -O2 *
|
||||||
./tests/exclusive-scan-add-10.ispc compfail x86-64 generic-16 Linux LLVM 3.6 clang++3.4 -O2 *
|
./tests/exclusive-scan-add-10.ispc compfail x86-64 generic-16 Linux LLVM 3.6 clang++3.4 -O2 *
|
||||||
./tests/exclusive-scan-add-2.ispc compfail x86-64 generic-16 Linux LLVM 3.6 clang++3.4 -O2 *
|
./tests/exclusive-scan-add-2.ispc compfail x86-64 generic-16 Linux LLVM 3.6 clang++3.4 -O2 *
|
||||||
@@ -533,6 +527,3 @@
|
|||||||
./tests/exclusive-scan-add-7.ispc compfail x86-64 knc Linux LLVM 3.7 icpc13.1 -O2 *
|
./tests/exclusive-scan-add-7.ispc compfail x86-64 knc Linux LLVM 3.7 icpc13.1 -O2 *
|
||||||
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.7 icpc15.0 -O2 *
|
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.7 icpc15.0 -O2 *
|
||||||
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.7 icpc13.1 -O2 *
|
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 knc Linux LLVM 3.7 icpc13.1 -O2 *
|
||||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.7 icpc15.0 -O0 *
|
|
||||||
./tests/soa-18.ispc runfail x86-64 knc Linux LLVM 3.7 icpc13.1 -O0 *
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user