diff --git a/cbackend.cpp b/cbackend.cpp index 8f3aacfd..a9c4c525 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -412,7 +412,7 @@ namespace { // Output all vector constants so they can be accessed with single // vector loads printVectorConstants(F); - + printFunction(F); return false; } @@ -768,11 +768,8 @@ CWriter::printSimpleType(llvm::raw_ostream &Out, llvm::Type *Ty, bool isSigned, return Out << (isSigned?"":"u") << "int32_t " << NameSoFar; else if (NumBits <= 64) return Out << (isSigned?"":"u") << "int64_t "<< NameSoFar; - else if (NumBits <= 128) - return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar; else return Out << "iN<" << NumBits << "> " << NameSoFar; - } case llvm::Type::FloatTyID: return Out << "float " << NameSoFar; case llvm::Type::DoubleTyID: return Out << "double " << NameSoFar; @@ -1904,8 +1901,6 @@ void CWriter::writeInstComputationInline(llvm::Instruction &I) { // If this is a non-trivial bool computation, make sure to truncate down to // a 1 bit value. This is important because we want "add i1 x, y" to return // "0" when x and y are true, not "2" for example. -// Out << "\n/* Tree\n" << I << "\n*/"; - bool NeedBoolTrunc = false; if (I.getType() == llvm::Type::getInt1Ty(I.getContext()) && !llvm::isa(I) && !llvm::isa(I)) @@ -2349,8 +2344,8 @@ bool CWriter::doInitialization(llvm::Module &M) { Out << "unsigned int putchar(unsigned int);\n"; Out << "int fflush(void *);\n"; Out << "int printf(const unsigned char *, ...);\n"; -// Out << "uint8_t *memcpy(uint8_t *, uint8_t *, uint64_t );\n"; -// Out << "uint8_t *memset(uint8_t *, uint8_t, uint64_t );\n"; + Out << "uint8_t *memcpy(uint8_t *, uint8_t *, uint64_t );\n"; + Out << "uint8_t *memset(uint8_t *, uint8_t, uint64_t );\n"; Out << "void memset_pattern16(void *, const void *, uint64_t );\n"; Out << "}\n\n"; @@ -2802,7 +2797,7 @@ void CWriter::printModuleTypes() { for (unsigned i = 0, e = IntegerTypes.size(); i != e; ++i) { llvm::IntegerType *IT = IntegerTypes[i]; - if (IT->getIntegerBitWidth() <= 128 || Alignment[i] == 0) + if (IT->getIntegerBitWidth() <= 64 || Alignment[i] == 0) continue; Out << "typedef struct __attribute__ ((packed, aligned(" << Alignment[i] << "))) {\n "; @@ -4354,7 +4349,7 @@ void CWriter::writeMemoryAccess(llvm::Value *Operand, llvm::Type *OperandType, Out << '*'; if (IsVolatile || IsUnaligned) { Out << "(("; - if (IsUnaligned && ITy && (ITy->getBitWidth() > 128)) + if (IsUnaligned && ITy && (ITy->getBitWidth() > 64)) Out << "iN_" << ITy->getBitWidth() << "_align_" << Alignment << " *)"; else { if (IsUnaligned) @@ -4368,7 +4363,7 @@ void CWriter::writeMemoryAccess(llvm::Value *Operand, llvm::Type *OperandType, Out << ")"; } } - + writeOperand(Operand); if (IsVolatile || IsUnaligned) { @@ -4379,7 +4374,6 @@ void CWriter::writeMemoryAccess(llvm::Value *Operand, llvm::Type *OperandType, } void CWriter::visitLoadInst(llvm::LoadInst &I) { -// Out << "\n/* Tree\n" << I << "\n*/"; llvm::VectorType *VT = llvm::dyn_cast(I.getType()); if (VT != NULL) { Out << "__load<" << I.getAlignment() << ">("; diff --git a/examples/intrinsics/generic-16.h b/examples/intrinsics/generic-16.h index f44c581e..f5bacbb2 100644 --- a/examples/intrinsics/generic-16.h +++ b/examples/intrinsics/generic-16.h @@ -159,6 +159,51 @@ PRE_ALIGN(128) struct __vec16_i64 : public vec16 { v8, v9, v10, v11, v12, v13, v14, v15) { } } POST_ALIGN(128); +template +struct iN { + int num[num_bits / (sizeof (int) * 8)]; + + iN () {} + + iN (const char *val) { + if (val == NULL) + return; + int length = num_bits / (sizeof (int) * 8); + int val_len = 0; + for (val_len = 0; val[val_len]; (val_len)++); + for (int i = 0; (i < val_len && i < num_bits); i++) + num[i / (sizeof (int) * 8)] = (num[i / (sizeof (int) * 8)] << 1) | (val[i] - '0'); + } + + ~iN () {} + + iN operator >> (const iN rhs) { + iN res; + int length = num_bits / (sizeof (int) * 8); + int cells = rhs.num[0] / (sizeof(int) * 8); + for (int i = 0; i < (length - cells); i++) + res.num[i] = this->num[cells + i]; + return res; + } + + iN operator & (iN rhs) { + iN res; + int length = num_bits / (sizeof (int) * 8); + for (int i = 0; i < length; i++) + res.num[i] = (this->num[i]) & (rhs.num[i]); + return res; + } + + operator uint32_t() { return this->num[0]; } +}; + +template +T __cast_bits (T to, __vec16_i32 from) { + for (int i = 0; i < 16; i++) + to.num[i] = ((uint32_t*)(&from))[i] ; + return to; +} + /////////////////////////////////////////////////////////////////////////// // macros... diff --git a/examples/intrinsics/knc.h b/examples/intrinsics/knc.h index eb5af424..907d966d 100644 --- a/examples/intrinsics/knc.h +++ b/examples/intrinsics/knc.h @@ -32,9 +32,7 @@ */ #include // INT_MIN -#include #include -#include #include #include #include @@ -82,9 +80,9 @@ extern "C" { int puts(unsigned char *); unsigned int putchar(unsigned int); int fflush(void *); -// uint8_t *memcpy(uint8_t *, uint8_t *, uint64_t); -// uint8_t *memset(uint8_t *, uint8_t, uint64_t); -// void memset_pattern16(void *, const void *, uint64_t); + uint8_t *memcpy(uint8_t *, uint8_t *, uint64_t); + uint8_t *memset(uint8_t *, uint8_t, uint64_t); + void memset_pattern16(void *, const void *, uint64_t); } typedef float __vec1_f; @@ -101,40 +99,19 @@ struct iN { int num[num_bits / (sizeof (int) * 8)]; iN () {} - - iN (int val) { - num [0] = val; - } - - template - iN (T *val) { - int length = num_bits / (sizeof (int) * 8); - for (int i = 0; i < length; i++) - num[i] = val[i]; - } - + iN (const char *val) { + if (val == NULL) + return; int length = num_bits / (sizeof (int) * 8); - for (int i = 0; (i < strlen(val) && i < num_bits); i++) + int val_len = 0; + for (val_len = 0; val[val_len]; (val_len)++); + for (int i = 0; (i < val_len && i < num_bits); i++) num[i / (sizeof (int) * 8)] = (num[i / (sizeof (int) * 8)] << 1) | (val[i] - '0'); } -/* - iN (const iN &a) { - int length = num_bits / (sizeof (int) * 8); - for (int i = 0; i < length; i++) - num[i] = a.num[i]; - } -*/ + ~iN () {} -/* - iN operator >> (const int rhs) { - iN res; - int cells = rhs / (sizeof(int) * 8); - for (int i = 0; i < (this->length - cells); i++) - res.num[i] = this->num[cells + i]; - return res; - } -*/ + iN operator >> (const iN rhs) { iN res; int length = num_bits / (sizeof (int) * 8); @@ -152,18 +129,6 @@ struct iN { return res; } -/* - iN& operator = (iN rhs) { - iN swap; - int length = num_bits / (sizeof (int) * 8); - for (int i = 0; i < length; i++) { - swap.num[i] = this->num[i]; - this->num[i] = rhs.num[i]; - rhs.num[i] = swap.num[i]; - } - return *this; - } -*/ operator uint32_t() { return this->num[0]; } }; diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index 765a931f..56483bf3 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -179,6 +179,50 @@ FORCEINLINE __vec4_i64::__vec4_i64(__vec4_d vd) { v[1] = _mm_castpd_si128(vd.v[1]); } +template +struct iN { + int num[num_bits / (sizeof (int) * 8)]; + + iN () {} + + iN (const char *val) { + if (val == NULL) + return; + int length = num_bits / (sizeof (int) * 8); + int val_len = 0; + for (val_len = 0; val[val_len]; (val_len)++); + for (int i = 0; (i < val_len && i < num_bits); i++) + num[i / (sizeof (int) * 8)] = (num[i / (sizeof (int) * 8)] << 1) | (val[i] - '0'); + } + + ~iN () {} + + iN operator >> (const iN rhs) { + iN res; + int length = num_bits / (sizeof (int) * 8); + int cells = rhs.num[0] / (sizeof(int) * 8); + for (int i = 0; i < (length - cells); i++) + res.num[i] = this->num[cells + i]; + return res; + } + + iN operator & (iN rhs) { + iN res; + int length = num_bits / (sizeof (int) * 8); + for (int i = 0; i < length; i++) + res.num[i] = (this->num[i]) & (rhs.num[i]); + return res; + } + + operator uint32_t() { return this->num[0]; } +}; + +template +T __cast_bits (T to, __vec4_i32 from) { + for (int i = 0; i < 16; i++) + to.num[i] = ((uint32_t*)(&from))[i] ; + return to; +} /////////////////////////////////////////////////////////////////////////// // SSE helpers / utility functions