diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index e4c70aa4..6d4d2db6 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -39,12 +39,12 @@ reduce_equal(WIDTH) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; broadcast/rotate/shuffle -declare @__smear_float(float) nounwind readnone -declare @__smear_double(double) nounwind readnone -declare @__smear_i8(i8) nounwind readnone -declare @__smear_i16(i16) nounwind readnone -declare @__smear_i32(i32) nounwind readnone -declare @__smear_i64(i64) nounwind readnone +declare @__smear_float(, float) nounwind readnone +declare @__smear_double(, double) nounwind readnone +declare @__smear_i8(, i8) nounwind readnone +declare @__smear_i16(, i16) nounwind readnone +declare @__smear_i32(, i32) nounwind readnone +declare @__smear_i64(, i64) nounwind readnone declare @__broadcast_float(, i32) nounwind readnone declare @__broadcast_double(, i32) nounwind readnone diff --git a/cbackend.cpp b/cbackend.cpp index e932dff2..d43880dc 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -1464,6 +1464,8 @@ void CWriter::printConstant(Constant *CPV, bool Static) { Constant *CZ = Constant::getNullValue(VT->getElementType()); Out << smearFunc << "("; + printType(Out, VT); + Out << "(), "; printConstant(CZ, Static); Out << ")"; } @@ -1471,6 +1473,8 @@ void CWriter::printConstant(Constant *CPV, bool Static) { llvm::Constant *splatValue = CV->getSplatValue(); if (splatValue != NULL && smearFunc != NULL) { Out << smearFunc << "("; + printType(Out, VT); + Out << "(), "; printConstant(splatValue, Static); Out << ")"; } @@ -1486,6 +1490,8 @@ void CWriter::printConstant(Constant *CPV, bool Static) { llvm::Constant *splatValue = CDV->getSplatValue(); if (splatValue != NULL && smearFunc != NULL) { Out << smearFunc << "("; + printType(Out, VT); + Out << "(), "; printConstant(splatValue, Static); Out << ")"; } @@ -4392,16 +4398,17 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) { if (smearFunc == NULL) { Constant *sf = module->getOrInsertFunction(smearFuncName, iter->getType(), - matchType, NULL); + iter->getType(), matchType, NULL); smearFunc = dyn_cast(sf); assert(smearFunc != NULL); smearFunc->setDoesNotThrow(true); smearFunc->setDoesNotAccessMemory(true); } - + + llvm::Value *undefResult = llvm::UndefValue::get(vt); assert(smearFunc != NULL); - Value *args[1] = { toMatch }; - ArrayRef argArray(&args[0], &args[1]); + Value *args[2] = { undefResult, toMatch }; + ArrayRef argArray(&args[0], &args[2]); Instruction *smearCall = CallInst::Create(smearFunc, argArray, LLVMGetName(toMatch, "_smear"), (Instruction *)NULL); diff --git a/examples/intrinsics/generic-16.h b/examples/intrinsics/generic-16.h index 384a9ece..4c696975 100644 --- a/examples/intrinsics/generic-16.h +++ b/examples/intrinsics/generic-16.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -259,13 +259,13 @@ static FORCEINLINE TYPE NAME(TYPE a, int32_t b) { \ return ret; \ } -#define SMEAR(VTYPE, NAME, STYPE) \ -static FORCEINLINE VTYPE __smear_##NAME(STYPE v) { \ - VTYPE ret; \ - for (int i = 0; i < 16; ++i) \ - ret.v[i] = v; \ - return ret; \ -} \ +#define SMEAR(VTYPE, NAME, STYPE) \ +static FORCEINLINE VTYPE __smear_##NAME(VTYPE retType, STYPE v) { \ + VTYPE ret; \ + for (int i = 0; i < 16; ++i) \ + ret.v[i] = v; \ + return ret; \ +} #define BROADCAST(VTYPE, NAME, STYPE) \ static FORCEINLINE VTYPE __broadcast_##NAME(VTYPE v, int index) { \ @@ -374,7 +374,7 @@ static FORCEINLINE void __store(__vec16_i1 *p, __vec16_i1 v, int align) { *ptr = v.v; } -static FORCEINLINE __vec16_i1 __smear_i1(int v) { +static FORCEINLINE __vec16_i1 __smear_i1(__vec16_i1, int v) { return __vec16_i1(v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v); } diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index 48a67719..5fe22b78 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -266,7 +266,7 @@ static FORCEINLINE void __store(__vec4_i1 *p, __vec4_i1 value, int align) { _mm_storeu_ps((float *)(&p->v), value.v); } -static FORCEINLINE __vec4_i1 __smear_i1(int v) { +static FORCEINLINE __vec4_i1 __smear_i1(__vec4_i1, int v) { return __vec4_i1(v, v, v, v); } @@ -493,7 +493,7 @@ static FORCEINLINE void __insert_element(__vec4_i8 *v, int index, int8_t val) { ((int8_t *)v)[index] = val; } -static FORCEINLINE __vec4_i8 __smear_i8(int8_t v) { +static FORCEINLINE __vec4_i8 __smear_i8(__vec4_i8, int8_t v) { return _mm_set1_epi8(v); } @@ -752,7 +752,7 @@ static FORCEINLINE void __insert_element(__vec4_i16 *v, int index, int16_t val) ((int16_t *)v)[index] = val; } -static FORCEINLINE __vec4_i16 __smear_i16(int16_t v) { +static FORCEINLINE __vec4_i16 __smear_i16(__vec4_i16, int16_t v) { return _mm_set1_epi16(v); } @@ -989,7 +989,7 @@ static FORCEINLINE __vec4_i32 __select(__vec4_i1 mask, __vec4_i32 a, __vec4_i32 _mm_castsi128_ps(a.v), mask.v)); } -static FORCEINLINE __vec4_i32 __smear_i32(int32_t v) { +static FORCEINLINE __vec4_i32 __smear_i32(__vec4_i32, int32_t v) { return _mm_set1_epi32(v); } @@ -1250,7 +1250,7 @@ static FORCEINLINE __vec4_i64 __select(__vec4_i1 mask, __vec4_i64 a, __vec4_i64 return __vec4_i64(_mm_castpd_si128(r0), _mm_castpd_si128(r1)); } -static FORCEINLINE __vec4_i64 __smear_i64(int64_t v) { +static FORCEINLINE __vec4_i64 __smear_i64(__vec4_i64, int64_t v) { return __vec4_i64(v, v, v, v); } @@ -1354,7 +1354,7 @@ static FORCEINLINE __vec4_f __select(__vec4_i1 mask, __vec4_f a, __vec4_f b) { return _mm_blendv_ps(b.v, a.v, mask.v); } -static FORCEINLINE __vec4_f __smear_float(float v) { +static FORCEINLINE __vec4_f __smear_float(__vec4_f, float v) { return _mm_set1_ps(v); } @@ -1486,7 +1486,7 @@ static FORCEINLINE __vec4_d __select(__vec4_i1 mask, __vec4_d a, __vec4_d b) { return __vec4_d(r0, r1); } -static FORCEINLINE __vec4_d __smear_double(double v) { +static FORCEINLINE __vec4_d __smear_double(__vec4_d, double v) { return __vec4_d(_mm_set1_pd(v), _mm_set1_pd(v)); } @@ -1586,11 +1586,13 @@ static FORCEINLINE __vec4_i16 __cast_sext(__vec4_i16, __vec4_i8 val) { } static FORCEINLINE __vec4_i8 __cast_sext(__vec4_i8, __vec4_i1 v) { - return __select(v, __smear_i8(0xff), __smear_i8(0)); + return __select(v, __smear_i8(__vec4_i8(), 0xff), + __smear_i8(__vec4_i8(), 0)); } static FORCEINLINE __vec4_i16 __cast_sext(__vec4_i16, __vec4_i1 v) { - return __select(v, __smear_i16(0xffff), __smear_i16(0)); + return __select(v, __smear_i16(__vec4_i16(), 0xffff), + __smear_i16(__vec4_i16(), 0)); } static FORCEINLINE __vec4_i32 __cast_sext(__vec4_i32, __vec4_i1 v) { @@ -1650,11 +1652,12 @@ static FORCEINLINE __vec4_i16 __cast_zext(__vec4_i16, __vec4_i8 val) { } static FORCEINLINE __vec4_i8 __cast_zext(__vec4_i8, __vec4_i1 v) { - return __select(v, __smear_i8(1), __smear_i8(0)); + return __select(v, __smear_i8(__vec4_i8(), 1), __smear_i8(__vec4_i8(), 0)); } static FORCEINLINE __vec4_i16 __cast_zext(__vec4_i16, __vec4_i1 v) { - return __select(v, __smear_i16(1), __smear_i16(0)); + return __select(v, __smear_i16(__vec4_i16(), 1), + __smear_i16(__vec4_i16(), 0)); } static FORCEINLINE __vec4_i32 __cast_zext(__vec4_i32, __vec4_i1 v) { @@ -1662,7 +1665,7 @@ static FORCEINLINE __vec4_i32 __cast_zext(__vec4_i32, __vec4_i1 v) { } static FORCEINLINE __vec4_i64 __cast_zext(__vec4_i64, __vec4_i1 v) { - return __select(v, __smear_i64(1), __smear_i64(0)); + return __select(v, __smear_i64(__vec4_i64(), 1), __smear_i64(__vec4_i64(), 0)); } // truncations @@ -1822,11 +1825,11 @@ static FORCEINLINE __vec4_d __cast_uitofp(__vec4_d, __vec4_i64 val) { } static FORCEINLINE __vec4_f __cast_uitofp(__vec4_f, __vec4_i1 v) { - return __select(v, __smear_float(1.), __smear_float(0.)); + return __select(v, __smear_float(__vec4_f(), 1.), __smear_float(__vec4_f(), 0.)); } static FORCEINLINE __vec4_d __cast_uitofp(__vec4_d, __vec4_i1 v) { - return __select(v, __smear_double(1.), __smear_double(0.)); + return __select(v, __smear_double(__vec4_d(), 1.), __smear_double(__vec4_d(), 0.)); } // float/double to signed int @@ -2617,8 +2620,8 @@ lGatherBaseOffsets32(RetVec, RetScalar, unsigned char *p, __vec4_i32 offsets, RetScalar r[4]; #if 1 // "Fast gather" trick... - offsets = __select(mask, offsets, __smear_i32(0)); - constOffset = __select(mask, constOffset, __smear_i32(0)); + offsets = __select(mask, offsets, __smear_i32(__vec4_i32(), 0)); + constOffset = __select(mask, constOffset, __smear_i32(__vec4_i32(), 0)); int offset = scale * _mm_extract_epi32(offsets.v, 0) + _mm_extract_epi32(constOffset.v, 0); RetScalar *ptr = (RetScalar *)(p + offset); @@ -2675,8 +2678,8 @@ lGatherBaseOffsets64(RetVec, RetScalar, unsigned char *p, __vec4_i64 offsets, RetScalar r[4]; #if 1 // "Fast gather" trick... - offsets = __select(mask, offsets, __smear_i64(0)); - constOffset = __select(mask, constOffset, __smear_i64(0)); + offsets = __select(mask, offsets, __smear_i64(__vec4_i64(), 0)); + constOffset = __select(mask, constOffset, __smear_i64(__vec4_i64(), 0)); int64_t offset = scale * _mm_extract_epi64(offsets.v[0], 0) + _mm_extract_epi64(constOffset.v[0], 0); RetScalar *ptr = (RetScalar *)(p + offset); @@ -2760,8 +2763,8 @@ __gather_base_offsets32_i32(uint8_t *p, __vec4_i32 offsets, uint32_t scale, __m128i r = _mm_set_epi32(0, 0, 0, 0); #if 1 // "Fast gather"... - offsets = __select(mask, offsets, __smear_i32(0)); - constOffset = __select(mask, constOffset, __smear_i32(0)); + offsets = __select(mask, offsets, __smear_i32(__vec4_i32(), 0)); + constOffset = __select(mask, constOffset, __smear_i32(__vec4_i32(), 0)); int offset = scale * _mm_extract_epi32(offsets.v, 0) + _mm_extract_epi32(constOffset.v, 0);