Merge pull request #1010 from Vsevolod-Livinskij/fix_for_nvptx

Typo fix in util-nvptx.m4
This commit is contained in:
Dmitry Babokin
2015-04-16 13:37:40 +03:00
2 changed files with 22 additions and 9 deletions

View File

@@ -49,6 +49,17 @@ define(`MASK_HIGH_BIT_ON',
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; LLVM has different IR for different versions since 3.7
define(`PTR_OP_ARGS',
ifelse(LLVM_VERSION, LLVM_3_7,
``$1 , $1 *'',
``$1 *''
)
)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; vector deconstruction utilities
;; split 8-wide vector into 2 4-wide vectors
;;
@@ -2759,7 +2770,7 @@ entry:
if.then: ; preds = %entry
%idxprom = ashr i64 %call, 32
%arrayidx = getelementptr inbounds PTR_OP_ARGS(`i32') startptr, i64 %idxprom
%arrayidx = getelementptr inbounds PTR_OP_ARGS(`i32') %startptr, i64 %idxprom
%val = load PTR_OP_ARGS(`i32') %arrayidx, align 4
%valvec = insertelement <1 x i32> undef, i32 %val, i32 0
store <1 x i32> %valvec, <1 x i32>* %val_ptr, align 4
@@ -2780,7 +2791,7 @@ entry:
if.then: ; preds = %entry
%idxprom = ashr i64 %call, 32
%arrayidx = getelementptr inbounds PTR_OP_ARGS(`i32') startptr, i64 %idxprom
%arrayidx = getelementptr inbounds PTR_OP_ARGS(`i32') %startptr, i64 %idxprom
%val = extractelement <1 x i32> %vals, i32 0
store i32 %val, i32* %arrayidx, align 4
br label %if.end

View File

@@ -1010,12 +1010,6 @@ Target::SupportedArchs() {
const char *
Target::SupportedTargets() {
return
#ifdef ISPC_ARM_ENABLED
"neon-i8x16, neon-i16x8, neon-i32x4, "
#endif
#ifdef ISPC_NVPTX_ENABLED
"nvptx, "
#endif
"sse2-i32x4, sse2-i32x8, "
"sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
"avx1-i32x4, "
@@ -1023,7 +1017,15 @@ Target::SupportedTargets() {
"avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4 "
"avx2-i32x8, avx2-i32x16, avx2-i64x4, "
"generic-x1, generic-x4, generic-x8, generic-x16, "
"generic-x32, generic-x64";
"generic-x32, generic-x64"
#ifdef ISPC_ARM_ENABLED
", neon-i8x16, neon-i16x8, neon-i32x4"
#endif
#ifdef ISPC_NVPTX_ENABLED
", nvptx"
#endif
;
}