Compare commits
52 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f1b8e5b1bf | ||
|
|
e7a70b05af | ||
|
|
cf73286938 | ||
|
|
e6f80c0adc | ||
|
|
5e31d7b6d0 | ||
|
|
649f2ad7b7 | ||
|
|
fade1cdf1d | ||
|
|
d261105a86 | ||
|
|
b3d3e8987b | ||
|
|
4e91f3777a | ||
|
|
5584240c7f | ||
|
|
7126a39092 | ||
|
|
8ad28a3f6f | ||
|
|
9921b8e530 | ||
|
|
9052d4b10b | ||
|
|
2405dae8e6 | ||
|
|
3607f3e045 | ||
|
|
de84acfa5d | ||
|
|
a501ab1aa6 | ||
|
|
cdc850f98c | ||
|
|
ca87579f23 | ||
|
|
38fc13d1ab | ||
|
|
cf9d9f717e | ||
|
|
173632f446 | ||
|
|
1dedd88132 | ||
|
|
0848c2cc19 | ||
|
|
e2a88d491f | ||
|
|
30f9dcd4f5 | ||
|
|
0c344b6755 | ||
|
|
6734021520 | ||
|
|
dd153d3c5c | ||
|
|
9ca7541d52 | ||
|
|
0c20483853 | ||
|
|
9d4ff1bc06 | ||
|
|
83f22f1939 | ||
|
|
6375ed9224 | ||
|
|
cf23cf9ef4 | ||
|
|
1147b53dcd | ||
|
|
4cf831a651 | ||
|
|
785d8a29d3 | ||
|
|
46d2bad231 | ||
|
|
32da8e11b4 | ||
|
|
5dedb6f836 | ||
|
|
2ea6d249d5 | ||
|
|
c86128e8ee | ||
|
|
375f1cb8e8 | ||
|
|
3ca7b6b078 | ||
|
|
effe901890 | ||
|
|
4f451bd041 | ||
|
|
c76ef7b174 | ||
|
|
743d82e935 | ||
|
|
18546e9c6d |
@@ -15,8 +15,8 @@ code.
|
|||||||
|
|
||||||
ispc is an open source compiler under the BSD license; see the file
|
ispc is an open source compiler under the BSD license; see the file
|
||||||
LICENSE.txt. ispc supports Windows, Mac, and Linux, with both x86 and
|
LICENSE.txt. ispc supports Windows, Mac, and Linux, with both x86 and
|
||||||
x86-64 targets. It currently supports the SSE2 and SSE4 instruction sets,
|
x86-64 targets. It currently supports the SSE2, SSE4, and AVX instruction
|
||||||
though support for AVX should be available soon.
|
sets.
|
||||||
|
|
||||||
For more information and examples, as well as a wiki and the bug database,
|
For more information and examples, as well as a wiki and the bug database,
|
||||||
see the ispc distribution site, http://ispc.github.com.
|
see the ispc distribution site, http://ispc.github.com.
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ import sys
|
|||||||
import string
|
import string
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import platform
|
||||||
|
import os
|
||||||
|
|
||||||
length=0
|
length=0
|
||||||
|
|
||||||
@@ -14,8 +16,12 @@ target = re.sub("\.ll$", "", target)
|
|||||||
target = re.sub("\.c$", "", target)
|
target = re.sub("\.c$", "", target)
|
||||||
target = re.sub("-", "_", target)
|
target = re.sub("-", "_", target)
|
||||||
|
|
||||||
|
llvm_as="llvm-as"
|
||||||
|
if platform.system() == 'Windows' or string.find(platform.system(), "CYGWIN_NT") != -1:
|
||||||
|
llvm_as = os.getenv("LLVM_INSTALL_DIR").replace("\\", "/") + "/bin/" + llvm_as
|
||||||
|
|
||||||
try:
|
try:
|
||||||
as_out=subprocess.Popen([ "llvm-as", "-", "-o", "-"], stdout=subprocess.PIPE)
|
as_out=subprocess.Popen([llvm_as, "-", "-o", "-"], stdout=subprocess.PIPE)
|
||||||
except IOError:
|
except IOError:
|
||||||
print >> sys.stderr, "Couldn't open " + src
|
print >> sys.stderr, "Couldn't open " + src
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
@@ -232,8 +232,8 @@ define internal float @__reduce_add_float(<16 x float>) nounwind readonly always
|
|||||||
%v1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %va, <8 x float> %vb)
|
%v1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %va, <8 x float> %vb)
|
||||||
%v2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %v1, <8 x float> %v1)
|
%v2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %v1, <8 x float> %v1)
|
||||||
%v3 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %v2, <8 x float> %v2)
|
%v3 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %v2, <8 x float> %v2)
|
||||||
%scalar1 = extractelement <8 x float> %v2, i32 0
|
%scalar1 = extractelement <8 x float> %v3, i32 0
|
||||||
%scalar2 = extractelement <8 x float> %v2, i32 4
|
%scalar2 = extractelement <8 x float> %v3, i32 4
|
||||||
%sum = fadd float %scalar1, %scalar2
|
%sum = fadd float %scalar1, %scalar2
|
||||||
ret float %sum
|
ret float %sum
|
||||||
}
|
}
|
||||||
@@ -316,7 +316,9 @@ define internal double @__reduce_add_double(<16 x double>) nounwind readonly alw
|
|||||||
|
|
||||||
%sum0 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %vab, <4 x double> %vcd)
|
%sum0 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %vab, <4 x double> %vcd)
|
||||||
%sum1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %sum0, <4 x double> %sum0)
|
%sum1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %sum0, <4 x double> %sum0)
|
||||||
%sum = extractelement <4 x double> %sum1, i32 0
|
%final0 = extractelement <4 x double> %sum1, i32 0
|
||||||
|
%final1 = extractelement <4 x double> %sum1, i32 2
|
||||||
|
%sum = fadd double %final0, %final1
|
||||||
ret double %sum
|
ret double %sum
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -521,35 +523,104 @@ define void @__masked_store_64(<16 x i64>* nocapture, <16 x i64>,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
;; FIXME: various code elsewhere in the builtins implementations makes
|
masked_store_blend_8_16_by_16()
|
||||||
;; calls to the 32/64 bit versions of these, basically assuming that doing
|
|
||||||
;; so is faster than doing a full call to an actual masked store, which
|
|
||||||
;; isn't likely to be the case on AVX. So here we provide those functions
|
|
||||||
;; but then don't actually do what the caller asked for...
|
|
||||||
|
|
||||||
declare void @llvm.trap()
|
declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>,
|
||||||
|
<8 x float>) nounwind readnone
|
||||||
define void @__masked_store_blend_8(<8 x i8>* nocapture, <8 x i8>,
|
|
||||||
<8 x i32>) nounwind alwaysinline {
|
|
||||||
call void @llvm.trap()
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @__masked_store_blend_16(<8 x i16>* nocapture, <8 x i16>,
|
|
||||||
<8 x i32>) nounwind alwaysinline {
|
|
||||||
call void @llvm.trap()
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @__masked_store_blend_32(<16 x i32>* nocapture, <16 x i32>,
|
define void @__masked_store_blend_32(<16 x i32>* nocapture, <16 x i32>,
|
||||||
<16 x i32>) nounwind alwaysinline {
|
<16 x i32>) nounwind alwaysinline {
|
||||||
call void @__masked_store_32(<16 x i32> * %0, <16 x i32> %1, <16 x i32> %2)
|
%maskAsFloat = bitcast <16 x i32> %2 to <16 x float>
|
||||||
|
%oldValue = load <16 x i32>* %0, align 4
|
||||||
|
%oldAsFloat = bitcast <16 x i32> %oldValue to <16 x float>
|
||||||
|
%newAsFloat = bitcast <16 x i32> %1 to <16 x float>
|
||||||
|
|
||||||
|
%old0 = shufflevector <16 x float> %oldAsFloat, <16 x float> undef,
|
||||||
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
|
%old1 = shufflevector <16 x float> %oldAsFloat, <16 x float> undef,
|
||||||
|
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
%new0 = shufflevector <16 x float> %newAsFloat, <16 x float> undef,
|
||||||
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
|
%new1 = shufflevector <16 x float> %newAsFloat, <16 x float> undef,
|
||||||
|
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
%mask0 = shufflevector <16 x float> %maskAsFloat, <16 x float> undef,
|
||||||
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
|
%mask1 = shufflevector <16 x float> %maskAsFloat, <16 x float> undef,
|
||||||
|
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
|
||||||
|
%blend0 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %old0,
|
||||||
|
<8 x float> %new0,
|
||||||
|
<8 x float> %mask0)
|
||||||
|
%blend1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %old1,
|
||||||
|
<8 x float> %new1,
|
||||||
|
<8 x float> %mask1)
|
||||||
|
%blend = shufflevector <8 x float> %blend0, <8 x float> %blend1,
|
||||||
|
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
||||||
|
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
%blendAsInt = bitcast <16 x float> %blend to <16 x i32>
|
||||||
|
store <16 x i32> %blendAsInt, <16 x i32>* %0, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @__masked_store_blend_64(<16 x i64>* nocapture, <16 x i64>,
|
|
||||||
<16 x i32>) nounwind alwaysinline {
|
declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>,
|
||||||
call void @__masked_store_64(<16 x i64> * %0, <16 x i64> %1, <16 x i32> %2)
|
<4 x double>) nounwind readnone
|
||||||
|
|
||||||
|
define void @__masked_store_blend_64(<16 x i64>* nocapture %ptr, <16 x i64> %newi64,
|
||||||
|
<16 x i32> %mask) nounwind alwaysinline {
|
||||||
|
%oldValue = load <16 x i64>* %ptr, align 8
|
||||||
|
%old = bitcast <16 x i64> %oldValue to <16 x double>
|
||||||
|
%old0d = shufflevector <16 x double> %old, <16 x double> undef,
|
||||||
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
|
%old1d = shufflevector <16 x double> %old, <16 x double> undef,
|
||||||
|
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||||
|
%old2d = shufflevector <16 x double> %old, <16 x double> undef,
|
||||||
|
<4 x i32> <i32 8, i32 9, i32 10, i32 11>
|
||||||
|
%old3d = shufflevector <16 x double> %old, <16 x double> undef,
|
||||||
|
<4 x i32> <i32 12, i32 13, i32 14, i32 15>
|
||||||
|
|
||||||
|
%new = bitcast <16 x i64> %newi64 to <16 x double>
|
||||||
|
%new0d = shufflevector <16 x double> %new, <16 x double> undef,
|
||||||
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
|
%new1d = shufflevector <16 x double> %new, <16 x double> undef,
|
||||||
|
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||||
|
%new2d = shufflevector <16 x double> %new, <16 x double> undef,
|
||||||
|
<4 x i32> <i32 8, i32 9, i32 10, i32 11>
|
||||||
|
%new3d = shufflevector <16 x double> %new, <16 x double> undef,
|
||||||
|
<4 x i32> <i32 12, i32 13, i32 14, i32 15>
|
||||||
|
|
||||||
|
%mask0 = shufflevector <16 x i32> %mask, <16 x i32> undef,
|
||||||
|
<8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
|
||||||
|
%mask1 = shufflevector <16 x i32> %mask, <16 x i32> undef,
|
||||||
|
<8 x i32> <i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
|
||||||
|
%mask2 = shufflevector <16 x i32> %mask, <16 x i32> undef,
|
||||||
|
<8 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11>
|
||||||
|
%mask3 = shufflevector <16 x i32> %mask, <16 x i32> undef,
|
||||||
|
<8 x i32> <i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
|
||||||
|
%mask0d = bitcast <8 x i32> %mask0 to <4 x double>
|
||||||
|
%mask1d = bitcast <8 x i32> %mask1 to <4 x double>
|
||||||
|
%mask2d = bitcast <8 x i32> %mask2 to <4 x double>
|
||||||
|
%mask3d = bitcast <8 x i32> %mask3 to <4 x double>
|
||||||
|
|
||||||
|
%result0d = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %old0d,
|
||||||
|
<4 x double> %new0d, <4 x double> %mask0d)
|
||||||
|
%result1d = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %old1d,
|
||||||
|
<4 x double> %new1d, <4 x double> %mask1d)
|
||||||
|
%result2d = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %old2d,
|
||||||
|
<4 x double> %new2d, <4 x double> %mask2d)
|
||||||
|
%result3d = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %old3d,
|
||||||
|
<4 x double> %new3d, <4 x double> %mask3d)
|
||||||
|
|
||||||
|
%result01 = shufflevector <4 x double> %result0d, <4 x double> %result1d,
|
||||||
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
|
%result23 = shufflevector <4 x double> %result2d, <4 x double> %result3d,
|
||||||
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
|
|
||||||
|
%result = shufflevector <8 x double> %result01, <8 x double> %result23,
|
||||||
|
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
||||||
|
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
%result64 = bitcast <16 x double> %result to <16 x i64>
|
||||||
|
store <16 x i64> %result64, <16 x i64> * %ptr
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -294,10 +294,12 @@ define internal double @__reduce_add_double(<8 x double>) nounwind readonly alwa
|
|||||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
%v1 = shufflevector <8 x double> %0, <8 x double> undef,
|
%v1 = shufflevector <8 x double> %0, <8 x double> undef,
|
||||||
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||||
%sum01 = fadd <4 x double> %v0, %v1
|
%sum0 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %v0, <4 x double> %v1)
|
||||||
%red0 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %sum01, <4 x double> %sum01)
|
%sum1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %sum0, <4 x double> %sum0)
|
||||||
%red1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %red0, <4 x double> %red0)
|
%final0 = extractelement <4 x double> %sum1, i32 0
|
||||||
%sum = extractelement <4 x double> %red1, i32 0
|
%final1 = extractelement <4 x double> %sum1, i32 2
|
||||||
|
%sum = fadd double %final0, %final1
|
||||||
|
|
||||||
ret double %sum
|
ret double %sum
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -448,38 +450,74 @@ define void @__masked_store_64(<8 x i64>* nocapture, <8 x i64>,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
;; FIXME: various code elsewhere in the builtins implementations makes
|
|
||||||
;; calls to the 32/64 bit versions of these, basically assuming that doing
|
|
||||||
;; so is faster than doing a full call to an actual masked store, which
|
|
||||||
;; isn't likely to be the case on AVX. So here we provide those functions
|
|
||||||
;; but then don't actually do what the caller asked for...
|
|
||||||
|
|
||||||
declare void @llvm.trap()
|
masked_store_blend_8_16_by_8()
|
||||||
|
|
||||||
define void @__masked_store_blend_8(<8 x i8>* nocapture, <8 x i8>,
|
|
||||||
<8 x i32>) nounwind alwaysinline {
|
|
||||||
call void @llvm.trap()
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
define void @__masked_store_blend_16(<8 x i16>* nocapture, <8 x i16>,
|
|
||||||
<8 x i32>) nounwind alwaysinline {
|
|
||||||
call void @llvm.trap()
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
|
declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>,
|
||||||
|
<8 x float>) nounwind readnone
|
||||||
|
|
||||||
define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
||||||
<8 x i32>) nounwind alwaysinline {
|
<8 x i32>) nounwind alwaysinline {
|
||||||
call void @__masked_store_32(<8 x i32> * %0, <8 x i32> %1, <8 x i32> %2)
|
%mask_as_float = bitcast <8 x i32> %2 to <8 x float>
|
||||||
|
%oldValue = load <8 x i32>* %0, align 4
|
||||||
|
%oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float>
|
||||||
|
%newAsFloat = bitcast <8 x i32> %1 to <8 x float>
|
||||||
|
%blend = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %oldAsFloat,
|
||||||
|
<8 x float> %newAsFloat,
|
||||||
|
<8 x float> %mask_as_float)
|
||||||
|
%blendAsInt = bitcast <8 x float> %blend to <8 x i32>
|
||||||
|
store <8 x i32> %blendAsInt, <8 x i32>* %0, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define void @__masked_store_blend_64(<8 x i64>* nocapture, <8 x i64>,
|
define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new,
|
||||||
<8 x i32>) nounwind alwaysinline {
|
<8 x i32> %i32mask) nounwind alwaysinline {
|
||||||
call void @__masked_store_64(<8 x i64> * %0, <8 x i64> %1, <8 x i32> %2)
|
%oldValue = load <8 x i64>* %ptr, align 8
|
||||||
|
%mask = bitcast <8 x i32> %i32mask to <8 x float>
|
||||||
|
|
||||||
|
; Do 4x64-bit blends by doing two <8 x i32> blends, where the <8 x i32> values
|
||||||
|
; are actually bitcast <4 x i64> values
|
||||||
|
;
|
||||||
|
; set up the first four 64-bit values
|
||||||
|
%old01 = shufflevector <8 x i64> %oldValue, <8 x i64> undef,
|
||||||
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
|
%old01f = bitcast <4 x i64> %old01 to <8 x float>
|
||||||
|
%new01 = shufflevector <8 x i64> %new, <8 x i64> undef,
|
||||||
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
|
%new01f = bitcast <4 x i64> %new01 to <8 x float>
|
||||||
|
; compute mask--note that the indices are all doubled-up
|
||||||
|
%mask01 = shufflevector <8 x float> %mask, <8 x float> undef,
|
||||||
|
<8 x i32> <i32 0, i32 0, i32 1, i32 1,
|
||||||
|
i32 2, i32 2, i32 3, i32 3>
|
||||||
|
; and blend them
|
||||||
|
%result01f = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %old01f,
|
||||||
|
<8 x float> %new01f,
|
||||||
|
<8 x float> %mask01)
|
||||||
|
%result01 = bitcast <8 x float> %result01f to <4 x i64>
|
||||||
|
|
||||||
|
; and again
|
||||||
|
%old23 = shufflevector <8 x i64> %oldValue, <8 x i64> undef,
|
||||||
|
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||||
|
%old23f = bitcast <4 x i64> %old23 to <8 x float>
|
||||||
|
%new23 = shufflevector <8 x i64> %new, <8 x i64> undef,
|
||||||
|
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||||
|
%new23f = bitcast <4 x i64> %new23 to <8 x float>
|
||||||
|
; compute mask--note that the values are doubled-up...
|
||||||
|
%mask23 = shufflevector <8 x float> %mask, <8 x float> undef,
|
||||||
|
<8 x i32> <i32 4, i32 4, i32 5, i32 5,
|
||||||
|
i32 6, i32 6, i32 7, i32 7>
|
||||||
|
; and blend them
|
||||||
|
%result23f = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %old23f,
|
||||||
|
<8 x float> %new23f,
|
||||||
|
<8 x float> %mask23)
|
||||||
|
%result23 = bitcast <8 x float> %result23f to <4 x i64>
|
||||||
|
|
||||||
|
; reconstruct the final <8 x i64> vector
|
||||||
|
%final = shufflevector <4 x i64> %result01, <4 x i64> %result23,
|
||||||
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||||
|
i32 4, i32 5, i32 6, i32 7>
|
||||||
|
store <8 x i64> %final, <8 x i64> * %ptr, align 8
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
23
builtins.cpp
23
builtins.cpp
@@ -389,6 +389,27 @@ lDefineConstantInt(const char *name, int val, llvm::Module *module,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
lDefineConstantIntFunc(const char *name, int val, llvm::Module *module,
|
||||||
|
SymbolTable *symbolTable) {
|
||||||
|
std::vector<const Type *> args;
|
||||||
|
FunctionType *ft = new FunctionType(AtomicType::UniformInt32, args, SourcePos());
|
||||||
|
Symbol *sym = new Symbol(name, SourcePos(), ft);
|
||||||
|
sym->isStatic = true;
|
||||||
|
|
||||||
|
llvm::Function *func = module->getFunction(name);
|
||||||
|
assert(func != NULL); // it should be declared already...
|
||||||
|
func->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||||
|
llvm::BasicBlock *bblock = llvm::BasicBlock::Create(*g->ctx, "entry", func, 0);
|
||||||
|
llvm::ReturnInst::Create(*g->ctx, LLVMInt32(val), bblock);
|
||||||
|
|
||||||
|
sym->function = func;
|
||||||
|
symbolTable->AddVariable(sym);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
|
lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
|
||||||
Symbol *pidx = new Symbol("programIndex", SourcePos(),
|
Symbol *pidx = new Symbol("programIndex", SourcePos(),
|
||||||
@@ -492,6 +513,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
symbolTable);
|
symbolTable);
|
||||||
lDefineConstantInt("__math_lib_system", (int)Globals::Math_System, module,
|
lDefineConstantInt("__math_lib_system", (int)Globals::Math_System, module,
|
||||||
symbolTable);
|
symbolTable);
|
||||||
|
lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload, module,
|
||||||
|
symbolTable);
|
||||||
|
|
||||||
if (includeStdlibISPC) {
|
if (includeStdlibISPC) {
|
||||||
// If the user wants the standard library to be included, parse the
|
// If the user wants the standard library to be included, parse the
|
||||||
|
|||||||
113
builtins.m4
113
builtins.m4
@@ -851,6 +851,8 @@ define internal void @__prefetch_read_nt_$1($2 *) alwaysinline {
|
|||||||
|
|
||||||
define(`stdlib_core', `
|
define(`stdlib_core', `
|
||||||
|
|
||||||
|
declare i32 @__fast_masked_vload()
|
||||||
|
|
||||||
declare i8* @ISPCMalloc(i64, i32) nounwind
|
declare i8* @ISPCMalloc(i64, i32) nounwind
|
||||||
declare i8* @ISPCFree(i8*) nounwind
|
declare i8* @ISPCFree(i8*) nounwind
|
||||||
declare void @ISPCLaunch(i8*, i8*) nounwind
|
declare void @ISPCLaunch(i8*, i8*) nounwind
|
||||||
@@ -1344,12 +1346,6 @@ i64minmax($1,max,uint64,ugt)
|
|||||||
|
|
||||||
define(`load_and_broadcast', `
|
define(`load_and_broadcast', `
|
||||||
define <$1 x $2> @__load_and_broadcast_$3(i8 *, <$1 x i32> %mask) nounwind alwaysinline {
|
define <$1 x $2> @__load_and_broadcast_$3(i8 *, <$1 x i32> %mask) nounwind alwaysinline {
|
||||||
; must not load if the mask is all off; the address may be invalid
|
|
||||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
|
||||||
%any_on = icmp ne i32 %mm, 0
|
|
||||||
br i1 %any_on, label %load, label %skip
|
|
||||||
|
|
||||||
load:
|
|
||||||
%ptr = bitcast i8 * %0 to $2 *
|
%ptr = bitcast i8 * %0 to $2 *
|
||||||
%val = load $2 * %ptr
|
%val = load $2 * %ptr
|
||||||
|
|
||||||
@@ -1357,9 +1353,6 @@ load:
|
|||||||
forloop(i, 1, eval($1-1), `
|
forloop(i, 1, eval($1-1), `
|
||||||
%ret`'i = insertelement <$1 x $2> %ret`'eval(i-1), $2 %val, i32 i')
|
%ret`'i = insertelement <$1 x $2> %ret`'eval(i-1), $2 %val, i32 i')
|
||||||
ret <$1 x $2> %ret`'eval($1-1)
|
ret <$1 x $2> %ret`'eval($1-1)
|
||||||
|
|
||||||
skip:
|
|
||||||
ret <$1 x $2> undef
|
|
||||||
}
|
}
|
||||||
')
|
')
|
||||||
|
|
||||||
@@ -1375,14 +1368,20 @@ define(`load_masked', `
|
|||||||
define <$1 x $2> @__load_masked_$3(i8 *, <$1 x i32> %mask) nounwind alwaysinline {
|
define <$1 x $2> @__load_masked_$3(i8 *, <$1 x i32> %mask) nounwind alwaysinline {
|
||||||
entry:
|
entry:
|
||||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||||
|
|
||||||
; if the first lane and the last lane are on, then it is safe to do a vector load
|
; if the first lane and the last lane are on, then it is safe to do a vector load
|
||||||
; of the whole thing--what the lanes in the middle want turns out to not matter...
|
; of the whole thing--what the lanes in the middle want turns out to not matter...
|
||||||
%mm_and = and i32 %mm, eval(1 | (1<<($1-1)))
|
%mm_and = and i32 %mm, eval(1 | (1<<($1-1)))
|
||||||
%can_vload = icmp eq i32 %mm_and, eval(1 | (1<<($1-1)))
|
%can_vload = icmp eq i32 %mm_and, eval(1 | (1<<($1-1)))
|
||||||
|
|
||||||
|
%fast32 = call i32 @__fast_masked_vload()
|
||||||
|
%fast_i1 = trunc i32 %fast32 to i1
|
||||||
|
%can_vload_maybe_fast = or i1 %fast_i1, %can_vload
|
||||||
|
|
||||||
; if we are not able to do a singe vload, we will accumulate lanes in this memory..
|
; if we are not able to do a singe vload, we will accumulate lanes in this memory..
|
||||||
%retptr = alloca <$1 x $2>
|
%retptr = alloca <$1 x $2>
|
||||||
%retptr32 = bitcast <$1 x $2> * %retptr to $2 *
|
%retptr32 = bitcast <$1 x $2> * %retptr to $2 *
|
||||||
br i1 %can_vload, label %load, label %loop
|
br i1 %can_vload_maybe_fast, label %load, label %loop
|
||||||
|
|
||||||
load:
|
load:
|
||||||
%ptr = bitcast i8 * %0 to <$1 x $2> *
|
%ptr = bitcast i8 * %0 to <$1 x $2> *
|
||||||
@@ -1517,6 +1516,46 @@ define void @__masked_store_blend_16(<8 x i16>* nocapture, <8 x i16>,
|
|||||||
')
|
')
|
||||||
|
|
||||||
|
|
||||||
|
define(`masked_store_blend_8_16_by_16', `
|
||||||
|
define void @__masked_store_blend_8(<16 x i8>* nocapture, <16 x i8>,
|
||||||
|
<16 x i32>) nounwind alwaysinline {
|
||||||
|
%old = load <16 x i8> * %0
|
||||||
|
%old128 = bitcast <16 x i8> %old to i128
|
||||||
|
%new128 = bitcast <16 x i8> %1 to i128
|
||||||
|
|
||||||
|
%mask8 = trunc <16 x i32> %2 to <16 x i8>
|
||||||
|
%mask128 = bitcast <16 x i8> %mask8 to i128
|
||||||
|
%notmask128 = xor i128 %mask128, -1
|
||||||
|
|
||||||
|
%newmasked = and i128 %new128, %mask128
|
||||||
|
%oldmasked = and i128 %old128, %notmask128
|
||||||
|
%result = or i128 %newmasked, %oldmasked
|
||||||
|
|
||||||
|
%resultvec = bitcast i128 %result to <16 x i8>
|
||||||
|
store <16 x i8> %resultvec, <16 x i8> * %0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @__masked_store_blend_16(<16 x i16>* nocapture, <16 x i16>,
|
||||||
|
<16 x i32>) nounwind alwaysinline {
|
||||||
|
%old = load <16 x i16> * %0
|
||||||
|
%old256 = bitcast <16 x i16> %old to i256
|
||||||
|
%new256 = bitcast <16 x i16> %1 to i256
|
||||||
|
|
||||||
|
%mask16 = trunc <16 x i32> %2 to <16 x i16>
|
||||||
|
%mask256 = bitcast <16 x i16> %mask16 to i256
|
||||||
|
%notmask256 = xor i256 %mask256, -1
|
||||||
|
|
||||||
|
%newmasked = and i256 %new256, %mask256
|
||||||
|
%oldmasked = and i256 %old256, %notmask256
|
||||||
|
%result = or i256 %newmasked, %oldmasked
|
||||||
|
|
||||||
|
%resultvec = bitcast i256 %result to <16 x i16>
|
||||||
|
store <16 x i16> %resultvec, <16 x i16> * %0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; packed load and store functions
|
;; packed load and store functions
|
||||||
;;
|
;;
|
||||||
@@ -1544,7 +1583,7 @@ entry:
|
|||||||
|
|
||||||
known_mask:
|
known_mask:
|
||||||
%allon = icmp eq i32 %mask, eval((1 << $1) -1)
|
%allon = icmp eq i32 %mask, eval((1 << $1) -1)
|
||||||
br i1 %allon, label %all_on, label %not_all_on
|
br i1 %allon, label %all_on, label %unknown_mask
|
||||||
|
|
||||||
all_on:
|
all_on:
|
||||||
;; everyone wants to load, so just load an entire vector width in a single
|
;; everyone wants to load, so just load an entire vector width in a single
|
||||||
@@ -1554,14 +1593,6 @@ all_on:
|
|||||||
store <$1 x i32> %vec_load, <$1 x i32> * %val_ptr, align 4
|
store <$1 x i32> %vec_load, <$1 x i32> * %val_ptr, align 4
|
||||||
ret i32 $1
|
ret i32 $1
|
||||||
|
|
||||||
not_all_on:
|
|
||||||
%alloff = icmp eq i32 %mask, 0
|
|
||||||
br i1 %alloff, label %all_off, label %unknown_mask
|
|
||||||
|
|
||||||
all_off:
|
|
||||||
;; no one wants to load
|
|
||||||
ret i32 0
|
|
||||||
|
|
||||||
unknown_mask:
|
unknown_mask:
|
||||||
br label %loop
|
br label %loop
|
||||||
|
|
||||||
@@ -1608,20 +1639,13 @@ entry:
|
|||||||
|
|
||||||
known_mask:
|
known_mask:
|
||||||
%allon = icmp eq i32 %mask, eval((1 << $1) -1)
|
%allon = icmp eq i32 %mask, eval((1 << $1) -1)
|
||||||
br i1 %allon, label %all_on, label %not_all_on
|
br i1 %allon, label %all_on, label %unknown_mask
|
||||||
|
|
||||||
all_on:
|
all_on:
|
||||||
%vecptr = bitcast i32 *%startptr to <$1 x i32> *
|
%vecptr = bitcast i32 *%startptr to <$1 x i32> *
|
||||||
store <$1 x i32> %vals, <$1 x i32> * %vecptr, align 4
|
store <$1 x i32> %vals, <$1 x i32> * %vecptr, align 4
|
||||||
ret i32 $1
|
ret i32 $1
|
||||||
|
|
||||||
not_all_on:
|
|
||||||
%alloff = icmp eq i32 %mask, 0
|
|
||||||
br i1 %alloff, label %all_off, label %unknown_mask
|
|
||||||
|
|
||||||
all_off:
|
|
||||||
ret i32 0
|
|
||||||
|
|
||||||
unknown_mask:
|
unknown_mask:
|
||||||
br label %loop
|
br label %loop
|
||||||
|
|
||||||
@@ -1671,14 +1695,6 @@ entry:
|
|||||||
br i1 %allon, label %check_neighbors, label %domixed
|
br i1 %allon, label %check_neighbors, label %domixed
|
||||||
|
|
||||||
domixed:
|
domixed:
|
||||||
; the mask is mixed on/off. First see if the lanes are all off
|
|
||||||
%alloff = icmp eq i32 %mm, 0
|
|
||||||
br i1 %alloff, label %doalloff, label %actuallymixed
|
|
||||||
|
|
||||||
doalloff:
|
|
||||||
ret i1 false ;; this seems safest
|
|
||||||
|
|
||||||
actuallymixed:
|
|
||||||
; First, figure out which lane is the first active one
|
; First, figure out which lane is the first active one
|
||||||
%first = call i32 @llvm.cttz.i32(i32 %mm)
|
%first = call i32 @llvm.cttz.i32(i32 %mm)
|
||||||
%baseval = extractelement <$1 x $2> %v, i32 %first
|
%baseval = extractelement <$1 x $2> %v, i32 %first
|
||||||
@@ -1701,7 +1717,7 @@ actuallymixed:
|
|||||||
br label %check_neighbors
|
br label %check_neighbors
|
||||||
|
|
||||||
check_neighbors:
|
check_neighbors:
|
||||||
%vec = phi <$1 x $2> [ %blendvec, %actuallymixed ], [ %v, %entry ]
|
%vec = phi <$1 x $2> [ %blendvec, %domixed ], [ %v, %entry ]
|
||||||
ifelse($6, `32', `
|
ifelse($6, `32', `
|
||||||
; For 32-bit elements, we rotate once and compare with the vector, which ends
|
; For 32-bit elements, we rotate once and compare with the vector, which ends
|
||||||
; up comparing each element to its neighbor on the right. Then see if
|
; up comparing each element to its neighbor on the right. Then see if
|
||||||
@@ -1833,7 +1849,7 @@ pl_known_mask:
|
|||||||
;; the mask is known at compile time; see if it is something we can
|
;; the mask is known at compile time; see if it is something we can
|
||||||
;; handle more efficiently
|
;; handle more efficiently
|
||||||
%pl_is_allon = icmp eq i32 %pl_mask, eval((1<<$1)-1)
|
%pl_is_allon = icmp eq i32 %pl_mask, eval((1<<$1)-1)
|
||||||
br i1 %pl_is_allon, label %pl_all_on, label %pl_not_all_on
|
br i1 %pl_is_allon, label %pl_all_on, label %pl_unknown_mask
|
||||||
|
|
||||||
pl_all_on:
|
pl_all_on:
|
||||||
;; the mask is all on--just expand the code for each lane sequentially
|
;; the mask is all on--just expand the code for each lane sequentially
|
||||||
@@ -1841,19 +1857,14 @@ pl_all_on:
|
|||||||
`patsubst(`$3', `ID\|LANE', i)')
|
`patsubst(`$3', `ID\|LANE', i)')
|
||||||
br label %pl_done
|
br label %pl_done
|
||||||
|
|
||||||
pl_not_all_on:
|
pl_unknown_mask:
|
||||||
;; not all on--see if it is all off or mixed
|
;; we just run the general case, though we could
|
||||||
;; for the mixed case, we just run the general case, though we could
|
|
||||||
;; try to be smart and just emit the code based on what it actually is,
|
;; try to be smart and just emit the code based on what it actually is,
|
||||||
;; for example by emitting the code straight-line without a loop and doing
|
;; for example by emitting the code straight-line without a loop and doing
|
||||||
;; the lane tests explicitly, leaving later optimization passes to eliminate
|
;; the lane tests explicitly, leaving later optimization passes to eliminate
|
||||||
;; the stuff that is definitely not needed. Not clear if we will frequently
|
;; the stuff that is definitely not needed. Not clear if we will frequently
|
||||||
;; encounter a mask that is known at compile-time but is not either all on or
|
;; encounter a mask that is known at compile-time but is not either all on or
|
||||||
;; all off...
|
;; all off...
|
||||||
%pl_alloff = icmp eq i32 %pl_mask, 0
|
|
||||||
br i1 %pl_alloff, label %pl_done, label %pl_unknown_mask
|
|
||||||
|
|
||||||
pl_unknown_mask:
|
|
||||||
br label %pl_loop
|
br label %pl_loop
|
||||||
|
|
||||||
pl_loop:
|
pl_loop:
|
||||||
@@ -1909,20 +1920,6 @@ define internal <$1 x $2> @__gather_elt_$2(i8 * %ptr, <$1 x i32> %offsets, <$1 x
|
|||||||
|
|
||||||
define <$1 x $2> @__gather_base_offsets_$2(i8 * %ptr, <$1 x i32> %offsets,
|
define <$1 x $2> @__gather_base_offsets_$2(i8 * %ptr, <$1 x i32> %offsets,
|
||||||
<$1 x i32> %vecmask) nounwind readonly alwaysinline {
|
<$1 x i32> %vecmask) nounwind readonly alwaysinline {
|
||||||
entry:
|
|
||||||
%mask = call i32 @__movmsk(<$1 x i32> %vecmask)
|
|
||||||
|
|
||||||
%maskKnown = call i1 @__is_compile_time_constant_mask(<$1 x i32> %vecmask)
|
|
||||||
br i1 %maskKnown, label %known_mask, label %unknown_mask
|
|
||||||
|
|
||||||
known_mask:
|
|
||||||
%alloff = icmp eq i32 %mask, 0
|
|
||||||
br i1 %alloff, label %gather_all_off, label %unknown_mask
|
|
||||||
|
|
||||||
gather_all_off:
|
|
||||||
ret <$1 x $2> undef
|
|
||||||
|
|
||||||
unknown_mask:
|
|
||||||
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
||||||
; to require that the 0th element of the array being gathered from is always
|
; to require that the 0th element of the array being gathered from is always
|
||||||
; legal to read from (and we do indeed require that, given the benefits!)
|
; legal to read from (and we do indeed require that, given the benefits!)
|
||||||
|
|||||||
60
ctx.cpp
60
ctx.cpp
@@ -153,7 +153,6 @@ FunctionEmitContext::FunctionEmitContext(const Type *rt, llvm::Function *functio
|
|||||||
StoreInst(llvm::Constant::getNullValue(ftype), returnValuePtr);
|
StoreInst(llvm::Constant::getNullValue(ftype), returnValuePtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef LLVM_2_8
|
|
||||||
if (m->diBuilder) {
|
if (m->diBuilder) {
|
||||||
/* If debugging is enabled, tell the debug information emission
|
/* If debugging is enabled, tell the debug information emission
|
||||||
code about this new function */
|
code about this new function */
|
||||||
@@ -174,7 +173,6 @@ FunctionEmitContext::FunctionEmitContext(const Type *rt, llvm::Function *functio
|
|||||||
/* And start a scope representing the initial function scope */
|
/* And start a scope representing the initial function scope */
|
||||||
StartScope();
|
StartScope();
|
||||||
}
|
}
|
||||||
#endif // LLVM_2_8
|
|
||||||
|
|
||||||
launchedTasks = false;
|
launchedTasks = false;
|
||||||
|
|
||||||
@@ -183,7 +181,6 @@ FunctionEmitContext::FunctionEmitContext(const Type *rt, llvm::Function *functio
|
|||||||
assert(maskSymbol != NULL);
|
assert(maskSymbol != NULL);
|
||||||
maskSymbol->storagePtr = maskPtr;
|
maskSymbol->storagePtr = maskPtr;
|
||||||
|
|
||||||
#ifndef LLVM_2_8
|
|
||||||
// add debugging info for __mask, programIndex, ...
|
// add debugging info for __mask, programIndex, ...
|
||||||
if (m->diBuilder) {
|
if (m->diBuilder) {
|
||||||
maskSymbol->pos = funcStartPos;
|
maskSymbol->pos = funcStartPos;
|
||||||
@@ -208,15 +205,12 @@ FunctionEmitContext::FunctionEmitContext(const Type *rt, llvm::Function *functio
|
|||||||
true /* static */,
|
true /* static */,
|
||||||
programCountSymbol->storagePtr);
|
programCountSymbol->storagePtr);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
FunctionEmitContext::~FunctionEmitContext() {
|
FunctionEmitContext::~FunctionEmitContext() {
|
||||||
assert(controlFlowInfo.size() == 0);
|
assert(controlFlowInfo.size() == 0);
|
||||||
#ifndef LLVM_2_8
|
|
||||||
assert(debugScopes.size() == (m->diBuilder ? 1 : 0));
|
assert(debugScopes.size() == (m->diBuilder ? 1 : 0));
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -704,6 +698,7 @@ FunctionEmitContext::LaneMask(llvm::Value *v) {
|
|||||||
|
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
||||||
|
#if 0
|
||||||
// Compare the two masks to get a vector of i1s
|
// Compare the two masks to get a vector of i1s
|
||||||
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
||||||
v1, v2, "v1==v2");
|
v1, v2, "v1==v2");
|
||||||
@@ -711,6 +706,12 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
|||||||
cmp = I1VecToBoolVec(cmp);
|
cmp = I1VecToBoolVec(cmp);
|
||||||
// And see if it's all on
|
// And see if it's all on
|
||||||
return All(cmp);
|
return All(cmp);
|
||||||
|
#else
|
||||||
|
llvm::Value *mm1 = LaneMask(v1);
|
||||||
|
llvm::Value *mm2 = LaneMask(v2);
|
||||||
|
return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2,
|
||||||
|
"v1==v2");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -850,7 +851,6 @@ FunctionEmitContext::GetDebugPos() const {
|
|||||||
void
|
void
|
||||||
FunctionEmitContext::AddDebugPos(llvm::Value *value, const SourcePos *pos,
|
FunctionEmitContext::AddDebugPos(llvm::Value *value, const SourcePos *pos,
|
||||||
llvm::DIScope *scope) {
|
llvm::DIScope *scope) {
|
||||||
#ifndef LLVM_2_8
|
|
||||||
llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(value);
|
llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(value);
|
||||||
if (inst != NULL && m->diBuilder) {
|
if (inst != NULL && m->diBuilder) {
|
||||||
SourcePos p = pos ? *pos : currentPos;
|
SourcePos p = pos ? *pos : currentPos;
|
||||||
@@ -861,13 +861,11 @@ FunctionEmitContext::AddDebugPos(llvm::Value *value, const SourcePos *pos,
|
|||||||
inst->setDebugLoc(llvm::DebugLoc::get(p.first_line, p.first_column,
|
inst->setDebugLoc(llvm::DebugLoc::get(p.first_line, p.first_column,
|
||||||
scope ? *scope : GetDIScope()));
|
scope ? *scope : GetDIScope()));
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
FunctionEmitContext::StartScope() {
|
FunctionEmitContext::StartScope() {
|
||||||
#ifndef LLVM_2_8
|
|
||||||
if (m->diBuilder != NULL) {
|
if (m->diBuilder != NULL) {
|
||||||
llvm::DIScope parentScope;
|
llvm::DIScope parentScope;
|
||||||
if (debugScopes.size() > 0)
|
if (debugScopes.size() > 0)
|
||||||
@@ -881,18 +879,15 @@ FunctionEmitContext::StartScope() {
|
|||||||
currentPos.first_column);
|
currentPos.first_column);
|
||||||
debugScopes.push_back(lexicalBlock);
|
debugScopes.push_back(lexicalBlock);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
FunctionEmitContext::EndScope() {
|
FunctionEmitContext::EndScope() {
|
||||||
#ifndef LLVM_2_8
|
|
||||||
if (m->diBuilder != NULL) {
|
if (m->diBuilder != NULL) {
|
||||||
assert(debugScopes.size() > 0);
|
assert(debugScopes.size() > 0);
|
||||||
debugScopes.pop_back();
|
debugScopes.pop_back();
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -905,7 +900,6 @@ FunctionEmitContext::GetDIScope() const {
|
|||||||
|
|
||||||
void
|
void
|
||||||
FunctionEmitContext::EmitVariableDebugInfo(Symbol *sym) {
|
FunctionEmitContext::EmitVariableDebugInfo(Symbol *sym) {
|
||||||
#ifndef LLVM_2_8
|
|
||||||
if (m->diBuilder == NULL)
|
if (m->diBuilder == NULL)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@@ -921,13 +915,11 @@ FunctionEmitContext::EmitVariableDebugInfo(Symbol *sym) {
|
|||||||
llvm::Instruction *declareInst =
|
llvm::Instruction *declareInst =
|
||||||
m->diBuilder->insertDeclare(sym->storagePtr, var, bblock);
|
m->diBuilder->insertDeclare(sym->storagePtr, var, bblock);
|
||||||
AddDebugPos(declareInst, &sym->pos, &scope);
|
AddDebugPos(declareInst, &sym->pos, &scope);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym) {
|
FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym) {
|
||||||
#ifndef LLVM_2_8
|
|
||||||
if (m->diBuilder == NULL)
|
if (m->diBuilder == NULL)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@@ -943,7 +935,6 @@ FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym) {
|
|||||||
llvm::Instruction *declareInst =
|
llvm::Instruction *declareInst =
|
||||||
m->diBuilder->insertDeclare(sym->storagePtr, var, bblock);
|
m->diBuilder->insertDeclare(sym->storagePtr, var, bblock);
|
||||||
AddDebugPos(declareInst, &sym->pos, &scope);
|
AddDebugPos(declareInst, &sym->pos, &scope);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1501,27 +1492,15 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type,
|
|||||||
void
|
void
|
||||||
FunctionEmitContext::addGSMetadata(llvm::Instruction *inst, SourcePos pos) {
|
FunctionEmitContext::addGSMetadata(llvm::Instruction *inst, SourcePos pos) {
|
||||||
llvm::Value *str = llvm::MDString::get(*g->ctx, pos.name);
|
llvm::Value *str = llvm::MDString::get(*g->ctx, pos.name);
|
||||||
#ifdef LLVM_2_8
|
|
||||||
llvm::MDNode *md = llvm::MDNode::get(*g->ctx, &str, 1);
|
|
||||||
#else
|
|
||||||
llvm::MDNode *md = llvm::MDNode::get(*g->ctx, str);
|
llvm::MDNode *md = llvm::MDNode::get(*g->ctx, str);
|
||||||
#endif
|
|
||||||
inst->setMetadata("filename", md);
|
inst->setMetadata("filename", md);
|
||||||
|
|
||||||
llvm::Value *line = LLVMInt32(pos.first_line);
|
llvm::Value *line = LLVMInt32(pos.first_line);
|
||||||
#ifdef LLVM_2_8
|
|
||||||
md = llvm::MDNode::get(*g->ctx, &line, 1);
|
|
||||||
#else
|
|
||||||
md = llvm::MDNode::get(*g->ctx, line);
|
md = llvm::MDNode::get(*g->ctx, line);
|
||||||
#endif
|
|
||||||
inst->setMetadata("line", md);
|
inst->setMetadata("line", md);
|
||||||
|
|
||||||
llvm::Value *column = LLVMInt32(pos.first_column);
|
llvm::Value *column = LLVMInt32(pos.first_column);
|
||||||
#ifdef LLVM_2_8
|
|
||||||
md = llvm::MDNode::get(*g->ctx, &column, 1);
|
|
||||||
#else
|
|
||||||
md = llvm::MDNode::get(*g->ctx, column);
|
md = llvm::MDNode::get(*g->ctx, column);
|
||||||
#endif
|
|
||||||
inst->setMetadata("column", md);
|
inst->setMetadata("column", md);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1838,9 +1817,9 @@ llvm::PHINode *
|
|||||||
FunctionEmitContext::PhiNode(LLVM_TYPE_CONST llvm::Type *type, int count,
|
FunctionEmitContext::PhiNode(LLVM_TYPE_CONST llvm::Type *type, int count,
|
||||||
const char *name) {
|
const char *name) {
|
||||||
llvm::PHINode *pn = llvm::PHINode::Create(type,
|
llvm::PHINode *pn = llvm::PHINode::Create(type,
|
||||||
#if !defined(LLVM_2_8) && !defined(LLVM_2_9)
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
count,
|
count,
|
||||||
#endif // !LLVM_2_8 && !LLVM_2_9
|
#endif // LLVM_3_0
|
||||||
name ? name : "phi", bblock);
|
name ? name : "phi", bblock);
|
||||||
AddDebugPos(pn);
|
AddDebugPos(pn);
|
||||||
return pn;
|
return pn;
|
||||||
@@ -1982,17 +1961,26 @@ FunctionEmitContext::LaunchInst(llvm::Function *callee,
|
|||||||
assert(argStructType->getNumElements() == argVals.size() + 1);
|
assert(argStructType->getNumElements() == argVals.size() + 1);
|
||||||
|
|
||||||
int align = 4 * RoundUpPow2(g->target.nativeVectorWidth);
|
int align = 4 * RoundUpPow2(g->target.nativeVectorWidth);
|
||||||
|
llvm::Value *argmem;
|
||||||
#ifdef ISPC_IS_WINDOWS
|
#ifdef ISPC_IS_WINDOWS
|
||||||
// Use malloc() to allocate storage on Windows, since the stack is
|
// Use malloc() to allocate storage on Windows, since the stack is
|
||||||
// generally not big enough there to do enough allocations for lots of
|
// generally not big enough there to do enough allocations for lots of
|
||||||
// tasks and then things crash horribly...
|
// tasks and then things crash horribly...
|
||||||
llvm::Value *argmem = EmitMalloc(argStructType, align);
|
argmem = EmitMalloc(argStructType, align);
|
||||||
#else
|
#else
|
||||||
// Use alloca for space for the task args on OSX And Linux. KEY
|
// Otherwise, use alloca for space for the task args, ** unless we're
|
||||||
// DETAIL: pass false to the call of FunctionEmitContext::AllocaInst so
|
// compiling to AVX, in which case we use malloc after all **. (See
|
||||||
// that the alloca doesn't happen just once at the top of the function,
|
// http://llvm.org/bugs/show_bug.cgi?id=10841 for details. There are
|
||||||
// but happens each time the enclosing basic block executes.
|
// limitations in LLVM with respect to dynamic allocas of this sort
|
||||||
llvm::Value *argmem = AllocaInst(argStructType, "argmem", align, false);
|
// when the stack also has to be 32-byte aligned...).
|
||||||
|
if (g->target.isa == Target::AVX)
|
||||||
|
argmem = EmitMalloc(argStructType, align);
|
||||||
|
else
|
||||||
|
// KEY DETAIL: pass false to the call of
|
||||||
|
// FunctionEmitContext::AllocaInst so that the alloca doesn't
|
||||||
|
// happen just once at the top of the function, but happens each
|
||||||
|
// time the enclosing basic block executes.
|
||||||
|
argmem = AllocaInst(argStructType, "argmem", align, false);
|
||||||
#endif // ISPC_IS_WINDOWS
|
#endif // ISPC_IS_WINDOWS
|
||||||
llvm::Value *voidmem = BitCastInst(argmem, LLVMTypes::VoidPointerType);
|
llvm::Value *voidmem = BitCastInst(argmem, LLVMTypes::VoidPointerType);
|
||||||
|
|
||||||
|
|||||||
2
decl.cpp
2
decl.cpp
@@ -237,7 +237,7 @@ Declarator::GetType(DeclSpecs *ds) const {
|
|||||||
sprintf(buf, "__anon_parameter_%d", i);
|
sprintf(buf, "__anon_parameter_%d", i);
|
||||||
sym = new Symbol(buf, pos);
|
sym = new Symbol(buf, pos);
|
||||||
Declarator *declarator = new Declarator(sym, sym->pos);
|
Declarator *declarator = new Declarator(sym, sym->pos);
|
||||||
sym->type = declarator->GetType(ds);
|
sym->type = declarator->GetType(d->declSpecs);
|
||||||
d->declarators.push_back(declarator);
|
d->declarators.push_back(declarator);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|||||||
@@ -1,3 +1,42 @@
|
|||||||
|
=== v1.0.9 === (26 September 2011)
|
||||||
|
|
||||||
|
The binary release of v1.0.9 is the first that supports AVX code
|
||||||
|
generation. Two targets are provided: "avx", which runs with a
|
||||||
|
programCount of 8, and "avx-x2" which runs 16 program instances
|
||||||
|
simultaneously. (This binary is also built using the in-progress LLVM 3.0
|
||||||
|
development libraries, while previous ones have been built with the
|
||||||
|
released 2.9 version of LLVM.)
|
||||||
|
|
||||||
|
This release has no other significant changes beyond a number of small
|
||||||
|
bugfixes (https://github.com/ispc/ispc/issues/100,
|
||||||
|
https://github.com/ispc/ispc/issues/101, https://github.com/ispc/ispc/issues/103.)
|
||||||
|
|
||||||
|
=== v1.0.8 === (19 September 2011)
|
||||||
|
|
||||||
|
A number of improvements have been made to handling of 'if' statements in
|
||||||
|
the language:
|
||||||
|
- A bug was fixed where invalid memory could be incorrectly accessed even
|
||||||
|
if none of the running program instances wanted to execute the
|
||||||
|
corresponding instructions (https://github.com/ispc/ispc/issues/74).
|
||||||
|
- The code generated for 'if' statements is a bit simpler and thus more
|
||||||
|
efficient.
|
||||||
|
|
||||||
|
There is now '--pic' command-line argument that causes position-independent
|
||||||
|
code to be generated (Linux and OSX only).
|
||||||
|
|
||||||
|
A number of additional performance improvements:
|
||||||
|
- Loops are now unrolled by default; the --opt=disable-loop-unroll
|
||||||
|
command-line argument can be used to disable this behavior.
|
||||||
|
(https://github.com/ispc/ispc/issues/78)
|
||||||
|
- A few more cases where gathers/scatters could be determined at compile
|
||||||
|
time to actually access contiguous locations have been added.
|
||||||
|
(https://github.com/ispc/ispc/issues/79)
|
||||||
|
|
||||||
|
Finally, warnings are now issued (if possible) when it can be determined
|
||||||
|
at compile-time that an out-of-bounds array index is being used.
|
||||||
|
(https://github.com/ispc/ispc/issues/98).
|
||||||
|
|
||||||
|
|
||||||
=== v1.0.7 === (3 September 2011)
|
=== v1.0.7 === (3 September 2011)
|
||||||
|
|
||||||
The various atomic_*_global() standard library functions are generally
|
The various atomic_*_global() standard library functions are generally
|
||||||
|
|||||||
104
docs/ispc.txt
104
docs/ispc.txt
@@ -33,6 +33,17 @@ The main goals behind ``ispc`` are to:
|
|||||||
number of non-trivial workloads that aren't handled well by other
|
number of non-trivial workloads that aren't handled well by other
|
||||||
compilation approaches (e.g. loop auto-vectorization.)
|
compilation approaches (e.g. loop auto-vectorization.)
|
||||||
|
|
||||||
|
**We are very interested in your feedback and comments about ispc and
|
||||||
|
in hearing your experiences using the system. We are especially interested
|
||||||
|
in hearing if you try using ispc but see results that are not as you
|
||||||
|
were expecting or hoping for.** We encourage you to send a note with your
|
||||||
|
experiences or comments to the `ispc-users`_ mailing list or to file bug or
|
||||||
|
feature requests with the ``ispc`` `bug tracker`_. (Thanks!)
|
||||||
|
|
||||||
|
.. _ispc-users: http://groups.google.com/group/ispc-users
|
||||||
|
.. _bug tracker: https://github.com/ispc/ispc/issues?state=open
|
||||||
|
|
||||||
|
|
||||||
Contents:
|
Contents:
|
||||||
|
|
||||||
* `Recent Changes to ISPC`_
|
* `Recent Changes to ISPC`_
|
||||||
@@ -102,6 +113,8 @@ Contents:
|
|||||||
+ `Small Performance Tricks`_
|
+ `Small Performance Tricks`_
|
||||||
+ `Instrumenting Your ISPC Programs`_
|
+ `Instrumenting Your ISPC Programs`_
|
||||||
+ `Using Scan Operations For Variable Output`_
|
+ `Using Scan Operations For Variable Output`_
|
||||||
|
+ `Application-Supplied Execution Masks`_
|
||||||
|
+ `Explicit Vector Programming With Uniform Short Vector Types`_
|
||||||
|
|
||||||
* `Disclaimer and Legal Information`_
|
* `Disclaimer and Legal Information`_
|
||||||
|
|
||||||
@@ -1174,7 +1187,7 @@ This code implicitly assumes that ``programCount`` evenly divides
|
|||||||
::
|
::
|
||||||
|
|
||||||
for (uniform int i = 0; i < count; i += programCount) {
|
for (uniform int i = 0; i < count; i += programCount) {
|
||||||
if (i + programIndex < programCount) {
|
if (i + programIndex < count) {
|
||||||
float d = data[i + programIndex];
|
float d = data[i + programIndex];
|
||||||
...
|
...
|
||||||
|
|
||||||
@@ -2209,14 +2222,14 @@ Both the ``foo`` and ``bar`` global variables can be accessed on each
|
|||||||
side.
|
side.
|
||||||
|
|
||||||
``ispc`` code can also call back to C/C++. On the ``ispc`` side, any
|
``ispc`` code can also call back to C/C++. On the ``ispc`` side, any
|
||||||
application functions to be called must be declared with the ``export "C"``
|
application functions to be called must be declared with the ``extern "C"``
|
||||||
qualifier.
|
qualifier.
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
extern "C" void foo(uniform float f, uniform float g);
|
extern "C" void foo(uniform float f, uniform float g);
|
||||||
|
|
||||||
Unlike in C++, ``export "C"`` doesn't take braces to delineate
|
Unlike in C++, ``extern "C"`` doesn't take braces to delineate
|
||||||
multiple functions to be declared; thus, multiple C functions to be called
|
multiple functions to be declared; thus, multiple C functions to be called
|
||||||
from ``ispc`` must be declared as follows:
|
from ``ispc`` must be declared as follows:
|
||||||
|
|
||||||
@@ -2843,6 +2856,91 @@ values to ``outArray[1]`` and ``outArray[2]``, and so forth. The
|
|||||||
``reduce_add`` call at the end returns the total number of values that the
|
``reduce_add`` call at the end returns the total number of values that the
|
||||||
program instances have written to the array.
|
program instances have written to the array.
|
||||||
|
|
||||||
|
Application-Supplied Execution Masks
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
Recall that when execution transitions from the application code to an
|
||||||
|
``ispc`` function, all of the program instances are initially executing.
|
||||||
|
In some cases, it may desired that only some of them are running, based on
|
||||||
|
a data-dependent condition computed in the application program. This
|
||||||
|
situation can easily be handled via an additional parameter from the
|
||||||
|
application.
|
||||||
|
|
||||||
|
As a simple example, consider a case where the application code has an
|
||||||
|
array of ``float`` values and we'd like the ``ispc`` code to update
|
||||||
|
just specific values in that array, where which of those values to be
|
||||||
|
updated has been determined by the application. In C++ code, we might
|
||||||
|
have:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
int count = ...;
|
||||||
|
float *array = new float[count];
|
||||||
|
bool *shouldUpdate = new bool[count];
|
||||||
|
// initialize array and shouldUpdate
|
||||||
|
ispc_func(array, shouldUpdate, count);
|
||||||
|
|
||||||
|
Then, the ``ispc`` code could process this update as:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
export void ispc_func(uniform float array[], uniform bool update[],
|
||||||
|
uniform int count) {
|
||||||
|
for (uniform int i = 0; i < count; i += programCount) {
|
||||||
|
cif (update[i+programIndex] == true)
|
||||||
|
// update array[i+programIndex]...
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(In this case a "coherent" if statement is likely to be worthwhile if the
|
||||||
|
``update`` array will tend to have sections that are either all-true or
|
||||||
|
all-false.)
|
||||||
|
|
||||||
|
Explicit Vector Programming With Uniform Short Vector Types
|
||||||
|
-----------------------------------------------------------
|
||||||
|
|
||||||
|
The typical model for programming in ``ispc`` is an *implicit* parallel
|
||||||
|
model, where one writes a program that is apparently doing scalar
|
||||||
|
computation on values and the program is then vectorized to run in parallel
|
||||||
|
across the SIMD lanes of a processor. However, ``ispc`` also has some
|
||||||
|
support for explicit vector unit programming, where the vectorization is
|
||||||
|
explicit. Some computations may be more effectively described in the
|
||||||
|
explicit model rather than the implicit model.
|
||||||
|
|
||||||
|
This support is provided via ``uniform`` instances of short vectors
|
||||||
|
(as were introduced in the `Short Vector Types`_ section). Specifically,
|
||||||
|
if this short program
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
export uniform float<8> madd(uniform float<8> a,
|
||||||
|
uniform float<8> b, uniform float<8> c) {
|
||||||
|
return a + b * c;
|
||||||
|
}
|
||||||
|
|
||||||
|
is compiled with the AVX target, ``ispc`` generates the following assembly:
|
||||||
|
|
||||||
|
::
|
||||||
|
_madd:
|
||||||
|
vmulps %ymm2, %ymm1, %ymm1
|
||||||
|
vaddps %ymm0, %ymm1, %ymm0
|
||||||
|
ret
|
||||||
|
|
||||||
|
(And similarly, if compiled with a 4-wide SSE target, two ``mulps`` and two
|
||||||
|
``addps`` instructions are generated, and so forth.)
|
||||||
|
|
||||||
|
Note that ``ispc`` doesn't currently support control-flow based on
|
||||||
|
``uniform`` short vector types; it is thus not possible to write code like:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
export uniform int<8> count(uniform float<8> a, uniform float<8> b) {
|
||||||
|
uniform int<8> sum = 0;
|
||||||
|
while (a++ < b)
|
||||||
|
++sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
Disclaimer and Legal Information
|
Disclaimer and Legal Information
|
||||||
================================
|
================================
|
||||||
|
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
|
|||||||
# This could be handy for archiving the generated documentation or
|
# This could be handy for archiving the generated documentation or
|
||||||
# if some version control system is used.
|
# if some version control system is used.
|
||||||
|
|
||||||
PROJECT_NUMBER = 1.0.7
|
PROJECT_NUMBER = 1.0.9
|
||||||
|
|
||||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||||
# base path where the generated documentation will be put.
|
# base path where the generated documentation will be put.
|
||||||
|
|||||||
@@ -1,8 +1,20 @@
|
|||||||
|
|
||||||
CXX=g++ -m64
|
ARCH = $(shell uname)
|
||||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
|
||||||
|
TASK_CXX=../tasks_pthreads.cpp
|
||||||
|
TASK_LIB=-lpthread
|
||||||
|
|
||||||
|
ifeq ($(ARCH), Darwin)
|
||||||
|
TASK_CXX=../tasks_gcd.cpp
|
||||||
|
TASK_LIB=
|
||||||
|
endif
|
||||||
|
|
||||||
|
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||||
|
|
||||||
|
CXX=g++
|
||||||
|
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||||
ISPC=ispc
|
ISPC=ispc
|
||||||
ISPCFLAGS=-O2 --fast-math --arch=x86-64
|
ISPCFLAGS=-O2 --target=sse4 --arch=x86-64
|
||||||
|
|
||||||
default: ao
|
default: ao
|
||||||
|
|
||||||
@@ -14,12 +26,15 @@ dirs:
|
|||||||
clean:
|
clean:
|
||||||
/bin/rm -rf objs *~ ao
|
/bin/rm -rf objs *~ ao
|
||||||
|
|
||||||
ao: dirs objs/ao.o objs/ao_serial.o objs/ao_ispc.o
|
ao: dirs objs/ao.o objs/ao_serial.o objs/ao_ispc.o $(TASK_OBJ)
|
||||||
$(CXX) $(CXXFLAGS) -o $@ objs/ao.o objs/ao_ispc.o objs/ao_serial.o -lm -lpthread
|
$(CXX) $(CXXFLAGS) -o $@ objs/ao.o objs/ao_ispc.o objs/ao_serial.o $(TASK_OBJ) -lm $(TASK_LIB)
|
||||||
|
|
||||||
objs/%.o: %.cpp
|
objs/%.o: %.cpp
|
||||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||||
|
|
||||||
|
objs/%.o: ../%.cpp
|
||||||
|
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||||
|
|
||||||
objs/ao.o: objs/ao_ispc.h
|
objs/ao.o: objs/ao_ispc.h
|
||||||
|
|
||||||
objs/%_ispc.h objs/%_ispc.o: %.ispc
|
objs/%_ispc.h objs/%_ispc.o: %.ispc
|
||||||
|
|||||||
@@ -173,10 +173,30 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Report results and save image
|
// Report results and save image
|
||||||
printf("[aobench ispc]:\t\t\t[%.3f] M cycles (%d x %d image)\n", minTimeISPC,
|
printf("[aobench ispc]:\t\t\t[%.3f] M cycles (%d x %d image)\n",
|
||||||
width, height);
|
minTimeISPC, width, height);
|
||||||
savePPM("ao-ispc.ppm", width, height);
|
savePPM("ao-ispc.ppm", width, height);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Run the ispc + tasks path, test_iterations times, and report the
|
||||||
|
// minimum time for any of them.
|
||||||
|
//
|
||||||
|
double minTimeISPCTasks = 1e30;
|
||||||
|
for (unsigned int i = 0; i < test_iterations; i++) {
|
||||||
|
memset((void *)fimg, 0, sizeof(float) * width * height * 3);
|
||||||
|
assert(NSUBSAMPLES == 2);
|
||||||
|
|
||||||
|
reset_and_start_timer();
|
||||||
|
ao_ispc_tasks(width, height, NSUBSAMPLES, fimg);
|
||||||
|
double t = get_elapsed_mcycles();
|
||||||
|
minTimeISPCTasks = std::min(minTimeISPCTasks, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Report results and save image
|
||||||
|
printf("[aobench ispc + tasks]:\t\t[%.3f] M cycles (%d x %d image)\n",
|
||||||
|
minTimeISPCTasks, width, height);
|
||||||
|
savePPM("ao-ispc-tasks.ppm", width, height);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Run the serial path, again test_iteration times, and report the
|
// Run the serial path, again test_iteration times, and report the
|
||||||
// minimum time.
|
// minimum time.
|
||||||
@@ -193,7 +213,8 @@ int main(int argc, char **argv)
|
|||||||
// Report more results, save another image...
|
// Report more results, save another image...
|
||||||
printf("[aobench serial]:\t\t[%.3f] M cycles (%d x %d image)\n", minTimeSerial,
|
printf("[aobench serial]:\t\t[%.3f] M cycles (%d x %d image)\n", minTimeSerial,
|
||||||
width, height);
|
width, height);
|
||||||
printf("\t\t\t\t(%.2fx speedup from ISPC)\n", minTimeSerial / minTimeISPC);
|
printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n",
|
||||||
|
minTimeSerial / minTimeISPC, minTimeSerial / minTimeISPCTasks);
|
||||||
savePPM("ao-serial.ppm", width, height);
|
savePPM("ao-serial.ppm", width, height);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@@ -203,8 +203,9 @@ ambient_occlusion(reference Isect isect, reference Plane plane,
|
|||||||
/* Compute the image for the scanlines from [y0,y1), for an overall image
|
/* Compute the image for the scanlines from [y0,y1), for an overall image
|
||||||
of width w and height h.
|
of width w and height h.
|
||||||
*/
|
*/
|
||||||
void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
|
static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
|
||||||
uniform int nsubsamples, reference uniform float image[]) {
|
uniform int h, uniform int nsubsamples,
|
||||||
|
reference uniform float image[]) {
|
||||||
static Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } };
|
static Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } };
|
||||||
static Sphere spheres[3] = {
|
static Sphere spheres[3] = {
|
||||||
{ { -2.0f, 0.0f, -3.5f }, 0.5f },
|
{ { -2.0f, 0.0f, -3.5f }, 0.5f },
|
||||||
@@ -231,6 +232,9 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
|
|||||||
// direction we do per iteration and ny the number in y.
|
// direction we do per iteration and ny the number in y.
|
||||||
uniform int nx = 1, ny = 1;
|
uniform int nx = 1, ny = 1;
|
||||||
|
|
||||||
|
// FIXME: We actually need ny to be 1 regardless of the decomposition,
|
||||||
|
// since the task decomposition is one scanline high.
|
||||||
|
|
||||||
if (programCount == 8) {
|
if (programCount == 8) {
|
||||||
// Do two pixels at once in the x direction
|
// Do two pixels at once in the x direction
|
||||||
nx = 2;
|
nx = 2;
|
||||||
@@ -239,19 +243,21 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
|
|||||||
++du;
|
++du;
|
||||||
}
|
}
|
||||||
else if (programCount == 16) {
|
else if (programCount == 16) {
|
||||||
// Two at once in both x and y
|
nx = 4;
|
||||||
nx = ny = 2;
|
ny = 1;
|
||||||
if ((programIndex >= 4 && programIndex < 8) || programIndex >= 12)
|
if (programIndex >= 4 && programIndex < 8)
|
||||||
++du;
|
++du;
|
||||||
if (programIndex >= 8)
|
if (programIndex >= 8 && programIndex < 12)
|
||||||
++dv;
|
du += 2;
|
||||||
|
if (programIndex >= 12)
|
||||||
|
du += 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now loop over all of the pixels, stepping in x and y as calculated
|
// Now loop over all of the pixels, stepping in x and y as calculated
|
||||||
// above. (Assumes that ny divides y and nx divides x...)
|
// above. (Assumes that ny divides y and nx divides x...)
|
||||||
for (uniform int y = y0; y < y1; y += ny) {
|
for (uniform int y = y0; y < y1; y += ny) {
|
||||||
for (uniform int x = 0; x < w; x += nx) {
|
for (uniform int x = 0; x < w; x += nx) {
|
||||||
// Figur out x,y pixel in NDC
|
// Figure out x,y pixel in NDC
|
||||||
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
|
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
|
||||||
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
|
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
|
||||||
float ret = 0.f;
|
float ret = 0.f;
|
||||||
@@ -293,7 +299,7 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
|
|||||||
|
|
||||||
// offset to the first pixel in the image
|
// offset to the first pixel in the image
|
||||||
uniform int offset = 3 * (y * w + x);
|
uniform int offset = 3 * (y * w + x);
|
||||||
for (uniform int p = 0; p < programCount; p += 4, ++offset) {
|
for (uniform int p = 0; p < programCount; p += 4, offset += 3) {
|
||||||
// Get the four sample values for this pixel
|
// Get the four sample values for this pixel
|
||||||
uniform float sumret = retArray[p] + retArray[p+1] + retArray[p+2] +
|
uniform float sumret = retArray[p] + retArray[p+1] + retArray[p+2] +
|
||||||
retArray[p+3];
|
retArray[p+3];
|
||||||
@@ -315,3 +321,18 @@ export void ao_ispc(uniform int w, uniform int h, uniform int nsubsamples,
|
|||||||
uniform float image[]) {
|
uniform float image[]) {
|
||||||
ao_scanlines(0, h, w, h, nsubsamples, image);
|
ao_scanlines(0, h, w, h, nsubsamples, image);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void task ao_task(uniform int y0, uniform int y1, uniform int width,
|
||||||
|
uniform int height, uniform int nsubsamples,
|
||||||
|
uniform float image[]) {
|
||||||
|
ao_scanlines(y0, y1, width, height, nsubsamples, image);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples,
|
||||||
|
uniform float image[]) {
|
||||||
|
uniform int dy = 1;
|
||||||
|
for (uniform int y = 0; y < h; y += dy)
|
||||||
|
launch < ao_task(y, y+dy, w, h, nsubsamples, image) >;
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
<ProjectConfiguration Include="Debug|Win32">
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
@@ -21,6 +21,7 @@
|
|||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClCompile Include="ao.cpp" />
|
<ClCompile Include="ao.cpp" />
|
||||||
<ClCompile Include="ao_serial.cpp" />
|
<ClCompile Include="ao_serial.cpp" />
|
||||||
|
<ClCompile Include="../tasks_concrt.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="ao.ispc">
|
<CustomBuild Include="ao.ispc">
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
CXX=g++ -m64
|
CXX=g++ -m64
|
||||||
CXXFLAGS=-Iobjs/ -g3 -Wall
|
CXXFLAGS=-Iobjs/ -g3 -Wall
|
||||||
ISPC=ispc
|
ISPC=ispc
|
||||||
ISPCFLAGS=-O2 --fast-math --instrument --arch=x86-64
|
ISPCFLAGS=-O2 --instrument --arch=x86-64
|
||||||
|
|
||||||
default: ao
|
default: ao
|
||||||
|
|
||||||
|
|||||||
@@ -40,6 +40,7 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <string.h>
|
||||||
#include "../timing.h"
|
#include "../timing.h"
|
||||||
#include "../cpuid.h"
|
#include "../cpuid.h"
|
||||||
#include "mandelbrot_ispc.h"
|
#include "mandelbrot_ispc.h"
|
||||||
@@ -99,8 +100,12 @@ ensureTargetISAIsSupported() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void usage() {
|
||||||
|
fprintf(stderr, "usage: mandelbrot [--scale=<factor]\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
int main() {
|
int main(int argc, char *argv[]) {
|
||||||
unsigned int width = 1536;
|
unsigned int width = 1536;
|
||||||
unsigned int height = 1024;
|
unsigned int height = 1024;
|
||||||
float x0 = -2;
|
float x0 = -2;
|
||||||
@@ -108,6 +113,25 @@ int main() {
|
|||||||
float y0 = -1;
|
float y0 = -1;
|
||||||
float y1 = 1;
|
float y1 = 1;
|
||||||
|
|
||||||
|
if (argc == 1)
|
||||||
|
;
|
||||||
|
else if (argc == 2) {
|
||||||
|
if (strncmp(argv[1], "--scale=", 8) == 0) {
|
||||||
|
float scale = atof(argv[1] + 8);
|
||||||
|
if (scale == 0.f)
|
||||||
|
usage();
|
||||||
|
width *= scale;
|
||||||
|
height *= scale;
|
||||||
|
// round up to multiples of 16
|
||||||
|
width = (width + 0xf) & ~0xf;
|
||||||
|
height = (height + 0xf) & ~0xf;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
usage();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
usage();
|
||||||
|
|
||||||
ensureTargetISAIsSupported();
|
ensureTargetISAIsSupported();
|
||||||
|
|
||||||
int maxIterations = 512;
|
int maxIterations = 512;
|
||||||
|
|||||||
@@ -131,11 +131,11 @@ static float Noise(float x, float y, float z) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static float Turbulence(float x, float y, float z, int octaves) {
|
static float Turbulence(float x, float y, float z, uniform int octaves) {
|
||||||
float omega = 0.6;
|
float omega = 0.6;
|
||||||
|
|
||||||
float sum = 0., lambda = 1., o = 1.;
|
float sum = 0., lambda = 1., o = 1.;
|
||||||
for (int i = 0; i < octaves; ++i) {
|
for (uniform int i = 0; i < octaves; ++i) {
|
||||||
sum += abs(o * Noise(lambda * x, lambda * y, lambda * z));
|
sum += abs(o * Noise(lambda * x, lambda * y, lambda * z));
|
||||||
lambda *= 1.99f;
|
lambda *= 1.99f;
|
||||||
o *= omega;
|
o *= omega;
|
||||||
|
|||||||
@@ -52,7 +52,8 @@ using namespace ispc;
|
|||||||
|
|
||||||
typedef unsigned int uint;
|
typedef unsigned int uint;
|
||||||
|
|
||||||
extern void raytrace_serial(int width, int height, const float raster2camera[4][4],
|
extern void raytrace_serial(int width, int height, int baseWidth, int baseHeight,
|
||||||
|
const float raster2camera[4][4],
|
||||||
const float camera2world[4][4], float image[],
|
const float camera2world[4][4], float image[],
|
||||||
int id[], const LinearBVHNode nodes[],
|
int id[], const LinearBVHNode nodes[],
|
||||||
const Triangle triangles[]);
|
const Triangle triangles[]);
|
||||||
@@ -127,11 +128,28 @@ ensureTargetISAIsSupported() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void usage() {
|
||||||
|
fprintf(stderr, "rt [--scale=<factor>] <scene name base>\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
if (argc != 2) {
|
float scale = 1.f;
|
||||||
fprintf(stderr, "usage: rt <filename base>\n");
|
const char *filename = NULL;
|
||||||
exit(1);
|
for (int i = 1; i < argc; ++i) {
|
||||||
|
if (strncmp(argv[i], "--scale=", 8) == 0) {
|
||||||
|
scale = atof(argv[i] + 8);
|
||||||
|
if (scale == 0.f)
|
||||||
|
usage();
|
||||||
|
}
|
||||||
|
else if (filename != NULL)
|
||||||
|
usage();
|
||||||
|
else
|
||||||
|
filename = argv[i];
|
||||||
}
|
}
|
||||||
|
if (filename == NULL)
|
||||||
|
usage();
|
||||||
|
|
||||||
ensureTargetISAIsSupported();
|
ensureTargetISAIsSupported();
|
||||||
|
|
||||||
@@ -145,10 +163,10 @@ int main(int argc, char *argv[]) {
|
|||||||
// Read the camera specification information from the camera file
|
// Read the camera specification information from the camera file
|
||||||
//
|
//
|
||||||
char fnbuf[1024];
|
char fnbuf[1024];
|
||||||
sprintf(fnbuf, "%s.camera", argv[1]);
|
sprintf(fnbuf, "%s.camera", filename);
|
||||||
FILE *f = fopen(fnbuf, "rb");
|
FILE *f = fopen(fnbuf, "rb");
|
||||||
if (!f) {
|
if (!f) {
|
||||||
perror(argv[1]);
|
perror(fnbuf);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -156,20 +174,20 @@ int main(int argc, char *argv[]) {
|
|||||||
// Nothing fancy, and trouble if we run on a big-endian system, just
|
// Nothing fancy, and trouble if we run on a big-endian system, just
|
||||||
// fread in the bits
|
// fread in the bits
|
||||||
//
|
//
|
||||||
int width, height;
|
int baseWidth, baseHeight;
|
||||||
float camera2world[4][4], raster2camera[4][4];
|
float camera2world[4][4], raster2camera[4][4];
|
||||||
READ(width, 1);
|
READ(baseWidth, 1);
|
||||||
READ(height, 1);
|
READ(baseHeight, 1);
|
||||||
READ(camera2world[0][0], 16);
|
READ(camera2world[0][0], 16);
|
||||||
READ(raster2camera[0][0], 16);
|
READ(raster2camera[0][0], 16);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Read in the serialized BVH
|
// Read in the serialized BVH
|
||||||
//
|
//
|
||||||
sprintf(fnbuf, "%s.bvh", argv[1]);
|
sprintf(fnbuf, "%s.bvh", filename);
|
||||||
f = fopen(fnbuf, "rb");
|
f = fopen(fnbuf, "rb");
|
||||||
if (!f) {
|
if (!f) {
|
||||||
perror(argv[2]);
|
perror(fnbuf);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -216,10 +234,10 @@ int main(int argc, char *argv[]) {
|
|||||||
}
|
}
|
||||||
fclose(f);
|
fclose(f);
|
||||||
|
|
||||||
// round image resolution up to multiple of 4 to make things easy for
|
// round image resolution up to multiple of 16 to make things easy for
|
||||||
// the code that assigns pixels to ispc program instances
|
// the code that assigns pixels to ispc program instances
|
||||||
height = (height + 3) & ~3;
|
int height = (int(baseHeight * scale) + 0xf) & ~0xf;
|
||||||
width = (width + 3) & ~3;
|
int width = (int(baseWidth * scale) + 0xf) & ~0xf;
|
||||||
|
|
||||||
// allocate images; one to hold hit object ids, one to hold depth to
|
// allocate images; one to hold hit object ids, one to hold depth to
|
||||||
// the first interseciton
|
// the first interseciton
|
||||||
@@ -232,8 +250,8 @@ int main(int argc, char *argv[]) {
|
|||||||
double minTimeISPC = 1e30;
|
double minTimeISPC = 1e30;
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
raytrace_ispc(width, height, raster2camera, camera2world,
|
raytrace_ispc(width, height, baseWidth, baseHeight, raster2camera,
|
||||||
image, id, nodes, triangles);
|
camera2world, image, id, nodes, triangles);
|
||||||
double dt = get_elapsed_mcycles();
|
double dt = get_elapsed_mcycles();
|
||||||
minTimeISPC = std::min(dt, minTimeISPC);
|
minTimeISPC = std::min(dt, minTimeISPC);
|
||||||
}
|
}
|
||||||
@@ -251,8 +269,8 @@ int main(int argc, char *argv[]) {
|
|||||||
double minTimeISPCtasks = 1e30;
|
double minTimeISPCtasks = 1e30;
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
raytrace_ispc_tasks(width, height, raster2camera, camera2world,
|
raytrace_ispc_tasks(width, height, baseWidth, baseHeight, raster2camera,
|
||||||
image, id, nodes, triangles);
|
camera2world, image, id, nodes, triangles);
|
||||||
double dt = get_elapsed_mcycles();
|
double dt = get_elapsed_mcycles();
|
||||||
minTimeISPCtasks = std::min(dt, minTimeISPCtasks);
|
minTimeISPCtasks = std::min(dt, minTimeISPCtasks);
|
||||||
}
|
}
|
||||||
@@ -271,8 +289,8 @@ int main(int argc, char *argv[]) {
|
|||||||
double minTimeSerial = 1e30;
|
double minTimeSerial = 1e30;
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
raytrace_serial(width, height, raster2camera, camera2world,
|
raytrace_serial(width, height, baseWidth, baseHeight, raster2camera,
|
||||||
image, id, nodes, triangles);
|
camera2world, image, id, nodes, triangles);
|
||||||
double dt = get_elapsed_mcycles();
|
double dt = get_elapsed_mcycles();
|
||||||
minTimeSerial = std::min(dt, minTimeSerial);
|
minTimeSerial = std::min(dt, minTimeSerial);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -227,12 +227,17 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
|||||||
|
|
||||||
|
|
||||||
static void raytrace_tile(uniform int x0, uniform int x1,
|
static void raytrace_tile(uniform int x0, uniform int x1,
|
||||||
uniform int y0, uniform int y1, uniform int width,
|
uniform int y0, uniform int y1,
|
||||||
|
uniform int width, uniform int height,
|
||||||
|
uniform int baseWidth, uniform int baseHeight,
|
||||||
const uniform float raster2camera[4][4],
|
const uniform float raster2camera[4][4],
|
||||||
const uniform float camera2world[4][4],
|
const uniform float camera2world[4][4],
|
||||||
uniform float image[], uniform int id[],
|
uniform float image[], uniform int id[],
|
||||||
const LinearBVHNode nodes[],
|
const LinearBVHNode nodes[],
|
||||||
const Triangle triangles[]) {
|
const Triangle triangles[]) {
|
||||||
|
uniform float widthScale = (float)(baseWidth) / (float)(width);
|
||||||
|
uniform float heightScale = (float)(baseHeight) / (float)(height);
|
||||||
|
|
||||||
static const uniform float udx[16] = { 0, 1, 0, 1, 2, 3, 2, 3,
|
static const uniform float udx[16] = { 0, 1, 0, 1, 2, 3, 2, 3,
|
||||||
0, 1, 0, 1, 2, 3, 2, 3 };
|
0, 1, 0, 1, 2, 3, 2, 3 };
|
||||||
static const uniform float udy[16] = { 0, 0, 1, 1, 0, 0, 1, 1,
|
static const uniform float udy[16] = { 0, 0, 1, 1, 0, 0, 1, 1,
|
||||||
@@ -252,7 +257,8 @@ static void raytrace_tile(uniform int x0, uniform int x1,
|
|||||||
const float dy = udy[o * programCount + programIndex];
|
const float dy = udy[o * programCount + programIndex];
|
||||||
|
|
||||||
Ray ray;
|
Ray ray;
|
||||||
generateRay(raster2camera, camera2world, x+dx, y+dy, ray);
|
generateRay(raster2camera, camera2world, (x+dx)*widthScale,
|
||||||
|
(y+dy)*heightScale, ray);
|
||||||
BVHIntersect(nodes, triangles, ray);
|
BVHIntersect(nodes, triangles, ray);
|
||||||
|
|
||||||
int offset = (y + (int)dy) * width + (x + (int)dx);
|
int offset = (y + (int)dy) * width + (x + (int)dx);
|
||||||
@@ -265,29 +271,35 @@ static void raytrace_tile(uniform int x0, uniform int x1,
|
|||||||
|
|
||||||
|
|
||||||
export void raytrace_ispc(uniform int width, uniform int height,
|
export void raytrace_ispc(uniform int width, uniform int height,
|
||||||
|
uniform int baseWidth, uniform int baseHeight,
|
||||||
const uniform float raster2camera[4][4],
|
const uniform float raster2camera[4][4],
|
||||||
const uniform float camera2world[4][4],
|
const uniform float camera2world[4][4],
|
||||||
uniform float image[], uniform int id[],
|
uniform float image[], uniform int id[],
|
||||||
const LinearBVHNode nodes[],
|
const LinearBVHNode nodes[],
|
||||||
const Triangle triangles[]) {
|
const Triangle triangles[]) {
|
||||||
raytrace_tile(0, width, 0, height, width, raster2camera, camera2world, image,
|
raytrace_tile(0, width, 0, height, width, height, baseWidth, baseHeight,
|
||||||
|
raster2camera, camera2world, image,
|
||||||
id, nodes, triangles);
|
id, nodes, triangles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
task void raytrace_tile_task(uniform int x0, uniform int x1,
|
task void raytrace_tile_task(uniform int x0, uniform int x1,
|
||||||
uniform int y0, uniform int y1, uniform int width,
|
uniform int y0, uniform int y1,
|
||||||
|
uniform int width, uniform int height,
|
||||||
|
uniform int baseWidth, uniform int baseHeight,
|
||||||
const uniform float raster2camera[4][4],
|
const uniform float raster2camera[4][4],
|
||||||
const uniform float camera2world[4][4],
|
const uniform float camera2world[4][4],
|
||||||
uniform float image[], uniform int id[],
|
uniform float image[], uniform int id[],
|
||||||
const LinearBVHNode nodes[],
|
const LinearBVHNode nodes[],
|
||||||
const Triangle triangles[]) {
|
const Triangle triangles[]) {
|
||||||
raytrace_tile(x0, x1, y0, y1, width, raster2camera, camera2world, image,
|
raytrace_tile(x0, x1, y0, y1, width, height, baseWidth, baseHeight,
|
||||||
|
raster2camera, camera2world, image,
|
||||||
id, nodes, triangles);
|
id, nodes, triangles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
export void raytrace_ispc_tasks(uniform int width, uniform int height,
|
export void raytrace_ispc_tasks(uniform int width, uniform int height,
|
||||||
|
uniform int baseWidth, uniform int baseHeight,
|
||||||
const uniform float raster2camera[4][4],
|
const uniform float raster2camera[4][4],
|
||||||
const uniform float camera2world[4][4],
|
const uniform float camera2world[4][4],
|
||||||
uniform float image[], uniform int id[],
|
uniform float image[], uniform int id[],
|
||||||
@@ -298,9 +310,9 @@ export void raytrace_ispc_tasks(uniform int width, uniform int height,
|
|||||||
uniform int y1 = min(y + dy, height);
|
uniform int y1 = min(y + dy, height);
|
||||||
for (uniform int x = 0; x < width; x += dx) {
|
for (uniform int x = 0; x < width; x += dx) {
|
||||||
uniform int x1 = min(x + dx, width);
|
uniform int x1 = min(x + dx, width);
|
||||||
launch < raytrace_tile_task(x, x1, y, y1, width, raster2camera,
|
launch < raytrace_tile_task(x, x1, y, y1, width, height, baseWidth,
|
||||||
camera2world, image, id, nodes,
|
baseHeight, raster2camera, camera2world,
|
||||||
triangles) >;
|
image, id, nodes, triangles) >;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -258,17 +258,21 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void raytrace_serial(int width, int height,
|
void raytrace_serial(int width, int height, int baseWidth, int baseHeight,
|
||||||
const float raster2camera[4][4],
|
const float raster2camera[4][4],
|
||||||
const float camera2world[4][4],
|
const float camera2world[4][4],
|
||||||
float image[],
|
float image[],
|
||||||
int id[],
|
int id[],
|
||||||
const LinearBVHNode nodes[],
|
const LinearBVHNode nodes[],
|
||||||
const Triangle triangles[]) {
|
const Triangle triangles[]) {
|
||||||
|
float widthScale = float(baseWidth) / float(width);
|
||||||
|
float heightScale = float(baseHeight) / float(height);
|
||||||
|
|
||||||
for (int y = 0; y < height; ++y) {
|
for (int y = 0; y < height; ++y) {
|
||||||
for (int x = 0; x < width; ++x) {
|
for (int x = 0; x < width; ++x) {
|
||||||
Ray ray;
|
Ray ray;
|
||||||
generateRay(raster2camera, camera2world, x, y, ray);
|
generateRay(raster2camera, camera2world, x * widthScale,
|
||||||
|
y * heightScale, ray);
|
||||||
BVHIntersect(nodes, triangles, ray);
|
BVHIntersect(nodes, triangles, ray);
|
||||||
|
|
||||||
int offset = y * width + x;
|
int offset = y * width + x;
|
||||||
|
|||||||
@@ -33,10 +33,20 @@
|
|||||||
|
|
||||||
#include "taskinfo.h"
|
#include "taskinfo.h"
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
#define ISPC_IS_WINDOWS
|
||||||
|
#elif defined(__linux__)
|
||||||
|
#define ISPC_IS_LINUX
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
#define ISPC_IS_APPLE
|
||||||
|
#endif
|
||||||
|
|
||||||
/* A simple task system for ispc programs based on Apple's Grand Central
|
/* A simple task system for ispc programs based on Apple's Grand Central
|
||||||
Dispatch. */
|
Dispatch. */
|
||||||
#include <dispatch/dispatch.h>
|
#include <dispatch/dispatch.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
static int initialized = 0;
|
static int initialized = 0;
|
||||||
static volatile int32_t lock = 0;
|
static volatile int32_t lock = 0;
|
||||||
@@ -47,6 +57,8 @@ static dispatch_group_t gcdGroup;
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
void ISPCLaunch(void *f, void *data);
|
void ISPCLaunch(void *f, void *data);
|
||||||
void ISPCSync();
|
void ISPCSync();
|
||||||
|
void *ISPCMalloc(int64_t size, int32_t alignment);
|
||||||
|
void ISPCFree(void *ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -97,3 +109,18 @@ void ISPCSync() {
|
|||||||
|
|
||||||
lResetTaskInfo();
|
lResetTaskInfo();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void *ISPCMalloc(int64_t size, int32_t alignment) {
|
||||||
|
void *mem = malloc(size + (alignment-1) + sizeof(void*));
|
||||||
|
char *amem = ((char*)mem) + sizeof(void*);
|
||||||
|
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
|
||||||
|
(alignment - 1)));
|
||||||
|
((void**)amem)[-1] = mem;
|
||||||
|
return amem;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ISPCFree(void *ptr) {
|
||||||
|
free(((void**)ptr)[-1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -31,6 +31,14 @@
|
|||||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
#define ISPC_IS_WINDOWS
|
||||||
|
#elif defined(__linux__)
|
||||||
|
#define ISPC_IS_LINUX
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
#define ISPC_IS_APPLE
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "taskinfo.h"
|
#include "taskinfo.h"
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <semaphore.h>
|
#include <semaphore.h>
|
||||||
@@ -46,6 +54,9 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
#include <malloc.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
static int initialized = 0;
|
static int initialized = 0;
|
||||||
static volatile int32_t lock = 0;
|
static volatile int32_t lock = 0;
|
||||||
@@ -63,6 +74,8 @@ static pthread_cond_t tasksRunningCondition;
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
void ISPCLaunch(void *f, void *data);
|
void ISPCLaunch(void *f, void *data);
|
||||||
void ISPCSync();
|
void ISPCSync();
|
||||||
|
void *ISPCMalloc(int64_t size, int32_t alignment);
|
||||||
|
void ISPCFree(void *ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *lTaskEntry(void *arg);
|
static void *lTaskEntry(void *arg);
|
||||||
@@ -292,3 +305,35 @@ void ISPCSync() {
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void *ISPCMalloc(int64_t size, int32_t alignment) {
|
||||||
|
#ifdef ISPC_IS_WINDOWS
|
||||||
|
return _aligned_malloc(size, alignment);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
return memalign(alignment, size);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_APPLE
|
||||||
|
void *mem = malloc(size + (alignment-1) + sizeof(void*));
|
||||||
|
char *amem = ((char*)mem) + sizeof(void*);
|
||||||
|
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
|
||||||
|
(alignment - 1)));
|
||||||
|
((void**)amem)[-1] = mem;
|
||||||
|
return amem;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ISPCFree(void *ptr) {
|
||||||
|
#ifdef ISPC_IS_WINDOWS
|
||||||
|
_aligned_free(ptr);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
free(ptr);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_APPLE
|
||||||
|
free(((void**)ptr)[-1]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
254
expr.cpp
254
expr.cpp
@@ -741,6 +741,12 @@ UnaryExpr::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
UnaryExpr::EstimateCost() const {
|
||||||
|
return (expr ? expr->EstimateCost() : 0) + COST_SIMPLE_ARITH_LOGIC_OP;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
UnaryExpr::Print() const {
|
UnaryExpr::Print() const {
|
||||||
if (!expr || !GetType())
|
if (!expr || !GetType())
|
||||||
@@ -1299,6 +1305,17 @@ BinaryExpr::TypeCheck() {
|
|||||||
if (type0 == NULL || type1 == NULL)
|
if (type0 == NULL || type1 == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
if (dynamic_cast<const ReferenceType *>(type0) != NULL) {
|
||||||
|
arg0 = new DereferenceExpr(arg0, arg0->pos);
|
||||||
|
type0 = arg0->GetType();
|
||||||
|
assert(type0 != NULL);
|
||||||
|
}
|
||||||
|
if (dynamic_cast<const ReferenceType *>(type1) != NULL) {
|
||||||
|
arg1 = new DereferenceExpr(arg1, arg1->pos);
|
||||||
|
type1 = arg1->GetType();
|
||||||
|
assert(type1 != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case Shl:
|
case Shl:
|
||||||
case Shr:
|
case Shr:
|
||||||
@@ -1445,6 +1462,15 @@ BinaryExpr::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
BinaryExpr::EstimateCost() const {
|
||||||
|
return ((arg0 ? arg0->EstimateCost() : 0) +
|
||||||
|
(arg1 ? arg1->EstimateCost() : 0) +
|
||||||
|
((op == Div || op == Mod) ? COST_COMPLEX_ARITH_OP :
|
||||||
|
COST_SIMPLE_ARITH_LOGIC_OP));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
BinaryExpr::Print() const {
|
BinaryExpr::Print() const {
|
||||||
if (!arg0 || !arg1 || !GetType())
|
if (!arg0 || !arg1 || !GetType())
|
||||||
@@ -1696,6 +1722,20 @@ AssignExpr::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
AssignExpr::EstimateCost() const {
|
||||||
|
int cost = ((lvalue ? lvalue->EstimateCost() : 0) +
|
||||||
|
(rvalue ? rvalue->EstimateCost() : 0));
|
||||||
|
cost += COST_ASSIGN;
|
||||||
|
if (op == Assign)
|
||||||
|
return cost;
|
||||||
|
if (op == DivAssign || op == ModAssign)
|
||||||
|
return cost + COST_COMPLEX_ARITH_OP;
|
||||||
|
else
|
||||||
|
return cost + COST_SIMPLE_ARITH_LOGIC_OP;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
AssignExpr::Print() const {
|
AssignExpr::Print() const {
|
||||||
if (!lvalue || !rvalue || !GetType())
|
if (!lvalue || !rvalue || !GetType())
|
||||||
@@ -1944,6 +1984,12 @@ SelectExpr::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
SelectExpr::EstimateCost() const {
|
||||||
|
return COST_SELECT;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
SelectExpr::Print() const {
|
SelectExpr::Print() const {
|
||||||
if (!test || !expr1 || !expr2 || !GetType())
|
if (!test || !expr1 || !expr2 || !GetType())
|
||||||
@@ -2222,55 +2268,6 @@ FunctionCallExpr::FunctionCallExpr(Expr *f, ExprList *a, SourcePos p, bool il)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Starting from the function initialFunction, we're calling into
|
|
||||||
calledFunc. The question is: is this a recursive call back to
|
|
||||||
initialFunc? If it definitely is or if it may be, then return true.
|
|
||||||
Return false if it definitely is not.
|
|
||||||
*/
|
|
||||||
static bool
|
|
||||||
lMayBeRecursiveCall(llvm::Function *calledFunc,
|
|
||||||
llvm::Function *initialFunc,
|
|
||||||
std::set<llvm::Function *> &seenFuncs) {
|
|
||||||
// Easy case: intrinsics aren't going to call functions themselves
|
|
||||||
if (calledFunc->isIntrinsic())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
std::string name = calledFunc->getName();
|
|
||||||
if (name.size() > 2 && name[0] == '_' && name[1] == '_')
|
|
||||||
// builtin stdlib function; none of these are recursive...
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (calledFunc->isDeclaration())
|
|
||||||
// There's visibility into what the called function does without a
|
|
||||||
// definition, so we have to be conservative
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if (calledFunc == initialFunc)
|
|
||||||
// hello recursive call
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// Otherwise iterate over all of the instructions in the function. If
|
|
||||||
// any of them is a function call then check recursively..
|
|
||||||
llvm::inst_iterator iter;
|
|
||||||
for (iter = llvm::inst_begin(calledFunc);
|
|
||||||
iter != llvm::inst_end(calledFunc); ++iter) {
|
|
||||||
llvm::Instruction *inst = &*iter;
|
|
||||||
llvm::CallInst *ci = llvm::dyn_cast<llvm::CallInst>(inst);
|
|
||||||
if (ci != NULL) {
|
|
||||||
llvm::Function *nextCalledFunc = ci->getCalledFunction();
|
|
||||||
// Don't repeatedly test functions we've seen before
|
|
||||||
if (seenFuncs.find(nextCalledFunc) == seenFuncs.end()) {
|
|
||||||
seenFuncs.insert(nextCalledFunc);
|
|
||||||
if (lMayBeRecursiveCall(nextCalledFunc, initialFunc,
|
|
||||||
seenFuncs))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const {
|
FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const {
|
||||||
if (!func || !args)
|
if (!func || !args)
|
||||||
@@ -2391,47 +2388,14 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We sometimes need to check to see if the mask is all off here;
|
|
||||||
// specifically, if the mask is all off and we call a recursive
|
|
||||||
// function, then we will probably have an unsesirable infinite loop.
|
|
||||||
ctx->SetDebugPos(pos);
|
|
||||||
llvm::BasicBlock *bDoCall = ctx->CreateBasicBlock("funcall_mask_ok");
|
|
||||||
llvm::BasicBlock *bSkip = ctx->CreateBasicBlock("funcall_mask_off");
|
|
||||||
llvm::BasicBlock *bAfter = ctx->CreateBasicBlock("after_funcall");
|
|
||||||
llvm::Function *currentFunc = ctx->GetCurrentBasicBlock()->getParent();
|
|
||||||
|
|
||||||
// If we need to check the mask (it may be a recursive call, possibly
|
|
||||||
// transitively), or we're launching a task, which is expensive and
|
|
||||||
// thus probably always worth checking, then use the mask to choose
|
|
||||||
// whether to go to the bDoCallBlock or the bSkip block
|
|
||||||
std::set<llvm::Function *> seenFuncs;
|
|
||||||
seenFuncs.insert(currentFunc);
|
|
||||||
if (ft->isTask || lMayBeRecursiveCall(callee, currentFunc, seenFuncs)) {
|
|
||||||
Debug(pos, "Checking mask before function call \"%s\".", funSym->name.c_str());
|
|
||||||
ctx->BranchIfMaskAny(bDoCall, bSkip);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
// If we don't need to check the mask, then always to the call;
|
|
||||||
// just jump to bDoCall
|
|
||||||
ctx->BranchInst(bDoCall);
|
|
||||||
|
|
||||||
// And the bSkip block just jumps immediately to bAfter. So why do we
|
|
||||||
// need it? So the phi node below can easily tell what paths are
|
|
||||||
// going into it
|
|
||||||
ctx->SetCurrentBasicBlock(bSkip);
|
|
||||||
ctx->BranchInst(bAfter);
|
|
||||||
|
|
||||||
// Emit the code to do the function call
|
|
||||||
ctx->SetCurrentBasicBlock(bDoCall);
|
|
||||||
|
|
||||||
llvm::Value *retVal = NULL;
|
llvm::Value *retVal = NULL;
|
||||||
ctx->SetDebugPos(pos);
|
ctx->SetDebugPos(pos);
|
||||||
if (ft->isTask)
|
if (ft->isTask)
|
||||||
ctx->LaunchInst(callee, argVals);
|
ctx->LaunchInst(callee, argVals);
|
||||||
else {
|
else {
|
||||||
// Most of the time, the mask is passed as the last argument. this
|
// Most of the time, the mask is passed as the last argument. this
|
||||||
// isn't the case for things like SSE intrinsics and extern "C"
|
// isn't the case for things like intrinsics, builtins, and extern
|
||||||
// functions from the application.
|
// "C" functions from the application.
|
||||||
assert(callargs.size() + 1 == callee->arg_size() ||
|
assert(callargs.size() + 1 == callee->arg_size() ||
|
||||||
callargs.size() == callee->arg_size());
|
callargs.size() == callee->arg_size());
|
||||||
|
|
||||||
@@ -2458,22 +2422,10 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// And jump out to the 'after funciton call' basic block
|
|
||||||
ctx->BranchInst(bAfter);
|
|
||||||
ctx->SetCurrentBasicBlock(bAfter);
|
|
||||||
|
|
||||||
if (isVoidFunc)
|
if (isVoidFunc)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
else
|
||||||
// The return value for the non-void case is either undefined or the
|
return retVal;
|
||||||
// function return value, depending on whether we actually ran the code
|
|
||||||
// path that called the function or not.
|
|
||||||
LLVM_TYPE_CONST llvm::Type *lrType = ft->GetReturnType()->LLVMType(g->ctx);
|
|
||||||
llvm::PHINode *ret = ctx->PhiNode(lrType, 2, "fun_ret");
|
|
||||||
assert(retVal != NULL);
|
|
||||||
ret->addIncoming(llvm::UndefValue::get(lrType), bSkip);
|
|
||||||
ret->addIncoming(retVal, bDoCall);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -2534,6 +2486,13 @@ FunctionCallExpr::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
FunctionCallExpr::EstimateCost() const {
|
||||||
|
return ((args ? args->EstimateCost() : 0) +
|
||||||
|
(isLaunch ? COST_TASK_LAUNCH : COST_FUNCALL));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
FunctionCallExpr::Print() const {
|
FunctionCallExpr::Print() const {
|
||||||
if (!func || !args || !GetType())
|
if (!func || !args || !GetType())
|
||||||
@@ -2622,7 +2581,7 @@ ExprList::GetConstant(const Type *type) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (dynamic_cast<const StructType *>(type) != NULL) {
|
if (dynamic_cast<const StructType *>(type) != NULL) {
|
||||||
#if defined(LLVM_2_8) || defined(LLVM_2_9)
|
#if defined(LLVM_2_9)
|
||||||
return llvm::ConstantStruct::get(*g->ctx, cv, false);
|
return llvm::ConstantStruct::get(*g->ctx, cv, false);
|
||||||
#else
|
#else
|
||||||
LLVM_TYPE_CONST llvm::StructType *llvmStructType =
|
LLVM_TYPE_CONST llvm::StructType *llvmStructType =
|
||||||
@@ -2645,6 +2604,17 @@ ExprList::GetConstant(const Type *type) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
ExprList::EstimateCost() const {
|
||||||
|
int cost = 0;
|
||||||
|
for (unsigned int i = 0; i < exprs.size(); ++i) {
|
||||||
|
if (exprs[i] != NULL)
|
||||||
|
cost += exprs[i]->EstimateCost();
|
||||||
|
}
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
ExprList::Print() const {
|
ExprList::Print() const {
|
||||||
printf("expr list (");
|
printf("expr list (");
|
||||||
@@ -2775,6 +2745,22 @@ IndexExpr::GetLValue(FunctionEmitContext *ctx) const {
|
|||||||
if (!basePtr)
|
if (!basePtr)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
// If the array index is a compile time constant, check to see if it
|
||||||
|
// may lead to an out-of-bounds access.
|
||||||
|
ConstExpr *ce = dynamic_cast<ConstExpr *>(index);
|
||||||
|
const SequentialType *seqType = dynamic_cast<const SequentialType *>(type);
|
||||||
|
assert(seqType != NULL);
|
||||||
|
int nElements = seqType->GetElementCount();
|
||||||
|
if (ce != NULL && nElements > 0) {
|
||||||
|
int32_t indices[ISPC_MAX_NVEC];
|
||||||
|
int count = ce->AsInt32(indices);
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
if (indices[i] < 0 || indices[i] >= nElements)
|
||||||
|
Warning(index->pos, "Array index \"%d\" may be out of bounds for "
|
||||||
|
"\"%d\" element array.", indices[i], nElements);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
basePtr = lCastUniformVectorBasePtr(basePtr, ctx);
|
basePtr = lCastUniformVectorBasePtr(basePtr, ctx);
|
||||||
|
|
||||||
ctx->SetDebugPos(pos);
|
ctx->SetDebugPos(pos);
|
||||||
@@ -2827,6 +2813,16 @@ IndexExpr::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
IndexExpr::EstimateCost() const {
|
||||||
|
// be pessimistic
|
||||||
|
if (index && index->GetType()->IsVaryingType())
|
||||||
|
return COST_GATHER;
|
||||||
|
else
|
||||||
|
return COST_LOAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
IndexExpr::Print() const {
|
IndexExpr::Print() const {
|
||||||
if (!arrayOrVector || !index || !GetType())
|
if (!arrayOrVector || !index || !GetType())
|
||||||
@@ -3126,6 +3122,7 @@ MemberExpr::create(Expr *e, const char *id, SourcePos p, SourcePos idpos) {
|
|||||||
return new MemberExpr(e, id, p, idpos);
|
return new MemberExpr(e, id, p, idpos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
MemberExpr::MemberExpr(Expr *e, const char *id, SourcePos p, SourcePos idpos)
|
MemberExpr::MemberExpr(Expr *e, const char *id, SourcePos p, SourcePos idpos)
|
||||||
: Expr(p), identifierPos(idpos) {
|
: Expr(p), identifierPos(idpos) {
|
||||||
expr = e;
|
expr = e;
|
||||||
@@ -3222,6 +3219,14 @@ MemberExpr::Optimize() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
MemberExpr::EstimateCost() const {
|
||||||
|
// FIXME: return gather cost when we can tell a gather is going to be
|
||||||
|
// needed
|
||||||
|
return COST_SIMPLE_ARITH_LOGIC_OP;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
MemberExpr::Print() const {
|
MemberExpr::Print() const {
|
||||||
if (!expr || !GetType())
|
if (!expr || !GetType())
|
||||||
@@ -4017,6 +4022,12 @@ ConstExpr::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
ConstExpr::EstimateCost() const {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
ConstExpr::Print() const {
|
ConstExpr::Print() const {
|
||||||
printf("[%s] (", GetType()->GetString().c_str());
|
printf("[%s] (", GetType()->GetString().c_str());
|
||||||
@@ -4103,7 +4114,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
case AtomicType::TYPE_BOOL:
|
case AtomicType::TYPE_BOOL:
|
||||||
if (fromType->IsVaryingType() &&
|
if (fromType->IsVaryingType() &&
|
||||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||||
// If we have a bool vector of i32 element,s first truncate
|
// If we have a bool vector of i32 elements, first truncate
|
||||||
// down to a single bit
|
// down to a single bit
|
||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||||
// And then do an unisgned int->float cast
|
// And then do an unisgned int->float cast
|
||||||
@@ -4163,9 +4174,6 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
case AtomicType::TYPE_UINT16:
|
case AtomicType::TYPE_UINT16:
|
||||||
case AtomicType::TYPE_UINT32:
|
case AtomicType::TYPE_UINT32:
|
||||||
case AtomicType::TYPE_UINT64:
|
case AtomicType::TYPE_UINT64:
|
||||||
if (fromType->IsVaryingType())
|
|
||||||
PerformanceWarning(pos, "Conversion from unsigned int64 to float is slow. "
|
|
||||||
"Use \"int64\" if possible");
|
|
||||||
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int
|
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int
|
||||||
exprVal, targetType, "uint2double");
|
exprVal, targetType, "uint2double");
|
||||||
break;
|
break;
|
||||||
@@ -4937,6 +4945,13 @@ TypeCastExpr::Optimize() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
TypeCastExpr::EstimateCost() const {
|
||||||
|
// FIXME: return COST_TYPECAST_COMPLEX when appropriate
|
||||||
|
return COST_TYPECAST_SIMPLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
TypeCastExpr::Print() const {
|
TypeCastExpr::Print() const {
|
||||||
printf("[%s] type cast (", GetType()->GetString().c_str());
|
printf("[%s] type cast (", GetType()->GetString().c_str());
|
||||||
@@ -5002,6 +5017,12 @@ ReferenceExpr::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
ReferenceExpr::EstimateCost() const {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
ReferenceExpr::Print() const {
|
ReferenceExpr::Print() const {
|
||||||
if (expr == NULL || GetType() == NULL)
|
if (expr == NULL || GetType() == NULL)
|
||||||
@@ -5080,6 +5101,12 @@ DereferenceExpr::Optimize() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
DereferenceExpr::EstimateCost() const {
|
||||||
|
return COST_DEREF;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
DereferenceExpr::Print() const {
|
DereferenceExpr::Print() const {
|
||||||
if (expr == NULL || GetType() == NULL)
|
if (expr == NULL || GetType() == NULL)
|
||||||
@@ -5151,6 +5178,15 @@ SymbolExpr::Optimize() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
SymbolExpr::EstimateCost() const {
|
||||||
|
if (symbol->constValue != NULL)
|
||||||
|
return 0;
|
||||||
|
else
|
||||||
|
return COST_LOAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
SymbolExpr::Print() const {
|
SymbolExpr::Print() const {
|
||||||
if (symbol == NULL || GetType() == NULL)
|
if (symbol == NULL || GetType() == NULL)
|
||||||
@@ -5204,6 +5240,12 @@ FunctionSymbolExpr::Optimize() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
FunctionSymbolExpr::EstimateCost() const {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
FunctionSymbolExpr::Print() const {
|
FunctionSymbolExpr::Print() const {
|
||||||
if (!matchingFunc || !GetType())
|
if (!matchingFunc || !GetType())
|
||||||
@@ -5238,6 +5280,12 @@ SyncExpr::GetValue(FunctionEmitContext *ctx) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
SyncExpr::EstimateCost() const {
|
||||||
|
return COST_SYNC;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
SyncExpr::Print() const {
|
SyncExpr::Print() const {
|
||||||
printf("sync");
|
printf("sync");
|
||||||
|
|||||||
41
expr.h
41
expr.h
@@ -121,8 +121,8 @@ public:
|
|||||||
void Print() const;
|
void Print() const;
|
||||||
Expr *Optimize();
|
Expr *Optimize();
|
||||||
Expr *TypeCheck();
|
Expr *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
const Op op;
|
const Op op;
|
||||||
Expr *expr;
|
Expr *expr;
|
||||||
};
|
};
|
||||||
@@ -164,8 +164,8 @@ public:
|
|||||||
|
|
||||||
Expr *Optimize();
|
Expr *Optimize();
|
||||||
Expr *TypeCheck();
|
Expr *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
const Op op;
|
const Op op;
|
||||||
Expr *arg0, *arg1;
|
Expr *arg0, *arg1;
|
||||||
};
|
};
|
||||||
@@ -196,8 +196,8 @@ public:
|
|||||||
|
|
||||||
Expr *Optimize();
|
Expr *Optimize();
|
||||||
Expr *TypeCheck();
|
Expr *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
const Op op;
|
const Op op;
|
||||||
Expr *lvalue, *rvalue;
|
Expr *lvalue, *rvalue;
|
||||||
};
|
};
|
||||||
@@ -217,8 +217,8 @@ public:
|
|||||||
|
|
||||||
Expr *Optimize();
|
Expr *Optimize();
|
||||||
Expr *TypeCheck();
|
Expr *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
Expr *test, *expr1, *expr2;
|
Expr *test, *expr1, *expr2;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -240,6 +240,7 @@ public:
|
|||||||
llvm::Constant *GetConstant(const Type *type) const;
|
llvm::Constant *GetConstant(const Type *type) const;
|
||||||
ExprList *Optimize();
|
ExprList *Optimize();
|
||||||
ExprList *TypeCheck();
|
ExprList *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
std::vector<Expr *> exprs;
|
std::vector<Expr *> exprs;
|
||||||
};
|
};
|
||||||
@@ -257,12 +258,13 @@ public:
|
|||||||
|
|
||||||
Expr *Optimize();
|
Expr *Optimize();
|
||||||
Expr *TypeCheck();
|
Expr *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
Expr *func;
|
Expr *func;
|
||||||
ExprList *args;
|
ExprList *args;
|
||||||
bool isLaunch;
|
bool isLaunch;
|
||||||
|
|
||||||
|
private:
|
||||||
void resolveFunctionOverloads();
|
void resolveFunctionOverloads();
|
||||||
bool tryResolve(bool (*matchFunc)(Expr *, const Type *));
|
bool tryResolve(bool (*matchFunc)(Expr *, const Type *));
|
||||||
};
|
};
|
||||||
@@ -285,8 +287,8 @@ public:
|
|||||||
|
|
||||||
Expr *Optimize();
|
Expr *Optimize();
|
||||||
Expr *TypeCheck();
|
Expr *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
Expr *arrayOrVector, *index;
|
Expr *arrayOrVector, *index;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -303,16 +305,17 @@ public:
|
|||||||
MemberExpr(Expr *expr, const char *identifier, SourcePos pos,
|
MemberExpr(Expr *expr, const char *identifier, SourcePos pos,
|
||||||
SourcePos identifierPos);
|
SourcePos identifierPos);
|
||||||
|
|
||||||
virtual llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||||
virtual llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
|
llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
|
||||||
virtual const Type *GetType() const;
|
const Type *GetType() const;
|
||||||
virtual Symbol *GetBaseSymbol() const;
|
Symbol *GetBaseSymbol() const;
|
||||||
virtual void Print() const;
|
void Print() const;
|
||||||
virtual Expr *Optimize();
|
Expr *Optimize();
|
||||||
virtual Expr *TypeCheck();
|
Expr *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
virtual int getElementNumber() const;
|
virtual int getElementNumber() const;
|
||||||
|
|
||||||
protected:
|
|
||||||
std::string getCandidateNearMatches() const;
|
std::string getCandidateNearMatches() const;
|
||||||
|
|
||||||
Expr *expr;
|
Expr *expr;
|
||||||
@@ -392,6 +395,7 @@ public:
|
|||||||
|
|
||||||
Expr *TypeCheck();
|
Expr *TypeCheck();
|
||||||
Expr *Optimize();
|
Expr *Optimize();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
/** Return the ConstExpr's values as booleans, doing type conversion
|
/** Return the ConstExpr's values as booleans, doing type conversion
|
||||||
from the actual type if needed. If forceVarying is true, then type
|
from the actual type if needed. If forceVarying is true, then type
|
||||||
@@ -495,8 +499,8 @@ public:
|
|||||||
void Print() const;
|
void Print() const;
|
||||||
Expr *TypeCheck();
|
Expr *TypeCheck();
|
||||||
Expr *Optimize();
|
Expr *Optimize();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
const Type *type;
|
const Type *type;
|
||||||
Expr *expr;
|
Expr *expr;
|
||||||
};
|
};
|
||||||
@@ -514,8 +518,8 @@ public:
|
|||||||
void Print() const;
|
void Print() const;
|
||||||
Expr *TypeCheck();
|
Expr *TypeCheck();
|
||||||
Expr *Optimize();
|
Expr *Optimize();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
Expr *expr;
|
Expr *expr;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -533,8 +537,8 @@ public:
|
|||||||
void Print() const;
|
void Print() const;
|
||||||
Expr *TypeCheck();
|
Expr *TypeCheck();
|
||||||
Expr *Optimize();
|
Expr *Optimize();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
Expr *expr;
|
Expr *expr;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -551,6 +555,7 @@ public:
|
|||||||
Expr *TypeCheck();
|
Expr *TypeCheck();
|
||||||
Expr *Optimize();
|
Expr *Optimize();
|
||||||
void Print() const;
|
void Print() const;
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Symbol *symbol;
|
Symbol *symbol;
|
||||||
@@ -571,6 +576,7 @@ public:
|
|||||||
Expr *TypeCheck();
|
Expr *TypeCheck();
|
||||||
Expr *Optimize();
|
Expr *Optimize();
|
||||||
void Print() const;
|
void Print() const;
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class FunctionCallExpr;
|
friend class FunctionCallExpr;
|
||||||
@@ -597,6 +603,7 @@ public:
|
|||||||
Expr *TypeCheck();
|
Expr *TypeCheck();
|
||||||
Expr *Optimize();
|
Expr *Optimize();
|
||||||
void Print() const;
|
void Print() const;
|
||||||
|
int EstimateCost() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // ISPC_EXPR_H
|
#endif // ISPC_EXPR_H
|
||||||
|
|||||||
22
ispc.cpp
22
ispc.cpp
@@ -46,9 +46,7 @@
|
|||||||
#endif
|
#endif
|
||||||
#include <llvm/LLVMContext.h>
|
#include <llvm/LLVMContext.h>
|
||||||
#include <llvm/Module.h>
|
#include <llvm/Module.h>
|
||||||
#ifndef LLVM_2_8
|
|
||||||
#include <llvm/Analysis/DIBuilder.h>
|
#include <llvm/Analysis/DIBuilder.h>
|
||||||
#endif
|
|
||||||
#include <llvm/Analysis/DebugInfo.h>
|
#include <llvm/Analysis/DebugInfo.h>
|
||||||
#include <llvm/Support/Dwarf.h>
|
#include <llvm/Support/Dwarf.h>
|
||||||
#include <llvm/Target/TargetMachine.h>
|
#include <llvm/Target/TargetMachine.h>
|
||||||
@@ -72,7 +70,7 @@ Module *m;
|
|||||||
|
|
||||||
bool
|
bool
|
||||||
Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||||
Target *t) {
|
bool pic, Target *t) {
|
||||||
if (cpu == NULL) {
|
if (cpu == NULL) {
|
||||||
std::string hostCPU = llvm::sys::getHostCPUName();
|
std::string hostCPU = llvm::sys::getHostCPUName();
|
||||||
if (hostCPU.size() > 0)
|
if (hostCPU.size() > 0)
|
||||||
@@ -100,6 +98,8 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
|||||||
|
|
||||||
bool error = false;
|
bool error = false;
|
||||||
|
|
||||||
|
t->generatePIC = pic;
|
||||||
|
|
||||||
// Make sure the target architecture is a known one; print an error
|
// Make sure the target architecture is a known one; print an error
|
||||||
// with the valid ones otherwise.
|
// with the valid ones otherwise.
|
||||||
t->target = NULL;
|
t->target = NULL;
|
||||||
@@ -228,14 +228,22 @@ llvm::TargetMachine *
|
|||||||
Target::GetTargetMachine() const {
|
Target::GetTargetMachine() const {
|
||||||
std::string triple = GetTripleString();
|
std::string triple = GetTripleString();
|
||||||
|
|
||||||
|
llvm::Reloc::Model relocModel = generatePIC ? llvm::Reloc::PIC_ :
|
||||||
|
llvm::Reloc::Default;
|
||||||
#if defined(LLVM_3_0svn) || defined(LLVM_3_0)
|
#if defined(LLVM_3_0svn) || defined(LLVM_3_0)
|
||||||
std::string featuresString = attributes;
|
std::string featuresString = attributes;
|
||||||
llvm::TargetMachine *targetMachine =
|
llvm::TargetMachine *targetMachine =
|
||||||
target->createTargetMachine(triple, cpu, featuresString);
|
target->createTargetMachine(triple, cpu, featuresString, relocModel);
|
||||||
#else
|
#else
|
||||||
|
#ifdef ISPC_IS_APPLE
|
||||||
|
relocModel = llvm::Reloc::PIC_;
|
||||||
|
#endif // ISPC_IS_APPLE
|
||||||
std::string featuresString = cpu + std::string(",") + attributes;
|
std::string featuresString = cpu + std::string(",") + attributes;
|
||||||
llvm::TargetMachine *targetMachine =
|
llvm::TargetMachine *targetMachine =
|
||||||
target->createTargetMachine(triple, featuresString);
|
target->createTargetMachine(triple, featuresString);
|
||||||
|
#ifndef ISPC_IS_WINDOWS
|
||||||
|
targetMachine->setRelocationModel(relocModel);
|
||||||
|
#endif // !ISPC_IS_WINDOWS
|
||||||
#endif
|
#endif
|
||||||
assert(targetMachine != NULL);
|
assert(targetMachine != NULL);
|
||||||
|
|
||||||
@@ -250,6 +258,8 @@ Target::GetTargetMachine() const {
|
|||||||
Opt::Opt() {
|
Opt::Opt() {
|
||||||
level = 1;
|
level = 1;
|
||||||
fastMath = false;
|
fastMath = false;
|
||||||
|
fastMaskedVload = false;
|
||||||
|
unrollLoops = true;
|
||||||
disableBlendedMaskedStores = false;
|
disableBlendedMaskedStores = false;
|
||||||
disableCoherentControlFlow = false;
|
disableCoherentControlFlow = false;
|
||||||
disableUniformControlFlow = false;
|
disableUniformControlFlow = false;
|
||||||
@@ -299,13 +309,9 @@ SourcePos::SourcePos(const char *n, int l, int c) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
llvm::DIFile SourcePos::GetDIFile() const {
|
llvm::DIFile SourcePos::GetDIFile() const {
|
||||||
#ifdef LLVM_2_8
|
|
||||||
return llvm::DIFile();
|
|
||||||
#else
|
|
||||||
std::string directory, filename;
|
std::string directory, filename;
|
||||||
GetDirectoryAndFileName(g->currentDirectory, name, &directory, &filename);
|
GetDirectoryAndFileName(g->currentDirectory, name, &directory, &filename);
|
||||||
return m->diBuilder->createFile(filename, directory);
|
return m->diBuilder->createFile(filename, directory);
|
||||||
#endif // LLVM_2_8
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
41
ispc.h
41
ispc.h
@@ -148,6 +148,8 @@ public:
|
|||||||
pointer in place of the original ASTNode *. */
|
pointer in place of the original ASTNode *. */
|
||||||
virtual ASTNode *TypeCheck() = 0;
|
virtual ASTNode *TypeCheck() = 0;
|
||||||
|
|
||||||
|
virtual int EstimateCost() const = 0;
|
||||||
|
|
||||||
/** All AST nodes must track the file position where they are
|
/** All AST nodes must track the file position where they are
|
||||||
defined. */
|
defined. */
|
||||||
const SourcePos pos;
|
const SourcePos pos;
|
||||||
@@ -162,7 +164,7 @@ struct Target {
|
|||||||
name, if the name is a known target. Returns true if the
|
name, if the name is a known target. Returns true if the
|
||||||
target was initialized and false if the name is unknown. */
|
target was initialized and false if the name is unknown. */
|
||||||
static bool GetTarget(const char *arch, const char *cpu, const char *isa,
|
static bool GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||||
Target *);
|
bool pic, Target *);
|
||||||
|
|
||||||
/** Returns a comma-delimited string giving the names of the currently
|
/** Returns a comma-delimited string giving the names of the currently
|
||||||
supported target ISAs. */
|
supported target ISAs. */
|
||||||
@@ -215,8 +217,12 @@ struct Target {
|
|||||||
integer multiple of the native vector width, for example if we're
|
integer multiple of the native vector width, for example if we're
|
||||||
"doubling up" and compiling 8-wide on a 4-wide SSE system. */
|
"doubling up" and compiling 8-wide on a 4-wide SSE system. */
|
||||||
int vectorWidth;
|
int vectorWidth;
|
||||||
|
|
||||||
|
/** Indicates whether position independent code should be generated. */
|
||||||
|
bool generatePIC;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/** @brief Structure that collects optimization options
|
/** @brief Structure that collects optimization options
|
||||||
|
|
||||||
This structure collects all of the options related to optimization of
|
This structure collects all of the options related to optimization of
|
||||||
@@ -234,6 +240,16 @@ struct Opt {
|
|||||||
should be performed. This is false by default. */
|
should be performed. This is false by default. */
|
||||||
bool fastMath;
|
bool fastMath;
|
||||||
|
|
||||||
|
/** Indicates whether an vector load should be issued for masked loads
|
||||||
|
on platforms that don't have a native masked vector load. (This may
|
||||||
|
lead to accessing memory up to programCount-1 elements past the end of
|
||||||
|
arrays, so is unsafe in general.) */
|
||||||
|
bool fastMaskedVload;
|
||||||
|
|
||||||
|
/** Indicates when loops should be unrolled (when doing so seems like
|
||||||
|
it will make sense. */
|
||||||
|
bool unrollLoops;
|
||||||
|
|
||||||
/** On targets that don't have a masked store instruction but do have a
|
/** On targets that don't have a masked store instruction but do have a
|
||||||
blending instruction, by default, we simulate masked stores by
|
blending instruction, by default, we simulate masked stores by
|
||||||
loading the old value, blending, and storing the result. This can
|
loading the old value, blending, and storing the result. This can
|
||||||
@@ -351,6 +367,29 @@ struct Globals {
|
|||||||
std::vector<std::string> cppArgs;
|
std::vector<std::string> cppArgs;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
COST_ASSIGN = 1,
|
||||||
|
COST_COHERENT_BREAK_CONTINE = 4,
|
||||||
|
COST_COMPLEX_ARITH_OP = 4,
|
||||||
|
COST_DEREF = 4,
|
||||||
|
COST_FUNCALL = 4,
|
||||||
|
COST_GATHER = 8,
|
||||||
|
COST_LOAD = 2,
|
||||||
|
COST_REGULAR_BREAK_CONTINUE = 2,
|
||||||
|
COST_RETURN = 4,
|
||||||
|
COST_SELECT = 4,
|
||||||
|
COST_SIMPLE_ARITH_LOGIC_OP = 1,
|
||||||
|
COST_SYNC = 32,
|
||||||
|
COST_TASK_LAUNCH = 16,
|
||||||
|
COST_TYPECAST_COMPLEX = 4,
|
||||||
|
COST_TYPECAST_SIMPLE = 1,
|
||||||
|
COST_UNIFORM_LOOP = 4,
|
||||||
|
COST_VARYING_LOOP = 6,
|
||||||
|
|
||||||
|
CHECK_MASK_AT_FUNCTION_START_COST = 16,
|
||||||
|
PREDICATE_SAFE_IF_STATEMENT_COST = 6,
|
||||||
|
};
|
||||||
|
|
||||||
extern Globals *g;
|
extern Globals *g;
|
||||||
extern Module *m;
|
extern Module *m;
|
||||||
|
|
||||||
|
|||||||
24
ispc.vcxproj
24
ispc.vcxproj
@@ -1,4 +1,4 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
<ProjectConfiguration Include="Debug|Win32">
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
@@ -31,12 +31,14 @@
|
|||||||
<ClCompile Include="opt.cpp" />
|
<ClCompile Include="opt.cpp" />
|
||||||
<ClCompile Include="parse.cc" />
|
<ClCompile Include="parse.cc" />
|
||||||
<CustomBuild Include="builtins-c.c">
|
<CustomBuild Include="builtins-c.c">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-32.c > gen-bitcode-c-32.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-32.c > gen-bitcode-c-32.cpp;
|
||||||
|
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-64.c > gen-bitcode-c-64.cpp</Command>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">clang builtins-c.c</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">clang builtins-c.c</Message>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-32.c > gen-bitcode-c-32.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-32.c > gen-bitcode-c-32.cpp;
|
||||||
|
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-64.c > gen-bitcode-c-64.cpp</Command>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">clang builtins-c.c</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">clang builtins-c.c</Message>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-c.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-c-32.cpp;gen-bitcore-c-64.cpp</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-c.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-c-32.cpp;gen-bitcore-c-64.cpp</Outputs>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<ClCompile Include="stmt.cpp" />
|
<ClCompile Include="stmt.cpp" />
|
||||||
<ClCompile Include="sym.cpp" />
|
<ClCompile Include="sym.cpp" />
|
||||||
@@ -61,9 +63,9 @@
|
|||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="stdlib.ispc">
|
<CustomBuild Include="stdlib.ispc">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-stdlib.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-stdlib.cpp</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-stdlib.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-stdlib.cpp</Outputs>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-stdlib.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-stdlib.cpp</Message>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-stdlib.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-stdlib.cpp</Message>
|
||||||
@@ -194,7 +196,7 @@
|
|||||||
<PrecompiledHeader>NotUsing</PrecompiledHeader>
|
<PrecompiledHeader>NotUsing</PrecompiledHeader>
|
||||||
<WarningLevel>Level3</WarningLevel>
|
<WarningLevel>Level3</WarningLevel>
|
||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<PreprocessorDefinitions>NOMINMAX;LLVM_2_9</PreprocessorDefinitions>
|
<PreprocessorDefinitions>NOMINMAX;LLVM_3_0</PreprocessorDefinitions>
|
||||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<DisableSpecificWarnings>4146;4800;4996;4355;4624</DisableSpecificWarnings>
|
<DisableSpecificWarnings>4146;4800;4996;4355;4624</DisableSpecificWarnings>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
@@ -202,7 +204,7 @@
|
|||||||
<SubSystem>Console</SubSystem>
|
<SubSystem>Console</SubSystem>
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||||
<AdditionalDependencies>clangFrontend.lib;clangDriver.lib;clangSerialization.lib;clangParse.lib;clangSema.lib;clangAnalysis.lib;clangAST.lib;clangLex.lib;clangBasic.lib;LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
<AdditionalDependencies>clangFrontend.lib;clangDriver.lib;clangSerialization.lib;clangParse.lib;clangSema.lib;clangAnalysis.lib;clangAST.lib;clangLex.lib;clangBasic.lib;LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmParser.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMDebugInfo.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCDisassembler.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Desc.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;LLVMipa.lib;LLVMipo.lib;shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||||
</Link>
|
</Link>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
@@ -212,7 +214,7 @@
|
|||||||
<Optimization>MaxSpeed</Optimization>
|
<Optimization>MaxSpeed</Optimization>
|
||||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
<PreprocessorDefinitions>NOMINMAX;LLVM_2_9</PreprocessorDefinitions>
|
<PreprocessorDefinitions>NOMINMAX;LLVM_3_0</PreprocessorDefinitions>
|
||||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<DisableSpecificWarnings>4146;4800;4996;4355;4624</DisableSpecificWarnings>
|
<DisableSpecificWarnings>4146;4800;4996;4355;4624</DisableSpecificWarnings>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
@@ -222,7 +224,7 @@
|
|||||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
<OptimizeReferences>true</OptimizeReferences>
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||||
<AdditionalDependencies>clangFrontend.lib;clangDriver.lib;clangSerialization.lib;clangParse.lib;clangSema.lib;clangAnalysis.lib;clangAST.lib;clangLex.lib;clangBasic.lib;LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
<AdditionalDependencies>clangFrontend.lib;clangDriver.lib;clangSerialization.lib;clangParse.lib;clangSema.lib;clangAnalysis.lib;clangAST.lib;clangLex.lib;clangBasic.lib;LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmParser.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMDebugInfo.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCDisassembler.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Desc.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;LLVMipa.lib;LLVMipo.lib;shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||||
</Link>
|
</Link>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
|
|||||||
@@ -33,12 +33,25 @@
|
|||||||
|
|
||||||
#define _CRT_SECURE_NO_WARNINGS
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
#define ISPC_IS_WINDOWS
|
||||||
|
#elif defined(__linux__)
|
||||||
|
#define ISPC_IS_LINUX
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
#define ISPC_IS_APPLE
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef ISPC_IS_WINDOWS
|
#ifdef ISPC_IS_WINDOWS
|
||||||
#define NOMINMAX
|
#define NOMINMAX
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#endif
|
#endif
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <memory.h>
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
#include <malloc.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef ISPC_HAVE_SVML
|
#ifdef ISPC_HAVE_SVML
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
@@ -62,7 +75,6 @@ extern "C" {
|
|||||||
#include <llvm/Instructions.h>
|
#include <llvm/Instructions.h>
|
||||||
#include <llvm/ExecutionEngine/ExecutionEngine.h>
|
#include <llvm/ExecutionEngine/ExecutionEngine.h>
|
||||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
#include <llvm/ExecutionEngine/MCJIT.h>
|
|
||||||
#include <llvm/Support/TargetRegistry.h>
|
#include <llvm/Support/TargetRegistry.h>
|
||||||
#include <llvm/Support/TargetSelect.h>
|
#include <llvm/Support/TargetSelect.h>
|
||||||
#else
|
#else
|
||||||
@@ -81,9 +93,7 @@ extern "C" {
|
|||||||
#include <llvm/Support/raw_ostream.h>
|
#include <llvm/Support/raw_ostream.h>
|
||||||
#include <llvm/Bitcode/ReaderWriter.h>
|
#include <llvm/Bitcode/ReaderWriter.h>
|
||||||
#include <llvm/Support/MemoryBuffer.h>
|
#include <llvm/Support/MemoryBuffer.h>
|
||||||
#ifndef LLVM_2_8
|
|
||||||
#include <llvm/Support/system_error.h>
|
#include <llvm/Support/system_error.h>
|
||||||
#endif
|
|
||||||
|
|
||||||
bool shouldFail = false;
|
bool shouldFail = false;
|
||||||
|
|
||||||
@@ -105,16 +115,35 @@ void ISPCSync() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef ISPC_IS_WINDOWS
|
|
||||||
void *ISPCMalloc(int64_t size, int32_t alignment) {
|
void *ISPCMalloc(int64_t size, int32_t alignment) {
|
||||||
|
#ifdef ISPC_IS_WINDOWS
|
||||||
return _aligned_malloc(size, alignment);
|
return _aligned_malloc(size, alignment);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
return memalign(alignment, size);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_APPLE
|
||||||
|
void *mem = malloc(size + (alignment-1) + sizeof(void*));
|
||||||
|
char *amem = ((char*)mem) + sizeof(void*);
|
||||||
|
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
|
||||||
|
(alignment - 1)));
|
||||||
|
((void**)amem)[-1] = mem;
|
||||||
|
return amem;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ISPCFree(void *ptr) {
|
void ISPCFree(void *ptr) {
|
||||||
|
#ifdef ISPC_IS_WINDOWS
|
||||||
_aligned_free(ptr);
|
_aligned_free(ptr);
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
free(ptr);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_APPLE
|
||||||
|
free(((void**)ptr)[-1]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static void usage(int ret) {
|
static void usage(int ret) {
|
||||||
fprintf(stderr, "usage: ispc_test\n");
|
fprintf(stderr, "usage: ispc_test\n");
|
||||||
@@ -145,17 +174,6 @@ double Log(double x) { return log(x); }
|
|||||||
static bool lRunTest(const char *fn) {
|
static bool lRunTest(const char *fn) {
|
||||||
llvm::LLVMContext *ctx = new llvm::LLVMContext;
|
llvm::LLVMContext *ctx = new llvm::LLVMContext;
|
||||||
|
|
||||||
#ifdef LLVM_2_8
|
|
||||||
std::string err;
|
|
||||||
llvm::MemoryBuffer *buf = llvm::MemoryBuffer::getFileOrSTDIN(fn, &err);
|
|
||||||
if (!buf) {
|
|
||||||
fprintf(stderr, "Unable to open file \"%s\": %s\n", fn, err.c_str());
|
|
||||||
delete ctx;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
std::string bcErr;
|
|
||||||
llvm::Module *module = llvm::ParseBitcodeFile(buf, *ctx, &bcErr);
|
|
||||||
#else
|
|
||||||
llvm::OwningPtr<llvm::MemoryBuffer> buf;
|
llvm::OwningPtr<llvm::MemoryBuffer> buf;
|
||||||
llvm::error_code err = llvm::MemoryBuffer::getFileOrSTDIN(fn, buf);
|
llvm::error_code err = llvm::MemoryBuffer::getFileOrSTDIN(fn, buf);
|
||||||
if (err) {
|
if (err) {
|
||||||
@@ -165,7 +183,6 @@ static bool lRunTest(const char *fn) {
|
|||||||
}
|
}
|
||||||
std::string bcErr;
|
std::string bcErr;
|
||||||
llvm::Module *module = llvm::ParseBitcodeFile(buf.get(), *ctx, &bcErr);
|
llvm::Module *module = llvm::ParseBitcodeFile(buf.get(), *ctx, &bcErr);
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!module) {
|
if (!module) {
|
||||||
fprintf(stderr, "Bitcode reader failed for \"%s\": %s\n", fn, bcErr.c_str());
|
fprintf(stderr, "Bitcode reader failed for \"%s\": %s\n", fn, bcErr.c_str());
|
||||||
@@ -200,10 +217,8 @@ static bool lRunTest(const char *fn) {
|
|||||||
ee->addGlobalMapping(func, (void *)FUNC)
|
ee->addGlobalMapping(func, (void *)FUNC)
|
||||||
DO_FUNC(ISPCLaunch, "ISPCLaunch");
|
DO_FUNC(ISPCLaunch, "ISPCLaunch");
|
||||||
DO_FUNC(ISPCSync, "ISPCSync");
|
DO_FUNC(ISPCSync, "ISPCSync");
|
||||||
#ifdef ISPC_IS_WINDOWS
|
|
||||||
DO_FUNC(ISPCMalloc, "ISPCMalloc");
|
DO_FUNC(ISPCMalloc, "ISPCMalloc");
|
||||||
DO_FUNC(ISPCFree, "ISPCFree");
|
DO_FUNC(ISPCFree, "ISPCFree");
|
||||||
#endif // ISPC_IS_WINDOWS
|
|
||||||
DO_FUNC(putchar, "putchar");
|
DO_FUNC(putchar, "putchar");
|
||||||
DO_FUNC(printf, "printf");
|
DO_FUNC(printf, "printf");
|
||||||
DO_FUNC(fflush, "fflush");
|
DO_FUNC(fflush, "fflush");
|
||||||
@@ -357,8 +372,6 @@ static bool lRunTest(const char *fn) {
|
|||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
llvm::InitializeNativeTarget();
|
llvm::InitializeNativeTarget();
|
||||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
llvm::InitializeAllTargetMCs();
|
|
||||||
LLVMLinkInMCJIT();
|
|
||||||
LLVMLinkInJIT();
|
LLVMLinkInJIT();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@@ -52,14 +52,14 @@
|
|||||||
</PrecompiledHeader>
|
</PrecompiledHeader>
|
||||||
<WarningLevel>Level3</WarningLevel>
|
<WarningLevel>Level3</WarningLevel>
|
||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<PreprocessorDefinitions>ISPC_IS_WINDOWS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>LLVM_3_0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
<SubSystem>Console</SubSystem>
|
<SubSystem>Console</SubSystem>
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)/lib</AdditionalLibraryDirectories>
|
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)/lib</AdditionalLibraryDirectories>
|
||||||
<AdditionalDependencies>LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMJIT.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
<AdditionalDependencies>LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMJIT.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Desc.lib;LLVMX86Info.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||||
</Link>
|
</Link>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
@@ -70,7 +70,7 @@
|
|||||||
<Optimization>MaxSpeed</Optimization>
|
<Optimization>MaxSpeed</Optimization>
|
||||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
<PreprocessorDefinitions>ISPC_IS_WINDOWS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>LLVM_3_0;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
@@ -79,7 +79,7 @@
|
|||||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
<OptimizeReferences>true</OptimizeReferences>
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)/lib</AdditionalLibraryDirectories>
|
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)/lib</AdditionalLibraryDirectories>
|
||||||
<AdditionalDependencies>LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMJIT.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
<AdditionalDependencies>LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMJIT.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Desc.lib;LLVMX86Info.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||||
</Link>
|
</Link>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
|
|||||||
46
main.cpp
46
main.cpp
@@ -40,11 +40,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <llvm/Support/PrettyStackTrace.h>
|
#include <llvm/Support/PrettyStackTrace.h>
|
||||||
#ifdef LLVM_2_8
|
#include <llvm/Support/Signals.h>
|
||||||
#include <llvm/System/Signals.h>
|
|
||||||
#else
|
|
||||||
#include <llvm/Support/Signals.h>
|
|
||||||
#endif
|
|
||||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
#include <llvm/Support/TargetRegistry.h>
|
#include <llvm/Support/TargetRegistry.h>
|
||||||
#include <llvm/Support/TargetSelect.h>
|
#include <llvm/Support/TargetSelect.h>
|
||||||
@@ -73,7 +69,6 @@ static void usage(int ret) {
|
|||||||
printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n");
|
printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n");
|
||||||
printf(" [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n");
|
printf(" [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n");
|
||||||
printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n");
|
printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n");
|
||||||
printf(" [--fast-math]\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
|
||||||
printf(" [-g]\t\t\t\tGenerate debugging information\n");
|
printf(" [-g]\t\t\t\tGenerate debugging information\n");
|
||||||
printf(" [--help]\t\t\t\tPrint help\n");
|
printf(" [--help]\t\t\t\tPrint help\n");
|
||||||
printf(" [-h <name>/--header-outfile=<name>]\tOutput filename for header\n");
|
printf(" [-h <name>/--header-outfile=<name>]\tOutput filename for header\n");
|
||||||
@@ -87,8 +82,11 @@ static void usage(int ret) {
|
|||||||
printf(" [--nocpp]\t\t\t\tDon't run the C preprocessor\n");
|
printf(" [--nocpp]\t\t\t\tDon't run the C preprocessor\n");
|
||||||
printf(" [-o <name>/--outfile=<name>]\tOutput filename (may be \"-\" for standard output)\n");
|
printf(" [-o <name>/--outfile=<name>]\tOutput filename (may be \"-\" for standard output)\n");
|
||||||
printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n");
|
printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n");
|
||||||
#if 0
|
|
||||||
printf(" [--opt=<option>]\t\t\tSet optimization option\n");
|
printf(" [--opt=<option>]\t\t\tSet optimization option\n");
|
||||||
|
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
|
||||||
|
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
|
||||||
|
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
||||||
|
#if 0
|
||||||
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
|
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
|
||||||
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
||||||
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
||||||
@@ -98,6 +96,9 @@ static void usage(int ret) {
|
|||||||
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
||||||
printf(" disable-masked-store-optimizations\tDisable lowering to regular stores when possible\n");
|
printf(" disable-masked-store-optimizations\tDisable lowering to regular stores when possible\n");
|
||||||
#endif
|
#endif
|
||||||
|
#ifndef ISPC_IS_WINDOWS
|
||||||
|
printf(" [--pic]\t\t\t\tGenerate position-independent code\n");
|
||||||
|
#endif // !ISPC_IS_WINDOWS
|
||||||
printf(" [--target=<isa>]\t\t\tSelect target ISA. <isa>={%s}\n", Target::SupportedTargetISAs());
|
printf(" [--target=<isa>]\t\t\tSelect target ISA. <isa>={%s}\n", Target::SupportedTargetISAs());
|
||||||
printf(" [--version]\t\t\t\tPrint ispc version\n");
|
printf(" [--version]\t\t\t\tPrint ispc version\n");
|
||||||
printf(" [--woff]\t\t\t\tDisable warnings\n");
|
printf(" [--woff]\t\t\t\tDisable warnings\n");
|
||||||
@@ -184,8 +185,9 @@ int main(int Argc, char *Argv[]) {
|
|||||||
|
|
||||||
bool debugSet = false, optSet = false;
|
bool debugSet = false, optSet = false;
|
||||||
Module::OutputType ot = Module::Object;
|
Module::OutputType ot = Module::Object;
|
||||||
|
bool generatePIC = false;
|
||||||
const char *arch = NULL, *cpu = NULL, *target = NULL;
|
const char *arch = NULL, *cpu = NULL, *target = NULL;
|
||||||
|
|
||||||
for (int i = 1; i < argc; ++i) {
|
for (int i = 1; i < argc; ++i) {
|
||||||
if (!strcmp(argv[i], "--help"))
|
if (!strcmp(argv[i], "--help"))
|
||||||
usage(0);
|
usage(0);
|
||||||
@@ -195,8 +197,15 @@ int main(int Argc, char *Argv[]) {
|
|||||||
arch = argv[i] + 7;
|
arch = argv[i] + 7;
|
||||||
else if (!strncmp(argv[i], "--cpu=", 6))
|
else if (!strncmp(argv[i], "--cpu=", 6))
|
||||||
cpu = argv[i] + 6;
|
cpu = argv[i] + 6;
|
||||||
else if (!strcmp(argv[i], "--fast-math"))
|
else if (!strcmp(argv[i], "--fast-math")) {
|
||||||
g->opt.fastMath = true;
|
fprintf(stderr, "--fast-math option has been renamed to --opt=fast-math!\n");
|
||||||
|
usage(1);
|
||||||
|
}
|
||||||
|
else if (!strcmp(argv[i], "--fast-masked-vload")) {
|
||||||
|
fprintf(stderr, "--fast-masked-vload option has been renamed to "
|
||||||
|
"--opt=fast-masked-vload!\n");
|
||||||
|
usage(1);
|
||||||
|
}
|
||||||
else if (!strcmp(argv[i], "--debug"))
|
else if (!strcmp(argv[i], "--debug"))
|
||||||
g->debugPrint = true;
|
g->debugPrint = true;
|
||||||
else if (!strcmp(argv[i], "--instrument"))
|
else if (!strcmp(argv[i], "--instrument"))
|
||||||
@@ -233,7 +242,16 @@ int main(int Argc, char *Argv[]) {
|
|||||||
}
|
}
|
||||||
else if (!strncmp(argv[i], "--opt=", 6)) {
|
else if (!strncmp(argv[i], "--opt=", 6)) {
|
||||||
const char *opt = argv[i] + 6;
|
const char *opt = argv[i] + 6;
|
||||||
if (!strcmp(opt, "disable-blended-masked-stores"))
|
if (!strcmp(opt, "fast-math"))
|
||||||
|
g->opt.fastMath = true;
|
||||||
|
else if (!strcmp(opt, "fast-masked-vload"))
|
||||||
|
g->opt.fastMaskedVload = true;
|
||||||
|
else if (!strcmp(opt, "disable-loop-unroll"))
|
||||||
|
g->opt.unrollLoops = false;
|
||||||
|
|
||||||
|
// These are only used for performance tests of specific
|
||||||
|
// optimizations
|
||||||
|
else if (!strcmp(opt, "disable-blended-masked-stores"))
|
||||||
g->opt.disableBlendedMaskedStores = true;
|
g->opt.disableBlendedMaskedStores = true;
|
||||||
else if (!strcmp(opt, "disable-coherent-control-flow"))
|
else if (!strcmp(opt, "disable-coherent-control-flow"))
|
||||||
g->opt.disableCoherentControlFlow = true;
|
g->opt.disableCoherentControlFlow = true;
|
||||||
@@ -286,6 +304,10 @@ int main(int Argc, char *Argv[]) {
|
|||||||
g->includeStdlib = false;
|
g->includeStdlib = false;
|
||||||
else if (!strcmp(argv[i], "--nocpp"))
|
else if (!strcmp(argv[i], "--nocpp"))
|
||||||
g->runCPP = false;
|
g->runCPP = false;
|
||||||
|
#ifndef ISPC_IS_WINDOWS
|
||||||
|
else if (!strcmp(argv[i], "--pic"))
|
||||||
|
generatePIC = true;
|
||||||
|
#endif // !ISPC_IS_WINDOWS
|
||||||
else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) {
|
else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) {
|
||||||
printf("Intel(r) SPMD Program Compiler (ispc) build %s (%s)\n",
|
printf("Intel(r) SPMD Program Compiler (ispc) build %s (%s)\n",
|
||||||
BUILD_DATE, BUILD_VERSION);
|
BUILD_DATE, BUILD_VERSION);
|
||||||
@@ -307,7 +329,7 @@ int main(int Argc, char *Argv[]) {
|
|||||||
if (debugSet && !optSet)
|
if (debugSet && !optSet)
|
||||||
g->opt.level = 0;
|
g->opt.level = 0;
|
||||||
|
|
||||||
if (!Target::GetTarget(arch, cpu, target, &g->target))
|
if (!Target::GetTarget(arch, cpu, target, generatePIC, &g->target))
|
||||||
usage(1);
|
usage(1);
|
||||||
|
|
||||||
m = new Module(file);
|
m = new Module(file);
|
||||||
|
|||||||
71
module.cpp
71
module.cpp
@@ -78,14 +78,11 @@
|
|||||||
#include <llvm/Analysis/Verifier.h>
|
#include <llvm/Analysis/Verifier.h>
|
||||||
#include <llvm/Support/CFG.h>
|
#include <llvm/Support/CFG.h>
|
||||||
#include <clang/Frontend/CompilerInstance.h>
|
#include <clang/Frontend/CompilerInstance.h>
|
||||||
|
#include <clang/Frontend/TextDiagnosticPrinter.h>
|
||||||
#include <clang/Frontend/Utils.h>
|
#include <clang/Frontend/Utils.h>
|
||||||
#include <clang/Basic/TargetInfo.h>
|
#include <clang/Basic/TargetInfo.h>
|
||||||
#ifndef LLVM_2_8
|
#include <llvm/Support/ToolOutputFile.h>
|
||||||
#include <llvm/Support/ToolOutputFile.h>
|
#include <llvm/Support/Host.h>
|
||||||
#include <llvm/Support/Host.h>
|
|
||||||
#else // !LLVM_2_8
|
|
||||||
#include <llvm/System/Host.h>
|
|
||||||
#endif // LLVM_2_8
|
|
||||||
#include <llvm/Assembly/PrintModulePass.h>
|
#include <llvm/Assembly/PrintModulePass.h>
|
||||||
#include <llvm/Support/raw_ostream.h>
|
#include <llvm/Support/raw_ostream.h>
|
||||||
#include <llvm/Bitcode/ReaderWriter.h>
|
#include <llvm/Bitcode/ReaderWriter.h>
|
||||||
@@ -106,14 +103,11 @@ Module::Module(const char *fn) {
|
|||||||
|
|
||||||
module->setTargetTriple(g->target.GetTripleString());
|
module->setTargetTriple(g->target.GetTripleString());
|
||||||
|
|
||||||
#ifndef LLVM_2_8
|
|
||||||
if (g->generateDebuggingSymbols)
|
if (g->generateDebuggingSymbols)
|
||||||
diBuilder = new llvm::DIBuilder(*module);
|
diBuilder = new llvm::DIBuilder(*module);
|
||||||
else
|
else
|
||||||
diBuilder = NULL;
|
diBuilder = NULL;
|
||||||
#endif // LLVM_2_8
|
|
||||||
|
|
||||||
#ifndef LLVM_2_8
|
|
||||||
// If we're generating debugging symbols, let the DIBuilder know that
|
// If we're generating debugging symbols, let the DIBuilder know that
|
||||||
// we're starting a new compilation unit.
|
// we're starting a new compilation unit.
|
||||||
if (diBuilder != NULL) {
|
if (diBuilder != NULL) {
|
||||||
@@ -139,7 +133,6 @@ Module::Module(const char *fn) {
|
|||||||
0 /* run time version */);
|
0 /* run time version */);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // LLVM_2_8
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -153,6 +146,9 @@ extern void yy_delete_buffer(YY_BUFFER_STATE);
|
|||||||
|
|
||||||
int
|
int
|
||||||
Module::CompileFile() {
|
Module::CompileFile() {
|
||||||
|
if (g->opt.fastMath == true)
|
||||||
|
llvm::UnsafeFPMath = true;
|
||||||
|
|
||||||
// FIXME: it'd be nice to do this in the Module constructor, but this
|
// FIXME: it'd be nice to do this in the Module constructor, but this
|
||||||
// function ends up calling into routines that expect the global
|
// function ends up calling into routines that expect the global
|
||||||
// variable 'm' to be initialized and available (which it isn't until
|
// variable 'm' to be initialized and available (which it isn't until
|
||||||
@@ -457,6 +453,10 @@ Module::AddGlobal(DeclSpecs *ds, Declarator *decl) {
|
|||||||
// declarations, typedefs, and global variables declarations /
|
// declarations, typedefs, and global variables declarations /
|
||||||
// definitions. Figure out what we've got and take care of it.
|
// definitions. Figure out what we've got and take care of it.
|
||||||
|
|
||||||
|
if (ds == NULL || decl == NULL)
|
||||||
|
// Error happened earlier during parsing
|
||||||
|
return;
|
||||||
|
|
||||||
if (decl->isFunction) {
|
if (decl->isFunction) {
|
||||||
// function declaration
|
// function declaration
|
||||||
const Type *t = decl->GetType(ds);
|
const Type *t = decl->GetType(ds);
|
||||||
@@ -557,7 +557,6 @@ Module::AddGlobal(DeclSpecs *ds, Declarator *decl) {
|
|||||||
decl->sym->name.c_str());
|
decl->sym->name.c_str());
|
||||||
m->symbolTable->AddVariable(decl->sym);
|
m->symbolTable->AddVariable(decl->sym);
|
||||||
|
|
||||||
#ifndef LLVM_2_8
|
|
||||||
if (diBuilder && (ds->storageClass != SC_EXTERN)) {
|
if (diBuilder && (ds->storageClass != SC_EXTERN)) {
|
||||||
llvm::DIFile file = decl->pos.GetDIFile();
|
llvm::DIFile file = decl->pos.GetDIFile();
|
||||||
diBuilder->createGlobalVariable(decl->sym->name,
|
diBuilder->createGlobalVariable(decl->sym->name,
|
||||||
@@ -567,7 +566,6 @@ Module::AddGlobal(DeclSpecs *ds, Declarator *decl) {
|
|||||||
(ds->storageClass == SC_STATIC),
|
(ds->storageClass == SC_STATIC),
|
||||||
decl->sym->storagePtr);
|
decl->sym->storagePtr);
|
||||||
}
|
}
|
||||||
#endif // LLVM_2_8
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -662,6 +660,11 @@ lEmitFunctionCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
// the code to free that memory, now that we've copied the
|
// the code to free that memory, now that we've copied the
|
||||||
// parameter values out of the structure.
|
// parameter values out of the structure.
|
||||||
ctx->EmitFree(structParamPtr);
|
ctx->EmitFree(structParamPtr);
|
||||||
|
#else
|
||||||
|
// We also do this for AVX... (See discussion in
|
||||||
|
// FunctionEmitContext::LaunchInst().)
|
||||||
|
if (g->target.isa == Target::AVX)
|
||||||
|
ctx->EmitFree(structParamPtr);
|
||||||
#endif // ISPC_IS_WINDOWS
|
#endif // ISPC_IS_WINDOWS
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -700,8 +703,18 @@ lEmitFunctionCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
|
|
||||||
// Finally, we can generate code for the function
|
// Finally, we can generate code for the function
|
||||||
if (code != NULL) {
|
if (code != NULL) {
|
||||||
|
int costEstimate = code->EstimateCost();
|
||||||
bool checkMask = (ft->isTask == true) ||
|
bool checkMask = (ft->isTask == true) ||
|
||||||
(function->hasFnAttr(llvm::Attribute::AlwaysInline) == false);
|
((function->hasFnAttr(llvm::Attribute::AlwaysInline) == false) &&
|
||||||
|
costEstimate > CHECK_MASK_AT_FUNCTION_START_COST);
|
||||||
|
Debug(code->pos, "Estimated cost for function \"%s\" = %d\n",
|
||||||
|
funSym->name.c_str(), costEstimate);
|
||||||
|
// If the body of the function is non-trivial, then we wrap the
|
||||||
|
// entire thing around a varying "cif (true)" test in order to reap
|
||||||
|
// the side-effect benefit of checking to see if the execution mask
|
||||||
|
// is all on and thence having a specialized code path for that
|
||||||
|
// case. If this is a simple function, then this isn't worth the
|
||||||
|
// code bloat / overhead.
|
||||||
if (checkMask) {
|
if (checkMask) {
|
||||||
bool allTrue[ISPC_MAX_NVEC];
|
bool allTrue[ISPC_MAX_NVEC];
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
@@ -914,12 +927,7 @@ Module::WriteOutput(OutputType outputType, const char *outFileName) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
#ifdef LLVM_2_8
|
|
||||||
fprintf(stderr, "Direct object file emission not supported in this build.\n");
|
|
||||||
return false;
|
|
||||||
#else
|
|
||||||
return writeObjectFileOrAssembly(outputType, outFileName);
|
return writeObjectFileOrAssembly(outputType, outFileName);
|
||||||
#endif // LLVM_2_8
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1121,6 +1129,12 @@ lEmitVectorTypedefs(const std::vector<const VectorType *> &types, FILE *file) {
|
|||||||
for (unsigned int i = 0; i < types.size(); ++i) {
|
for (unsigned int i = 0; i < types.size(); ++i) {
|
||||||
std::string baseDecl;
|
std::string baseDecl;
|
||||||
const VectorType *vt = types[i]->GetAsNonConstType();
|
const VectorType *vt = types[i]->GetAsNonConstType();
|
||||||
|
if (!vt->IsUniformType())
|
||||||
|
// Varying stuff shouldn't be visibile to / used by the
|
||||||
|
// application, so at least make it not simple to access it by
|
||||||
|
// not declaring the type here...
|
||||||
|
continue;
|
||||||
|
|
||||||
int size = vt->GetElementCount();
|
int size = vt->GetElementCount();
|
||||||
|
|
||||||
baseDecl = vt->GetBaseType()->GetCDeclaration("");
|
baseDecl = vt->GetBaseType()->GetCDeclaration("");
|
||||||
@@ -1293,6 +1307,7 @@ Module::writeHeader(const char *fn) {
|
|||||||
default:
|
default:
|
||||||
FATAL("Unhandled target in header emission");
|
FATAL("Unhandled target in header emission");
|
||||||
}
|
}
|
||||||
|
fprintf(f, "#define ISPC_TARGET_VECTOR_WIDTH %d\n", g->target.vectorWidth);
|
||||||
|
|
||||||
fprintf(f, "#ifdef __cplusplus\nnamespace ispc {\n#endif // __cplusplus\n\n");
|
fprintf(f, "#ifdef __cplusplus\nnamespace ispc {\n#endif // __cplusplus\n\n");
|
||||||
|
|
||||||
@@ -1374,23 +1389,26 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre
|
|||||||
std::string error;
|
std::string error;
|
||||||
|
|
||||||
inst.createFileManager();
|
inst.createFileManager();
|
||||||
inst.createDiagnostics(0, NULL);
|
|
||||||
clang::TargetOptions& options = inst.getTargetOpts();
|
|
||||||
|
|
||||||
|
llvm::raw_fd_ostream stderrRaw(2, false);
|
||||||
|
clang::TextDiagnosticPrinter *diagPrinter =
|
||||||
|
new clang::TextDiagnosticPrinter(stderrRaw, clang::DiagnosticOptions());
|
||||||
|
inst.createDiagnostics(0, NULL, diagPrinter);
|
||||||
|
|
||||||
|
clang::TargetOptions &options = inst.getTargetOpts();
|
||||||
llvm::Triple triple(module->getTargetTriple());
|
llvm::Triple triple(module->getTargetTriple());
|
||||||
if (triple.getTriple().empty())
|
if (triple.getTriple().empty())
|
||||||
triple.setTriple(llvm::sys::getHostTriple());
|
triple.setTriple(llvm::sys::getHostTriple());
|
||||||
|
|
||||||
options.Triple = triple.getTriple();
|
options.Triple = triple.getTriple();
|
||||||
|
|
||||||
clang::TargetInfo* target
|
clang::TargetInfo *target =
|
||||||
= clang::TargetInfo::CreateTargetInfo(inst.getDiagnostics(), options);
|
clang::TargetInfo::CreateTargetInfo(inst.getDiagnostics(), options);
|
||||||
|
|
||||||
inst.setTarget(target);
|
inst.setTarget(target);
|
||||||
inst.createSourceManager(inst.getFileManager());
|
inst.createSourceManager(inst.getFileManager());
|
||||||
inst.InitializeSourceManager(infilename);
|
inst.InitializeSourceManager(infilename);
|
||||||
|
|
||||||
clang::PreprocessorOptions& opts = inst.getPreprocessorOpts();
|
clang::PreprocessorOptions &opts = inst.getPreprocessorOpts();
|
||||||
|
|
||||||
//Add defs for ISPC and PI
|
//Add defs for ISPC and PI
|
||||||
opts.addMacroDef("ISPC");
|
opts.addMacroDef("ISPC");
|
||||||
@@ -1403,7 +1421,10 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
inst.createPreprocessor();
|
inst.createPreprocessor();
|
||||||
|
|
||||||
|
clang::LangOptions langOptions;
|
||||||
|
diagPrinter->BeginSourceFile(langOptions, &inst.getPreprocessor());
|
||||||
clang::DoPrintPreprocessedInput(inst.getPreprocessor(),
|
clang::DoPrintPreprocessedInput(inst.getPreprocessor(),
|
||||||
ostream, inst.getPreprocessorOutputOpts());
|
ostream, inst.getPreprocessorOutputOpts());
|
||||||
|
diagPrinter->EndSourceFile();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
5
module.h
5
module.h
@@ -91,11 +91,8 @@ public:
|
|||||||
/** llvm Module object into which globals and functions are added. */
|
/** llvm Module object into which globals and functions are added. */
|
||||||
llvm::Module *module;
|
llvm::Module *module;
|
||||||
|
|
||||||
#ifndef LLVM_2_8
|
/** The diBuilder manages generating debugging information */
|
||||||
/** The diBuilder manages generating debugging information (only
|
|
||||||
supported in LLVM 2.9 and beyond...) */
|
|
||||||
llvm::DIBuilder *diBuilder;
|
llvm::DIBuilder *diBuilder;
|
||||||
#endif
|
|
||||||
|
|
||||||
GatherBuffer *gatherBuffer;
|
GatherBuffer *gatherBuffer;
|
||||||
|
|
||||||
|
|||||||
263
opt.cpp
263
opt.cpp
@@ -56,13 +56,11 @@
|
|||||||
#include <llvm/Intrinsics.h>
|
#include <llvm/Intrinsics.h>
|
||||||
#include <llvm/Constants.h>
|
#include <llvm/Constants.h>
|
||||||
#include <llvm/Analysis/ConstantFolding.h>
|
#include <llvm/Analysis/ConstantFolding.h>
|
||||||
#ifndef LLVM_2_8
|
#include <llvm/Target/TargetLibraryInfo.h>
|
||||||
#include <llvm/Target/TargetLibraryInfo.h>
|
#ifdef LLVM_2_9
|
||||||
#ifdef LLVM_2_9
|
#include <llvm/Support/StandardPasses.h>
|
||||||
#include <llvm/Support/StandardPasses.h>
|
#else
|
||||||
#else
|
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
|
||||||
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
|
|
||||||
#endif // LLVM_2_9
|
|
||||||
#endif // LLVM_2_8
|
#endif // LLVM_2_8
|
||||||
#include <llvm/ADT/Triple.h>
|
#include <llvm/ADT/Triple.h>
|
||||||
#include <llvm/Transforms/Scalar.h>
|
#include <llvm/Transforms/Scalar.h>
|
||||||
@@ -73,11 +71,15 @@
|
|||||||
#include <llvm/Target/TargetMachine.h>
|
#include <llvm/Target/TargetMachine.h>
|
||||||
#include <llvm/Analysis/Verifier.h>
|
#include <llvm/Analysis/Verifier.h>
|
||||||
#include <llvm/Support/raw_ostream.h>
|
#include <llvm/Support/raw_ostream.h>
|
||||||
#ifndef LLVM_2_8
|
|
||||||
#include <llvm/Analysis/DIBuilder.h>
|
#include <llvm/Analysis/DIBuilder.h>
|
||||||
#endif
|
|
||||||
#include <llvm/Analysis/DebugInfo.h>
|
#include <llvm/Analysis/DebugInfo.h>
|
||||||
#include <llvm/Support/Dwarf.h>
|
#include <llvm/Support/Dwarf.h>
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
#include <alloca.h>
|
||||||
|
#elif defined(ISPC_IS_WINDOWS)
|
||||||
|
#include <malloc.h>
|
||||||
|
#define alloca _alloca
|
||||||
|
#endif // ISPC_IS_WINDOWS
|
||||||
|
|
||||||
static llvm::Pass *CreateIntrinsicsOptPass();
|
static llvm::Pass *CreateIntrinsicsOptPass();
|
||||||
static llvm::Pass *CreateGatherScatterFlattenPass();
|
static llvm::Pass *CreateGatherScatterFlattenPass();
|
||||||
@@ -180,19 +182,22 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
llvm::PassManager optPM;
|
llvm::PassManager optPM;
|
||||||
llvm::FunctionPassManager funcPM(module);
|
llvm::FunctionPassManager funcPM(module);
|
||||||
|
|
||||||
#ifndef LLVM_2_8
|
|
||||||
llvm::TargetLibraryInfo *targetLibraryInfo =
|
llvm::TargetLibraryInfo *targetLibraryInfo =
|
||||||
new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple()));
|
new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple()));
|
||||||
optPM.add(targetLibraryInfo);
|
optPM.add(targetLibraryInfo);
|
||||||
#endif
|
|
||||||
optPM.add(new llvm::TargetData(module));
|
optPM.add(new llvm::TargetData(module));
|
||||||
|
|
||||||
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
|
optPM.add(llvm::createIndVarSimplifyPass());
|
||||||
|
#endif
|
||||||
|
|
||||||
if (optLevel == 0) {
|
if (optLevel == 0) {
|
||||||
// This is more or less the minimum set of optimizations that we
|
// This is more or less the minimum set of optimizations that we
|
||||||
// need to do to generate code that will actually run. (We can't
|
// need to do to generate code that will actually run. (We can't
|
||||||
// run absolutely no optimizations, since the front-end needs us to
|
// run absolutely no optimizations, since the front-end needs us to
|
||||||
// take the various __pseudo_* functions it has emitted and turn
|
// take the various __pseudo_* functions it has emitted and turn
|
||||||
// them into something that can actually execute.
|
// them into something that can actually execute.
|
||||||
|
optPM.add(llvm::createPromoteMemoryToRegisterPass());
|
||||||
optPM.add(CreateGatherScatterFlattenPass());
|
optPM.add(CreateGatherScatterFlattenPass());
|
||||||
optPM.add(CreateLowerGatherScatterPass());
|
optPM.add(CreateLowerGatherScatterPass());
|
||||||
optPM.add(CreateLowerMaskedStorePass());
|
optPM.add(CreateLowerMaskedStorePass());
|
||||||
@@ -213,7 +218,6 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
// only later in the optimization process as things like constant
|
// only later in the optimization process as things like constant
|
||||||
// propagation have done their thing, and then when they do kick
|
// propagation have done their thing, and then when they do kick
|
||||||
// in, they can often open up new opportunities for optimization...
|
// in, they can often open up new opportunities for optimization...
|
||||||
#ifndef LLVM_2_8
|
|
||||||
llvm::PassRegistry *registry = llvm::PassRegistry::getPassRegistry();
|
llvm::PassRegistry *registry = llvm::PassRegistry::getPassRegistry();
|
||||||
llvm::initializeCore(*registry);
|
llvm::initializeCore(*registry);
|
||||||
llvm::initializeScalarOpts(*registry);
|
llvm::initializeScalarOpts(*registry);
|
||||||
@@ -224,7 +228,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
llvm::initializeInstCombine(*registry);
|
llvm::initializeInstCombine(*registry);
|
||||||
llvm::initializeInstrumentation(*registry);
|
llvm::initializeInstrumentation(*registry);
|
||||||
llvm::initializeTarget(*registry);
|
llvm::initializeTarget(*registry);
|
||||||
#endif
|
|
||||||
// Early optimizations to try to reduce the total amount of code to
|
// Early optimizations to try to reduce the total amount of code to
|
||||||
// work with if we can
|
// work with if we can
|
||||||
optPM.add(CreateGatherScatterFlattenPass());
|
optPM.add(CreateGatherScatterFlattenPass());
|
||||||
@@ -281,13 +285,11 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
optPM.add(llvm::createConstantPropagationPass());
|
optPM.add(llvm::createConstantPropagationPass());
|
||||||
optPM.add(CreateIntrinsicsOptPass());
|
optPM.add(CreateIntrinsicsOptPass());
|
||||||
|
|
||||||
#if defined(LLVM_2_8)
|
#if defined(LLVM_2_9)
|
||||||
optPM.add(CreateIsCompileTimeConstantPass(true));
|
|
||||||
#elif defined(LLVM_2_9)
|
|
||||||
llvm::createStandardModulePasses(&optPM, 3,
|
llvm::createStandardModulePasses(&optPM, 3,
|
||||||
false /* opt size */,
|
false /* opt size */,
|
||||||
true /* unit at a time */,
|
true /* unit at a time */,
|
||||||
false /* unroll loops */,
|
g->opt.unrollLoops,
|
||||||
true /* simplify lib calls */,
|
true /* simplify lib calls */,
|
||||||
false /* may have exceptions */,
|
false /* may have exceptions */,
|
||||||
llvm::createFunctionInliningPass());
|
llvm::createFunctionInliningPass());
|
||||||
@@ -302,7 +304,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
llvm::createStandardModulePasses(&optPM, 3,
|
llvm::createStandardModulePasses(&optPM, 3,
|
||||||
false /* opt size */,
|
false /* opt size */,
|
||||||
true /* unit at a time */,
|
true /* unit at a time */,
|
||||||
false /* unroll loops */,
|
g->opt.unrollLoops,
|
||||||
true /* simplify lib calls */,
|
true /* simplify lib calls */,
|
||||||
false /* may have exceptions */,
|
false /* may have exceptions */,
|
||||||
llvm::createFunctionInliningPass());
|
llvm::createFunctionInliningPass());
|
||||||
@@ -311,6 +313,8 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
llvm::PassManagerBuilder builder;
|
llvm::PassManagerBuilder builder;
|
||||||
builder.OptLevel = 3;
|
builder.OptLevel = 3;
|
||||||
builder.Inliner = llvm::createFunctionInliningPass();
|
builder.Inliner = llvm::createFunctionInliningPass();
|
||||||
|
if (g->opt.unrollLoops == false)
|
||||||
|
builder.DisableUnrollLoops = true;
|
||||||
builder.populateFunctionPassManager(funcPM);
|
builder.populateFunctionPassManager(funcPM);
|
||||||
builder.populateModulePassManager(optPM);
|
builder.populateModulePassManager(optPM);
|
||||||
optPM.add(CreateIsCompileTimeConstantPass(true));
|
optPM.add(CreateIsCompileTimeConstantPass(true));
|
||||||
@@ -423,8 +427,11 @@ IntrinsicsOpt::IntrinsicsOpt()
|
|||||||
blendInstructions.push_back(BlendInstruction(
|
blendInstructions.push_back(BlendInstruction(
|
||||||
llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_sse41_blendvps),
|
llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_sse41_blendvps),
|
||||||
0xf, 0, 1, 2));
|
0xf, 0, 1, 2));
|
||||||
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
blendInstructions.push_back(BlendInstruction(
|
blendInstructions.push_back(BlendInstruction(
|
||||||
m->module->getFunction("llvm.x86.avx.blendvps"), 0xff, 0, 1, 2));
|
llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_blendv_ps_256),
|
||||||
|
0xff, 0, 1, 2));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1433,16 +1440,12 @@ LowerMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
llvm::Value *rvalue = callInst->getArgOperand(1);
|
llvm::Value *rvalue = callInst->getArgOperand(1);
|
||||||
llvm::Value *mask = callInst->getArgOperand(2);
|
llvm::Value *mask = callInst->getArgOperand(2);
|
||||||
|
|
||||||
// On SSE, we need to choose between doing the load + blend + store
|
// We need to choose between doing the load + blend + store trick,
|
||||||
// trick, or serializing the masked store. On targets with a
|
// or serializing the masked store. Even on targets with a native
|
||||||
// native masked store instruction, the implementations of
|
// masked store instruction, this is preferable since it lets us
|
||||||
// __masked_store_blend_* should be the same as __masked_store_*,
|
// keep values in registers rather than going out to the stack.
|
||||||
// so this doesn't matter. On SSE, blending is generally more
|
bool doBlend = (!g->opt.disableBlendedMaskedStores ||
|
||||||
// efficient and is always safe to do on stack-allocated values.(?)
|
|
||||||
bool doBlend = (g->target.isa != Target::AVX &&
|
|
||||||
lIsStackVariablePointer(lvalue));
|
lIsStackVariablePointer(lvalue));
|
||||||
if (g->target.isa == Target::SSE4 || g->target.isa == Target::SSE2)
|
|
||||||
doBlend |= !g->opt.disableBlendedMaskedStores;
|
|
||||||
|
|
||||||
// Generate the call to the appropriate masked store function and
|
// Generate the call to the appropriate masked store function and
|
||||||
// replace the __pseudo_* one with it.
|
// replace the __pseudo_* one with it.
|
||||||
@@ -1520,8 +1523,8 @@ static void lPrintVector(const char *info, llvm::Value *elements[ISPC_MAX_NVEC])
|
|||||||
|
|
||||||
|
|
||||||
/** Given an LLVM vector in vec, return a 'scalarized' version of the
|
/** Given an LLVM vector in vec, return a 'scalarized' version of the
|
||||||
vector in the provided offsets[] array. For example, if the vector
|
vector in the provided scalarizedVector[] array. For example, if the
|
||||||
value passed in is:
|
vector value passed in is:
|
||||||
|
|
||||||
add <4 x i32> %a_smear, <4 x i32> <4, 8, 12, 16>,
|
add <4 x i32> %a_smear, <4 x i32> <4, 8, 12, 16>,
|
||||||
|
|
||||||
@@ -1542,28 +1545,39 @@ static void lPrintVector(const char *info, llvm::Value *elements[ISPC_MAX_NVEC])
|
|||||||
@param vec Vector to be scalarized
|
@param vec Vector to be scalarized
|
||||||
@param scalarizedVector Array in which to store the individual vector
|
@param scalarizedVector Array in which to store the individual vector
|
||||||
elements
|
elements
|
||||||
|
@param vectorLength Number of elements in the given vector. (The
|
||||||
|
passed scalarizedVector array must also be at least
|
||||||
|
this length as well.)
|
||||||
@returns True if the vector was successfully scalarized and
|
@returns True if the vector was successfully scalarized and
|
||||||
the values in offsets[] are valid; false otherwise
|
the values in offsets[] are valid; false otherwise
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
lScalarizeVector(llvm::Value *vec, llvm::Value *scalarizedVector[ISPC_MAX_NVEC]) {
|
lScalarizeVector(llvm::Value *vec, llvm::Value **scalarizedVector,
|
||||||
|
int vectorLength) {
|
||||||
// First initialize the values of scalarizedVector[] to NULL.
|
// First initialize the values of scalarizedVector[] to NULL.
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < vectorLength; ++i)
|
||||||
scalarizedVector[i] = NULL;
|
scalarizedVector[i] = NULL;
|
||||||
|
|
||||||
|
// It may be ok for the vector to be an undef vector; these come up for
|
||||||
|
// example in shufflevector instructions. As long as elements of the
|
||||||
|
// undef vector aren't referenced by the shuffle indices, this is fine.
|
||||||
|
if (llvm::isa<llvm::UndefValue>(vec))
|
||||||
|
return true;
|
||||||
|
|
||||||
// ConstantVectors are easy; just pull out the individual constant
|
// ConstantVectors are easy; just pull out the individual constant
|
||||||
// element values
|
// element values
|
||||||
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(vec);
|
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(vec);
|
||||||
if (cv != NULL) {
|
if (cv != NULL) {
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < vectorLength; ++i)
|
||||||
scalarizedVector[i] = cv->getOperand(i);
|
scalarizedVector[i] = cv->getOperand(i);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// It's also easy if it's just a vector of all zeros
|
// It's also easy if it's just a vector of all zeros
|
||||||
llvm::ConstantAggregateZero *caz = llvm::dyn_cast<llvm::ConstantAggregateZero>(vec);
|
llvm::ConstantAggregateZero *caz =
|
||||||
if (caz) {
|
llvm::dyn_cast<llvm::ConstantAggregateZero>(vec);
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
if (caz != NULL) {
|
||||||
|
for (int i = 0; i < vectorLength; ++i)
|
||||||
scalarizedVector[i] = LLVMInt32(0);
|
scalarizedVector[i] = LLVMInt32(0);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -1575,13 +1589,16 @@ lScalarizeVector(llvm::Value *vec, llvm::Value *scalarizedVector[ISPC_MAX_NVEC])
|
|||||||
// scalar values we return from here are synthesized with scalar
|
// scalar values we return from here are synthesized with scalar
|
||||||
// versions of the original vector binary operator
|
// versions of the original vector binary operator
|
||||||
llvm::Instruction::BinaryOps opcode = bo->getOpcode();
|
llvm::Instruction::BinaryOps opcode = bo->getOpcode();
|
||||||
llvm::Value *v0[ISPC_MAX_NVEC], *v1[ISPC_MAX_NVEC];
|
llvm::Value **v0 =
|
||||||
|
(llvm::Value **)alloca(vectorLength * sizeof(llvm::Value *));
|
||||||
|
llvm::Value **v1 =
|
||||||
|
(llvm::Value **)alloca(vectorLength * sizeof(llvm::Value *));
|
||||||
|
|
||||||
if (!lScalarizeVector(bo->getOperand(0), v0) ||
|
if (!lScalarizeVector(bo->getOperand(0), v0, vectorLength) ||
|
||||||
!lScalarizeVector(bo->getOperand(1), v1))
|
!lScalarizeVector(bo->getOperand(1), v1, vectorLength))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i) {
|
for (int i = 0; i < vectorLength; ++i) {
|
||||||
scalarizedVector[i] =
|
scalarizedVector[i] =
|
||||||
llvm::BinaryOperator::Create(opcode, v0[i], v1[i], "flat_bop", bo);
|
llvm::BinaryOperator::Create(opcode, v0[i], v1[i], "flat_bop", bo);
|
||||||
lCopyMetadata(scalarizedVector[i], bo);
|
lCopyMetadata(scalarizedVector[i], bo);
|
||||||
@@ -1606,7 +1623,7 @@ lScalarizeVector(llvm::Value *vec, llvm::Value *scalarizedVector[ISPC_MAX_NVEC])
|
|||||||
// vaue in scalarizedVector[] based on the value being inserted.
|
// vaue in scalarizedVector[] based on the value being inserted.
|
||||||
while (ie != NULL) {
|
while (ie != NULL) {
|
||||||
uint64_t iOffset = lGetIntValue(ie->getOperand(2));
|
uint64_t iOffset = lGetIntValue(ie->getOperand(2));
|
||||||
assert((int)iOffset < g->target.vectorWidth);
|
assert((int)iOffset < vectorLength);
|
||||||
assert(scalarizedVector[iOffset] == NULL);
|
assert(scalarizedVector[iOffset] == NULL);
|
||||||
|
|
||||||
scalarizedVector[iOffset] = ie->getOperand(1);
|
scalarizedVector[iOffset] = ie->getOperand(1);
|
||||||
@@ -1620,15 +1637,17 @@ lScalarizeVector(llvm::Value *vec, llvm::Value *scalarizedVector[ISPC_MAX_NVEC])
|
|||||||
}
|
}
|
||||||
|
|
||||||
llvm::CastInst *ci = llvm::dyn_cast<llvm::CastInst>(vec);
|
llvm::CastInst *ci = llvm::dyn_cast<llvm::CastInst>(vec);
|
||||||
if (ci) {
|
if (ci != NULL) {
|
||||||
// Casts are similar to BinaryOperators in that we attempt to
|
// Casts are similar to BinaryOperators in that we attempt to
|
||||||
// scalarize the vector being cast and if successful, we apply
|
// scalarize the vector being cast and if successful, we apply
|
||||||
// equivalent scalar cast operators to each of the values in the
|
// equivalent scalar cast operators to each of the values in the
|
||||||
// scalarized vector.
|
// scalarized vector.
|
||||||
llvm::Instruction::CastOps op = ci->getOpcode();
|
llvm::Instruction::CastOps op = ci->getOpcode();
|
||||||
|
|
||||||
llvm::Value *scalarizedTarget[ISPC_MAX_NVEC];
|
llvm::Value **scalarizedTarget =
|
||||||
if (!lScalarizeVector(ci->getOperand(0), scalarizedTarget))
|
(llvm::Value **)alloca(vectorLength * sizeof(llvm::Value *));
|
||||||
|
if (!lScalarizeVector(ci->getOperand(0), scalarizedTarget,
|
||||||
|
vectorLength))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
LLVM_TYPE_CONST llvm::Type *destType = ci->getDestTy();
|
LLVM_TYPE_CONST llvm::Type *destType = ci->getDestTy();
|
||||||
@@ -1637,7 +1656,7 @@ lScalarizeVector(llvm::Value *vec, llvm::Value *scalarizedVector[ISPC_MAX_NVEC])
|
|||||||
assert(vectorDestType != NULL);
|
assert(vectorDestType != NULL);
|
||||||
LLVM_TYPE_CONST llvm::Type *elementType = vectorDestType->getElementType();
|
LLVM_TYPE_CONST llvm::Type *elementType = vectorDestType->getElementType();
|
||||||
|
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i) {
|
for (int i = 0; i < vectorLength; ++i) {
|
||||||
scalarizedVector[i] =
|
scalarizedVector[i] =
|
||||||
llvm::CastInst::Create(op, scalarizedTarget[i], elementType,
|
llvm::CastInst::Create(op, scalarizedTarget[i], elementType,
|
||||||
"cast", ci);
|
"cast", ci);
|
||||||
@@ -1647,16 +1666,11 @@ lScalarizeVector(llvm::Value *vec, llvm::Value *scalarizedVector[ISPC_MAX_NVEC])
|
|||||||
}
|
}
|
||||||
|
|
||||||
llvm::ShuffleVectorInst *svi = llvm::dyn_cast<llvm::ShuffleVectorInst>(vec);
|
llvm::ShuffleVectorInst *svi = llvm::dyn_cast<llvm::ShuffleVectorInst>(vec);
|
||||||
if (svi) {
|
if (svi != NULL) {
|
||||||
// Note that the code for shufflevector instructions is untested.
|
|
||||||
// (We haven't yet had a case where it needs to run). Therefore,
|
|
||||||
// an assert at the bottom of this routien will hit the first time
|
|
||||||
// it runs as a reminder that this needs to be tested further.
|
|
||||||
|
|
||||||
LLVM_TYPE_CONST llvm::VectorType *svInstType =
|
LLVM_TYPE_CONST llvm::VectorType *svInstType =
|
||||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(svi->getType());
|
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(svi->getType());
|
||||||
assert(svInstType != NULL);
|
assert(svInstType != NULL);
|
||||||
assert((int)svInstType->getNumElements() == g->target.vectorWidth);
|
assert((int)svInstType->getNumElements() == vectorLength);
|
||||||
|
|
||||||
// Scalarize the two vectors being shuffled. First figure out how
|
// Scalarize the two vectors being shuffled. First figure out how
|
||||||
// big they are.
|
// big they are.
|
||||||
@@ -1671,58 +1685,90 @@ lScalarizeVector(llvm::Value *vec, llvm::Value *scalarizedVector[ISPC_MAX_NVEC])
|
|||||||
int n0 = vectorType0->getNumElements();
|
int n0 = vectorType0->getNumElements();
|
||||||
int n1 = vectorType1->getNumElements();
|
int n1 = vectorType1->getNumElements();
|
||||||
|
|
||||||
// FIXME: It's actually totally legitimate for these two to have
|
|
||||||
// different sizes; the final result just needs to have the native
|
|
||||||
// vector width. To handle this, not only do we need to
|
|
||||||
// potentially dynamically allocate space for the arrays passed
|
|
||||||
// into lScalarizeVector, but we need to change the rest of its
|
|
||||||
// implementation to not key off g->target.vectorWidth everywhere
|
|
||||||
// to get the sizes of the arrays to iterate over, etc.
|
|
||||||
assert(n0 == g->target.vectorWidth && n1 == g->target.vectorWidth);
|
|
||||||
|
|
||||||
// Go ahead and scalarize the two input vectors now.
|
// Go ahead and scalarize the two input vectors now.
|
||||||
// FIXME: it's ok if some or all of the values of these two vectors
|
llvm::Value **v0 = (llvm::Value **)alloca(n0 * sizeof(llvm::Value *));
|
||||||
// have undef values, so long as we don't try to access undef
|
llvm::Value **v1 = (llvm::Value **)alloca(n1 * sizeof(llvm::Value *));
|
||||||
// values with the vector indices provided to the instruction.
|
|
||||||
// Should fix lScalarizeVector so that it doesn't return false in
|
if (!lScalarizeVector(svi->getOperand(0), v0, n0) ||
|
||||||
// this case and just leaves the elements of the arrays with undef
|
!lScalarizeVector(svi->getOperand(1), v1, n1))
|
||||||
// values as NULL.
|
|
||||||
llvm::Value *v0[ISPC_MAX_NVEC], *v1[ISPC_MAX_NVEC];
|
|
||||||
if (!lScalarizeVector(svi->getOperand(0), v0) ||
|
|
||||||
!lScalarizeVector(svi->getOperand(1), v1))
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
llvm::ConstantVector *shuffleIndicesVector =
|
llvm::ConstantAggregateZero *caz =
|
||||||
llvm::dyn_cast<llvm::ConstantVector>(svi->getOperand(2));
|
llvm::dyn_cast<llvm::ConstantAggregateZero>(svi->getOperand(2));
|
||||||
// I think this has to be a ConstantVector. If this ever hits,
|
if (caz != NULL) {
|
||||||
// we'll dig into what we got instead and figure out how to handle
|
for (int i = 0; i < vectorLength; ++i)
|
||||||
// that...
|
scalarizedVector[i] = v0[0];
|
||||||
assert(shuffleIndicesVector != NULL);
|
}
|
||||||
|
else {
|
||||||
// Get the integer indices for each element of the returned vector
|
llvm::ConstantVector *shuffleIndicesVector =
|
||||||
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> shuffleIndices;
|
llvm::dyn_cast<llvm::ConstantVector>(svi->getOperand(2));
|
||||||
shuffleIndicesVector->getVectorElements(shuffleIndices);
|
// I think this has to be a ConstantVector. If this ever hits,
|
||||||
assert((int)shuffleIndices.size() == g->target.vectorWidth);
|
// we'll dig into what we got instead and figure out how to handle
|
||||||
|
// that...
|
||||||
// And loop over the indices, setting the i'th element of the
|
assert(shuffleIndicesVector != NULL);
|
||||||
// result vector with the source vector element that corresponds to
|
|
||||||
// the i'th shuffle index value.
|
// Get the integer indices for each element of the returned vector
|
||||||
for (unsigned int i = 0; i < shuffleIndices.size(); ++i) {
|
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> shuffleIndices;
|
||||||
if (!llvm::isa<llvm::ConstantInt>(shuffleIndices[i]))
|
shuffleIndicesVector->getVectorElements(shuffleIndices);
|
||||||
// I'm not sure when this case would ever happen, though..
|
assert((int)shuffleIndices.size() == vectorLength);
|
||||||
return false;
|
|
||||||
int offset = (int)lGetIntValue(shuffleIndices[i]);
|
// And loop over the indices, setting the i'th element of the
|
||||||
assert(offset >= 0 && offset < n0+n1);
|
// result vector with the source vector element that corresponds to
|
||||||
|
// the i'th shuffle index value.
|
||||||
if (offset < n0)
|
for (unsigned int i = 0; i < shuffleIndices.size(); ++i) {
|
||||||
// Offsets from 0 to n0-1 index into the first vector
|
// I'm not sure when this case would ever happen, though..
|
||||||
scalarizedVector[i] = v0[offset];
|
assert(llvm::isa<llvm::ConstantInt>(shuffleIndices[i]));
|
||||||
else
|
|
||||||
// And offsets from n0 to (n0+n1-1) index into the second
|
int offset = (int)lGetIntValue(shuffleIndices[i]);
|
||||||
// vector
|
assert(offset >= 0 && offset < n0+n1);
|
||||||
scalarizedVector[i] = v1[offset - n0];
|
|
||||||
|
if (offset < n0)
|
||||||
|
// Offsets from 0 to n0-1 index into the first vector
|
||||||
|
scalarizedVector[i] = v0[offset];
|
||||||
|
else
|
||||||
|
// And offsets from n0 to (n0+n1-1) index into the second
|
||||||
|
// vector
|
||||||
|
scalarizedVector[i] = v1[offset - n0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::LoadInst *li = llvm::dyn_cast<llvm::LoadInst>(vec);
|
||||||
|
if (li != NULL) {
|
||||||
|
llvm::Value *baseAddr = li->getOperand(0);
|
||||||
|
llvm::Value *baseInt = new llvm::PtrToIntInst(baseAddr, LLVMTypes::Int64Type,
|
||||||
|
"base2int", li);
|
||||||
|
lCopyMetadata(baseInt, li);
|
||||||
|
|
||||||
|
LLVM_TYPE_CONST llvm::PointerType *ptrType =
|
||||||
|
llvm::dyn_cast<llvm::PointerType>(baseAddr->getType());
|
||||||
|
assert(ptrType != NULL);
|
||||||
|
LLVM_TYPE_CONST llvm::VectorType *vecType =
|
||||||
|
llvm::dyn_cast<llvm::VectorType>(ptrType->getElementType());
|
||||||
|
assert(vecType != NULL);
|
||||||
|
LLVM_TYPE_CONST llvm::Type *elementType = vecType->getElementType();
|
||||||
|
uint64_t elementSize;
|
||||||
|
bool sizeKnown = lSizeOfIfKnown(elementType, &elementSize);
|
||||||
|
assert(sizeKnown == true);
|
||||||
|
|
||||||
|
LLVM_TYPE_CONST llvm::Type *eltPtrType = llvm::PointerType::get(elementType, 0);
|
||||||
|
|
||||||
|
for (int i = 0; i < vectorLength; ++i) {
|
||||||
|
llvm::Value *intPtrOffset =
|
||||||
|
llvm::BinaryOperator::Create(llvm::Instruction::Add, baseInt,
|
||||||
|
LLVMInt64(i * elementSize), "baseoffset",
|
||||||
|
li);
|
||||||
|
lCopyMetadata(intPtrOffset, li);
|
||||||
|
llvm::Value *scalarLoadPtr =
|
||||||
|
new llvm::IntToPtrInst(intPtrOffset, eltPtrType, "int2ptr", li);
|
||||||
|
lCopyMetadata(scalarLoadPtr, li);
|
||||||
|
|
||||||
|
llvm::Instruction *scalarLoad =
|
||||||
|
new llvm::LoadInst(scalarLoadPtr, "loadelt", li);
|
||||||
|
lCopyMetadata(scalarLoad, li);
|
||||||
|
scalarizedVector[i] = scalarLoad;
|
||||||
}
|
}
|
||||||
FATAL("the above code is untested so far; check now that it's actually running");
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2134,11 +2180,18 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
if (ce && ce->getOpcode() == llvm::Instruction::BitCast)
|
if (ce && ce->getOpcode() == llvm::Instruction::BitCast)
|
||||||
base = ce->getOperand(0);
|
base = ce->getOperand(0);
|
||||||
|
|
||||||
// Try to out the offsets; the i'th element of the offsetElements
|
// Try to find out the offsets; the i'th element of the
|
||||||
// array should be an i32 with the value of the offset for the i'th
|
// offsetElements array should be an i32 with the value of the
|
||||||
// vector lane. This may fail; if so, just give up.
|
// offset for the i'th vector lane. This may fail; if so, just
|
||||||
|
// give up.
|
||||||
|
llvm::Value *vecValue = callInst->getArgOperand(1);
|
||||||
|
LLVM_TYPE_CONST llvm::VectorType *vt =
|
||||||
|
llvm::dyn_cast<llvm::VectorType>(vecValue->getType());
|
||||||
|
assert(vt != NULL);
|
||||||
|
int vecLength = vt->getNumElements();
|
||||||
|
assert(vecLength == g->target.vectorWidth);
|
||||||
llvm::Value *offsetElements[ISPC_MAX_NVEC];
|
llvm::Value *offsetElements[ISPC_MAX_NVEC];
|
||||||
if (!lScalarizeVector(callInst->getArgOperand(1), offsetElements))
|
if (!lScalarizeVector(vecValue, offsetElements, vecLength))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
llvm::Value *mask = callInst->getArgOperand((gatherInfo != NULL) ? 2 : 3);
|
llvm::Value *mask = callInst->getArgOperand((gatherInfo != NULL) ? 2 : 3);
|
||||||
@@ -2515,7 +2568,7 @@ llvm::RegisterPass<MakeInternalFuncsStaticPass>
|
|||||||
bool
|
bool
|
||||||
MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
|
MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
|
||||||
const char *names[] = {
|
const char *names[] = {
|
||||||
"__do_print",
|
"__do_print", "__fast_masked_vload",
|
||||||
"__gather_base_offsets_i8", "__gather_base_offsets_i16",
|
"__gather_base_offsets_i8", "__gather_base_offsets_i16",
|
||||||
"__gather_base_offsets_i32", "__gather_base_offsets_i64",
|
"__gather_base_offsets_i32", "__gather_base_offsets_i64",
|
||||||
"__gather_elt_8", "__gather_elt_16",
|
"__gather_elt_8", "__gather_elt_16",
|
||||||
|
|||||||
39
parse.yy
39
parse.yy
@@ -177,6 +177,7 @@ static const char *lParamListTokens[] = {
|
|||||||
%type <stmt> statement labeled_statement compound_statement for_init_statement
|
%type <stmt> statement labeled_statement compound_statement for_init_statement
|
||||||
%type <stmt> expression_statement selection_statement iteration_statement
|
%type <stmt> expression_statement selection_statement iteration_statement
|
||||||
%type <stmt> jump_statement statement_list declaration_statement print_statement
|
%type <stmt> jump_statement statement_list declaration_statement print_statement
|
||||||
|
%type <stmt> sync_statement
|
||||||
|
|
||||||
%type <declaration> declaration parameter_declaration
|
%type <declaration> declaration parameter_declaration
|
||||||
%type <declarators> init_declarator_list
|
%type <declarators> init_declarator_list
|
||||||
@@ -436,8 +437,6 @@ assignment_expression
|
|||||||
|
|
||||||
expression
|
expression
|
||||||
: assignment_expression
|
: assignment_expression
|
||||||
| TOKEN_SYNC
|
|
||||||
{ $$ = new SyncExpr(@1); }
|
|
||||||
| expression ',' assignment_expression
|
| expression ',' assignment_expression
|
||||||
{ $$ = new BinaryExpr(BinaryExpr::Comma, $1, $3, @2); }
|
{ $$ = new BinaryExpr(BinaryExpr::Comma, $1, $3, @2); }
|
||||||
;
|
;
|
||||||
@@ -928,9 +927,13 @@ parameter_list
|
|||||||
builtinTokens.push_back(*token);
|
builtinTokens.push_back(*token);
|
||||||
++token;
|
++token;
|
||||||
}
|
}
|
||||||
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
|
if (strlen(yytext) == 0)
|
||||||
std::string alts = lGetAlternates(alternates);
|
Error(@1, "Syntax error--premature end of file.");
|
||||||
Error(@1, "Syntax error--token \"%s\" unknown.%s", yytext, alts.c_str());
|
else {
|
||||||
|
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
|
||||||
|
std::string alts = lGetAlternates(alternates);
|
||||||
|
Error(@1, "Syntax error--token \"%s\" unknown.%s", yytext, alts.c_str());
|
||||||
|
}
|
||||||
$$ = NULL;
|
$$ = NULL;
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
@@ -1019,6 +1022,7 @@ statement
|
|||||||
| jump_statement
|
| jump_statement
|
||||||
| declaration_statement
|
| declaration_statement
|
||||||
| print_statement
|
| print_statement
|
||||||
|
| sync_statement
|
||||||
| error
|
| error
|
||||||
{
|
{
|
||||||
std::vector<std::string> builtinTokens;
|
std::vector<std::string> builtinTokens;
|
||||||
@@ -1027,9 +1031,13 @@ statement
|
|||||||
builtinTokens.push_back(*token);
|
builtinTokens.push_back(*token);
|
||||||
++token;
|
++token;
|
||||||
}
|
}
|
||||||
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
|
if (strlen(yytext) == 0)
|
||||||
std::string alts = lGetAlternates(alternates);
|
Error(@1, "Syntax error--premature end of file.");
|
||||||
Error(@1, "Syntax error--token \"%s\" unknown.%s", yytext, alts.c_str());
|
else {
|
||||||
|
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
|
||||||
|
std::string alts = lGetAlternates(alternates);
|
||||||
|
Error(@1, "Syntax error--token \"%s\" unknown.%s", yytext, alts.c_str());
|
||||||
|
}
|
||||||
$$ = NULL;
|
$$ = NULL;
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
@@ -1155,6 +1163,11 @@ jump_statement
|
|||||||
{ $$ = new ReturnStmt($2, true, @1); }
|
{ $$ = new ReturnStmt($2, true, @1); }
|
||||||
;
|
;
|
||||||
|
|
||||||
|
sync_statement
|
||||||
|
: TOKEN_SYNC
|
||||||
|
{ $$ = new ExprStmt(new SyncExpr(@1), @1); }
|
||||||
|
;
|
||||||
|
|
||||||
print_statement
|
print_statement
|
||||||
: TOKEN_PRINT '(' string_constant ')'
|
: TOKEN_PRINT '(' string_constant ')'
|
||||||
{
|
{
|
||||||
@@ -1177,9 +1190,13 @@ translation_unit
|
|||||||
builtinTokens.push_back(*token);
|
builtinTokens.push_back(*token);
|
||||||
++token;
|
++token;
|
||||||
}
|
}
|
||||||
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
|
if (strlen(yytext) == 0)
|
||||||
std::string alts = lGetAlternates(alternates);
|
Error(@1, "Syntax error--premature end of file.");
|
||||||
Error(@1, "Syntax error--token \"%s\" unknown.%s", yytext, alts.c_str());
|
else {
|
||||||
|
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
|
||||||
|
std::string alts = lGetAlternates(alternates);
|
||||||
|
Error(@1, "Syntax error--token \"%s\" unknown.%s", yytext, alts.c_str());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|||||||
11
run_tests.py
11
run_tests.py
@@ -17,6 +17,7 @@ import random
|
|||||||
import string
|
import string
|
||||||
import mutex
|
import mutex
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import platform
|
||||||
|
|
||||||
parser = OptionParser()
|
parser = OptionParser()
|
||||||
parser.add_option("-r", "--random-shuffle", dest="random", help="Randomly order tests",
|
parser.add_option("-r", "--random-shuffle", dest="random", help="Randomly order tests",
|
||||||
@@ -30,6 +31,8 @@ parser.add_option('-t', '--target', dest='target',
|
|||||||
parser.add_option('-a', '--arch', dest='arch',
|
parser.add_option('-a', '--arch', dest='arch',
|
||||||
help='Set architecture (x86, x86-64)',
|
help='Set architecture (x86, x86-64)',
|
||||||
default="x86-64")
|
default="x86-64")
|
||||||
|
parser.add_option('-o', '--no-opt', dest='no_opt', help='Disable optimization',
|
||||||
|
default=False, action="store_true")
|
||||||
|
|
||||||
(options, args) = parser.parse_args()
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
@@ -129,12 +132,16 @@ def run_tasks_from_queue(queue):
|
|||||||
exe_name = "%s.run" % filename
|
exe_name = "%s.run" % filename
|
||||||
ispc_cmd = "ispc --woff %s -o %s --arch=%s --target=%s" % \
|
ispc_cmd = "ispc --woff %s -o %s --arch=%s --target=%s" % \
|
||||||
(filename, obj_name, options.arch, options.target)
|
(filename, obj_name, options.arch, options.target)
|
||||||
|
if options.no_opt:
|
||||||
|
ispc_cmd += " -O0"
|
||||||
if options.arch == 'x86':
|
if options.arch == 'x86':
|
||||||
gcc_arch = '-m32'
|
gcc_arch = '-m32'
|
||||||
else:
|
else:
|
||||||
gcc_arch = '-m64'
|
gcc_arch = '-m64'
|
||||||
gcc_cmd = "g++ -Wl,-no_pie %s test_static.cpp -DTEST_SIG=%d %s.o -o %s" % \
|
gcc_cmd = "g++ %s test_static.cpp -DTEST_SIG=%d %s.o -o %s" % \
|
||||||
(gcc_arch, match, filename, exe_name)
|
(gcc_arch, match, filename, exe_name)
|
||||||
|
if platform.system() == 'Darwin':
|
||||||
|
gcc_cmd += ' -Wl,-no_pie'
|
||||||
if should_fail:
|
if should_fail:
|
||||||
gcc_cmd += " -DEXPECT_FAILURE"
|
gcc_cmd += " -DEXPECT_FAILURE"
|
||||||
|
|
||||||
@@ -152,6 +159,8 @@ def run_tasks_from_queue(queue):
|
|||||||
bitcode_file = "%s.bc" % filename
|
bitcode_file = "%s.bc" % filename
|
||||||
compile_cmd = "ispc --woff --emit-llvm %s --target=%s -o %s" % \
|
compile_cmd = "ispc --woff --emit-llvm %s --target=%s -o %s" % \
|
||||||
(filename, options.target, bitcode_file)
|
(filename, options.target, bitcode_file)
|
||||||
|
if options.no_opt:
|
||||||
|
compile_cmd += " -O0"
|
||||||
test_cmd = "ispc_test %s" % bitcode_file
|
test_cmd = "ispc_test %s" % bitcode_file
|
||||||
|
|
||||||
error_count += run_cmds([compile_cmd, test_cmd], filename, should_fail)
|
error_count += run_cmds([compile_cmd, test_cmd], filename, should_fail)
|
||||||
|
|||||||
@@ -2862,6 +2862,12 @@ static inline void seed_rng(reference uniform RNGState state, uniform unsigned i
|
|||||||
seed = __seed4(state, 0, seed);
|
seed = __seed4(state, 0, seed);
|
||||||
if (programCount == 8)
|
if (programCount == 8)
|
||||||
__seed4(state, 4, seed ^ 0xbeeff00d);
|
__seed4(state, 4, seed ^ 0xbeeff00d);
|
||||||
|
if (programCount == 16) {
|
||||||
|
__seed4(state, 4, seed ^ 0xbeeff00d);
|
||||||
|
__seed4(state, 8, ((seed & 0xffff) << 16) | (seed >> 16));
|
||||||
|
__seed4(state, 12, (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
|
||||||
|
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void fastmath() {
|
static inline void fastmath() {
|
||||||
|
|||||||
470
stmt.cpp
470
stmt.cpp
@@ -107,6 +107,12 @@ ExprStmt::Print(int indent) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
ExprStmt::EstimateCost() const {
|
||||||
|
return expr ? expr->EstimateCost() : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// DeclStmt
|
// DeclStmt
|
||||||
|
|
||||||
@@ -399,12 +405,25 @@ DeclStmt::Print(int indent) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
DeclStmt::EstimateCost() const {
|
||||||
|
int cost = 0;
|
||||||
|
for (unsigned int i = 0; i < declaration->declarators.size(); ++i)
|
||||||
|
if (declaration->declarators[i]->initExpr)
|
||||||
|
cost += declaration->declarators[i]->initExpr->EstimateCost();
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// IfStmt
|
// IfStmt
|
||||||
|
|
||||||
IfStmt::IfStmt(Expr *t, Stmt *ts, Stmt *fs, bool doUnif, SourcePos p)
|
IfStmt::IfStmt(Expr *t, Stmt *ts, Stmt *fs, bool checkCoherence, SourcePos p)
|
||||||
: Stmt(p), test(t), trueStmts(ts), falseStmts(fs),
|
: Stmt(p), test(t), trueStmts(ts), falseStmts(fs),
|
||||||
doCoherentCheck(doUnif && !g->opt.disableCoherentControlFlow) {
|
doAllCheck(checkCoherence &&
|
||||||
|
!g->opt.disableCoherentControlFlow),
|
||||||
|
doAnyCheck(test->GetType() != NULL &&
|
||||||
|
test->GetType()->IsVaryingType()) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -436,62 +455,46 @@ IfStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
|
|
||||||
ctx->SetDebugPos(pos);
|
ctx->SetDebugPos(pos);
|
||||||
bool isUniform = testType->IsUniformType();
|
bool isUniform = testType->IsUniformType();
|
||||||
|
|
||||||
|
llvm::Value *testValue = test->GetValue(ctx);
|
||||||
|
if (testValue == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
if (isUniform) {
|
if (isUniform) {
|
||||||
ctx->StartUniformIf(ctx->GetMask());
|
ctx->StartUniformIf(ctx->GetMask());
|
||||||
if (doCoherentCheck)
|
if (doAllCheck)
|
||||||
Warning(test->pos, "Uniform condition supplied to cif statement.");
|
Warning(test->pos, "Uniform condition supplied to \"cif\" statement.");
|
||||||
|
|
||||||
// 'If' statements with uniform conditions are relatively
|
// 'If' statements with uniform conditions are relatively
|
||||||
// straightforward. We evaluate the condition and then jump to
|
// straightforward. We evaluate the condition and then jump to
|
||||||
// either the 'then' or 'else' clause depending on its value.
|
// either the 'then' or 'else' clause depending on its value.
|
||||||
llvm::Value *vtest = test->GetValue(ctx);
|
llvm::BasicBlock *bthen = ctx->CreateBasicBlock("if_then");
|
||||||
if (vtest != NULL) {
|
llvm::BasicBlock *belse = ctx->CreateBasicBlock("if_else");
|
||||||
llvm::BasicBlock *bthen = ctx->CreateBasicBlock("if_then");
|
llvm::BasicBlock *bexit = ctx->CreateBasicBlock("if_exit");
|
||||||
llvm::BasicBlock *belse = ctx->CreateBasicBlock("if_else");
|
|
||||||
llvm::BasicBlock *bexit = ctx->CreateBasicBlock("if_exit");
|
|
||||||
|
|
||||||
// Jump to the appropriate basic block based on the value of
|
// Jump to the appropriate basic block based on the value of
|
||||||
// the 'if' test
|
// the 'if' test
|
||||||
ctx->BranchInst(bthen, belse, vtest);
|
ctx->BranchInst(bthen, belse, testValue);
|
||||||
|
|
||||||
// Emit code for the 'true' case
|
// Emit code for the 'true' case
|
||||||
ctx->SetCurrentBasicBlock(bthen);
|
ctx->SetCurrentBasicBlock(bthen);
|
||||||
lEmitIfStatements(ctx, trueStmts, "true");
|
lEmitIfStatements(ctx, trueStmts, "true");
|
||||||
if (ctx->GetCurrentBasicBlock())
|
if (ctx->GetCurrentBasicBlock())
|
||||||
ctx->BranchInst(bexit);
|
ctx->BranchInst(bexit);
|
||||||
|
|
||||||
// Emit code for the 'false' case
|
// Emit code for the 'false' case
|
||||||
ctx->SetCurrentBasicBlock(belse);
|
ctx->SetCurrentBasicBlock(belse);
|
||||||
lEmitIfStatements(ctx, falseStmts, "false");
|
lEmitIfStatements(ctx, falseStmts, "false");
|
||||||
if (ctx->GetCurrentBasicBlock())
|
if (ctx->GetCurrentBasicBlock())
|
||||||
ctx->BranchInst(bexit);
|
ctx->BranchInst(bexit);
|
||||||
|
|
||||||
// Set the active basic block to the newly-created exit block
|
// Set the active basic block to the newly-created exit block
|
||||||
// so that subsequent emitted code starts there.
|
// so that subsequent emitted code starts there.
|
||||||
ctx->SetCurrentBasicBlock(bexit);
|
ctx->SetCurrentBasicBlock(bexit);
|
||||||
}
|
|
||||||
ctx->EndIf();
|
ctx->EndIf();
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
// Code for 'If' statemnts with 'varying' conditions can be
|
emitVaryingIf(ctx, testValue);
|
||||||
// generated in two ways; one takes some care to see if all of the
|
|
||||||
// active program instances want to follow only the 'true' or
|
|
||||||
// 'false' cases, and the other always runs both cases but sets the
|
|
||||||
// mask appropriately. The first case is handled by the
|
|
||||||
// IfStmt::emitCoherentTests() call, and the second is handled by
|
|
||||||
// IfStmt::emitMaskedTrueAndFalse().
|
|
||||||
llvm::Value *testValue = test->GetValue(ctx);
|
|
||||||
if (testValue) {
|
|
||||||
if (doCoherentCheck)
|
|
||||||
emitCoherentTests(ctx, testValue);
|
|
||||||
else {
|
|
||||||
llvm::Value *oldMask = ctx->GetMask();
|
|
||||||
ctx->StartVaryingIf(oldMask);
|
|
||||||
emitMaskedTrueAndFalse(ctx, oldMask, testValue);
|
|
||||||
ctx->EndIf();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -535,9 +538,17 @@ Stmt *IfStmt::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
IfStmt::EstimateCost() const {
|
||||||
|
return ((test ? test->EstimateCost() : 0) +
|
||||||
|
(trueStmts ? trueStmts->EstimateCost() : 0) +
|
||||||
|
(falseStmts ? falseStmts->EstimateCost() : 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
IfStmt::Print(int indent) const {
|
IfStmt::Print(int indent) const {
|
||||||
printf("%*cIf Stmt %s", indent, ' ', doCoherentCheck ? "DO COHERENT CHECK" : "");
|
printf("%*cIf Stmt %s", indent, ' ', doAllCheck ? "DO ALL CHECK" : "");
|
||||||
pos.Print();
|
pos.Print();
|
||||||
printf("\n%*cTest: ", indent+4, ' ');
|
printf("\n%*cTest: ", indent+4, ' ');
|
||||||
test->Print();
|
test->Print();
|
||||||
@@ -554,7 +565,7 @@ IfStmt::Print(int indent) const {
|
|||||||
|
|
||||||
|
|
||||||
/** Emit code to run both the true and false statements for the if test,
|
/** Emit code to run both the true and false statements for the if test,
|
||||||
with the mask set appropriately before runnign each one.
|
with the mask set appropriately before running each one.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||||
@@ -574,11 +585,185 @@ IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Similar to the Stmt variant of this function, this conservatively
|
||||||
|
checks to see if it's safe to run the code for the given Expr even if
|
||||||
|
the mask is 'all off'.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
lSafeToRunWithAllLanesOff(Expr *expr) {
|
||||||
|
if (expr == NULL)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
UnaryExpr *ue;
|
||||||
|
if ((ue = dynamic_cast<UnaryExpr *>(expr)) != NULL)
|
||||||
|
return lSafeToRunWithAllLanesOff(ue->expr);
|
||||||
|
|
||||||
|
BinaryExpr *be;
|
||||||
|
if ((be = dynamic_cast<BinaryExpr *>(expr)) != NULL)
|
||||||
|
return (lSafeToRunWithAllLanesOff(be->arg0) &&
|
||||||
|
lSafeToRunWithAllLanesOff(be->arg1));
|
||||||
|
|
||||||
|
AssignExpr *ae;
|
||||||
|
if ((ae = dynamic_cast<AssignExpr *>(expr)) != NULL)
|
||||||
|
return (lSafeToRunWithAllLanesOff(ae->lvalue) &&
|
||||||
|
lSafeToRunWithAllLanesOff(ae->rvalue));
|
||||||
|
|
||||||
|
SelectExpr *se;
|
||||||
|
if ((se = dynamic_cast<SelectExpr *>(expr)) != NULL)
|
||||||
|
return (lSafeToRunWithAllLanesOff(se->test) &&
|
||||||
|
lSafeToRunWithAllLanesOff(se->expr1) &&
|
||||||
|
lSafeToRunWithAllLanesOff(se->expr2));
|
||||||
|
|
||||||
|
ExprList *el;
|
||||||
|
if ((el = dynamic_cast<ExprList *>(expr)) != NULL) {
|
||||||
|
for (unsigned int i = 0; i < el->exprs.size(); ++i)
|
||||||
|
if (!lSafeToRunWithAllLanesOff(el->exprs[i]))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
FunctionCallExpr *fce;
|
||||||
|
if ((fce = dynamic_cast<FunctionCallExpr *>(expr)) != NULL)
|
||||||
|
// FIXME: If we could somehow determine that the function being
|
||||||
|
// called was safe (and all of the args Exprs were safe, then it'd
|
||||||
|
// be nice to be able to return true here. (Consider a call to
|
||||||
|
// e.g. floatbits() in the stdlib.) Unfortunately for now we just
|
||||||
|
// have to be conservative.
|
||||||
|
return false;
|
||||||
|
|
||||||
|
IndexExpr *ie;
|
||||||
|
if ((ie = dynamic_cast<IndexExpr *>(expr)) != NULL) {
|
||||||
|
// If we can determine at compile time the size of the array/vector
|
||||||
|
// and if the indices are compile-time constants, then we may be
|
||||||
|
// able to safely run this under a predicated if statement..
|
||||||
|
if (ie->arrayOrVector == NULL)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const Type *type = ie->arrayOrVector->GetType();
|
||||||
|
ConstExpr *ce = dynamic_cast<ConstExpr *>(ie->index);
|
||||||
|
if (type == NULL || ce == NULL)
|
||||||
|
return false;
|
||||||
|
if (dynamic_cast<const ReferenceType *>(type) != NULL)
|
||||||
|
type = type->GetReferenceTarget();
|
||||||
|
|
||||||
|
const SequentialType *seqType =
|
||||||
|
dynamic_cast<const SequentialType *>(type);
|
||||||
|
assert(seqType != NULL);
|
||||||
|
int nElements = seqType->GetElementCount();
|
||||||
|
if (nElements == 0)
|
||||||
|
// Unsized array, so we can't be sure
|
||||||
|
return false;
|
||||||
|
|
||||||
|
int32_t indices[ISPC_MAX_NVEC];
|
||||||
|
int count = ce->AsInt32(indices);
|
||||||
|
for (int i = 0; i < count; ++i)
|
||||||
|
if (indices[i] < 0 || indices[i] >= nElements)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// All indices are in-bounds
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
MemberExpr *me;
|
||||||
|
if ((me = dynamic_cast<MemberExpr *>(expr)) != NULL)
|
||||||
|
return lSafeToRunWithAllLanesOff(me->expr);
|
||||||
|
|
||||||
|
if (dynamic_cast<ConstExpr *>(expr) != NULL)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
TypeCastExpr *tce;
|
||||||
|
if ((tce = dynamic_cast<TypeCastExpr *>(expr)) != NULL)
|
||||||
|
return lSafeToRunWithAllLanesOff(tce->expr);
|
||||||
|
|
||||||
|
ReferenceExpr *re;
|
||||||
|
if ((re = dynamic_cast<ReferenceExpr *>(expr)) != NULL)
|
||||||
|
return lSafeToRunWithAllLanesOff(re->expr);
|
||||||
|
|
||||||
|
DereferenceExpr *dre;
|
||||||
|
if ((dre = dynamic_cast<DereferenceExpr *>(expr)) != NULL)
|
||||||
|
return lSafeToRunWithAllLanesOff(dre->expr);
|
||||||
|
|
||||||
|
if (dynamic_cast<SymbolExpr *>(expr) != NULL ||
|
||||||
|
dynamic_cast<FunctionSymbolExpr *>(expr) != NULL ||
|
||||||
|
dynamic_cast<SyncExpr *>(expr) != NULL)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
FATAL("Unknown Expr type in lSafeToRunWithAllLanesOff()");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Given an arbitrary statement, this function conservatively tests to see
|
||||||
|
if it's safe to run the code for the statement even if the mask is all
|
||||||
|
off. Here we just need to determine which kind of statement we have
|
||||||
|
and recursively traverse it and/or the expressions inside of it.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
lSafeToRunWithAllLanesOff(Stmt *stmt) {
|
||||||
|
if (stmt == NULL)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
ExprStmt *es;
|
||||||
|
if ((es = dynamic_cast<ExprStmt *>(stmt)) != NULL)
|
||||||
|
return lSafeToRunWithAllLanesOff(es->expr);
|
||||||
|
|
||||||
|
DeclStmt *ds;
|
||||||
|
if ((ds = dynamic_cast<DeclStmt *>(stmt)) != NULL) {
|
||||||
|
for (unsigned int i = 0; i < ds->declaration->declarators.size(); ++i)
|
||||||
|
if (!lSafeToRunWithAllLanesOff(ds->declaration->declarators[i]->initExpr))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
IfStmt *is;
|
||||||
|
if ((is = dynamic_cast<IfStmt *>(stmt)) != NULL)
|
||||||
|
return (lSafeToRunWithAllLanesOff(is->test) &&
|
||||||
|
lSafeToRunWithAllLanesOff(is->trueStmts) &&
|
||||||
|
lSafeToRunWithAllLanesOff(is->falseStmts));
|
||||||
|
|
||||||
|
DoStmt *dos;
|
||||||
|
if ((dos = dynamic_cast<DoStmt *>(stmt)) != NULL)
|
||||||
|
return (lSafeToRunWithAllLanesOff(dos->testExpr) &&
|
||||||
|
lSafeToRunWithAllLanesOff(dos->bodyStmts));
|
||||||
|
|
||||||
|
ForStmt *fs;
|
||||||
|
if ((fs = dynamic_cast<ForStmt *>(stmt)) != NULL)
|
||||||
|
return (lSafeToRunWithAllLanesOff(fs->init) &&
|
||||||
|
lSafeToRunWithAllLanesOff(fs->test) &&
|
||||||
|
lSafeToRunWithAllLanesOff(fs->step) &&
|
||||||
|
lSafeToRunWithAllLanesOff(fs->stmts));
|
||||||
|
|
||||||
|
if (dynamic_cast<BreakStmt *>(stmt) != NULL ||
|
||||||
|
dynamic_cast<ContinueStmt *>(stmt) != NULL)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
ReturnStmt *rs;
|
||||||
|
if ((rs = dynamic_cast<ReturnStmt *>(stmt)) != NULL)
|
||||||
|
return lSafeToRunWithAllLanesOff(rs->val);
|
||||||
|
|
||||||
|
StmtList *sl;
|
||||||
|
if ((sl = dynamic_cast<StmtList *>(stmt)) != NULL) {
|
||||||
|
const std::vector<Stmt *> &sls = sl->GetStatements();
|
||||||
|
for (unsigned int i = 0; i < sls.size(); ++i)
|
||||||
|
if (!lSafeToRunWithAllLanesOff(sls[i]))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
PrintStmt *ps;
|
||||||
|
if ((ps = dynamic_cast<PrintStmt *>(stmt)) != NULL)
|
||||||
|
return lSafeToRunWithAllLanesOff(ps->values);
|
||||||
|
|
||||||
|
FATAL("Unexpected stmt type in lSafeToRunWithAllLanesOff()");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Emit code for an if test that checks the mask and the test values and
|
/** Emit code for an if test that checks the mask and the test values and
|
||||||
tries to be smart about jumping over code that doesn't need to be run.
|
tries to be smart about jumping over code that doesn't need to be run.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
IfStmt::emitCoherentTests(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
||||||
llvm::Value *oldMask = ctx->GetMask();
|
llvm::Value *oldMask = ctx->GetMask();
|
||||||
if (oldMask == LLVMMaskAllOn) {
|
if (oldMask == LLVMMaskAllOn) {
|
||||||
// We can tell that the mask is on statically at compile time; just
|
// We can tell that the mask is on statically at compile time; just
|
||||||
@@ -587,7 +772,7 @@ IfStmt::emitCoherentTests(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
|||||||
emitMaskAllOn(ctx, ltest, bDone);
|
emitMaskAllOn(ctx, ltest, bDone);
|
||||||
ctx->SetCurrentBasicBlock(bDone);
|
ctx->SetCurrentBasicBlock(bDone);
|
||||||
}
|
}
|
||||||
else {
|
else if (doAllCheck) {
|
||||||
// We can't tell if the mask going into the if is all on at the
|
// We can't tell if the mask going into the if is all on at the
|
||||||
// compile time. Emit code to check for this and then either run
|
// compile time. Emit code to check for this and then either run
|
||||||
// the code for the 'all on' or the 'mixed' case depending on the
|
// the code for the 'all on' or the 'mixed' case depending on the
|
||||||
@@ -619,6 +804,43 @@ IfStmt::emitCoherentTests(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
|||||||
// paths above jump to when they're done.
|
// paths above jump to when they're done.
|
||||||
ctx->SetCurrentBasicBlock(bDone);
|
ctx->SetCurrentBasicBlock(bDone);
|
||||||
}
|
}
|
||||||
|
else if (trueStmts != NULL || falseStmts != NULL) {
|
||||||
|
// If there is nothing that is potentially unsafe to run with all
|
||||||
|
// lanes off in the true and false statements and if the total
|
||||||
|
// complexity of those two is relatively simple, then we'll go
|
||||||
|
// ahead and emit straightline code that runs both sides, updating
|
||||||
|
// the mask accordingly. This is useful for efficiently compiling
|
||||||
|
// things like:
|
||||||
|
//
|
||||||
|
// if (foo) x = 0;
|
||||||
|
// else ++x;
|
||||||
|
//
|
||||||
|
// Where the overhead of checking if any of the program instances wants
|
||||||
|
// to run one side or the other is more than the actual computation.
|
||||||
|
// The lSafeToRunWithAllLanesOff() checks to make sure that we don't do this
|
||||||
|
// for potentially dangerous code like:
|
||||||
|
//
|
||||||
|
// if (index < count) array[index] = 0;
|
||||||
|
//
|
||||||
|
// where our use of blend for conditional assignments doesn't check
|
||||||
|
// for the 'all lanes' off case.
|
||||||
|
if (lSafeToRunWithAllLanesOff(trueStmts) &&
|
||||||
|
lSafeToRunWithAllLanesOff(falseStmts) &&
|
||||||
|
(((trueStmts ? trueStmts->EstimateCost() : 0) +
|
||||||
|
(falseStmts ? falseStmts->EstimateCost() : 0)) <
|
||||||
|
PREDICATE_SAFE_IF_STATEMENT_COST)) {
|
||||||
|
ctx->StartVaryingIf(oldMask);
|
||||||
|
emitMaskedTrueAndFalse(ctx, oldMask, ltest);
|
||||||
|
assert(ctx->GetCurrentBasicBlock());
|
||||||
|
ctx->EndIf();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
assert(doAnyCheck);
|
||||||
|
llvm::BasicBlock *bDone = ctx->CreateBasicBlock("if_done");
|
||||||
|
emitMaskMixed(ctx, oldMask, ltest, bDone);
|
||||||
|
ctx->SetCurrentBasicBlock(bDone);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -677,69 +899,50 @@ IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Emits code that checks to see if for all of the lanes where the mask is
|
|
||||||
on, the test has the value true.
|
|
||||||
*/
|
|
||||||
static llvm::Value *
|
|
||||||
lTestMatchesMask(FunctionEmitContext *ctx, llvm::Value *test, llvm::Value *mask) {
|
|
||||||
llvm::Value *testAndMask = ctx->BinaryOperator(llvm::Instruction::And, test,
|
|
||||||
mask, "test&mask");
|
|
||||||
return ctx->MasksAllEqual(testAndMask, mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/** Emit code for an 'if' test where the lane mask is known to be mixed
|
/** Emit code for an 'if' test where the lane mask is known to be mixed
|
||||||
on/off going into it.
|
on/off going into it.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||||
llvm::Value *ltest, llvm::BasicBlock *bDone) const {
|
llvm::Value *ltest, llvm::BasicBlock *bDone) const {
|
||||||
// First, see if, for all of the lanes where the mask is on, if the
|
ctx->StartVaryingIf(oldMask);
|
||||||
// value of the test is on. (i.e. (test&mask) == mask). In this case,
|
llvm::BasicBlock *bNext = ctx->CreateBasicBlock("safe_if_after_true");
|
||||||
// we only need to run the 'true' case code, since the lanes where the
|
if (trueStmts != NULL) {
|
||||||
// test was false aren't supposed to be running here anyway.
|
llvm::BasicBlock *bRunTrue = ctx->CreateBasicBlock("safe_if_run_true");
|
||||||
llvm::Value *testAllEqual = lTestMatchesMask(ctx, ltest, oldMask);
|
ctx->MaskAnd(oldMask, ltest);
|
||||||
llvm::BasicBlock *bTestAll = ctx->CreateBasicBlock("cif_mixed_test_all");
|
|
||||||
llvm::BasicBlock *bTestAnyCheck = ctx->CreateBasicBlock("cif_mixed_test_any_check");
|
|
||||||
ctx->BranchInst(bTestAll, bTestAnyCheck, testAllEqual);
|
|
||||||
|
|
||||||
// Emit code for the (test&mask)==mask case. Not only do we only need
|
// Do any of the program instances want to run the 'true'
|
||||||
// to emit code for the true statements, but we don't need to modify
|
// block? If not, jump ahead to bNext.
|
||||||
// the mask's value; it's already correct.
|
llvm::Value *maskAnyQ = ctx->Any(ctx->GetMask());
|
||||||
ctx->SetCurrentBasicBlock(bTestAll);
|
ctx->BranchInst(bRunTrue, bNext, maskAnyQ);
|
||||||
ctx->StartVaryingIf(ctx->GetMask());
|
|
||||||
lEmitIfStatements(ctx, trueStmts, "cif: all running lanes want just true stmts");
|
// Emit statements for true
|
||||||
assert(ctx->GetCurrentBasicBlock());
|
ctx->SetCurrentBasicBlock(bRunTrue);
|
||||||
ctx->EndIf();
|
lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
|
||||||
|
assert(ctx->GetCurrentBasicBlock());
|
||||||
|
ctx->BranchInst(bNext);
|
||||||
|
ctx->SetCurrentBasicBlock(bNext);
|
||||||
|
}
|
||||||
|
if (falseStmts != NULL) {
|
||||||
|
llvm::BasicBlock *bRunFalse = ctx->CreateBasicBlock("safe_if_run_false");
|
||||||
|
bNext = ctx->CreateBasicBlock("safe_if_after_false");
|
||||||
|
ctx->MaskAndNot(oldMask, ltest);
|
||||||
|
|
||||||
|
// Similarly, check to see if any of the instances want to
|
||||||
|
// run the 'false' block...
|
||||||
|
llvm::Value *maskAnyQ = ctx->Any(ctx->GetMask());
|
||||||
|
ctx->BranchInst(bRunFalse, bNext, maskAnyQ);
|
||||||
|
|
||||||
|
// Emit code for false
|
||||||
|
ctx->SetCurrentBasicBlock(bRunFalse);
|
||||||
|
lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
|
||||||
|
assert(ctx->GetCurrentBasicBlock());
|
||||||
|
ctx->BranchInst(bNext);
|
||||||
|
ctx->SetCurrentBasicBlock(bNext);
|
||||||
|
}
|
||||||
ctx->BranchInst(bDone);
|
ctx->BranchInst(bDone);
|
||||||
|
ctx->SetCurrentBasicBlock(bDone);
|
||||||
// Next, see if the active lanes only need to run the false case--i.e. if
|
|
||||||
// (~test & mask) == mask.
|
|
||||||
ctx->SetCurrentBasicBlock(bTestAnyCheck);
|
|
||||||
llvm::Value *notTest = ctx->BinaryOperator(llvm::Instruction::Xor, LLVMMaskAllOn,
|
|
||||||
ltest, "~test");
|
|
||||||
llvm::Value *notMatchesMask = lTestMatchesMask(ctx, notTest, oldMask);
|
|
||||||
llvm::BasicBlock *bTestAllNot = ctx->CreateBasicBlock("cif_mixed_test_none");
|
|
||||||
llvm::BasicBlock *bTestMixed = ctx->CreateBasicBlock("cif_mixed_test_mixed");
|
|
||||||
ctx->BranchInst(bTestAllNot, bTestMixed, notMatchesMask);
|
|
||||||
|
|
||||||
// Emit code for the (~test & mask) == mask case. We only need the
|
|
||||||
// 'false' statements and again don't need to modify the value of the
|
|
||||||
// mask.
|
|
||||||
ctx->SetCurrentBasicBlock(bTestAllNot);
|
|
||||||
ctx->StartVaryingIf(ctx->GetMask());
|
|
||||||
lEmitIfStatements(ctx, falseStmts, "cif: all running lanes want just false stmts");
|
|
||||||
assert(ctx->GetCurrentBasicBlock());
|
|
||||||
ctx->EndIf();
|
ctx->EndIf();
|
||||||
ctx->BranchInst(bDone);
|
|
||||||
|
|
||||||
// It's mixed; we need to run both the true and false cases and also do
|
|
||||||
// mask update stuff.
|
|
||||||
ctx->SetCurrentBasicBlock(bTestMixed);
|
|
||||||
ctx->StartVaryingIf(ctx->GetMask());
|
|
||||||
emitMaskedTrueAndFalse(ctx, oldMask, ltest);
|
|
||||||
ctx->EndIf();
|
|
||||||
ctx->BranchInst(bDone);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -955,6 +1158,13 @@ DoStmt::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
DoStmt::EstimateCost() const {
|
||||||
|
return ((testExpr ? testExpr->EstimateCost() : 0) +
|
||||||
|
(bodyStmts ? bodyStmts->EstimateCost() : 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
DoStmt::Print(int indent) const {
|
DoStmt::Print(int indent) const {
|
||||||
printf("%*cDo Stmt", indent, ' ');
|
printf("%*cDo Stmt", indent, ' ');
|
||||||
@@ -1162,6 +1372,20 @@ ForStmt::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
ForStmt::EstimateCost() const {
|
||||||
|
bool uniformTest = test ? test->GetType()->IsUniformType() :
|
||||||
|
(!g->opt.disableUniformControlFlow &&
|
||||||
|
!lHasVaryingBreakOrContinue(stmts));
|
||||||
|
|
||||||
|
return ((init ? init->EstimateCost() : 0) +
|
||||||
|
(test ? test->EstimateCost() : 0) +
|
||||||
|
(step ? step->EstimateCost() : 0) +
|
||||||
|
(stmts ? stmts->EstimateCost() : 0) +
|
||||||
|
(uniformTest ? COST_UNIFORM_LOOP : COST_VARYING_LOOP));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
ForStmt::Print(int indent) const {
|
ForStmt::Print(int indent) const {
|
||||||
printf("%*cFor Stmt", indent, ' ');
|
printf("%*cFor Stmt", indent, ' ');
|
||||||
@@ -1216,6 +1440,13 @@ BreakStmt::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
BreakStmt::EstimateCost() const {
|
||||||
|
return doCoherenceCheck ? COST_COHERENT_BREAK_CONTINE :
|
||||||
|
COST_REGULAR_BREAK_CONTINUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
BreakStmt::Print(int indent) const {
|
BreakStmt::Print(int indent) const {
|
||||||
printf("%*c%sBreak Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : "");
|
printf("%*c%sBreak Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : "");
|
||||||
@@ -1254,6 +1485,13 @@ ContinueStmt::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
ContinueStmt::EstimateCost() const {
|
||||||
|
return doCoherenceCheck ? COST_COHERENT_BREAK_CONTINE :
|
||||||
|
COST_REGULAR_BREAK_CONTINUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
ContinueStmt::Print(int indent) const {
|
ContinueStmt::Print(int indent) const {
|
||||||
printf("%*c%sContinue Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : "");
|
printf("%*c%sContinue Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : "");
|
||||||
@@ -1300,6 +1538,12 @@ ReturnStmt::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
ReturnStmt::EstimateCost() const {
|
||||||
|
return COST_RETURN + (val ? val->EstimateCost() : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
ReturnStmt::Print(int indent) const {
|
ReturnStmt::Print(int indent) const {
|
||||||
printf("%*c%sReturn Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : "");
|
printf("%*c%sReturn Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : "");
|
||||||
@@ -1345,6 +1589,16 @@ StmtList::TypeCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
StmtList::EstimateCost() const {
|
||||||
|
int cost = 0;
|
||||||
|
for (unsigned int i = 0; i < stmts.size(); ++i)
|
||||||
|
if (stmts[i])
|
||||||
|
cost += stmts[i]->EstimateCost();
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
StmtList::Print(int indent) const {
|
StmtList::Print(int indent) const {
|
||||||
printf("%*cStmt List", indent, ' ');
|
printf("%*cStmt List", indent, ' ');
|
||||||
@@ -1545,3 +1799,11 @@ PrintStmt::TypeCheck() {
|
|||||||
values = values->TypeCheck();
|
values = values->TypeCheck();
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
PrintStmt::EstimateCost() const {
|
||||||
|
return COST_FUNCALL + (values ? values->EstimateCost() : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
23
stmt.h
23
stmt.h
@@ -75,8 +75,8 @@ public:
|
|||||||
|
|
||||||
Stmt *Optimize();
|
Stmt *Optimize();
|
||||||
Stmt *TypeCheck();
|
Stmt *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
Expr *expr;
|
Expr *expr;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -92,8 +92,8 @@ public:
|
|||||||
|
|
||||||
Stmt *Optimize();
|
Stmt *Optimize();
|
||||||
Stmt *TypeCheck();
|
Stmt *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
Declaration *declaration;
|
Declaration *declaration;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -103,13 +103,14 @@ private:
|
|||||||
class IfStmt : public Stmt {
|
class IfStmt : public Stmt {
|
||||||
public:
|
public:
|
||||||
IfStmt(Expr *testExpr, Stmt *trueStmts, Stmt *falseStmts,
|
IfStmt(Expr *testExpr, Stmt *trueStmts, Stmt *falseStmts,
|
||||||
bool doCoherentCheck, SourcePos pos);
|
bool doAllCheck, SourcePos pos);
|
||||||
|
|
||||||
void EmitCode(FunctionEmitContext *ctx) const;
|
void EmitCode(FunctionEmitContext *ctx) const;
|
||||||
void Print(int indent) const;
|
void Print(int indent) const;
|
||||||
|
|
||||||
Stmt *Optimize();
|
Stmt *Optimize();
|
||||||
Stmt *TypeCheck();
|
Stmt *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
// @todo these are only public for lHasVaryingBreakOrContinue(); would
|
// @todo these are only public for lHasVaryingBreakOrContinue(); would
|
||||||
// be nice to clean that up...
|
// be nice to clean that up...
|
||||||
@@ -125,11 +126,12 @@ private:
|
|||||||
source and thus, if the emitted code should check to see if all
|
source and thus, if the emitted code should check to see if all
|
||||||
active program instances want to follow just one of the 'true' or
|
active program instances want to follow just one of the 'true' or
|
||||||
'false' blocks. */
|
'false' blocks. */
|
||||||
const bool doCoherentCheck;
|
const bool doAllCheck;
|
||||||
|
const bool doAnyCheck;
|
||||||
|
|
||||||
void emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
void emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||||
llvm::Value *test) const;
|
llvm::Value *test) const;
|
||||||
void emitCoherentTests(FunctionEmitContext *ctx, llvm::Value *test) const;
|
void emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *test) const;
|
||||||
void emitMaskAllOn(FunctionEmitContext *ctx,
|
void emitMaskAllOn(FunctionEmitContext *ctx,
|
||||||
llvm::Value *test, llvm::BasicBlock *bDone) const;
|
llvm::Value *test, llvm::BasicBlock *bDone) const;
|
||||||
void emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
void emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||||
@@ -150,8 +152,8 @@ public:
|
|||||||
|
|
||||||
Stmt *Optimize();
|
Stmt *Optimize();
|
||||||
Stmt *TypeCheck();
|
Stmt *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
Expr *testExpr;
|
Expr *testExpr;
|
||||||
Stmt *bodyStmts;
|
Stmt *bodyStmts;
|
||||||
const bool doCoherentCheck;
|
const bool doCoherentCheck;
|
||||||
@@ -171,8 +173,8 @@ public:
|
|||||||
|
|
||||||
Stmt *Optimize();
|
Stmt *Optimize();
|
||||||
Stmt *TypeCheck();
|
Stmt *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
/** 'for' statment initializer; may be NULL, indicating no intitializer */
|
/** 'for' statment initializer; may be NULL, indicating no intitializer */
|
||||||
Stmt *init;
|
Stmt *init;
|
||||||
/** expression that returns a value indicating whether the loop should
|
/** expression that returns a value indicating whether the loop should
|
||||||
@@ -198,6 +200,7 @@ public:
|
|||||||
|
|
||||||
Stmt *Optimize();
|
Stmt *Optimize();
|
||||||
Stmt *TypeCheck();
|
Stmt *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/** This indicates whether the generated code will check to see if no
|
/** This indicates whether the generated code will check to see if no
|
||||||
@@ -219,6 +222,7 @@ public:
|
|||||||
|
|
||||||
Stmt *Optimize();
|
Stmt *Optimize();
|
||||||
Stmt *TypeCheck();
|
Stmt *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/** This indicates whether the generated code will check to see if no
|
/** This indicates whether the generated code will check to see if no
|
||||||
@@ -240,8 +244,8 @@ public:
|
|||||||
|
|
||||||
Stmt *Optimize();
|
Stmt *Optimize();
|
||||||
Stmt *TypeCheck();
|
Stmt *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
Expr *val;
|
Expr *val;
|
||||||
/** This indicates whether the generated code will check to see if no
|
/** This indicates whether the generated code will check to see if no
|
||||||
more program instances are currently running after the return, in
|
more program instances are currently running after the return, in
|
||||||
@@ -262,6 +266,7 @@ public:
|
|||||||
|
|
||||||
Stmt *Optimize();
|
Stmt *Optimize();
|
||||||
Stmt *TypeCheck();
|
Stmt *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
void Add(Stmt *s) { if (s) stmts.push_back(s); }
|
void Add(Stmt *s) { if (s) stmts.push_back(s); }
|
||||||
const std::vector<Stmt *> &GetStatements() { return stmts; }
|
const std::vector<Stmt *> &GetStatements() { return stmts; }
|
||||||
@@ -289,8 +294,8 @@ public:
|
|||||||
|
|
||||||
Stmt *Optimize();
|
Stmt *Optimize();
|
||||||
Stmt *TypeCheck();
|
Stmt *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
private:
|
|
||||||
/** Format string for the print() statement. */
|
/** Format string for the print() statement. */
|
||||||
const std::string format;
|
const std::string format;
|
||||||
/** This holds the arguments passed to the print() statement. If more
|
/** This holds the arguments passed to the print() statement. If more
|
||||||
|
|||||||
@@ -31,9 +31,21 @@
|
|||||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
#define ISPC_IS_WINDOWS
|
||||||
|
#elif defined(__linux__)
|
||||||
|
#define ISPC_IS_LINUX
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
#define ISPC_IS_APPLE
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
#include <malloc.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
extern int width();
|
extern int width();
|
||||||
@@ -48,6 +60,8 @@ extern "C" {
|
|||||||
|
|
||||||
void ISPCLaunch(void *f, void *d);
|
void ISPCLaunch(void *f, void *d);
|
||||||
void ISPCSync();
|
void ISPCSync();
|
||||||
|
void *ISPCMalloc(int64_t size, int32_t alignment);
|
||||||
|
void ISPCFree(void *ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ISPCLaunch(void *f, void *d) {
|
void ISPCLaunch(void *f, void *d) {
|
||||||
@@ -60,6 +74,37 @@ void ISPCSync() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void *ISPCMalloc(int64_t size, int32_t alignment) {
|
||||||
|
#ifdef ISPC_IS_WINDOWS
|
||||||
|
return _aligned_malloc(size, alignment);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
return memalign(alignment, size);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_APPLE
|
||||||
|
void *mem = malloc(size + (alignment-1) + sizeof(void*));
|
||||||
|
char *amem = ((char*)mem) + sizeof(void*);
|
||||||
|
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
|
||||||
|
(alignment - 1)));
|
||||||
|
((void**)amem)[-1] = mem;
|
||||||
|
return amem;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ISPCFree(void *ptr) {
|
||||||
|
#ifdef ISPC_IS_WINDOWS
|
||||||
|
_aligned_free(ptr);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
free(ptr);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_APPLE
|
||||||
|
free(((void**)ptr)[-1]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
int w = width();
|
int w = width();
|
||||||
assert(w <= 16);
|
assert(w <= 16);
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ export uniform int width() { return programCount; }
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
static float x[2][1];
|
static float x[1][2];
|
||||||
|
|
||||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
float a = aFOO[programIndex];
|
float a = aFOO[programIndex];
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
export void result(uniform float RET[4]) {
|
export void result(uniform float RET[]) {
|
||||||
RET[programIndex] = 0;
|
RET[programIndex] = 0;
|
||||||
RET[3] = 4;
|
RET[3] = 4;
|
||||||
RET[4] = 5;
|
RET[4] = 5;
|
||||||
|
|||||||
@@ -11,5 +11,5 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
|||||||
|
|
||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
RET[programIndex] = 10;
|
RET[programIndex] = max(10, 1 + programIndex);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,7 +9,10 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
uniform int pc[16] = { 1, 1, 2, 1, 2, 2, 3, 1, 1, 2, 2, 3, 2, 3, 3, 4 };
|
uniform int pc[16] = { 1, 1, 2, 1,
|
||||||
|
2, 2, 3, 1,
|
||||||
|
2, 2, 3, 2,
|
||||||
|
3, 3, 4, 1 };
|
||||||
RET[programIndex] = pc[programIndex];
|
RET[programIndex] = pc[programIndex];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ export void result(uniform float RET[]) {
|
|||||||
uniform int x = -1234;
|
uniform int x = -1234;
|
||||||
if (programCount == 4) x = 10;
|
if (programCount == 4) x = 10;
|
||||||
else if (programCount == 8) x = 36;
|
else if (programCount == 8) x = 36;
|
||||||
else if (programCount == 16) x = 124;
|
else if (programCount == 16) x = 136;
|
||||||
RET[programIndex] = x;
|
RET[programIndex] = x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ export void result(uniform float RET[]) {
|
|||||||
uniform int x = -1234;
|
uniform int x = -1234;
|
||||||
if (programCount == 4) x = 10;
|
if (programCount == 4) x = 10;
|
||||||
else if (programCount == 8) x = 36;
|
else if (programCount == 8) x = 36;
|
||||||
else if (programCount == 16) x = 124;
|
else if (programCount == 16) x = 136;
|
||||||
RET[programIndex] = x;
|
RET[programIndex] = x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -9,17 +9,6 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
|||||||
uniform float<5> z = c ? x : y;
|
uniform float<5> z = c ? x : y;
|
||||||
RET[programIndex] = z[programIndex];
|
RET[programIndex] = z[programIndex];
|
||||||
}
|
}
|
||||||
/*CO return x[y];*/
|
|
||||||
|
|
||||||
/*CO int index = aFOO[programIndex];*/
|
|
||||||
/*CO index = min(index, 3);*/
|
|
||||||
/*CO return x[index];*/
|
|
||||||
|
|
||||||
/*CO return x << 1;*/
|
|
||||||
/*CO return c[0] ? 1 : 0;*/
|
|
||||||
/*CO x = b;*/
|
|
||||||
/*CO y = b;*/
|
|
||||||
/*CO return x+y;*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
|
|||||||
17
tests/shuffle-flatten.ispc
Normal file
17
tests/shuffle-flatten.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
int tmp1 = shuffle(programIndex, 0, programIndex);
|
||||||
|
|
||||||
|
RET[programIndex] = 10;
|
||||||
|
if (programIndex < 1) {
|
||||||
|
uniform int foo = extract(tmp1, 0);
|
||||||
|
RET[programIndex] = aFOO[foo + programIndex];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 10;
|
||||||
|
RET[0] = 1;
|
||||||
|
}
|
||||||
38
type.cpp
38
type.cpp
@@ -45,9 +45,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <llvm/Value.h>
|
#include <llvm/Value.h>
|
||||||
#include <llvm/Module.h>
|
#include <llvm/Module.h>
|
||||||
#ifndef LLVM_2_8
|
|
||||||
#include <llvm/Analysis/DIBuilder.h>
|
#include <llvm/Analysis/DIBuilder.h>
|
||||||
#endif
|
|
||||||
#include <llvm/Analysis/DebugInfo.h>
|
#include <llvm/Analysis/DebugInfo.h>
|
||||||
#include <llvm/Support/Dwarf.h>
|
#include <llvm/Support/Dwarf.h>
|
||||||
|
|
||||||
@@ -414,10 +412,6 @@ AtomicType::LLVMType(llvm::LLVMContext *ctx) const {
|
|||||||
|
|
||||||
llvm::DIType
|
llvm::DIType
|
||||||
AtomicType::GetDIType(llvm::DIDescriptor scope) const {
|
AtomicType::GetDIType(llvm::DIDescriptor scope) const {
|
||||||
#ifdef LLVM_2_8
|
|
||||||
FATAL("debug info not supported in llvm 2.8");
|
|
||||||
return llvm::DIType();
|
|
||||||
#else
|
|
||||||
if (isUniform) {
|
if (isUniform) {
|
||||||
switch (basicType) {
|
switch (basicType) {
|
||||||
case TYPE_VOID:
|
case TYPE_VOID:
|
||||||
@@ -484,7 +478,6 @@ AtomicType::GetDIType(llvm::DIDescriptor scope) const {
|
|||||||
uint64_t align = unifType.getAlignInBits() * g->target.vectorWidth;
|
uint64_t align = unifType.getAlignInBits() * g->target.vectorWidth;
|
||||||
return m->diBuilder->createVectorType(size, align, unifType, subArray);
|
return m->diBuilder->createVectorType(size, align, unifType, subArray);
|
||||||
}
|
}
|
||||||
#endif // LLVM_2_8
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -645,10 +638,6 @@ EnumType::LLVMType(llvm::LLVMContext *ctx) const {
|
|||||||
|
|
||||||
llvm::DIType
|
llvm::DIType
|
||||||
EnumType::GetDIType(llvm::DIDescriptor scope) const {
|
EnumType::GetDIType(llvm::DIDescriptor scope) const {
|
||||||
#ifdef LLVM_2_8
|
|
||||||
FATAL("debug info not supported in llvm 2.8");
|
|
||||||
return llvm::DIType();
|
|
||||||
#else
|
|
||||||
std::vector<llvm::Value *> enumeratorDescriptors;
|
std::vector<llvm::Value *> enumeratorDescriptors;
|
||||||
for (unsigned int i = 0; i < enumerators.size(); ++i) {
|
for (unsigned int i = 0; i < enumerators.size(); ++i) {
|
||||||
unsigned int enumeratorValue;
|
unsigned int enumeratorValue;
|
||||||
@@ -688,7 +677,6 @@ EnumType::GetDIType(llvm::DIDescriptor scope) const {
|
|||||||
uint64_t size = diType.getSizeInBits() * g->target.vectorWidth;
|
uint64_t size = diType.getSizeInBits() * g->target.vectorWidth;
|
||||||
uint64_t align = diType.getAlignInBits() * g->target.vectorWidth;
|
uint64_t align = diType.getAlignInBits() * g->target.vectorWidth;
|
||||||
return m->diBuilder->createVectorType(size, align, diType, subArray);
|
return m->diBuilder->createVectorType(size, align, diType, subArray);
|
||||||
#endif // !LLVM_2_8
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -893,10 +881,6 @@ ArrayType::TotalElementCount() const {
|
|||||||
|
|
||||||
llvm::DIType
|
llvm::DIType
|
||||||
ArrayType::GetDIType(llvm::DIDescriptor scope) const {
|
ArrayType::GetDIType(llvm::DIDescriptor scope) const {
|
||||||
#ifdef LLVM_2_8
|
|
||||||
FATAL("debug info not supported in llvm 2.8");
|
|
||||||
return llvm::DIType();
|
|
||||||
#else
|
|
||||||
if (!child)
|
if (!child)
|
||||||
return llvm::DIType();
|
return llvm::DIType();
|
||||||
|
|
||||||
@@ -923,7 +907,6 @@ ArrayType::GetDIType(llvm::DIDescriptor scope) const {
|
|||||||
uint64_t align = eltType.getAlignInBits();
|
uint64_t align = eltType.getAlignInBits();
|
||||||
|
|
||||||
return m->diBuilder->createArrayType(size, align, eltType, subArray);
|
return m->diBuilder->createArrayType(size, align, eltType, subArray);
|
||||||
#endif // LLVM_2_8
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1044,16 +1027,11 @@ SOAArrayType::LLVMType(llvm::LLVMContext *ctx) const {
|
|||||||
|
|
||||||
llvm::DIType
|
llvm::DIType
|
||||||
SOAArrayType::GetDIType(llvm::DIDescriptor scope) const {
|
SOAArrayType::GetDIType(llvm::DIDescriptor scope) const {
|
||||||
#ifdef LLVM_2_8
|
|
||||||
FATAL("debug info not supported in llvm 2.8");
|
|
||||||
return llvm::DIType();
|
|
||||||
#else
|
|
||||||
if (!child)
|
if (!child)
|
||||||
return llvm::DIType();
|
return llvm::DIType();
|
||||||
|
|
||||||
const Type *t = soaType();
|
const Type *t = soaType();
|
||||||
return t->GetDIType(scope);
|
return t->GetDIType(scope);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1217,10 +1195,6 @@ VectorType::LLVMType(llvm::LLVMContext *ctx) const {
|
|||||||
|
|
||||||
llvm::DIType
|
llvm::DIType
|
||||||
VectorType::GetDIType(llvm::DIDescriptor scope) const {
|
VectorType::GetDIType(llvm::DIDescriptor scope) const {
|
||||||
#ifdef LLVM_2_8
|
|
||||||
FATAL("debug info not supported in llvm 2.8");
|
|
||||||
return llvm::DIType();
|
|
||||||
#else
|
|
||||||
llvm::DIType eltType = base->GetDIType(scope);
|
llvm::DIType eltType = base->GetDIType(scope);
|
||||||
llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, numElements-1);
|
llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, numElements-1);
|
||||||
#ifdef LLVM_2_9
|
#ifdef LLVM_2_9
|
||||||
@@ -1240,7 +1214,6 @@ VectorType::GetDIType(llvm::DIDescriptor scope) const {
|
|||||||
align = 4 * g->target.nativeVectorWidth;
|
align = 4 * g->target.nativeVectorWidth;
|
||||||
|
|
||||||
return m->diBuilder->createVectorType(sizeBits, align, eltType, subArray);
|
return m->diBuilder->createVectorType(sizeBits, align, eltType, subArray);
|
||||||
#endif // LLVM_2_8
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1443,10 +1416,6 @@ StructType::LLVMType(llvm::LLVMContext *ctx) const {
|
|||||||
|
|
||||||
llvm::DIType
|
llvm::DIType
|
||||||
StructType::GetDIType(llvm::DIDescriptor scope) const {
|
StructType::GetDIType(llvm::DIDescriptor scope) const {
|
||||||
#ifdef LLVM_2_8
|
|
||||||
FATAL("debug info not supported in llvm 2.8");
|
|
||||||
return llvm::DIType();
|
|
||||||
#else
|
|
||||||
uint64_t currentSize = 0, align = 0;
|
uint64_t currentSize = 0, align = 0;
|
||||||
|
|
||||||
std::vector<llvm::Value *> elementLLVMTypes;
|
std::vector<llvm::Value *> elementLLVMTypes;
|
||||||
@@ -1500,7 +1469,6 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
|
|||||||
llvm::DIFile diFile = pos.GetDIFile();
|
llvm::DIFile diFile = pos.GetDIFile();
|
||||||
return m->diBuilder->createStructType(scope, name, diFile, pos.first_line, currentSize,
|
return m->diBuilder->createStructType(scope, name, diFile, pos.first_line, currentSize,
|
||||||
align, 0, elements);
|
align, 0, elements);
|
||||||
#endif // LLVM_2_8
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1698,13 +1666,8 @@ ReferenceType::LLVMType(llvm::LLVMContext *ctx) const {
|
|||||||
|
|
||||||
llvm::DIType
|
llvm::DIType
|
||||||
ReferenceType::GetDIType(llvm::DIDescriptor scope) const {
|
ReferenceType::GetDIType(llvm::DIDescriptor scope) const {
|
||||||
#ifdef LLVM_2_8
|
|
||||||
FATAL("debug info not supported in llvm 2.8");
|
|
||||||
return llvm::DIType();
|
|
||||||
#else
|
|
||||||
llvm::DIType diTargetType = targetType->GetDIType(scope);
|
llvm::DIType diTargetType = targetType->GetDIType(scope);
|
||||||
return m->diBuilder->createReferenceType(diTargetType);
|
return m->diBuilder->createReferenceType(diTargetType);
|
||||||
#endif // LLVM_2_8
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1870,6 +1833,7 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool includeMask) const {
|
|||||||
for (unsigned int i = 0; i < argTypes.size(); ++i) {
|
for (unsigned int i = 0; i < argTypes.size(); ++i) {
|
||||||
if (!argTypes[i])
|
if (!argTypes[i])
|
||||||
return NULL;
|
return NULL;
|
||||||
|
assert(argTypes[i] != AtomicType::Void);
|
||||||
|
|
||||||
LLVM_TYPE_CONST llvm::Type *t = argTypes[i]->LLVMType(ctx);
|
LLVM_TYPE_CONST llvm::Type *t = argTypes[i]->LLVMType(ctx);
|
||||||
if (!t)
|
if (!t)
|
||||||
|
|||||||
4
util.cpp
4
util.cpp
@@ -344,6 +344,10 @@ StringEditDistance(const std::string &str1, const std::string &str2, int maxDist
|
|||||||
|
|
||||||
std::vector<std::string>
|
std::vector<std::string>
|
||||||
MatchStrings(const std::string &str, const std::vector<std::string> &options) {
|
MatchStrings(const std::string &str, const std::vector<std::string> &options) {
|
||||||
|
if (str.size() == 0 || (str.size() == 1 && !isalpha(str[0])))
|
||||||
|
// don't even try...
|
||||||
|
return std::vector<std::string>();
|
||||||
|
|
||||||
const int maxDelta = 2;
|
const int maxDelta = 2;
|
||||||
std::vector<std::string> matches[maxDelta+1];
|
std::vector<std::string> matches[maxDelta+1];
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user