Update examples for multi-target compilation.
Makefile and vcxproj file updates. Also modified vcxproj files so that the various files ispc generates go into $(TargetDir), not the current directory. Modified the ray tracer example to not have uniform short-vector types in its app-visible datatypes (these are laid out differently on SSE vs AVX); there was an existing lurking bug in the way this was done before.
This commit is contained in:
@@ -8,7 +8,10 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx --arch=x86-64
|
||||
|
||||
OBJS=objs/rt.o objs/rt_serial.o $(TASK_OBJ) objs/rt_ispc.o objs/rt_ispc_sse2.o \
|
||||
objs/rt_ispc_sse4.o objs/rt_ispc_avx.o
|
||||
|
||||
default: rt
|
||||
|
||||
@@ -20,8 +23,8 @@ dirs:
|
||||
clean:
|
||||
/bin/rm -rf objs *~ rt
|
||||
|
||||
rt: dirs objs/rt.o objs/rt_serial.o objs/rt_ispc.o $(TASK_OBJ)
|
||||
$(CXX) $(CXXFLAGS) -o $@ objs/rt.o objs/rt_ispc.o objs/rt_serial.o $(TASK_OBJ) -lm $(TASK_LIB)
|
||||
rt: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
@@ -31,5 +34,5 @@ objs/%.o: ../%.cpp
|
||||
|
||||
objs/rt.o: objs/rt_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o: %.ispc
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
|
||||
@@ -168,12 +168,12 @@ int main(int argc, char *argv[]) {
|
||||
// of node, the total number of int it if a leaf node, etc.
|
||||
float b[6];
|
||||
READ(b[0], 6);
|
||||
nodes[i].bounds[0].v[0] = b[0];
|
||||
nodes[i].bounds[0].v[1] = b[1];
|
||||
nodes[i].bounds[0].v[2] = b[2];
|
||||
nodes[i].bounds[1].v[0] = b[3];
|
||||
nodes[i].bounds[1].v[1] = b[4];
|
||||
nodes[i].bounds[1].v[2] = b[5];
|
||||
nodes[i].bounds[0][0] = b[0];
|
||||
nodes[i].bounds[0][1] = b[1];
|
||||
nodes[i].bounds[0][2] = b[2];
|
||||
nodes[i].bounds[1][0] = b[3];
|
||||
nodes[i].bounds[1][1] = b[4];
|
||||
nodes[i].bounds[1][2] = b[5];
|
||||
READ(nodes[i].offset, 1);
|
||||
READ(nodes[i].nPrimitives, 1);
|
||||
READ(nodes[i].splitAxis, 1);
|
||||
@@ -190,9 +190,9 @@ int main(int argc, char *argv[]) {
|
||||
READ(v[0], 9);
|
||||
float *vp = v;
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
triangles[i].p[j].v[0] = *vp++;
|
||||
triangles[i].p[j].v[1] = *vp++;
|
||||
triangles[i].p[j].v[2] = *vp++;
|
||||
triangles[i].p[j][0] = *vp++;
|
||||
triangles[i].p[j][1] = *vp++;
|
||||
triangles[i].p[j][2] = *vp++;
|
||||
}
|
||||
// And create an object id
|
||||
triangles[i].id = i+1;
|
||||
|
||||
@@ -43,12 +43,13 @@ struct Ray {
|
||||
};
|
||||
|
||||
struct Triangle {
|
||||
uniform float3 p[3];
|
||||
uniform float p[3][4];
|
||||
uniform int id;
|
||||
uniform int pad[3];
|
||||
};
|
||||
|
||||
struct LinearBVHNode {
|
||||
uniform float3 bounds[2];
|
||||
uniform float bounds[2][3];
|
||||
uniform unsigned int offset; // num primitives for leaf, second child for interior
|
||||
uniform unsigned int8 nPrimitives;
|
||||
uniform unsigned int8 splitAxis;
|
||||
@@ -103,14 +104,16 @@ static void generateRay(uniform const float raster2camera[4][4],
|
||||
}
|
||||
|
||||
|
||||
static inline bool BBoxIntersect(const reference uniform float3 bounds[2],
|
||||
static inline bool BBoxIntersect(const uniform float bounds[2][3],
|
||||
const reference Ray ray) {
|
||||
uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
|
||||
uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
|
||||
float t0 = ray.mint, t1 = ray.maxt;
|
||||
|
||||
// Check all three axis-aligned slabs. Don't try to early out; it's
|
||||
// not worth the trouble
|
||||
float3 tNear = (bounds[0] - ray.origin) * ray.invDir;
|
||||
float3 tFar = (bounds[1] - ray.origin) * ray.invDir;
|
||||
float3 tNear = (bounds0 - ray.origin) * ray.invDir;
|
||||
float3 tFar = (bounds1 - ray.origin) * ray.invDir;
|
||||
if (tNear.x > tFar.x) {
|
||||
float tmp = tNear.x;
|
||||
tNear.x = tFar.x;
|
||||
@@ -141,8 +144,11 @@ static inline bool BBoxIntersect(const reference uniform float3 bounds[2],
|
||||
|
||||
|
||||
static inline bool TriIntersect(const reference Triangle tri, reference Ray ray) {
|
||||
uniform float3 e1 = tri.p[1] - tri.p[0];
|
||||
uniform float3 e2 = tri.p[2] - tri.p[0];
|
||||
uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
|
||||
uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
|
||||
uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
|
||||
uniform float3 e1 = p1 - p0;
|
||||
uniform float3 e2 = p2 - p0;
|
||||
|
||||
float3 s1 = Cross(ray.dir, e2);
|
||||
float divisor = Dot(s1, e1);
|
||||
@@ -153,7 +159,7 @@ static inline bool TriIntersect(const reference Triangle tri, reference Ray ray)
|
||||
float invDivisor = 1.f / divisor;
|
||||
|
||||
// Compute first barycentric coordinate
|
||||
float3 d = ray.origin - tri.p[0];
|
||||
float3 d = ray.origin - p0;
|
||||
float b1 = Dot(d, s1) * invDivisor;
|
||||
if (b1 < 0. || b1 > 1.)
|
||||
hit = false;
|
||||
|
||||
@@ -81,6 +81,7 @@
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
@@ -96,6 +97,7 @@
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
@@ -113,6 +115,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
@@ -131,6 +134,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
@@ -144,21 +148,21 @@
|
||||
<CustomBuild Include="rt.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
|
||||
ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
|
||||
ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Filename).obj</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Filename).obj</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
|
||||
ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
|
||||
ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Filename).obj</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Filename).obj</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
||||
@@ -75,12 +75,13 @@ struct Ray {
|
||||
// Declare these in a namespace so the mangling matches
|
||||
namespace ispc {
|
||||
struct Triangle {
|
||||
float3 p[3];
|
||||
float p[3][4]; // extra float pad after each vertex
|
||||
int32_t id;
|
||||
int32_t pad[3]; // make 16 x 32-bits
|
||||
};
|
||||
|
||||
struct LinearBVHNode {
|
||||
float3 bounds[2];
|
||||
float bounds[2][3];
|
||||
int32_t offset; // primitives for leaf, second child for interior
|
||||
uint8_t nPrimitives;
|
||||
uint8_t splitAxis;
|
||||
@@ -140,12 +141,14 @@ static void generateRay(const float raster2camera[4][4],
|
||||
}
|
||||
|
||||
|
||||
static inline bool BBoxIntersect(const float3 bounds[2],
|
||||
static inline bool BBoxIntersect(const float bounds[2][3],
|
||||
const Ray &ray) {
|
||||
float3 bounds0(bounds[0][0], bounds[0][1], bounds[0][2]);
|
||||
float3 bounds1(bounds[1][0], bounds[1][1], bounds[1][2]);
|
||||
float t0 = ray.mint, t1 = ray.maxt;
|
||||
|
||||
float3 tNear = (bounds[0] - ray.origin) * ray.invDir;
|
||||
float3 tFar = (bounds[1] - ray.origin) * ray.invDir;
|
||||
float3 tNear = (bounds0 - ray.origin) * ray.invDir;
|
||||
float3 tFar = (bounds1 - ray.origin) * ray.invDir;
|
||||
if (tNear.x > tFar.x) {
|
||||
float tmp = tNear.x;
|
||||
tNear.x = tFar.x;
|
||||
@@ -176,8 +179,11 @@ static inline bool BBoxIntersect(const float3 bounds[2],
|
||||
|
||||
|
||||
inline bool TriIntersect(const Triangle &tri, Ray &ray) {
|
||||
float3 e1 = tri.p[1] - tri.p[0];
|
||||
float3 e2 = tri.p[2] - tri.p[0];
|
||||
float3 p0(tri.p[0][0], tri.p[0][1], tri.p[0][2]);
|
||||
float3 p1(tri.p[1][0], tri.p[1][1], tri.p[1][2]);
|
||||
float3 p2(tri.p[2][0], tri.p[2][1], tri.p[2][2]);
|
||||
float3 e1 = p1 - p0;
|
||||
float3 e2 = p2 - p0;
|
||||
|
||||
float3 s1 = Cross(ray.dir, e2);
|
||||
float divisor = Dot(s1, e1);
|
||||
@@ -187,7 +193,7 @@ inline bool TriIntersect(const Triangle &tri, Ray &ray) {
|
||||
float invDivisor = 1.f / divisor;
|
||||
|
||||
// Compute first barycentric coordinate
|
||||
float3 d = ray.origin - tri.p[0];
|
||||
float3 d = ray.origin - p0;
|
||||
float b1 = Dot(d, s1) * invDivisor;
|
||||
if (b1 < 0. || b1 > 1.)
|
||||
return false;
|
||||
|
||||
Reference in New Issue
Block a user