Update examples for multi-target compilation.

Makefile and vcxproj file updates.
Also modified vcxproj files so that the various files ispc generates go into $(TargetDir),
  not the current directory.
Modified the ray tracer example to not have uniform short-vector types in its app-visible
  datatypes (these are laid out differently on SSE vs AVX); there was an existing lurking
  bug in the way this was done before.
This commit is contained in:
Matt Pharr
2011-10-04 15:56:34 -07:00
parent a68d137df6
commit bedaec2295
26 changed files with 242 additions and 157 deletions

View File

@@ -8,7 +8,10 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
CXX=g++
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
ISPC=ispc
ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx --arch=x86-64
OBJS=objs/rt.o objs/rt_serial.o $(TASK_OBJ) objs/rt_ispc.o objs/rt_ispc_sse2.o \
objs/rt_ispc_sse4.o objs/rt_ispc_avx.o
default: rt
@@ -20,8 +23,8 @@ dirs:
clean:
/bin/rm -rf objs *~ rt
rt: dirs objs/rt.o objs/rt_serial.o objs/rt_ispc.o $(TASK_OBJ)
$(CXX) $(CXXFLAGS) -o $@ objs/rt.o objs/rt_ispc.o objs/rt_serial.o $(TASK_OBJ) -lm $(TASK_LIB)
rt: dirs $(OBJS)
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
objs/%.o: %.cpp
$(CXX) $< $(CXXFLAGS) -c -o $@
@@ -31,5 +34,5 @@ objs/%.o: ../%.cpp
objs/rt.o: objs/rt_ispc.h
objs/%_ispc.h objs/%_ispc.o: %.ispc
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h

View File

@@ -168,12 +168,12 @@ int main(int argc, char *argv[]) {
// of node, the total number of int it if a leaf node, etc.
float b[6];
READ(b[0], 6);
nodes[i].bounds[0].v[0] = b[0];
nodes[i].bounds[0].v[1] = b[1];
nodes[i].bounds[0].v[2] = b[2];
nodes[i].bounds[1].v[0] = b[3];
nodes[i].bounds[1].v[1] = b[4];
nodes[i].bounds[1].v[2] = b[5];
nodes[i].bounds[0][0] = b[0];
nodes[i].bounds[0][1] = b[1];
nodes[i].bounds[0][2] = b[2];
nodes[i].bounds[1][0] = b[3];
nodes[i].bounds[1][1] = b[4];
nodes[i].bounds[1][2] = b[5];
READ(nodes[i].offset, 1);
READ(nodes[i].nPrimitives, 1);
READ(nodes[i].splitAxis, 1);
@@ -190,9 +190,9 @@ int main(int argc, char *argv[]) {
READ(v[0], 9);
float *vp = v;
for (int j = 0; j < 3; ++j) {
triangles[i].p[j].v[0] = *vp++;
triangles[i].p[j].v[1] = *vp++;
triangles[i].p[j].v[2] = *vp++;
triangles[i].p[j][0] = *vp++;
triangles[i].p[j][1] = *vp++;
triangles[i].p[j][2] = *vp++;
}
// And create an object id
triangles[i].id = i+1;

View File

@@ -43,12 +43,13 @@ struct Ray {
};
struct Triangle {
uniform float3 p[3];
uniform float p[3][4];
uniform int id;
uniform int pad[3];
};
struct LinearBVHNode {
uniform float3 bounds[2];
uniform float bounds[2][3];
uniform unsigned int offset; // num primitives for leaf, second child for interior
uniform unsigned int8 nPrimitives;
uniform unsigned int8 splitAxis;
@@ -103,14 +104,16 @@ static void generateRay(uniform const float raster2camera[4][4],
}
static inline bool BBoxIntersect(const reference uniform float3 bounds[2],
static inline bool BBoxIntersect(const uniform float bounds[2][3],
const reference Ray ray) {
uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
float t0 = ray.mint, t1 = ray.maxt;
// Check all three axis-aligned slabs. Don't try to early out; it's
// not worth the trouble
float3 tNear = (bounds[0] - ray.origin) * ray.invDir;
float3 tFar = (bounds[1] - ray.origin) * ray.invDir;
float3 tNear = (bounds0 - ray.origin) * ray.invDir;
float3 tFar = (bounds1 - ray.origin) * ray.invDir;
if (tNear.x > tFar.x) {
float tmp = tNear.x;
tNear.x = tFar.x;
@@ -141,8 +144,11 @@ static inline bool BBoxIntersect(const reference uniform float3 bounds[2],
static inline bool TriIntersect(const reference Triangle tri, reference Ray ray) {
uniform float3 e1 = tri.p[1] - tri.p[0];
uniform float3 e2 = tri.p[2] - tri.p[0];
uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
uniform float3 e1 = p1 - p0;
uniform float3 e2 = p2 - p0;
float3 s1 = Cross(ray.dir, e2);
float divisor = Dot(s1, e1);
@@ -153,7 +159,7 @@ static inline bool TriIntersect(const reference Triangle tri, reference Ray ray)
float invDivisor = 1.f / divisor;
// Compute first barycentric coordinate
float3 d = ray.origin - tri.p[0];
float3 d = ray.origin - p0;
float b1 = Dot(d, s1) * invDivisor;
if (b1 < 0. || b1 > 1.)
hit = false;

View File

@@ -81,6 +81,7 @@
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
@@ -96,6 +97,7 @@
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
@@ -113,6 +115,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
@@ -131,6 +134,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
@@ -144,21 +148,21 @@
<CustomBuild Include="rt.ispc">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx
</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Filename).obj</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Filename).obj</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx
</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Filename).obj</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Filename).obj</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
</CustomBuild>
</ItemGroup>
<ItemGroup>

View File

@@ -75,12 +75,13 @@ struct Ray {
// Declare these in a namespace so the mangling matches
namespace ispc {
struct Triangle {
float3 p[3];
float p[3][4]; // extra float pad after each vertex
int32_t id;
int32_t pad[3]; // make 16 x 32-bits
};
struct LinearBVHNode {
float3 bounds[2];
float bounds[2][3];
int32_t offset; // primitives for leaf, second child for interior
uint8_t nPrimitives;
uint8_t splitAxis;
@@ -140,12 +141,14 @@ static void generateRay(const float raster2camera[4][4],
}
static inline bool BBoxIntersect(const float3 bounds[2],
static inline bool BBoxIntersect(const float bounds[2][3],
const Ray &ray) {
float3 bounds0(bounds[0][0], bounds[0][1], bounds[0][2]);
float3 bounds1(bounds[1][0], bounds[1][1], bounds[1][2]);
float t0 = ray.mint, t1 = ray.maxt;
float3 tNear = (bounds[0] - ray.origin) * ray.invDir;
float3 tFar = (bounds[1] - ray.origin) * ray.invDir;
float3 tNear = (bounds0 - ray.origin) * ray.invDir;
float3 tFar = (bounds1 - ray.origin) * ray.invDir;
if (tNear.x > tFar.x) {
float tmp = tNear.x;
tNear.x = tFar.x;
@@ -176,8 +179,11 @@ static inline bool BBoxIntersect(const float3 bounds[2],
inline bool TriIntersect(const Triangle &tri, Ray &ray) {
float3 e1 = tri.p[1] - tri.p[0];
float3 e2 = tri.p[2] - tri.p[0];
float3 p0(tri.p[0][0], tri.p[0][1], tri.p[0][2]);
float3 p1(tri.p[1][0], tri.p[1][1], tri.p[1][2]);
float3 p2(tri.p[2][0], tri.p[2][1], tri.p[2][2]);
float3 e1 = p1 - p0;
float3 e2 = p2 - p0;
float3 s1 = Cross(ray.dir, e2);
float divisor = Dot(s1, e1);
@@ -187,7 +193,7 @@ inline bool TriIntersect(const Triangle &tri, Ray &ray) {
float invDivisor = 1.f / divisor;
// Compute first barycentric coordinate
float3 d = ray.origin - tri.p[0];
float3 d = ray.origin - p0;
float b1 = Dot(d, s1) * invDivisor;
if (b1 < 0. || b1 > 1.)
return false;