Add volume rendering example. (~2.3x speedup from SIMD vs serial code.)
This commit is contained in:
@@ -93,3 +93,17 @@ Simple
|
||||
This is a simple "hello world" type program that shows a ~10 line
|
||||
application program calling out to a ~5 line ispc program to do a simple
|
||||
computation.
|
||||
|
||||
Volume
|
||||
======
|
||||
|
||||
Ray-marching volume rendering, with single scattering lighting model. To
|
||||
run it, specify a camera parameter file and a volume density file, e.g.:
|
||||
|
||||
volume camera.dat density_highres.vol
|
||||
|
||||
(See, e.g. Chapters 11 and 16 of "Physically Based Rendering" for
|
||||
information about the algorithm implemented here.) The volume data set
|
||||
included here was generated by the example implementation of the "Wavelet
|
||||
Turbulence for Fluid Simulation" SIGGRAPH 2008 paper by Kim et
|
||||
al. (http://www.cs.cornell.edu/~tedkim/WTURB/)
|
||||
|
||||
@@ -17,6 +17,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "aobench_instrumented", "aob
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "noise", "noise\noise.vcxproj", "{0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "volume", "volume_rendering\volume.vcxproj", "{DEE5733A-E93E-449D-9114-9BFFCAEB4DF9}"
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stencil", "stencil\stencil.vcxproj", "{2EF070A1-F62F-4E6A-944B-88D140945C3C}"
|
||||
EndProject
|
||||
Global
|
||||
@@ -91,6 +92,14 @@ Global
|
||||
{0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}.Release|Win32.Build.0 = Release|Win32
|
||||
{0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}.Release|x64.ActiveCfg = Release|x64
|
||||
{0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}.Release|x64.Build.0 = Release|x64
|
||||
{DEE5733A-E93E-449D-9114-9BFFCAEB4DF9}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{DEE5733A-E93E-449D-9114-9BFFCAEB4DF9}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{DEE5733A-E93E-449D-9114-9BFFCAEB4DF9}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{DEE5733A-E93E-449D-9114-9BFFCAEB4DF9}.Debug|x64.Build.0 = Debug|x64
|
||||
{DEE5733A-E93E-449D-9114-9BFFCAEB4DF9}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{DEE5733A-E93E-449D-9114-9BFFCAEB4DF9}.Release|Win32.Build.0 = Release|Win32
|
||||
{DEE5733A-E93E-449D-9114-9BFFCAEB4DF9}.Release|x64.ActiveCfg = Release|x64
|
||||
{DEE5733A-E93E-449D-9114-9BFFCAEB4DF9}.Release|x64.Build.0 = Release|x64
|
||||
{2EF070A1-F62F-4E6A-944B-88D140945C3C}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{2EF070A1-F62F-4E6A-944B-88D140945C3C}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{2EF070A1-F62F-4E6A-944B-88D140945C3C}.Debug|x64.ActiveCfg = Debug|x64
|
||||
|
||||
2
examples/volume_rendering/.gitignore
vendored
Normal file
2
examples/volume_rendering/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
mandelbrot
|
||||
*.ppm
|
||||
38
examples/volume_rendering/Makefile
Normal file
38
examples/volume_rendering/Makefile
Normal file
@@ -0,0 +1,38 @@
|
||||
|
||||
ARCH = $(shell uname)
|
||||
|
||||
TASK_CXX=tasks_pthreads.cpp
|
||||
TASK_LIB=-lpthread
|
||||
|
||||
ifeq ($(ARCH), Darwin)
|
||||
TASK_CXX=tasks_gcd.cpp
|
||||
TASK_LIB=
|
||||
endif
|
||||
|
||||
TASK_OBJ=$(addprefix objs/, $(TASK_CXX:.cpp=.o))
|
||||
|
||||
CXX=g++ -m64
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64
|
||||
|
||||
default: volume
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ volume
|
||||
|
||||
volume: dirs objs/volume.o objs/volume_serial.o objs/volume_ispc.o $(TASK_OBJ)
|
||||
$(CXX) $(CXXFLAGS) -o $@ objs/volume.o objs/volume_ispc.o objs/volume_serial.o $(TASK_OBJ) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/volume.o: objs/volume_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
11
examples/volume_rendering/camera.dat
Normal file
11
examples/volume_rendering/camera.dat
Normal file
@@ -0,0 +1,11 @@
|
||||
896 1184
|
||||
|
||||
0.000155 0.000000 0.000000 -0.069927
|
||||
0.000000 -0.000155 0.000000 0.093236
|
||||
0.000000 0.000000 0.000000 1.000000
|
||||
0.000000 0.000000 -99.999001 100.000000
|
||||
|
||||
1.000000 0.000000 0.000000 1.000000
|
||||
0.000000 0.980129 -0.198360 2.900000
|
||||
0.000000 0.198360 0.980129 -10.500000
|
||||
0.000000 0.000000 0.000000 1.000000
|
||||
5
examples/volume_rendering/density_highres.vol
Normal file
5
examples/volume_rendering/density_highres.vol
Normal file
File diff suppressed because one or more lines are too long
4
examples/volume_rendering/density_lowres.vol
Normal file
4
examples/volume_rendering/density_lowres.vol
Normal file
File diff suppressed because one or more lines are too long
141
examples/volume_rendering/tasks_concrt.cpp
Normal file
141
examples/volume_rendering/tasks_concrt.cpp
Normal file
@@ -0,0 +1,141 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Simple task system implementation for ispc based on Microsoft's
|
||||
Concurrency Runtime. */
|
||||
|
||||
#include <windows.h>
|
||||
#include <concrt.h>
|
||||
using namespace Concurrency;
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// ispc expects these functions to have C linkage / not be mangled
|
||||
extern "C" {
|
||||
void ISPCLaunch(void *f, void *data);
|
||||
void ISPCSync();
|
||||
void *ISPCMalloc(int64_t size, int32_t alignment);
|
||||
void ISPCFree(void *ptr);
|
||||
}
|
||||
|
||||
typedef void (*TaskFuncType)(void *, int, int);
|
||||
|
||||
struct TaskInfo {
|
||||
TaskFuncType ispcFunc;
|
||||
void *ispcData;
|
||||
};
|
||||
|
||||
// This is a simple implementation that just aborts if more than MAX_TASKS
|
||||
// are launched. It could easily be extended to be more general...
|
||||
|
||||
#define MAX_TASKS 32768
|
||||
static int taskOffset;
|
||||
static TaskInfo taskInfo[MAX_TASKS];
|
||||
static event *events[MAX_TASKS];
|
||||
static CRITICAL_SECTION criticalSection;
|
||||
static bool initialized = false;
|
||||
|
||||
void
|
||||
TasksInit() {
|
||||
InitializeCriticalSection(&criticalSection);
|
||||
for (int i = 0; i < MAX_TASKS; ++i)
|
||||
events[i] = new event;
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
|
||||
void __cdecl
|
||||
lRunTask(LPVOID param) {
|
||||
TaskInfo *ti = (TaskInfo *)param;
|
||||
|
||||
// Actually run the task.
|
||||
// FIXME: like the tasks_gcd.cpp implementation, this is passing bogus
|
||||
// values for the threadIndex and threadCount builtins, which in turn
|
||||
// will cause bugs in code that uses those. FWIW this example doesn't
|
||||
// use them...
|
||||
int threadIndex = 0;
|
||||
int threadCount = 1;
|
||||
ti->ispcFunc(ti->ispcData, threadIndex, threadCount);
|
||||
|
||||
// Signal the event that this task is done
|
||||
int taskNum = ti - &taskInfo[0];
|
||||
events[taskNum]->set();
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ISPCLaunch(void *func, void *data) {
|
||||
if (!initialized) {
|
||||
fprintf(stderr, "You must call TasksInit() before launching tasks.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Get a TaskInfo struct for this task
|
||||
EnterCriticalSection(&criticalSection);
|
||||
TaskInfo *ti = &taskInfo[taskOffset++];
|
||||
assert(taskOffset < MAX_TASKS);
|
||||
LeaveCriticalSection(&criticalSection);
|
||||
|
||||
// And pass it on to the Concurrency Runtime...
|
||||
ti->ispcFunc = (TaskFuncType)func;
|
||||
ti->ispcData = data;
|
||||
CurrentScheduler::ScheduleTask(lRunTask, ti);
|
||||
}
|
||||
|
||||
|
||||
void ISPCSync() {
|
||||
if (!initialized) {
|
||||
fprintf(stderr, "You must call TasksInit() before launching tasks.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
event::wait_for_multiple(&events[0], taskOffset, true,
|
||||
COOPERATIVE_TIMEOUT_INFINITE);
|
||||
|
||||
for (int i = 0; i < taskOffset; ++i)
|
||||
events[i]->reset();
|
||||
|
||||
taskOffset = 0;
|
||||
}
|
||||
|
||||
|
||||
void *ISPCMalloc(int64_t size, int32_t alignment) {
|
||||
return _aligned_malloc(size, alignment);
|
||||
}
|
||||
|
||||
|
||||
void ISPCFree(void *ptr) {
|
||||
_aligned_free(ptr);
|
||||
}
|
||||
103
examples/volume_rendering/tasks_gcd.cpp
Normal file
103
examples/volume_rendering/tasks_gcd.cpp
Normal file
@@ -0,0 +1,103 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* A simple task system for ispc programs based on Apple's Grand Central
|
||||
Dispatch. */
|
||||
|
||||
#include <dispatch/dispatch.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static bool initialized = false;
|
||||
static dispatch_queue_t gcdQueue;
|
||||
static dispatch_group_t gcdGroup;
|
||||
|
||||
// ispc expects these functions to have C linkage / not be mangled
|
||||
extern "C" {
|
||||
void ISPCLaunch(void *f, void *data);
|
||||
void ISPCSync();
|
||||
}
|
||||
|
||||
struct TaskInfo {
|
||||
void *func;
|
||||
void *data;
|
||||
};
|
||||
|
||||
|
||||
void
|
||||
TasksInit() {
|
||||
gcdQueue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
|
||||
gcdGroup = dispatch_group_create();
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lRunTask(void *ti) {
|
||||
typedef void (*TaskFuncType)(void *, int, int);
|
||||
TaskInfo *taskInfo = (TaskInfo *)ti;
|
||||
|
||||
TaskFuncType func = (TaskFuncType)(taskInfo->func);
|
||||
|
||||
// FIXME: these are bogus values; may cause bugs in code that depends
|
||||
// on them having unique values in different threads.
|
||||
int threadIndex = 0;
|
||||
int threadCount = 1;
|
||||
// Actually run the task
|
||||
func(taskInfo->data, threadIndex, threadCount);
|
||||
|
||||
// FIXME: taskInfo leaks...
|
||||
}
|
||||
|
||||
|
||||
void ISPCLaunch(void *func, void *data) {
|
||||
if (!initialized) {
|
||||
fprintf(stderr, "You must call TasksInit() before launching tasks.\n");
|
||||
exit(1);
|
||||
}
|
||||
TaskInfo *ti = new TaskInfo;
|
||||
ti->func = func;
|
||||
ti->data = data;
|
||||
dispatch_group_async_f(gcdGroup, gcdQueue, ti, lRunTask);
|
||||
}
|
||||
|
||||
|
||||
void ISPCSync() {
|
||||
if (!initialized) {
|
||||
fprintf(stderr, "You must call TasksInit() before launching tasks.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Wait for all of the tasks in the group to complete before returning
|
||||
dispatch_group_wait(gcdGroup, DISPATCH_TIME_FOREVER);
|
||||
}
|
||||
295
examples/volume_rendering/tasks_pthreads.cpp
Normal file
295
examples/volume_rendering/tasks_pthreads.cpp
Normal file
@@ -0,0 +1,295 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <pthread.h>
|
||||
#include <semaphore.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include <vector>
|
||||
|
||||
// ispc expects these functions to have C linkage / not be mangled
|
||||
extern "C" {
|
||||
void ISPCLaunch(void *f, void *data);
|
||||
void ISPCSync();
|
||||
}
|
||||
|
||||
|
||||
static int nThreads;
|
||||
static pthread_t *threads;
|
||||
static pthread_mutex_t taskQueueMutex;
|
||||
static std::vector<std::pair<void *, void *> > taskQueue;
|
||||
static sem_t *workerSemaphore;
|
||||
static uint32_t numUnfinishedTasks;
|
||||
static pthread_mutex_t tasksRunningConditionMutex;
|
||||
static pthread_cond_t tasksRunningCondition;
|
||||
|
||||
static void *lTaskEntry(void *arg);
|
||||
|
||||
/** Figure out how many CPU cores there are in the system
|
||||
*/
|
||||
static int
|
||||
lNumCPUCores() {
|
||||
#if defined(__linux__)
|
||||
return sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#else
|
||||
// Mac
|
||||
int mib[2];
|
||||
mib[0] = CTL_HW;
|
||||
size_t length = 2;
|
||||
if (sysctlnametomib("hw.logicalcpu", mib, &length) == -1) {
|
||||
fprintf(stderr, "sysctlnametomib() filed. Guessing 2 cores.");
|
||||
return 2;
|
||||
}
|
||||
assert(length == 2);
|
||||
|
||||
int nCores = 0;
|
||||
size_t size = sizeof(nCores);
|
||||
|
||||
if (sysctl(mib, 2, &nCores, &size, NULL, 0) == -1) {
|
||||
fprintf(stderr, "sysctl() to find number of cores present failed. Guessing 2.");
|
||||
return 2;
|
||||
}
|
||||
return nCores;
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
TasksInit() {
|
||||
nThreads = lNumCPUCores();
|
||||
|
||||
threads = new pthread_t[nThreads];
|
||||
|
||||
int err;
|
||||
if ((err = pthread_mutex_init(&taskQueueMutex, NULL)) != 0) {
|
||||
fprintf(stderr, "Error creating mutex: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
char name[32];
|
||||
sprintf(name, "ispc_task.%d", (int)getpid());
|
||||
workerSemaphore = sem_open(name, O_CREAT, S_IRUSR|S_IWUSR, 0);
|
||||
if (!workerSemaphore) {
|
||||
fprintf(stderr, "Error creating semaphore: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ((err = pthread_cond_init(&tasksRunningCondition, NULL)) != 0) {
|
||||
fprintf(stderr, "Error creating condition variable: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ((err = pthread_mutex_init(&tasksRunningConditionMutex, NULL)) != 0) {
|
||||
fprintf(stderr, "Error creating mutex: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (int i = 0; i < nThreads; ++i) {
|
||||
err = pthread_create(&threads[i], NULL, &lTaskEntry, reinterpret_cast<void *>(i));
|
||||
if (err != 0) {
|
||||
fprintf(stderr, "Error creating pthread %d: %s\n", i, strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ISPCLaunch(void *f, void *d) {
|
||||
if (threads == NULL) {
|
||||
fprintf(stderr, "You must call TasksInit() before launching tasks.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
//
|
||||
// Acquire mutex, add task
|
||||
//
|
||||
int err;
|
||||
if ((err = pthread_mutex_lock(&taskQueueMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
taskQueue.push_back(std::make_pair(f, d));
|
||||
|
||||
if ((err = pthread_mutex_unlock(&taskQueueMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_unlock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
//
|
||||
// Update count of number of tasks left to run
|
||||
//
|
||||
if ((err = pthread_mutex_lock(&tasksRunningConditionMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
++numUnfinishedTasks;
|
||||
|
||||
if ((err = pthread_mutex_unlock(&tasksRunningConditionMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
//
|
||||
// Post to the worker semaphore to wake up worker threads that are
|
||||
// sleeping waiting for tasks to show up
|
||||
//
|
||||
if ((err = sem_post(workerSemaphore)) != 0) {
|
||||
fprintf(stderr, "Error from sem_post: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void *
|
||||
lTaskEntry(void *arg) {
|
||||
int threadIndex = int(reinterpret_cast<int64_t>(arg));
|
||||
int threadCount = nThreads;
|
||||
|
||||
while (true) {
|
||||
int err;
|
||||
if ((err = sem_wait(workerSemaphore)) != 0) {
|
||||
fprintf(stderr, "Error from sem_wait: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::pair<void *, void *> myTask;
|
||||
//
|
||||
// Acquire mutex, get task
|
||||
//
|
||||
if ((err = pthread_mutex_lock(&taskQueueMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
if (taskQueue.size() == 0) {
|
||||
//
|
||||
// Task queue is empty, go back and wait on the semaphore
|
||||
//
|
||||
if ((err = pthread_mutex_unlock(&taskQueueMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_unlock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
myTask = taskQueue.back();
|
||||
taskQueue.pop_back();
|
||||
|
||||
if ((err = pthread_mutex_unlock(&taskQueueMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_unlock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
//
|
||||
// Do work for _myTask_
|
||||
//
|
||||
typedef void (*TaskFunType)(void *, int, int);
|
||||
TaskFunType func = (TaskFunType)myTask.first;
|
||||
func(myTask.second, threadIndex, threadCount);
|
||||
|
||||
//
|
||||
// Decrement the number of unfinished tasks counter
|
||||
//
|
||||
if ((err = pthread_mutex_lock(&tasksRunningConditionMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int unfinished = --numUnfinishedTasks;
|
||||
if (unfinished == 0) {
|
||||
//
|
||||
// Signal the "no more tasks are running" condition if all of
|
||||
// them are done.
|
||||
//
|
||||
int err;
|
||||
if ((err = pthread_cond_signal(&tasksRunningCondition)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_cond_signal: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if ((err = pthread_mutex_unlock(&tasksRunningConditionMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
pthread_exit(NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void ISPCSync() {
|
||||
if (threads == NULL) {
|
||||
fprintf(stderr, "You must call TasksInit() before launching tasks.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int err;
|
||||
if ((err = pthread_mutex_lock(&tasksRunningConditionMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// As long as there are tasks running, wait on the condition variable;
|
||||
// doing so causes this thread to go to sleep until someone signals on
|
||||
// the tasksRunningCondition condition variable.
|
||||
while (numUnfinishedTasks > 0) {
|
||||
if ((err = pthread_cond_wait(&tasksRunningCondition,
|
||||
&tasksRunningConditionMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_cond_wait: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// We acquire ownership of the condition variable mutex when the above
|
||||
// pthread_cond_wait returns.
|
||||
// FIXME: is there a lurking issue here if numUnfinishedTasks gets back
|
||||
// to zero by the time we get to ISPCSync() and thence we're trying to
|
||||
// unlock a mutex we don't have a lock on?
|
||||
if ((err = pthread_mutex_unlock(&tasksRunningConditionMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
251
examples/volume_rendering/volume.cpp
Normal file
251
examples/volume_rendering/volume.cpp
Normal file
@@ -0,0 +1,251 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define NOMINMAX
|
||||
#pragma warning (disable: 4244)
|
||||
#pragma warning (disable: 4305)
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
||||
#include "../timing.h"
|
||||
#include "../cpuid.h"
|
||||
#include "volume_ispc.h"
|
||||
using namespace ispc;
|
||||
|
||||
extern void volume_serial(float density[], int nVoxels[3],
|
||||
const float raster2camera[4][4],
|
||||
const float camera2world[4][4],
|
||||
int width, int height, float image[]);
|
||||
|
||||
/* Write a PPM image file with the image */
|
||||
static void
|
||||
writePPM(float *buf, int width, int height, const char *fn) {
|
||||
FILE *fp = fopen(fn, "wb");
|
||||
fprintf(fp, "P6\n");
|
||||
fprintf(fp, "%d %d\n", width, height);
|
||||
fprintf(fp, "255\n");
|
||||
for (int i = 0; i < width*height; ++i) {
|
||||
float v = buf[i] * 255.f;
|
||||
if (v < 0.f) v = 0.f;
|
||||
else if (v > 255.f) v = 255.f;
|
||||
unsigned char c = (unsigned char)v;
|
||||
for (int j = 0; j < 3; ++j)
|
||||
fputc(c, fp);
|
||||
}
|
||||
fclose(fp);
|
||||
printf("Wrote image file %s\n", fn);
|
||||
}
|
||||
|
||||
|
||||
// Make sure that the vector ISA used during compilation is supported by
|
||||
// the processor. The ISPC_TARGET_* macro is set in the ispc-generated
|
||||
// header file that we include above.
|
||||
static void
|
||||
ensureTargetISAIsSupported() {
|
||||
#if defined(ISPC_TARGET_SSE2)
|
||||
bool isaSupported = CPUSupportsSSE2();
|
||||
const char *target = "SSE2";
|
||||
#elif defined(ISPC_TARGET_SSE4)
|
||||
bool isaSupported = CPUSupportsSSE4();
|
||||
const char *target = "SSE4";
|
||||
#elif defined(ISPC_TARGET_AVX)
|
||||
bool isaSupported = CPUSupportsAVX();
|
||||
const char *target = "AVX";
|
||||
#else
|
||||
#error "Unknown ISPC_TARGET_* value"
|
||||
#endif
|
||||
if (!isaSupported) {
|
||||
fprintf(stderr, "***\n*** Error: the ispc-compiled code uses the %s instruction "
|
||||
"set, which isn't\n*** supported by this computer's CPU!\n", target);
|
||||
fprintf(stderr, "***\n*** Please modify the "
|
||||
#ifdef _MSC_VER
|
||||
"MSVC project file "
|
||||
#else
|
||||
"Makefile "
|
||||
#endif
|
||||
"to select another target (e.g. sse2)\n***\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Load image and viewing parameters from a camera data file.
|
||||
FIXME: we should add support to be able to specify viewing parameters
|
||||
in the program here directly. */
|
||||
static void
|
||||
loadCamera(const char *fn, int *width, int *height, float raster2camera[4][4],
|
||||
float camera2world[4][4]) {
|
||||
FILE *f = fopen(fn, "r");
|
||||
if (!f) {
|
||||
perror(fn);
|
||||
exit(1);
|
||||
}
|
||||
if (fscanf(f, "%d %d", width, height) != 2) {
|
||||
fprintf(stderr, "Unexpected end of file in camera file\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
if (fscanf(f, "%f", &raster2camera[i][j]) != 1) {
|
||||
fprintf(stderr, "Unexpected end of file in camera file\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
if (fscanf(f, "%f", &camera2world[i][j]) != 1) {
|
||||
fprintf(stderr, "Unexpected end of file in camera file\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
|
||||
/* Load a volume density file. Expects the number of x, y, and z samples
|
||||
as the first three values (as integer strings), then x*y*z
|
||||
floating-point values (also as strings) to give the densities. */
|
||||
static float *
|
||||
loadVolume(const char *fn, int n[3]) {
|
||||
FILE *f = fopen(fn, "r");
|
||||
if (!f) {
|
||||
perror(fn);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fscanf(f, "%d %d %d", &n[0], &n[1], &n[2]) != 3) {
|
||||
fprintf(stderr, "Couldn't find resolution at start of density file\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int count = n[0] * n[1] * n[2];
|
||||
float *v = new float[count];
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if (fscanf(f, "%f", &v[i]) != 1) {
|
||||
fprintf(stderr, "Unexpected end of file at %d'th density value\n", i);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc != 3) {
|
||||
fprintf(stderr, "usage: volume <camera.dat> <volume_density.vol>\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
ensureTargetISAIsSupported();
|
||||
|
||||
extern void TasksInit();
|
||||
TasksInit();
|
||||
|
||||
//
|
||||
// Load viewing data and the volume density data
|
||||
//
|
||||
int width, height;
|
||||
float raster2camera[4][4], camera2world[4][4];
|
||||
loadCamera(argv[1], &width, &height, raster2camera, camera2world);
|
||||
float *image = new float[width*height];
|
||||
|
||||
int n[3];
|
||||
float *density = loadVolume(argv[2], n);
|
||||
|
||||
//
|
||||
// Compute the image using the ispc implementation; report the minimum
|
||||
// time of three runs.
|
||||
//
|
||||
double minISPC = 1e30;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
reset_and_start_timer();
|
||||
volume_ispc(density, n, raster2camera, camera2world,
|
||||
width, height, image);
|
||||
double dt = get_elapsed_mcycles();
|
||||
minISPC = std::min(minISPC, dt);
|
||||
}
|
||||
|
||||
printf("[volume ispc 1 core]:\t\t[%.3f] million cycles\n", minISPC);
|
||||
writePPM(image, width, height, "volume-ispc-1core.ppm");
|
||||
|
||||
// Clear out the buffer
|
||||
for (int i = 0; i < width * height; ++i)
|
||||
image[i] = 0.;
|
||||
|
||||
//
|
||||
// Compute the image using the ispc implementation that also uses
|
||||
// tasks; report the minimum time of three runs.
|
||||
//
|
||||
double minISPCtasks = 1e30;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
reset_and_start_timer();
|
||||
volume_ispc_tasks(density, n, raster2camera, camera2world,
|
||||
width, height, image);
|
||||
double dt = get_elapsed_mcycles();
|
||||
minISPCtasks = std::min(minISPCtasks, dt);
|
||||
}
|
||||
|
||||
printf("[volume ispc + tasks]:\t\t[%.3f] million cycles\n", minISPCtasks);
|
||||
writePPM(image, width, height, "volume-ispc-tasks.ppm");
|
||||
|
||||
// Clear out the buffer
|
||||
for (int i = 0; i < width * height; ++i)
|
||||
image[i] = 0.;
|
||||
|
||||
//
|
||||
// And run the serial implementation 3 times, again reporting the
|
||||
// minimum time.
|
||||
//
|
||||
double minSerial = 1e30;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
reset_and_start_timer();
|
||||
volume_serial(density, n, raster2camera, camera2world,
|
||||
width, height, image);
|
||||
double dt = get_elapsed_mcycles();
|
||||
minSerial = std::min(minSerial, dt);
|
||||
}
|
||||
|
||||
printf("[volume serial]:\t\t[%.3f] millon cycles\n", minSerial);
|
||||
writePPM(image, width, height, "volume-serial.ppm");
|
||||
|
||||
printf("\t\t\t\t(%.2fx speedup from ISPC serial, %.2fx from ISPC+tasks)\n",
|
||||
minSerial/minISPC, minSerial / minISPCtasks);
|
||||
|
||||
return 0;
|
||||
}
|
||||
345
examples/volume_rendering/volume.ispc
Normal file
345
examples/volume_rendering/volume.ispc
Normal file
@@ -0,0 +1,345 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
typedef float<3> float3;
|
||||
|
||||
struct Ray {
|
||||
float3 origin, dir;
|
||||
};
|
||||
|
||||
|
||||
static void
|
||||
generateRay(const uniform float raster2camera[4][4],
|
||||
const uniform float camera2world[4][4],
|
||||
float x, float y, reference Ray ray) {
|
||||
// transform raster coordinate (x, y, 0) to camera space
|
||||
float camx = raster2camera[0][0] * x + raster2camera[0][1] * y + raster2camera[0][3];
|
||||
float camy = raster2camera[1][0] * x + raster2camera[1][1] * y + raster2camera[1][3];
|
||||
float camz = raster2camera[2][3];
|
||||
float camw = raster2camera[3][3];
|
||||
camx /= camw;
|
||||
camy /= camw;
|
||||
camz /= camw;
|
||||
|
||||
ray.dir.x = camera2world[0][0] * camx + camera2world[0][1] * camy + camera2world[0][2] * camz;
|
||||
ray.dir.y = camera2world[1][0] * camx + camera2world[1][1] * camy + camera2world[1][2] * camz;
|
||||
ray.dir.z = camera2world[2][0] * camx + camera2world[2][1] * camy + camera2world[2][2] * camz;
|
||||
|
||||
ray.origin.x = camera2world[0][3] / camera2world[3][3];
|
||||
ray.origin.y = camera2world[1][3] / camera2world[3][3];
|
||||
ray.origin.z = camera2world[2][3] / camera2world[3][3];
|
||||
}
|
||||
|
||||
|
||||
static inline bool
|
||||
Inside(float3 p, float3 pMin, float3 pMax) {
|
||||
return (p.x >= pMin.x && p.x <= pMax.x &&
|
||||
p.y >= pMin.y && p.y <= pMax.y &&
|
||||
p.z >= pMin.z && p.z <= pMax.z);
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
IntersectP(Ray ray, float3 pMin, float3 pMax, reference float hit0, reference float hit1) {
|
||||
float t0 = -1e30, t1 = 1e30;
|
||||
|
||||
float3 tNear = (pMin - ray.origin) / ray.dir;
|
||||
float3 tFar = (pMax - ray.origin) / ray.dir;
|
||||
if (tNear.x > tFar.x) {
|
||||
float tmp = tNear.x;
|
||||
tNear.x = tFar.x;
|
||||
tFar.x = tmp;
|
||||
}
|
||||
t0 = max(tNear.x, t0);
|
||||
t1 = min(tFar.x, t1);
|
||||
|
||||
if (tNear.y > tFar.y) {
|
||||
float tmp = tNear.y;
|
||||
tNear.y = tFar.y;
|
||||
tFar.y = tmp;
|
||||
}
|
||||
t0 = max(tNear.y, t0);
|
||||
t1 = min(tFar.y, t1);
|
||||
|
||||
if (tNear.z > tFar.z) {
|
||||
float tmp = tNear.z;
|
||||
tNear.z = tFar.z;
|
||||
tFar.z = tmp;
|
||||
}
|
||||
t0 = max(tNear.z, t0);
|
||||
t1 = min(tFar.z, t1);
|
||||
|
||||
if (t0 <= t1) {
|
||||
hit0 = t0;
|
||||
hit1 = t1;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static inline float Lerp(float t, float a, float b) {
|
||||
return (1.f - t) * a + t * b;
|
||||
}
|
||||
|
||||
|
||||
static inline float D(int x, int y, int z, uniform int nVoxels[3],
|
||||
uniform float density[]) {
|
||||
x = clamp(x, 0, nVoxels[0]-1);
|
||||
y = clamp(y, 0, nVoxels[1]-1);
|
||||
z = clamp(z, 0, nVoxels[2]-1);
|
||||
|
||||
#if 0
|
||||
uniform int ux, uy, uz;
|
||||
if (reduce_equal(x, ux) && reduce_equal(y, uy) && reduce_equal(z, uz))
|
||||
return density[uz*nVoxels[0]*nVoxels[1] + uy*nVoxels[0] + ux];
|
||||
else
|
||||
#endif
|
||||
return density[z*nVoxels[0]*nVoxels[1] + y*nVoxels[0] + x];
|
||||
}
|
||||
|
||||
|
||||
static inline float3 Offset(float3 p, float3 pMin, float3 pMax) {
|
||||
return (p - pMin) / (pMax - pMin);
|
||||
}
|
||||
|
||||
|
||||
static inline float Density(float3 Pobj, float3 pMin, float3 pMax,
|
||||
uniform float density[], uniform int nVoxels[3]) {
|
||||
if (!Inside(Pobj, pMin, pMax))
|
||||
return 0;
|
||||
// Compute voxel coordinates and offsets for _Pobj_
|
||||
float3 vox = Offset(Pobj, pMin, pMax);
|
||||
vox.x = vox.x * nVoxels[0] - .5f;
|
||||
vox.y = vox.y * nVoxels[1] - .5f;
|
||||
vox.z = vox.z * nVoxels[2] - .5f;
|
||||
int vx = (int)(vox.x), vy = (int)(vox.y), vz = (int)(vox.z);
|
||||
float dx = vox.x - vx, dy = vox.y - vy, dz = vox.z - vz;
|
||||
|
||||
// Trilinearly interpolate density values to compute local density
|
||||
float d00 = Lerp(dx, D(vx, vy, vz, nVoxels, density),
|
||||
D(vx+1, vy, vz, nVoxels, density));
|
||||
float d10 = Lerp(dx, D(vx, vy+1, vz, nVoxels, density),
|
||||
D(vx+1, vy+1, vz, nVoxels, density));
|
||||
float d01 = Lerp(dx, D(vx, vy, vz+1, nVoxels, density),
|
||||
D(vx+1, vy, vz+1, nVoxels, density));
|
||||
float d11 = Lerp(dx, D(vx, vy+1, vz+1, nVoxels, density),
|
||||
D(vx+1, vy+1, vz+1, nVoxels, density));
|
||||
float d0 = Lerp(dy, d00, d10);
|
||||
float d1 = Lerp(dy, d01, d11);
|
||||
return Lerp(dz, d0, d1);
|
||||
}
|
||||
|
||||
|
||||
/* Returns the transmittance between two points p0 and p1, in a volume
|
||||
with extent (pMin,pMax) with transmittance coefficient sigma_t,
|
||||
defined by nVoxels[3] voxels in each dimension in the given density
|
||||
array. */
|
||||
static float
|
||||
transmittance(uniform float3 p0, float3 p1, uniform float3 pMin,
|
||||
uniform float3 pMax, uniform float sigma_t,
|
||||
uniform float density[], uniform int nVoxels[3]) {
|
||||
float rayT0, rayT1;
|
||||
Ray ray;
|
||||
ray.origin = p1;
|
||||
ray.dir = p0 - p1;
|
||||
|
||||
// Find the parametric t range along the ray that is inside the volume.
|
||||
if (!IntersectP(ray, pMin, pMax, rayT0, rayT1))
|
||||
return 1.;
|
||||
|
||||
rayT0 = max(rayT0, 0.f);
|
||||
|
||||
// Accumulate beam transmittance in tau
|
||||
float tau = 0;
|
||||
float rayLength = sqrt(ray.dir.x * ray.dir.x + ray.dir.y * ray.dir.y +
|
||||
ray.dir.z * ray.dir.z);
|
||||
uniform float stepDist = 0.2;
|
||||
float stepT = stepDist / rayLength;
|
||||
|
||||
float t = rayT0;
|
||||
float3 pos = ray.origin + ray.dir * rayT0;
|
||||
float3 dirStep = ray.dir * stepT;
|
||||
while (t < rayT1) {
|
||||
tau += stepDist * sigma_t * Density(pos, pMin, pMax, density, nVoxels);
|
||||
pos = pos + dirStep;
|
||||
t += stepT;
|
||||
}
|
||||
|
||||
return exp(-tau);
|
||||
}
|
||||
|
||||
|
||||
static inline float
|
||||
distanceSquared(float3 a, float3 b) {
|
||||
float3 d = a-b;
|
||||
return d.x*d.x + d.y*d.y + d.z*d.z;
|
||||
}
|
||||
|
||||
|
||||
static float
|
||||
raymarch(uniform float density[], uniform int nVoxels[3], Ray ray) {
|
||||
float rayT0, rayT1;
|
||||
uniform float3 pMin = {.3, -.2, .3}, pMax = {1.8, 2.3, 1.8};
|
||||
uniform float3 lightPos = { -1, 4, 1.5 };
|
||||
|
||||
cif (!IntersectP(ray, pMin, pMax, rayT0, rayT1))
|
||||
return 0.;
|
||||
|
||||
rayT0 = max(rayT0, 0.f);
|
||||
|
||||
// Parameters that define the volume scattering characteristics and
|
||||
// sampling rate for raymarching
|
||||
uniform float Le = .25; // Emission coefficient
|
||||
uniform float sigma_a = 10; // Absorption coefficient
|
||||
uniform float sigma_s = 10; // Scattering coefficient
|
||||
uniform float stepDist = 0.025; // Ray step amount
|
||||
uniform float lightIntensity = 40; // Light source intensity
|
||||
|
||||
float tau = 0.f; // accumulated beam transmittance
|
||||
float L = 0; // radiance along the ray
|
||||
float rayLength = sqrt(ray.dir.x * ray.dir.x + ray.dir.y * ray.dir.y +
|
||||
ray.dir.z * ray.dir.z);
|
||||
float stepT = stepDist / rayLength;
|
||||
|
||||
float t = rayT0;
|
||||
float3 pos = ray.origin + ray.dir * rayT0;
|
||||
float3 dirStep = ray.dir * stepT;
|
||||
cwhile (t < rayT1) {
|
||||
float d = Density(pos, pMin, pMax, density, nVoxels);
|
||||
|
||||
// terminate once attenuation is high
|
||||
float atten = exp(-tau);
|
||||
if (atten < .005)
|
||||
cbreak;
|
||||
|
||||
// direct lighting
|
||||
float Li = lightIntensity / distanceSquared(lightPos, pos) *
|
||||
transmittance(lightPos, pos, pMin, pMax, sigma_a + sigma_s,
|
||||
density, nVoxels);
|
||||
L += stepDist * atten * d * sigma_s * (Li + Le);
|
||||
|
||||
// update beam transmittance
|
||||
tau += stepDist * (sigma_a + sigma_s) * d;
|
||||
|
||||
pos = pos + dirStep;
|
||||
t += stepT;
|
||||
}
|
||||
|
||||
// Gamma correction
|
||||
return pow(L, 1.f / 2.2f);
|
||||
}
|
||||
|
||||
|
||||
/* Utility routine used by both the task-based and the single-core entrypoints.
|
||||
Renders a tile of the image, covering [x0,x0) * [y0, y1), storing the
|
||||
result into the image[] array.
|
||||
*/
|
||||
static void
|
||||
volume_tile(uniform int x0, uniform int y0, uniform int x1,
|
||||
uniform int y1, uniform float density[], uniform int nVoxels[3],
|
||||
const uniform float raster2camera[4][4],
|
||||
const uniform float camera2world[4][4],
|
||||
uniform int width, uniform int height, uniform float image[]) {
|
||||
// Work on 4x4=16 pixel big tiles of the image. This function thus
|
||||
// implicitly assumes that both (x1-x0) and (y1-y0) are evenly divisble
|
||||
// by 4.
|
||||
for (uniform int y = y0; y < y1; y += 4) {
|
||||
for (uniform int x = x0; x < x1; x += 4) {
|
||||
// For each such tile, process programCount pixels at a time,
|
||||
// until we've done all 16 of them. Thus, we're also assuming
|
||||
// that programCount <= 16 and that 16 is evenly dividible by
|
||||
// programCount.
|
||||
for (uniform int o = 0; o < 16; o += programCount) {
|
||||
// These two arrays encode the mapping from [0,15] to
|
||||
// offsets within the 4x4 pixel block so that we render
|
||||
// each pixel inside the block
|
||||
const uniform int xoffsets[16] = { 0, 1, 0, 1, 2, 3, 2, 3,
|
||||
0, 1, 0, 1, 2, 3, 2, 3 };
|
||||
const uniform int yoffsets[16] = { 0, 0, 1, 1, 0, 0, 1, 1,
|
||||
2, 2, 3, 3, 2, 2, 3, 3 };
|
||||
|
||||
// Figure out the pixel to render for this program instance
|
||||
int xo = x + xoffsets[o + programIndex];
|
||||
int yo = y + yoffsets[o + programIndex];
|
||||
|
||||
// Use viewing parameters to compute the corresponding ray
|
||||
// for the pixel
|
||||
Ray ray;
|
||||
generateRay(raster2camera, camera2world, xo, yo, ray);
|
||||
|
||||
// And raymarch through the volume to compute the pixel's
|
||||
// value
|
||||
int offset = yo * width + xo;
|
||||
image[offset] = raymarch(density, nVoxels, ray);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
task void
|
||||
volume_task(uniform int x0, uniform int y0, uniform int x1,
|
||||
uniform int y1, uniform float density[], uniform int nVoxels[3],
|
||||
const uniform float raster2camera[4][4],
|
||||
const uniform float camera2world[4][4],
|
||||
uniform int width, uniform int height, uniform float image[]) {
|
||||
volume_tile(x0, y0, x1, y1, density, nVoxels, raster2camera,
|
||||
camera2world, width, height, image);
|
||||
}
|
||||
|
||||
|
||||
export void
|
||||
volume_ispc(uniform float density[], uniform int nVoxels[3],
|
||||
const uniform float raster2camera[4][4],
|
||||
const uniform float camera2world[4][4],
|
||||
uniform int width, uniform int height, uniform float image[]) {
|
||||
volume_tile(0, 0, width, height, density, nVoxels, raster2camera,
|
||||
camera2world, width, height, image);
|
||||
}
|
||||
|
||||
|
||||
export void
|
||||
volume_ispc_tasks(uniform float density[], uniform int nVoxels[3],
|
||||
const uniform float raster2camera[4][4],
|
||||
const uniform float camera2world[4][4],
|
||||
uniform int width, uniform int height, uniform float image[]) {
|
||||
// Launch tasks to work on (dx,dy)-sized tiles of the image
|
||||
uniform int dx = 8, dy = 8;
|
||||
for (uniform int y = 0; y < height; y += dy)
|
||||
for (uniform int x = 0; x < width; x += dx)
|
||||
launch < volume_task(x, y, x+dx, y+dy, density, nVoxels,
|
||||
raster2camera, camera2world, width, height,
|
||||
image) >;
|
||||
}
|
||||
168
examples/volume_rendering/volume.vcxproj
Executable file
168
examples/volume_rendering/volume.vcxproj
Executable file
@@ -0,0 +1,168 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{dee5733a-e93e-449d-9114-9bffcaeb4df9}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>volume</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="volume.cpp" />
|
||||
<ClCompile Include="volume_serial.cpp" />
|
||||
<ClCompile Include="tasks_concrt.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="volume.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
305
examples/volume_rendering/volume_serial.cpp
Normal file
305
examples/volume_rendering/volume_serial.cpp
Normal file
@@ -0,0 +1,305 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
|
||||
// Just enough of a float3 class to do what we need in this file.
|
||||
#ifdef _MSC_VER
|
||||
__declspec(align(16))
|
||||
#endif
|
||||
struct float3 {
|
||||
float3() { }
|
||||
float3(float xx, float yy, float zz) { x = xx; y = yy; z = zz; }
|
||||
|
||||
float3 operator*(float f) const { return float3(x*f, y*f, z*f); }
|
||||
float3 operator-(const float3 &f2) const {
|
||||
return float3(x-f2.x, y-f2.y, z-f2.z);
|
||||
}
|
||||
float3 operator*(const float3 &f2) const {
|
||||
return float3(x*f2.x, y*f2.y, z*f2.z);
|
||||
}
|
||||
float3 operator+(const float3 &f2) const {
|
||||
return float3(x+f2.x, y+f2.y, z+f2.z);
|
||||
}
|
||||
float3 operator/(const float3 &f2) const {
|
||||
return float3(x/f2.x, y/f2.y, z/f2.z);
|
||||
}
|
||||
float operator[](int i) const { return (&x)[i]; }
|
||||
float &operator[](int i) { return (&x)[i]; }
|
||||
|
||||
float x, y, z;
|
||||
float pad; // match padding/alignment of ispc version
|
||||
}
|
||||
#ifndef _MSC_VER
|
||||
__attribute__ ((aligned(16)))
|
||||
#endif
|
||||
;
|
||||
|
||||
struct Ray {
|
||||
float3 origin, dir;
|
||||
};
|
||||
|
||||
|
||||
static void
|
||||
generateRay(const float raster2camera[4][4], const float camera2world[4][4],
|
||||
float x, float y, Ray &ray) {
|
||||
// transform raster coordinate (x, y, 0) to camera space
|
||||
float camx = raster2camera[0][0] * x + raster2camera[0][1] * y + raster2camera[0][3];
|
||||
float camy = raster2camera[1][0] * x + raster2camera[1][1] * y + raster2camera[1][3];
|
||||
float camz = raster2camera[2][3];
|
||||
float camw = raster2camera[3][3];
|
||||
camx /= camw;
|
||||
camy /= camw;
|
||||
camz /= camw;
|
||||
|
||||
ray.dir.x = camera2world[0][0] * camx + camera2world[0][1] * camy + camera2world[0][2] * camz;
|
||||
ray.dir.y = camera2world[1][0] * camx + camera2world[1][1] * camy + camera2world[1][2] * camz;
|
||||
ray.dir.z = camera2world[2][0] * camx + camera2world[2][1] * camy + camera2world[2][2] * camz;
|
||||
|
||||
ray.origin.x = camera2world[0][3] / camera2world[3][3];
|
||||
ray.origin.y = camera2world[1][3] / camera2world[3][3];
|
||||
ray.origin.z = camera2world[2][3] / camera2world[3][3];
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
Inside(float3 p, float3 pMin, float3 pMax) {
|
||||
return (p.x >= pMin.x && p.x <= pMax.x &&
|
||||
p.y >= pMin.y && p.y <= pMax.y &&
|
||||
p.z >= pMin.z && p.z <= pMax.z);
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
IntersectP(const Ray &ray, float3 pMin, float3 pMax, float *hit0, float *hit1) {
|
||||
float t0 = -1e30, t1 = 1e30;
|
||||
|
||||
float3 tNear = (pMin - ray.origin) / ray.dir;
|
||||
float3 tFar = (pMax - ray.origin) / ray.dir;
|
||||
if (tNear.x > tFar.x) {
|
||||
float tmp = tNear.x;
|
||||
tNear.x = tFar.x;
|
||||
tFar.x = tmp;
|
||||
}
|
||||
t0 = std::max(tNear.x, t0);
|
||||
t1 = std::min(tFar.x, t1);
|
||||
|
||||
if (tNear.y > tFar.y) {
|
||||
float tmp = tNear.y;
|
||||
tNear.y = tFar.y;
|
||||
tFar.y = tmp;
|
||||
}
|
||||
t0 = std::max(tNear.y, t0);
|
||||
t1 = std::min(tFar.y, t1);
|
||||
|
||||
if (tNear.z > tFar.z) {
|
||||
float tmp = tNear.z;
|
||||
tNear.z = tFar.z;
|
||||
tFar.z = tmp;
|
||||
}
|
||||
t0 = std::max(tNear.z, t0);
|
||||
t1 = std::min(tFar.z, t1);
|
||||
|
||||
if (t0 <= t1) {
|
||||
*hit0 = t0;
|
||||
*hit1 = t1;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static inline float Lerp(float t, float a, float b) {
|
||||
return (1.f - t) * a + t * b;
|
||||
}
|
||||
|
||||
|
||||
static inline int Clamp(int v, int low, int high) {
|
||||
return std::min(std::max(v, low), high);
|
||||
}
|
||||
|
||||
|
||||
static inline float D(int x, int y, int z, int nVoxels[3], float density[]) {
|
||||
x = Clamp(x, 0, nVoxels[0]-1);
|
||||
y = Clamp(y, 0, nVoxels[1]-1);
|
||||
z = Clamp(z, 0, nVoxels[2]-1);
|
||||
return density[z*nVoxels[0]*nVoxels[1] + y*nVoxels[0] + x];
|
||||
}
|
||||
|
||||
|
||||
static inline float3 Offset(float3 p, float3 pMin, float3 pMax) {
|
||||
return float3((p.x - pMin.x) / (pMax.x - pMin.x),
|
||||
(p.y - pMin.y) / (pMax.y - pMin.y),
|
||||
(p.z - pMin.z) / (pMax.z - pMin.z));
|
||||
}
|
||||
|
||||
|
||||
static inline float Density(float3 Pobj, float3 pMin, float3 pMax,
|
||||
float density[], int nVoxels[3]) {
|
||||
if (!Inside(Pobj, pMin, pMax))
|
||||
return 0;
|
||||
// Compute voxel coordinates and offsets for _Pobj_
|
||||
float3 vox = Offset(Pobj, pMin, pMax);
|
||||
vox.x = vox.x * nVoxels[0] - .5f;
|
||||
vox.y = vox.y * nVoxels[1] - .5f;
|
||||
vox.z = vox.z * nVoxels[2] - .5f;
|
||||
int vx = (int)(vox.x), vy = (int)(vox.y), vz = (int)(vox.z);
|
||||
float dx = vox.x - vx, dy = vox.y - vy, dz = vox.z - vz;
|
||||
|
||||
// Trilinearly interpolate density values to compute local density
|
||||
float d00 = Lerp(dx, D(vx, vy, vz, nVoxels, density),
|
||||
D(vx+1, vy, vz, nVoxels, density));
|
||||
float d10 = Lerp(dx, D(vx, vy+1, vz, nVoxels, density),
|
||||
D(vx+1, vy+1, vz, nVoxels, density));
|
||||
float d01 = Lerp(dx, D(vx, vy, vz+1, nVoxels, density),
|
||||
D(vx+1, vy, vz+1, nVoxels, density));
|
||||
float d11 = Lerp(dx, D(vx, vy+1, vz+1, nVoxels, density),
|
||||
D(vx+1, vy+1, vz+1, nVoxels, density));
|
||||
float d0 = Lerp(dy, d00, d10);
|
||||
float d1 = Lerp(dy, d01, d11);
|
||||
return Lerp(dz, d0, d1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static float
|
||||
transmittance(float3 p0, float3 p1, float3 pMin,
|
||||
float3 pMax, float sigma_t, float density[], int nVoxels[3]) {
|
||||
float rayT0, rayT1;
|
||||
Ray ray;
|
||||
ray.origin = p1;
|
||||
ray.dir = p0 - p1;
|
||||
|
||||
// Find the parametric t range along the ray that is inside the volume.
|
||||
if (!IntersectP(ray, pMin, pMax, &rayT0, &rayT1))
|
||||
return 1.;
|
||||
|
||||
rayT0 = std::max(rayT0, 0.f);
|
||||
|
||||
// Accumulate beam transmittance in tau
|
||||
float tau = 0;
|
||||
float rayLength = sqrtf(ray.dir.x * ray.dir.x + ray.dir.y * ray.dir.y +
|
||||
ray.dir.z * ray.dir.z);
|
||||
float stepDist = 0.2;
|
||||
float stepT = stepDist / rayLength;
|
||||
|
||||
float t = rayT0;
|
||||
float3 pos = ray.origin + ray.dir * rayT0;
|
||||
float3 dirStep = ray.dir * stepT;
|
||||
while (t < rayT1) {
|
||||
tau += stepDist * sigma_t * Density(pos, pMin, pMax, density, nVoxels);
|
||||
pos = pos + dirStep;
|
||||
t += stepT;
|
||||
}
|
||||
|
||||
return expf(-tau);
|
||||
}
|
||||
|
||||
|
||||
static float
|
||||
distanceSquared(float3 a, float3 b) {
|
||||
float3 d = a-b;
|
||||
return d.x*d.x + d.y*d.y + d.z*d.z;
|
||||
}
|
||||
|
||||
|
||||
static float
|
||||
raymarch(float density[], int nVoxels[3], const Ray &ray) {
|
||||
float rayT0, rayT1;
|
||||
float3 pMin(.3, -.2, .3), pMax(1.8, 2.3, 1.8);
|
||||
float3 lightPos(-1, 4, 1.5);
|
||||
|
||||
if (!IntersectP(ray, pMin, pMax, &rayT0, &rayT1))
|
||||
return 0.;
|
||||
|
||||
rayT0 = std::max(rayT0, 0.f);
|
||||
|
||||
// Parameters that define the volume scattering characteristics and
|
||||
// sampling rate for raymarching
|
||||
float Le = .25; // Emission coefficient
|
||||
float sigma_a = 10; // Absorption coefficient
|
||||
float sigma_s = 10; // Scattering coefficient
|
||||
float stepDist = 0.025; // Ray step amount
|
||||
float lightIntensity = 40; // Light source intensity
|
||||
|
||||
float tau = 0.f; // accumulated beam transmittance
|
||||
float L = 0; // radiance along the ray
|
||||
float rayLength = sqrtf(ray.dir.x * ray.dir.x + ray.dir.y * ray.dir.y +
|
||||
ray.dir.z * ray.dir.z);
|
||||
float stepT = stepDist / rayLength;
|
||||
|
||||
float t = rayT0;
|
||||
float3 pos = ray.origin + ray.dir * rayT0;
|
||||
float3 dirStep = ray.dir * stepT;
|
||||
while (t < rayT1) {
|
||||
float d = Density(pos, pMin, pMax, density, nVoxels);
|
||||
|
||||
// terminate once attenuation is high
|
||||
float atten = expf(-tau);
|
||||
if (atten < .005)
|
||||
break;
|
||||
|
||||
// direct lighting
|
||||
float Li = lightIntensity / distanceSquared(lightPos, pos) *
|
||||
transmittance(lightPos, pos, pMin, pMax, sigma_a + sigma_s,
|
||||
density, nVoxels);
|
||||
L += stepDist * atten * d * sigma_s * (Li + Le);
|
||||
|
||||
// update beam transmittance
|
||||
tau += stepDist * (sigma_a + sigma_s) * d;
|
||||
|
||||
pos = pos + dirStep;
|
||||
t += stepT;
|
||||
}
|
||||
|
||||
// Gamma correction
|
||||
return powf(L, 1.f / 2.2f);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
volume_serial(float density[], int nVoxels[3], const float raster2camera[4][4],
|
||||
const float camera2world[4][4],
|
||||
int width, int height, float image[]) {
|
||||
int offset = 0;
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x = 0; x < width; ++x, ++offset) {
|
||||
Ray ray;
|
||||
generateRay(raster2camera, camera2world, x, y, ray);
|
||||
image[offset] = raymarch(density, nVoxels, ray);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user