From a4bb6b552050bbc4f3991105d2d98bda93e47f26 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Mon, 1 Aug 2011 10:33:18 +0100 Subject: [PATCH] Add new example with implementation of Perlin Noise ~4.2x speedup versus serial on OSX / gcc. ~2.9x speedup versus serial on Windows / MSVC. --- builtins.m4 | 16 ++- examples/README.txt | 7 ++ examples/cpuid.h | 6 +- examples/examples.sln | 10 ++ examples/noise/.gitignore | 3 + examples/noise/Makefile | 26 +++++ examples/noise/noise.cpp | 150 ++++++++++++++++++++++++++++ examples/noise/noise.ispc | 164 ++++++++++++++++++++++++++++++ examples/noise/noise.vcxproj | 167 +++++++++++++++++++++++++++++++ examples/noise/noise_serial.cpp | 170 ++++++++++++++++++++++++++++++++ 10 files changed, 707 insertions(+), 12 deletions(-) create mode 100644 examples/noise/.gitignore create mode 100644 examples/noise/Makefile create mode 100644 examples/noise/noise.cpp create mode 100644 examples/noise/noise.ispc create mode 100755 examples/noise/noise.vcxproj create mode 100644 examples/noise/noise_serial.cpp diff --git a/builtins.m4 b/builtins.m4 index 28daa97c..661d9ba7 100644 --- a/builtins.m4 +++ b/builtins.m4 @@ -1431,26 +1431,24 @@ pl_done: define(`gen_gather', ` ;; Define the utility function to do the gather operation for a single element ;; of the type -define internal <$1 x $2> @__gather_elt_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %ret, +define internal <$1 x $2> @__gather_elt_$2(i8 * %ptr, <$1 x i32> %offsets, <$1 x $2> %ret, i32 %lane) nounwind readonly alwaysinline { ; compute address for this one from the base %offset32 = extractelement <$1 x i32> %offsets, i32 %lane - %offset64 = zext i32 %offset32 to i64 - %ptrdelta = add i64 %ptr64, %offset64 - %ptr = inttoptr i64 %ptrdelta to $2 * + %ptroffset = getelementptr i8 * %ptr, i32 %offset32 + %ptrcast = bitcast i8 * %ptroffset to $2 * ; load value and insert into returned value - %val = load $2 *%ptr + %val = load $2 *%ptrcast %updatedret = insertelement <$1 x $2> %ret, $2 %val, i32 %lane ret <$1 x $2> %updatedret } -define <$1 x $2> @__gather_base_offsets_$2(i8*, <$1 x i32> %offsets, +define <$1 x $2> @__gather_base_offsets_$2(i8 * %ptr, <$1 x i32> %offsets, <$1 x i32> %vecmask) nounwind readonly alwaysinline { entry: %mask = call i32 @__movmsk(<$1 x i32> %vecmask) - %ptr64 = ptrtoint i8 * %0 to i64 %maskKnown = call i1 @__is_compile_time_constant_mask(<$1 x i32> %vecmask) br i1 %maskKnown, label %known_mask, label %unknown_mask @@ -1474,10 +1472,10 @@ unknown_mask: <$1 x i32> %vecmask) %newOffsets = load <$1 x i32> * %offsetsPtr - %ret0 = call <$1 x $2> @__gather_elt_$2(i64 %ptr64, <$1 x i32> %newOffsets, + %ret0 = call <$1 x $2> @__gather_elt_$2(i8 * %ptr, <$1 x i32> %newOffsets, <$1 x $2> undef, i32 0) forloop(lane, 1, eval($1-1), - `patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt_$2(i64 %ptr64, + `patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt_$2(i8 * %ptr, <$1 x i32> %newOffsets, <$1 x $2> %retPREV, i32 LANE) ', `LANE', lane), `PREV', eval(lane-1))') ret <$1 x $2> %ret`'eval($1-1) diff --git a/examples/README.txt b/examples/README.txt index 7ef078db..03915b27 100644 --- a/examples/README.txt +++ b/examples/README.txt @@ -57,6 +57,13 @@ Linux, a pthreads-based task system is used (tasks_pthreads.cpp). When using tasks with ispc, no task system is mandated; the user is free to plug in any task system they want, for ease of interoperating with existing task systems. + +Noise +===== + +This example has an implementation of Ken Perlin's procedural "noise" +function, as described in his 2002 "Improving Noise" SIGGRAPH paper. + Options ======= diff --git a/examples/cpuid.h b/examples/cpuid.h index f7ab70a8..81a25200 100644 --- a/examples/cpuid.h +++ b/examples/cpuid.h @@ -48,19 +48,19 @@ static void __cpuid(int info[4], int infoType) { inline bool CPUSupportsSSE2() { int info[4]; __cpuid(info, 1); - return (info[3] & (1 << 26)); + return (info[3] & (1 << 26)) != 0; } inline bool CPUSupportsSSE4() { int info[4]; __cpuid(info, 1); - return (info[2] & (1 << 19)); + return (info[2] & (1 << 19)) != 0; } inline bool CPUSupportsAVX() { int info[4]; __cpuid(info, 1); - return (info[2] & (1 << 28)); + return (info[2] & (1 << 28)) != 0; } #endif // ISPC_CPUID_H diff --git a/examples/examples.sln b/examples/examples.sln index 5e8de17a..e952716e 100755 --- a/examples/examples.sln +++ b/examples/examples.sln @@ -15,6 +15,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mandelbrot_tasks", "mandelb EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "aobench_instrumented", "aobench_instrumented\aobench_instrumented.vcxproj", "{B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "noise", "noise\noise.vcxproj", "{0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -79,6 +81,14 @@ Global {B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}.Release|Win32.Build.0 = Release|Win32 {B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}.Release|x64.ActiveCfg = Release|x64 {B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}.Release|x64.Build.0 = Release|x64 + {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}.Debug|Win32.ActiveCfg = Debug|Win32 + {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}.Debug|Win32.Build.0 = Debug|Win32 + {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}.Debug|x64.ActiveCfg = Debug|x64 + {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}.Debug|x64.Build.0 = Debug|x64 + {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}.Release|Win32.ActiveCfg = Release|Win32 + {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}.Release|Win32.Build.0 = Release|Win32 + {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}.Release|x64.ActiveCfg = Release|x64 + {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/examples/noise/.gitignore b/examples/noise/.gitignore new file mode 100644 index 00000000..db3af314 --- /dev/null +++ b/examples/noise/.gitignore @@ -0,0 +1,3 @@ +noise +*.ppm +objs diff --git a/examples/noise/Makefile b/examples/noise/Makefile new file mode 100644 index 00000000..66f4401d --- /dev/null +++ b/examples/noise/Makefile @@ -0,0 +1,26 @@ + +CXX=g++ -m64 +CXXFLAGS=-Iobjs/ -O3 -Wall +ISPC=ispc +ISPCFLAGS=-O2 --target=sse4 --arch=x86-64 + +default: noise + +.PHONY: dirs clean + +dirs: + /bin/mkdir -p objs/ + +clean: + /bin/rm -rf objs *~ noise + +noise: dirs objs/noise.o objs/noise_serial.o objs/noise_ispc.o + $(CXX) $(CXXFLAGS) -o $@ objs/noise.o objs/noise_ispc.o objs/noise_serial.o -lm + +objs/%.o: %.cpp + $(CXX) $< $(CXXFLAGS) -c -o $@ + +objs/noise.o: objs/noise_ispc.h + +objs/%_ispc.h objs/%_ispc.o: %.ispc + $(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h diff --git a/examples/noise/noise.cpp b/examples/noise/noise.cpp new file mode 100644 index 00000000..b3f3920d --- /dev/null +++ b/examples/noise/noise.cpp @@ -0,0 +1,150 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#define NOMINMAX +#pragma warning (disable: 4244) +#pragma warning (disable: 4305) +#endif + +#include +#include +#include "../timing.h" +#include "../cpuid.h" +#include "noise_ispc.h" +using namespace ispc; + +extern void noise_serial(float x0, float y0, float x1, float y1, + int width, int height, float output[]); + +/* Write a PPM image file with the image */ +static void +writePPM(float *buf, int width, int height, const char *fn) { + FILE *fp = fopen(fn, "wb"); + fprintf(fp, "P6\n"); + fprintf(fp, "%d %d\n", width, height); + fprintf(fp, "255\n"); + for (int i = 0; i < width*height; ++i) { + float v = buf[i] * 255.f; + if (v < 0) v = 0; + if (v > 255) v = 255; + for (int j = 0; j < 3; ++j) + fputc((char)v, fp); + } + fclose(fp); +} + + +// Make sure that the vector ISA used during compilation is supported by +// the processor. The ISPC_TARGET_* macro is set in the ispc-generated +// header file that we include above. +static void +ensureTargetISAIsSupported() { +#if defined(ISPC_TARGET_SSE2) + bool isaSupported = CPUSupportsSSE2(); + const char *target = "SSE2"; +#elif defined(ISPC_TARGET_SSE4) + bool isaSupported = CPUSupportsSSE4(); + const char *target = "SSE4"; +#elif defined(ISPC_TARGET_AVX) + bool isaSupported = CPUSupportsAVX(); + const char *target = "AVX"; +#else +#error "Unknown ISPC_TARGET_* value" +#endif + if (!isaSupported) { + fprintf(stderr, "***\n*** Error: the ispc-compiled code uses the %s instruction " + "set, which isn't\n*** supported by this computer's CPU!\n", target); + fprintf(stderr, "***\n*** Please modify the " +#ifdef _MSC_VER + "MSVC project file " +#else + "Makefile " +#endif + "to select another target (e.g. sse2)\n***\n"); + exit(1); + } +} + + +int main() { + unsigned int width = 768; + unsigned int height = 768; + float x0 = -10; + float x1 = 10; + float y0 = -10; + float y1 = 10; + + float *buf = new float[width*height]; + + ensureTargetISAIsSupported(); + + // + // Compute the image using the ispc implementation; report the minimum + // time of three runs. + // + double minISPC = 1e30; + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + noise_ispc(x0, y0, x1, y1, width, height, buf); + double dt = get_elapsed_mcycles(); + minISPC = std::min(minISPC, dt); + } + + printf("[noise ispc]:\t\t\t[%.3f] million cycles\n", minISPC); + writePPM(buf, width, height, "noise-ispc.ppm"); + + // Clear out the buffer + for (unsigned int i = 0; i < width * height; ++i) + buf[i] = 0; + + // + // And run the serial implementation 3 times, again reporting the + // minimum time. + // + double minSerial = 1e30; + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + noise_serial(x0, y0, x1, y1, width, height, buf); + double dt = get_elapsed_mcycles(); + minSerial = std::min(minSerial, dt); + } + + printf("[noise serial]:\t\t\t[%.3f] millon cycles\n", minSerial); + writePPM(buf, width, height, "noise-serial.ppm"); + + printf("\t\t\t\t(%.2fx speedup from ISPC)\n", minSerial/minISPC); + + return 0; +} diff --git a/examples/noise/noise.ispc b/examples/noise/noise.ispc new file mode 100644 index 00000000..cff2dd5e --- /dev/null +++ b/examples/noise/noise.ispc @@ -0,0 +1,164 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#define NOISE_PERM_SIZE 256 + +static uniform int NoisePerm[2 * NOISE_PERM_SIZE] = { + 151, 160, 137, 91, 90, 15, 131, 13, 201, 95, 96, 53, 194, 233, 7, 225, 140, + 36, 103, 30, 69, 142, 8, 99, 37, 240, 21, 10, 23, 190, 6, 148, 247, 120, + 234, 75, 0, 26, 197, 62, 94, 252, 219, 203, 117, 35, 11, 32, 57, 177, 33, + 88, 237, 149, 56, 87, 174, 20, 125, 136, 171, 168, 68, 175, 74, 165, 71, + 134, 139, 48, 27, 166, 77, 146, 158, 231, 83, 111, 229, 122, 60, 211, 133, + 230, 220, 105, 92, 41, 55, 46, 245, 40, 244, 102, 143, 54, 65, 25, 63, 161, + 1, 216, 80, 73, 209, 76, 132, 187, 208, 89, 18, 169, 200, 196, 135, 130, + 116, 188, 159, 86, 164, 100, 109, 198, 173, 186, 3, 64, 52, 217, 226, 250, + 124, 123, 5, 202, 38, 147, 118, 126, 255, 82, 85, 212, 207, 206, 59, 227, + 47, 16, 58, 17, 182, 189, 28, 42, 223, 183, 170, 213, 119, 248, 152, 2, 44, + 154, 163, 70, 221, 153, 101, 155, 167, 43, 172, 9, 129, 22, 39, 253, 19, + 98, 108, 110, 79, 113, 224, 232, 178, 185, 112, 104, 218, 246, 97, 228, 251, + 34, 242, 193, 238, 210, 144, 12, 191, 179, 162, 241, 81, 51, 145, 235, 249, + 14, 239, 107, 49, 192, 214, 31, 181, 199, 106, 157, 184, 84, 204, 176, 115, + 121, 50, 45, 127, 4, 150, 254, 138, 236, 205, 93, 222, 114, 67, 29, 24, 72, + 243, 141, 128, 195, 78, 66, 215, 61, 156, 180, 151, 160, 137, 91, 90, 15, + 131, 13, 201, 95, 96, 53, 194, 233, 7, 225, 140, 36, 103, 30, 69, 142, 8, 99, + 37, 240, 21, 10, 23, 190, 6, 148, 247, 120, 234, 75, 0, 26, 197, 62, 94, 252, + 219, 203, 117, 35, 11, 32, 57, 177, 33, 88, 237, 149, 56, 87, 174, 20, 125, + 136, 171, 168, 68, 175, 74, 165, 71, 134, 139, 48, 27, 166, 77, 146, 158, + 231, 83, 111, 229, 122, 60, 211, 133, 230, 220, 105, 92, 41, 55, 46, 245, + 40, 244, 102, 143, 54, 65, 25, 63, 161, 1, 216, 80, 73, 209, 76, 132, 187, + 208, 89, 18, 169, 200, 196, 135, 130, 116, 188, 159, 86, 164, 100, 109, + 198, 173, 186, 3, 64, 52, 217, 226, 250, 124, 123, 5, 202, 38, 147, 118, + 126, 255, 82, 85, 212, 207, 206, 59, 227, 47, 16, 58, 17, 182, 189, 28, 42, + 223, 183, 170, 213, 119, 248, 152, 2, 44, 154, 163, 70, 221, 153, 101, 155, + 167, 43, 172, 9, 129, 22, 39, 253, 19, 98, 108, 110, 79, 113, 224, 232, + 178, 185, 112, 104, 218, 246, 97, 228, 251, 34, 242, 193, 238, 210, 144, + 12, 191, 179, 162, 241, 81, 51, 145, 235, 249, 14, 239, 107, 49, 192, 214, + 31, 181, 199, 106, 157, 184, 84, 204, 176, 115, 121, 50, 45, 127, 4, 150, + 254, 138, 236, 205, 93, 222, 114, 67, 29, 24, 72, 243, 141, 128, 195, 78, + 66, 215, 61, 156, 180 +}; + + +inline float SmoothStep(float low, float high, float value) { + float v = clamp((value - low) / (high - low), 0.f, 1.f); + return v * v * (-2.f * v + 3.f); +} + + +inline int Floor2Int(float val) { + return (int)floor(val); +} + + +inline float Grad(int x, int y, int z, float dx, float dy, float dz) { + int h = NoisePerm[NoisePerm[NoisePerm[x]+y]+z]; + h &= 15; + float u = h<8 || h==12 || h==13 ? dx : dy; + float v = h<4 || h==12 || h==13 ? dy : dz; + return ((h&1) ? -u : u) + ((h&2) ? -v : v); +} + + +inline float NoiseWeight(float t) { + float t3 = t*t*t; + float t4 = t3*t; + return 6.f*t4*t - 15.f*t4 + 10.f*t3; +} + + +inline float Lerp(float t, float low, float high) { + return (1. - t) * low + t * high; +} + + +static float Noise(float x, float y, float z) { + // Compute noise cell coordinates and offsets + int ix = Floor2Int(x), iy = Floor2Int(y), iz = Floor2Int(z); + float dx = x - ix, dy = y - iy, dz = z - iz; + + // Compute gradient weights + ix &= (NOISE_PERM_SIZE-1); + iy &= (NOISE_PERM_SIZE-1); + iz &= (NOISE_PERM_SIZE-1); + float w000 = Grad(ix, iy, iz, dx, dy, dz); + float w100 = Grad(ix+1, iy, iz, dx-1, dy, dz); + float w010 = Grad(ix, iy+1, iz, dx, dy-1, dz); + float w110 = Grad(ix+1, iy+1, iz, dx-1, dy-1, dz); + float w001 = Grad(ix, iy, iz+1, dx, dy, dz-1); + float w101 = Grad(ix+1, iy, iz+1, dx-1, dy, dz-1); + float w011 = Grad(ix, iy+1, iz+1, dx, dy-1, dz-1); + float w111 = Grad(ix+1, iy+1, iz+1, dx-1, dy-1, dz-1); + + // Compute trilinear interpolation of weights + float wx = NoiseWeight(dx), wy = NoiseWeight(dy), wz = NoiseWeight(dz); + float x00 = Lerp(wx, w000, w100); + float x10 = Lerp(wx, w010, w110); + float x01 = Lerp(wx, w001, w101); + float x11 = Lerp(wx, w011, w111); + float y0 = Lerp(wy, x00, x10); + float y1 = Lerp(wy, x01, x11); + return Lerp(wz, y0, y1); +} + + +static float Turbulence(float x, float y, float z, int octaves) { + float omega = 0.6; + + float sum = 0., lambda = 1., o = 1.; + for (int i = 0; i < octaves; ++i) { + sum += abs(o * Noise(lambda * x, lambda * y, lambda * z)); + lambda *= 1.99f; + o *= omega; + } + return sum * 0.5; +} + + +export void noise_ispc(uniform float x0, uniform float y0, uniform float x1, + uniform float y1, uniform int width, uniform int height, + uniform float output[]) +{ + uniform float dx = (x1 - x0) / width; + uniform float dy = (y1 - y0) / height; + + for (uniform int j = 0; j < height; j++) { + for (uniform int i = 0; i < width; i += programCount) { + float x = x0 + (i + programIndex) * dx; + float y = y0 + j * dy; + + int index = (j * width + i + programIndex); + output[index] = Turbulence(x, y, 0.6, 8); + } + } +} + diff --git a/examples/noise/noise.vcxproj b/examples/noise/noise.vcxproj new file mode 100755 index 00000000..d5bf3109 --- /dev/null +++ b/examples/noise/noise.vcxproj @@ -0,0 +1,167 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD} + Win32Proj + noise + + + + Application + true + Unicode + + + Application + true + Unicode + + + Application + false + true + Unicode + + + Application + false + true + Unicode + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + Fast + + + Console + true + + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + Fast + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + Fast + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + Fast + + + Console + true + true + true + + + + + + + + + Document + ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4 + + ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4 + + %(Filename).obj;%(Filename)_ispc.h + %(Filename).obj;%(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4 + + ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4 + + %(Filename).obj;%(Filename)_ispc.h + %(Filename).obj;%(Filename)_ispc.h + + + + + + \ No newline at end of file diff --git a/examples/noise/noise_serial.cpp b/examples/noise/noise_serial.cpp new file mode 100644 index 00000000..a988bc1a --- /dev/null +++ b/examples/noise/noise_serial.cpp @@ -0,0 +1,170 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include + +#define NOISE_PERM_SIZE 256 + +static int NoisePerm[2 * NOISE_PERM_SIZE] = { + 151, 160, 137, 91, 90, 15, 131, 13, 201, 95, 96, 53, 194, 233, 7, 225, 140, + 36, 103, 30, 69, 142, 8, 99, 37, 240, 21, 10, 23, 190, 6, 148, 247, 120, + 234, 75, 0, 26, 197, 62, 94, 252, 219, 203, 117, 35, 11, 32, 57, 177, 33, + 88, 237, 149, 56, 87, 174, 20, 125, 136, 171, 168, 68, 175, 74, 165, 71, + 134, 139, 48, 27, 166, 77, 146, 158, 231, 83, 111, 229, 122, 60, 211, 133, + 230, 220, 105, 92, 41, 55, 46, 245, 40, 244, 102, 143, 54, 65, 25, 63, 161, + 1, 216, 80, 73, 209, 76, 132, 187, 208, 89, 18, 169, 200, 196, 135, 130, + 116, 188, 159, 86, 164, 100, 109, 198, 173, 186, 3, 64, 52, 217, 226, 250, + 124, 123, 5, 202, 38, 147, 118, 126, 255, 82, 85, 212, 207, 206, 59, 227, + 47, 16, 58, 17, 182, 189, 28, 42, 223, 183, 170, 213, 119, 248, 152, 2, 44, + 154, 163, 70, 221, 153, 101, 155, 167, 43, 172, 9, 129, 22, 39, 253, 19, + 98, 108, 110, 79, 113, 224, 232, 178, 185, 112, 104, 218, 246, 97, 228, 251, + 34, 242, 193, 238, 210, 144, 12, 191, 179, 162, 241, 81, 51, 145, 235, 249, + 14, 239, 107, 49, 192, 214, 31, 181, 199, 106, 157, 184, 84, 204, 176, 115, + 121, 50, 45, 127, 4, 150, 254, 138, 236, 205, 93, 222, 114, 67, 29, 24, 72, + 243, 141, 128, 195, 78, 66, 215, 61, 156, 180, 151, 160, 137, 91, 90, 15, + 131, 13, 201, 95, 96, 53, 194, 233, 7, 225, 140, 36, 103, 30, 69, 142, 8, 99, + 37, 240, 21, 10, 23, 190, 6, 148, 247, 120, 234, 75, 0, 26, 197, 62, 94, 252, + 219, 203, 117, 35, 11, 32, 57, 177, 33, 88, 237, 149, 56, 87, 174, 20, 125, + 136, 171, 168, 68, 175, 74, 165, 71, 134, 139, 48, 27, 166, 77, 146, 158, + 231, 83, 111, 229, 122, 60, 211, 133, 230, 220, 105, 92, 41, 55, 46, 245, + 40, 244, 102, 143, 54, 65, 25, 63, 161, 1, 216, 80, 73, 209, 76, 132, 187, + 208, 89, 18, 169, 200, 196, 135, 130, 116, 188, 159, 86, 164, 100, 109, + 198, 173, 186, 3, 64, 52, 217, 226, 250, 124, 123, 5, 202, 38, 147, 118, + 126, 255, 82, 85, 212, 207, 206, 59, 227, 47, 16, 58, 17, 182, 189, 28, 42, + 223, 183, 170, 213, 119, 248, 152, 2, 44, 154, 163, 70, 221, 153, 101, 155, + 167, 43, 172, 9, 129, 22, 39, 253, 19, 98, 108, 110, 79, 113, 224, 232, + 178, 185, 112, 104, 218, 246, 97, 228, 251, 34, 242, 193, 238, 210, 144, + 12, 191, 179, 162, 241, 81, 51, 145, 235, 249, 14, 239, 107, 49, 192, 214, + 31, 181, 199, 106, 157, 184, 84, 204, 176, 115, 121, 50, 45, 127, 4, 150, + 254, 138, 236, 205, 93, 222, 114, 67, 29, 24, 72, 243, 141, 128, 195, 78, + 66, 215, 61, 156, 180 +}; + + +inline float Clamp(float v, float low, float high) { + return v < low ? low : ((v > high) ? high : v); +} + + +inline float SmoothStep(float low, float high, float value) { + float v = Clamp((value - low) / (high - low), 0.f, 1.f); + return v * v * (-2.f * v + 3.f); +} + + +inline int Floor2Int(float val) { + return (int)floorf(val); +} + + +inline float Grad(int x, int y, int z, float dx, float dy, float dz) { + int h = NoisePerm[NoisePerm[NoisePerm[x]+y]+z]; + h &= 15; + float u = h<8 || h==12 || h==13 ? dx : dy; + float v = h<4 || h==12 || h==13 ? dy : dz; + return ((h&1) ? -u : u) + ((h&2) ? -v : v); +} + + +inline float NoiseWeight(float t) { + float t3 = t*t*t; + float t4 = t3*t; + return 6.f*t4*t - 15.f*t4 + 10.f*t3; +} + + +inline float Lerp(float t, float low, float high) { + return (1. - t) * low + t * high; +} + + +static float Noise(float x, float y, float z) { + // Compute noise cell coordinates and offsets + int ix = Floor2Int(x), iy = Floor2Int(y), iz = Floor2Int(z); + float dx = x - ix, dy = y - iy, dz = z - iz; + + // Compute gradient weights + ix &= (NOISE_PERM_SIZE-1); + iy &= (NOISE_PERM_SIZE-1); + iz &= (NOISE_PERM_SIZE-1); + float w000 = Grad(ix, iy, iz, dx, dy, dz); + float w100 = Grad(ix+1, iy, iz, dx-1, dy, dz); + float w010 = Grad(ix, iy+1, iz, dx, dy-1, dz); + float w110 = Grad(ix+1, iy+1, iz, dx-1, dy-1, dz); + float w001 = Grad(ix, iy, iz+1, dx, dy, dz-1); + float w101 = Grad(ix+1, iy, iz+1, dx-1, dy, dz-1); + float w011 = Grad(ix, iy+1, iz+1, dx, dy-1, dz-1); + float w111 = Grad(ix+1, iy+1, iz+1, dx-1, dy-1, dz-1); + + // Compute trilinear interpolation of weights + float wx = NoiseWeight(dx), wy = NoiseWeight(dy), wz = NoiseWeight(dz); + float x00 = Lerp(wx, w000, w100); + float x10 = Lerp(wx, w010, w110); + float x01 = Lerp(wx, w001, w101); + float x11 = Lerp(wx, w011, w111); + float y0 = Lerp(wy, x00, x10); + float y1 = Lerp(wy, x01, x11); + return Lerp(wz, y0, y1); +} + + +static float Turbulence(float x, float y, float z, int octaves) { + float omega = 0.6; + + float sum = 0., lambda = 1., o = 1.; + for (int i = 0; i < octaves; ++i) { + sum += fabsf(o * Noise(lambda * x, lambda * y, lambda * z)); + lambda *= 1.99f; + o *= omega; + } + return sum * 0.5; +} + + +void noise_serial(float x0, float y0, float x1, float y1, + int width, int height, float output[]) +{ + float dx = (x1 - x0) / width; + float dy = (y1 - y0) / height; + + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; ++i) { + float x = x0 + i * dx; + float y = y0 + j * dy; + + int index = (j * width + i); + output[index] = Turbulence(x, y, 0.6, 8); + } + } +} +