fixes
This commit is contained in:
17
examples_ptx/ispc_malloc.h
Normal file
17
examples_ptx/ispc_malloc.h
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef _CUDA_
|
||||||
|
extern void ispc_malloc(void **ptr, const size_t size);
|
||||||
|
extern void ispc_free(void *ptr);
|
||||||
|
#else
|
||||||
|
#include <cstdlib>
|
||||||
|
static inline void ispc_malloc(void **ptr, const size_t size)
|
||||||
|
{
|
||||||
|
*ptr = malloc(size);
|
||||||
|
}
|
||||||
|
static inline void ispc_free(void *ptr)
|
||||||
|
{
|
||||||
|
free(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -40,11 +40,11 @@
|
|||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <string.h>
|
|
||||||
#include "../timing.h"
|
#include "../timing.h"
|
||||||
|
#include "../ispc_malloc.h"
|
||||||
#include "mandelbrot_tasks_ispc.h"
|
#include "mandelbrot_tasks_ispc.h"
|
||||||
using namespace ispc;
|
|
||||||
|
|
||||||
extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
||||||
int width, int height, int maxIterations,
|
int width, int height, int maxIterations,
|
||||||
@@ -103,25 +103,28 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
|
|
||||||
int maxIterations = 512;
|
int maxIterations = 512;
|
||||||
int *buf = new int[width*height];
|
int *buf;
|
||||||
|
ispc_malloc(&buf, n*widht*height);
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < width * height; ++i)
|
||||||
|
buf[i] = 0;
|
||||||
|
|
||||||
//
|
//
|
||||||
// Compute the image using the ispc implementation; report the minimum
|
// Compute the image using the ispc implementation; report the minimum
|
||||||
// time of three runs.
|
// time of three runs.
|
||||||
//
|
//
|
||||||
double minISPC = 1e30;
|
double minISPC = 1e30;
|
||||||
for (int i = 0; i < test_iterations[0]; ++i) {
|
for (int i = 0; i < test_iterations[0]; ++i)
|
||||||
|
{
|
||||||
// Clear out the buffer
|
// Clear out the buffer
|
||||||
for (unsigned int i = 0; i < width * height; ++i)
|
|
||||||
buf[i] = 0;
|
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf);
|
ispc::mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||||
double dt = get_elapsed_mcycles();
|
double dt = get_elapsed_msec();
|
||||||
printf("@time of ISPC + TASKS run:\t\t\t[%.3f] million cycles\n", dt);
|
printf("@time of ISPC + TASKS run:\t\t\t[%.3f] msec\n", dt);
|
||||||
minISPC = std::min(minISPC, dt);
|
minISPC = std::min(minISPC, dt);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC);
|
printf("[mandelbrot ispc+tasks]:\t[%.3f] msec\n", minISPC);
|
||||||
writePPM(buf, width, height, "mandelbrot-ispc.ppm");
|
writePPM(buf, width, height, "mandelbrot-ispc.ppm");
|
||||||
|
|
||||||
|
|
||||||
@@ -129,22 +132,26 @@ int main(int argc, char *argv[]) {
|
|||||||
// And run the serial implementation 3 times, again reporting the
|
// And run the serial implementation 3 times, again reporting the
|
||||||
// minimum time.
|
// minimum time.
|
||||||
//
|
//
|
||||||
double minSerial = 1e30;
|
|
||||||
for (int i = 0; i < test_iterations[1]; ++i) {
|
|
||||||
// Clear out the buffer
|
// Clear out the buffer
|
||||||
for (unsigned int i = 0; i < width * height; ++i)
|
for (unsigned int i = 0; i < width * height; ++i)
|
||||||
buf[i] = 0;
|
buf[i] = 0;
|
||||||
|
|
||||||
|
double minSerial = 1e30;
|
||||||
|
for (int i = 0; i < test_iterations[1]; ++i)
|
||||||
|
{
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
|
mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||||
double dt = get_elapsed_mcycles();
|
double dt = get_elapsed_mcycles();
|
||||||
printf("@time of serial run:\t\t\t[%.3f] million cycles\n", dt);
|
printf("@time of serial run:\t\t\t[%.3f] msec\n", dt);
|
||||||
minSerial = std::min(minSerial, dt);
|
minSerial = std::min(minSerial, dt);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial);
|
printf("[mandelbrot serial]:\t\t[%.3f] msec\n", minSerial);
|
||||||
writePPM(buf, width, height, "mandelbrot-serial.ppm");
|
writePPM(buf, width, height, "mandelbrot-serial.ppm");
|
||||||
|
|
||||||
printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC);
|
printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC);
|
||||||
|
|
||||||
|
ispc_free(buf);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
110
examples_ptx/timing.h
Normal file
110
examples_ptx/timing.h
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
static inline double rtc(void)
|
||||||
|
{
|
||||||
|
struct timeval Tvalue;
|
||||||
|
double etime;
|
||||||
|
struct timezone dummy;
|
||||||
|
|
||||||
|
gettimeofday(&Tvalue,&dummy);
|
||||||
|
etime = (double) Tvalue.tv_sec +
|
||||||
|
1.e-6*((double) Tvalue.tv_usec);
|
||||||
|
return etime;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __arm__
|
||||||
|
#include <sys/time.h>
|
||||||
|
// There's no easy way to get a hardware clock counter on ARM, so instead
|
||||||
|
// we'll pretend it's a 1GHz processor and then compute pretend cycles
|
||||||
|
// based on elapsed time from gettimeofday().
|
||||||
|
__inline__ uint64_t rdtsc() {
|
||||||
|
static bool first = true;
|
||||||
|
static struct timeval tv_start;
|
||||||
|
if (first) {
|
||||||
|
gettimeofday(&tv_start, NULL);
|
||||||
|
first = false;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct timeval tv;
|
||||||
|
gettimeofday(&tv, NULL);
|
||||||
|
tv.tv_sec -= tv_start.tv_sec;
|
||||||
|
tv.tv_usec -= tv_start.tv_usec;
|
||||||
|
return (1000000ull * tv.tv_sec + tv.tv_usec) * 1000ull;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else // __arm__
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
#include <windows.h>
|
||||||
|
#define rdtsc __rdtsc
|
||||||
|
#else // WIN32
|
||||||
|
__inline__ uint64_t rdtsc() {
|
||||||
|
uint32_t low, high;
|
||||||
|
#ifdef __x86_64
|
||||||
|
__asm__ __volatile__ ("xorl %%eax,%%eax \n cpuid"
|
||||||
|
::: "%rax", "%rbx", "%rcx", "%rdx" );
|
||||||
|
#else
|
||||||
|
__asm__ __volatile__ ("xorl %%eax,%%eax \n cpuid"
|
||||||
|
::: "%eax", "%ebx", "%ecx", "%edx" );
|
||||||
|
#endif
|
||||||
|
__asm__ __volatile__ ("rdtsc" : "=a" (low), "=d" (high));
|
||||||
|
return (uint64_t)high << 32 | low;
|
||||||
|
}
|
||||||
|
#endif // !WIN32
|
||||||
|
#endif // !__arm__
|
||||||
|
|
||||||
|
static uint64_t start, end;
|
||||||
|
static double tstart, tend;
|
||||||
|
|
||||||
|
static inline void reset_and_start_timer()
|
||||||
|
{
|
||||||
|
start = rdtsc();
|
||||||
|
tstart = rtc();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns the number of millions of elapsed processor cycles since the
|
||||||
|
last reset_and_start_timer() call. */
|
||||||
|
static inline double get_elapsed_mcycles()
|
||||||
|
{
|
||||||
|
end = rdtsc();
|
||||||
|
tend = rtc();
|
||||||
|
#if 0
|
||||||
|
return (end-start) / (1024. * 1024.);
|
||||||
|
#else
|
||||||
|
return (tend - tstart)*1e3;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user