fixes
This commit is contained in:
17
examples_ptx/ispc_malloc.h
Normal file
17
examples_ptx/ispc_malloc.h
Normal file
@@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef _CUDA_
|
||||
extern void ispc_malloc(void **ptr, const size_t size);
|
||||
extern void ispc_free(void *ptr);
|
||||
#else
|
||||
#include <cstdlib>
|
||||
static inline void ispc_malloc(void **ptr, const size_t size)
|
||||
{
|
||||
*ptr = malloc(size);
|
||||
}
|
||||
static inline void ispc_free(void *ptr)
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -40,11 +40,11 @@
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
#include <string.h>
|
||||
#include "../timing.h"
|
||||
#include "../ispc_malloc.h"
|
||||
#include "mandelbrot_tasks_ispc.h"
|
||||
using namespace ispc;
|
||||
|
||||
extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
||||
int width, int height, int maxIterations,
|
||||
@@ -103,25 +103,28 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
|
||||
int maxIterations = 512;
|
||||
int *buf = new int[width*height];
|
||||
int *buf;
|
||||
ispc_malloc(&buf, n*widht*height);
|
||||
|
||||
for (unsigned int i = 0; i < width * height; ++i)
|
||||
buf[i] = 0;
|
||||
|
||||
//
|
||||
// Compute the image using the ispc implementation; report the minimum
|
||||
// time of three runs.
|
||||
//
|
||||
double minISPC = 1e30;
|
||||
for (int i = 0; i < test_iterations[0]; ++i) {
|
||||
// Clear out the buffer
|
||||
for (unsigned int i = 0; i < width * height; ++i)
|
||||
buf[i] = 0;
|
||||
reset_and_start_timer();
|
||||
mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||
double dt = get_elapsed_mcycles();
|
||||
printf("@time of ISPC + TASKS run:\t\t\t[%.3f] million cycles\n", dt);
|
||||
minISPC = std::min(minISPC, dt);
|
||||
for (int i = 0; i < test_iterations[0]; ++i)
|
||||
{
|
||||
// Clear out the buffer
|
||||
reset_and_start_timer();
|
||||
ispc::mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||
double dt = get_elapsed_msec();
|
||||
printf("@time of ISPC + TASKS run:\t\t\t[%.3f] msec\n", dt);
|
||||
minISPC = std::min(minISPC, dt);
|
||||
}
|
||||
|
||||
printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC);
|
||||
printf("[mandelbrot ispc+tasks]:\t[%.3f] msec\n", minISPC);
|
||||
writePPM(buf, width, height, "mandelbrot-ispc.ppm");
|
||||
|
||||
|
||||
@@ -129,22 +132,26 @@ int main(int argc, char *argv[]) {
|
||||
// And run the serial implementation 3 times, again reporting the
|
||||
// minimum time.
|
||||
//
|
||||
// Clear out the buffer
|
||||
for (unsigned int i = 0; i < width * height; ++i)
|
||||
buf[i] = 0;
|
||||
|
||||
double minSerial = 1e30;
|
||||
for (int i = 0; i < test_iterations[1]; ++i) {
|
||||
// Clear out the buffer
|
||||
for (unsigned int i = 0; i < width * height; ++i)
|
||||
buf[i] = 0;
|
||||
reset_and_start_timer();
|
||||
mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||
double dt = get_elapsed_mcycles();
|
||||
printf("@time of serial run:\t\t\t[%.3f] million cycles\n", dt);
|
||||
minSerial = std::min(minSerial, dt);
|
||||
for (int i = 0; i < test_iterations[1]; ++i)
|
||||
{
|
||||
reset_and_start_timer();
|
||||
mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||
double dt = get_elapsed_mcycles();
|
||||
printf("@time of serial run:\t\t\t[%.3f] msec\n", dt);
|
||||
minSerial = std::min(minSerial, dt);
|
||||
}
|
||||
|
||||
printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial);
|
||||
printf("[mandelbrot serial]:\t\t[%.3f] msec\n", minSerial);
|
||||
writePPM(buf, width, height, "mandelbrot-serial.ppm");
|
||||
|
||||
printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC);
|
||||
|
||||
ispc_free(buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
110
examples_ptx/timing.h
Normal file
110
examples_ptx/timing.h
Normal file
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <sys/time.h>
|
||||
static inline double rtc(void)
|
||||
{
|
||||
struct timeval Tvalue;
|
||||
double etime;
|
||||
struct timezone dummy;
|
||||
|
||||
gettimeofday(&Tvalue,&dummy);
|
||||
etime = (double) Tvalue.tv_sec +
|
||||
1.e-6*((double) Tvalue.tv_usec);
|
||||
return etime;
|
||||
}
|
||||
|
||||
#ifdef __arm__
|
||||
#include <sys/time.h>
|
||||
// There's no easy way to get a hardware clock counter on ARM, so instead
|
||||
// we'll pretend it's a 1GHz processor and then compute pretend cycles
|
||||
// based on elapsed time from gettimeofday().
|
||||
__inline__ uint64_t rdtsc() {
|
||||
static bool first = true;
|
||||
static struct timeval tv_start;
|
||||
if (first) {
|
||||
gettimeofday(&tv_start, NULL);
|
||||
first = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
tv.tv_sec -= tv_start.tv_sec;
|
||||
tv.tv_usec -= tv_start.tv_usec;
|
||||
return (1000000ull * tv.tv_sec + tv.tv_usec) * 1000ull;
|
||||
}
|
||||
|
||||
#else // __arm__
|
||||
|
||||
#ifdef WIN32
|
||||
#include <windows.h>
|
||||
#define rdtsc __rdtsc
|
||||
#else // WIN32
|
||||
__inline__ uint64_t rdtsc() {
|
||||
uint32_t low, high;
|
||||
#ifdef __x86_64
|
||||
__asm__ __volatile__ ("xorl %%eax,%%eax \n cpuid"
|
||||
::: "%rax", "%rbx", "%rcx", "%rdx" );
|
||||
#else
|
||||
__asm__ __volatile__ ("xorl %%eax,%%eax \n cpuid"
|
||||
::: "%eax", "%ebx", "%ecx", "%edx" );
|
||||
#endif
|
||||
__asm__ __volatile__ ("rdtsc" : "=a" (low), "=d" (high));
|
||||
return (uint64_t)high << 32 | low;
|
||||
}
|
||||
#endif // !WIN32
|
||||
#endif // !__arm__
|
||||
|
||||
static uint64_t start, end;
|
||||
static double tstart, tend;
|
||||
|
||||
static inline void reset_and_start_timer()
|
||||
{
|
||||
start = rdtsc();
|
||||
tstart = rtc();
|
||||
}
|
||||
|
||||
/* Returns the number of millions of elapsed processor cycles since the
|
||||
last reset_and_start_timer() call. */
|
||||
static inline double get_elapsed_mcycles()
|
||||
{
|
||||
end = rdtsc();
|
||||
tend = rtc();
|
||||
#if 0
|
||||
return (end-start) / (1024. * 1024.);
|
||||
#else
|
||||
return (tend - tstart)*1e3;
|
||||
#endif
|
||||
}
|
||||
Reference in New Issue
Block a user