diff --git a/ptxtools/Makefile b/ptxtools/Makefile index 66a108c6..1cb3a8d2 100644 --- a/ptxtools/Makefile +++ b/ptxtools/Makefile @@ -1,3 +1,35 @@ +# +# Copyright (c) 2014, Evghenii Gaburov +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + all: ptxcc ptxgen CXX=clang++ diff --git a/ptxtools/PTXLexer.h b/ptxtools/PTXLexer.h index 1136fe4d..dd1f8504 100644 --- a/ptxtools/PTXLexer.h +++ b/ptxtools/PTXLexer.h @@ -1,3 +1,40 @@ +// -*- mode: c++ -*- +/* + Copyright (c) 2014, Evghenii Gaburov + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + Based on GPU Ocelot PTX parser : https://code.google.com/p/gpuocelot/ + */ + #pragma once #include diff --git a/ptxtools/PTXParser.h b/ptxtools/PTXParser.h index d221d739..6f8d1a81 100644 --- a/ptxtools/PTXParser.h +++ b/ptxtools/PTXParser.h @@ -1,3 +1,40 @@ +// -*- mode: c++ -*- +/* + Copyright (c) 2014, Evghenii Gaburov + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + Based on GPU Ocelot PTX parser : https://code.google.com/p/gpuocelot/ + */ + #pragma once #undef yyFlexLexer diff --git a/ptxtools/main.cpp b/ptxtools/main.cpp deleted file mode 100644 index f5de981d..00000000 --- a/ptxtools/main.cpp +++ /dev/null @@ -1,1458 +0,0 @@ -/* - -Bonsai V2: A parallel GPU N-body gravitational Tree-code - -(c) 2010-2012: -Jeroen Bedorf -Evghenii Gaburov -Simon Portegies Zwart - -Leiden Observatory, Leiden University - -http://castle.strw.leidenuniv.nl -http://github.com/treecode/Bonsai - -*/ - -#ifdef WIN32 -#define WIN32_LEAN_AND_MEAN -#define NOMINMAX -#include -#include -#define M_PI 3.14159265358979323846264338328 - -#include -#include -void srand48(const long seed) -{ - srand(seed); -} -//JB This is not a proper work around but just to get things compiled... -double drand48() -{ - return double(rand())/RAND_MAX; -} - - -#endif - - -#ifdef USE_MPI - #include - #include -#endif - -#include -#include -#include -#include -#include -#include "log.h" -#include "anyoption.h" -#include "renderloop.h" -#include "plummer.h" -#include "disk_shuffle.h" -#ifdef GALACTICS -#include "galactics.h" -#endif - - -#if ENABLE_LOG - bool ENABLE_RUNTIME_LOG; - bool PREPEND_RANK; - int PREPEND_RANK_PROCID; - int PREPEND_RANK_NPROCS; -#endif - -using namespace std; - -#include "../profiling/bonsai_timing.h" - -int devID; -int renderDevID; - -extern void initTimers() -{ -#ifndef CUXTIMER_DISABLE - // Set up the profiling timing info - build_tree_init(); - compute_propertiesD_init(); - dev_approximate_gravity_init(); - parallel_init(); - sortKernels_init(); - timestep_init(); -#endif -} - -extern void displayTimers() -{ -#ifndef CUXTIMER_DISABLE - // Display all timing info on the way out - build_tree_display(); - compute_propertiesD_display(); - //dev_approximate_gravity_display(); - //parallel_display(); - //sortKernels_display(); - //timestep_display(); -#endif -} - -#include "octree.h" - -#ifdef USE_OPENGL -#include "renderloop.h" -#include -#endif - -void read_dumbp_file_parallel(vector &bodyPositions, vector &bodyVelocities, vector &bodiesIDs, float eps2, - string fileName, int rank, int procs, int &NTotal2, int &NFirst, int &NSecond, int &NThird, octree *tree, int reduce_bodies_factor) -{ - //Process 0 does the file reading and sends the data - //to the other processes - - //Now we have different types of files, try to determine which one is used - /***** - If individual softening is on there is only one option: - Header is formatted as follows: - N # # # - so read the first number and compute how particles should be distributed - - If individual softening is NOT enabled, i can be anything, but for ease I assume standard dumbp files: - no Header - ID mass x y z vx vy vz - now the next step is risky, we assume mass adds up to 1, so number of particles will be : 1 / mass - use this as initial particle distribution - - */ - - - char fullFileName[256]; - sprintf(fullFileName, "%s", fileName.c_str()); - - LOG("Trying to read file: %s \n", fullFileName); - - ifstream inputFile(fullFileName, ios::in); - - if(!inputFile.is_open()) - { - LOG("Can't open input file \n"); - exit(0); - } - - int NTotal; - int idummy; - real4 positions; - real4 velocity; - - #ifndef INDSOFT - inputFile >> idummy >> positions.w; - inputFile.seekg(0, ios::beg); //Reset file pointer - NTotal = (int)(1 / positions.w); - #else - //Read the Ntotal from the file header - inputFile >> NTotal >> NFirst >> NSecond >> NThird; - #endif - - - - //Rough divide - uint perProc = NTotal / procs; - bodyPositions.reserve(perProc+10); - bodyVelocities.reserve(perProc+10); - bodiesIDs.reserve(perProc+10); - perProc -= 1; - - //Start reading - int particleCount = 0; - int procCntr = 1; - - int globalParticleCount = 0; - - while(!inputFile.eof()) { - - inputFile >> idummy - >> positions.w >> positions.x >> positions.y >> positions.z - >> velocity.x >> velocity.y >> velocity.z; - - globalParticleCount++; - - if( globalParticleCount % reduce_bodies_factor == 0 ) - positions.w *= reduce_bodies_factor; - - if( globalParticleCount % reduce_bodies_factor != 0 ) - continue; - - #ifndef INDSOFT - velocity.w = sqrt(eps2); - #else - inputFile >> velocity.w; //Read the softening from the input file - #endif - - bodyPositions.push_back(positions); - bodyVelocities.push_back(velocity); - - #ifndef INDSOFT - idummy = particleCount; - #endif - - bodiesIDs.push_back(idummy); - - particleCount++; - - - if(bodyPositions.size() > perProc && procCntr != procs) - { - tree->ICSend(procCntr, &bodyPositions[0], &bodyVelocities[0], &bodiesIDs[0], (int)bodyPositions.size()); - procCntr++; - - bodyPositions.clear(); - bodyVelocities.clear(); - bodiesIDs.clear(); - } - }//end while - - inputFile.close(); - - //Clear the last one since its double - bodyPositions.resize(bodyPositions.size()-1); - NTotal2 = particleCount-1; - - LOGF(stderr, "NTotal: %d\tper proc: %d\tFor ourself: %d \n", NTotal, perProc, (int)bodiesIDs.size()); -} - -void read_tipsy_file_parallel(vector &bodyPositions, vector &bodyVelocities, - vector &bodiesIDs, float eps2, string fileName, - int rank, int procs, int &NTotal2, int &NFirst, - int &NSecond, int &NThird, octree *tree, - vector &dustPositions, vector &dustVelocities, - vector &dustIDs, int reduce_bodies_factor, - int reduce_dust_factor) -{ - //Process 0 does the file reading and sends the data - //to the other processes - /* - - Read in our custom version of the tipsy file format. - Most important change is that we store particle id on the - location where previously the potential was stored. - */ - - - char fullFileName[256]; - sprintf(fullFileName, "%s", fileName.c_str()); - - LOG("Trying to read file: %s \n", fullFileName); - - - - ifstream inputFile(fullFileName, ios::in | ios::binary); - if(!inputFile.is_open()) - { - LOG("Can't open input file \n"); - exit(0); - } - - dump h; - inputFile.read((char*)&h, sizeof(h)); - - int NTotal; - int idummy; - real4 positions; - real4 velocity; - - - //Read tipsy header - NTotal = h.nbodies; - NFirst = h.ndark; - NSecond = h.nstar; - NThird = h.nsph; - - tree->set_t_current((float) h.time); - - //Rough divide - uint perProc = (NTotal / procs) /reduce_bodies_factor; - bodyPositions.reserve(perProc+10); - bodyVelocities.reserve(perProc+10); - bodiesIDs.reserve(perProc+10); - perProc -= 1; - - //Start reading - int particleCount = 0; - int procCntr = 1; - - dark_particle d; - star_particle s; - - int globalParticleCount = 0; - int bodyCount = 0; - int dustCount = 0; - - for(int i=0; i < NTotal; i++) - { - if(i < NFirst) - { - inputFile.read((char*)&d, sizeof(d)); - velocity.w = d.eps; - positions.w = d.mass; - positions.x = d.pos[0]; - positions.y = d.pos[1]; - positions.z = d.pos[2]; - velocity.x = d.vel[0]; - velocity.y = d.vel[1]; - velocity.z = d.vel[2]; - idummy = d.phi; - } - else - { - inputFile.read((char*)&s, sizeof(s)); - velocity.w = s.eps; - positions.w = s.mass; - positions.x = s.pos[0]; - positions.y = s.pos[1]; - positions.z = s.pos[2]; - velocity.x = s.vel[0]; - velocity.y = s.vel[1]; - velocity.z = s.vel[2]; - idummy = s.phi; - } - - - if(positions.z < -10e10) - { - fprintf(stderr," Removing particle %d because of Z is: %f \n", globalParticleCount, positions.z); - continue; - } - - - globalParticleCount++; - - #ifdef USE_DUST - if(idummy >= 50000000 && idummy < 100000000) - { - dustCount++; - if( dustCount % reduce_dust_factor == 0 ) - positions.w *= reduce_dust_factor; - - if( dustCount % reduce_dust_factor != 0 ) - continue; - dustPositions.push_back(positions); - dustVelocities.push_back(velocity); - dustIDs.push_back(idummy); - } - else - { - bodyCount++; - if( bodyCount % reduce_bodies_factor == 0 ) - positions.w *= reduce_bodies_factor; - - if( bodyCount % reduce_bodies_factor != 0 ) - continue; - bodyPositions.push_back(positions); - bodyVelocities.push_back(velocity); - bodiesIDs.push_back(idummy); - } - - - #else - if( globalParticleCount % reduce_bodies_factor == 0 ) - positions.w *= reduce_bodies_factor; - - if( globalParticleCount % reduce_bodies_factor != 0 ) - continue; - bodyPositions.push_back(positions); - bodyVelocities.push_back(velocity); - bodiesIDs.push_back(idummy); - #endif - - particleCount++; - - - if(bodyPositions.size() > perProc && procCntr != procs) - { - tree->ICSend(procCntr, &bodyPositions[0], &bodyVelocities[0], &bodiesIDs[0], (int)bodyPositions.size()); - procCntr++; - - bodyPositions.clear(); - bodyVelocities.clear(); - bodiesIDs.clear(); - } - }//end while - - inputFile.close(); - - //Clear the last one since its double -// bodyPositions.resize(bodyPositions.size()-1); -// NTotal2 = particleCount-1; - NTotal2 = particleCount; - LOGF(stderr,"NTotal: %d\tper proc: %d\tFor ourself: %d \tNDust: %d \n", - NTotal, perProc, (int)bodiesIDs.size(), (int)dustPositions.size()); -} - - -void read_generate_cube(vector &bodyPositions, vector &bodyVelocities, - vector &bodiesIDs, float eps2, string fileName, - int rank, int procs, int &NTotal2, int &NFirst, - int &NSecond, int &NThird, octree *tree, - vector &dustPositions, vector &dustVelocities, - vector &dustIDs, int reduce_bodies_factor, - int reduce_dust_factor) -{ - //Process 0 does the file reading and sends the data - //to the other processes - /* - - Read in our custom version of the tipsy file format. - Most important change is that we store particle id on the - location where previously the potential was stored. - */ - - - int NTotal; - int idummy; - real4 positions; - real4 velocity; - - - //Read tipsy header - NTotal = (int)std::pow(2.0, 22); - NFirst = NTotal; - NSecond = 0; - NThird = 0; - - fprintf(stderr,"Going to generate a random cube , number of particles: %d \n", NTotal); - - tree->set_t_current((float) 0); - - //Rough divide - uint perProc = NTotal / procs; - bodyPositions.reserve(perProc+10); - bodyVelocities.reserve(perProc+10); - bodiesIDs.reserve(perProc+10); - perProc -= 1; - - //Start reading - int particleCount = 0; - int procCntr = 1; - - - int globalParticleCount = 0; - - float mass = 1.0 / NTotal; - - for(int i=0; i < NTotal; i++) - { - velocity.w = 0; - positions.w = mass; - positions.x = drand48(); - positions.y = drand48(); - positions.z = drand48(); - velocity.x = 0.001*drand48(); - velocity.y = 0.001*drand48(); - velocity.z = 0.001*drand48(); - - globalParticleCount++; - bodyPositions.push_back(positions); - bodyVelocities.push_back(velocity); - bodiesIDs.push_back(globalParticleCount); - - if(bodyPositions.size() > perProc && procCntr != procs) - { - tree->ICSend(procCntr, &bodyPositions[0], &bodyVelocities[0], &bodiesIDs[0], (int)bodyPositions.size()); - procCntr++; - - bodyPositions.clear(); - bodyVelocities.clear(); - bodiesIDs.clear(); - } - }//end while - - //Clear the last one since its double -// bodyPositions.resize(bodyPositions.size()-1); -// NTotal2 = particleCount-1; - NTotal2 = NTotal; - LOGF(stderr,"NTotal: %d\tper proc: %d\tFor ourself: %d \tNDust: %d \n", - NTotal, perProc, (int)bodiesIDs.size(), (int)dustPositions.size()); -} - - -double rot[3][3]; - -void rotmat(double i,double w) -{ - rot[0][0] = cos(w); - rot[0][1] = -cos(i)*sin(w); - rot[0][2] = -sin(i)*sin(w); - rot[1][0] = sin(w); - rot[1][1] = cos(i)*cos(w); - rot[1][2] = sin(i)*cos(w); - rot[2][0] = 0.0; - rot[2][1] = -sin(i); - rot[2][2] = cos(i); - fprintf(stderr,"%g %g %g\n",rot[0][0], rot[0][1], rot[0][2]); - fprintf(stderr,"%g %g %g\n",rot[1][0], rot[1][1], rot[1][2]); - fprintf(stderr,"%g %g %g\n",rot[2][0], rot[2][1], rot[2][2]); -} - -void rotate(double rot[3][3],float *vin) -{ - static double vout[3]; - - for(int i=0; i<3; i++) { - vout[i] = 0; - for(int j=0; j<3; j++) - vout[i] += rot[i][j] * vin[j]; - /* Remember the rotation matrix is the transpose of rot */ - } - for(int i=0; i<3; i++) - vin[i] = (float) vout[i]; -} - -void euler(vector &bodyPositions, - vector &bodyVelocities, - double inc, double omega) -{ - rotmat(inc,omega); - size_t nobj = bodyPositions.size(); - for(uint i=0; i < nobj; i++) - { - float r[3], v[3]; - r[0] = bodyPositions[i].x; - r[1] = bodyPositions[i].y; - r[2] = bodyPositions[i].z; - v[0] = bodyVelocities[i].x; - v[1] = bodyVelocities[i].y; - v[2] = bodyVelocities[i].z; - - rotate(rot,r); - rotate(rot,v); - - bodyPositions[i].x = r[0]; - bodyPositions[i].y = r[1]; - bodyPositions[i].z = r[2]; - bodyVelocities[i].x = v[0]; - bodyVelocities[i].y = v[1]; - bodyVelocities[i].z = v[2]; - } -} - - - -double centerGalaxy(vector &bodyPositions, - vector &bodyVelocities) -{ - size_t nobj = bodyPositions.size(); - float xc, yc, zc, vxc, vyc, vzc, mtot; - - - mtot = 0; - xc = yc = zc = vxc = vyc = vzc = 0; - for(uint i=0; i< nobj; i++) { - xc += bodyPositions[i].w*bodyPositions[i].x; - yc += bodyPositions[i].w*bodyPositions[i].y; - zc += bodyPositions[i].w*bodyPositions[i].z; - vxc += bodyPositions[i].w*bodyVelocities[i].x; - vyc += bodyPositions[i].w*bodyVelocities[i].y; - vzc += bodyPositions[i].w*bodyVelocities[i].z; - mtot += bodyPositions[i].w; - } - xc /= mtot; - yc /= mtot; - zc /= mtot; - vxc /= mtot; - vyc /= mtot; - vzc /= mtot; - for(uint i=0; i< nobj; i++) - { - bodyPositions[i].x -= xc; - bodyPositions[i].y -= yc; - bodyPositions[i].z -= zc; - bodyVelocities[i].x -= vxc; - bodyVelocities[i].y -= vyc; - bodyVelocities[i].z -= vzc; - } - - return mtot; -} - - - - -int setupMergerModel(vector &bodyPositions1, - vector &bodyVelocities1, - vector &bodyIDs1, - vector &bodyPositions2, - vector &bodyVelocities2, - vector &bodyIDs2){ - uint i; - double ds=1.0, vs, ms=1.0; - double mu1, mu2, vp; - double b=1.0, rsep=10.0; - double x, y, vx, vy, x1, y1, vx1, vy1 , x2, y2, vx2, vy2; - double theta, tcoll; - double inc1=0, omega1=0; - double inc2=0, omega2=0; - - - ds = 1.52; - ms = 1.0; - b = 10; - rsep = 168; - inc1 = 0; - omega1 = 0; - inc2 = 180; - omega2 = 0; - - - if(ds < 0) - { - cout << "Enter size ratio (for gal2): "; - cin >> ds; - cout << "Enter mass ratio (for gal2): "; - cin >> ms; - cout << "Enter relative impact parameter: "; - cin >> b; - - cout << "Enter initial separation: "; - cin >> rsep; - cout << "Enter Euler angles for first galaxy:\n"; - cout << "Enter inclination: "; - cin >> inc1; - cout << "Enter omega: "; - cin >> omega1; - cout << "Enter Euler angles for second galaxy:\n"; - cout << "Enter inclination: "; - cin >> inc2; - cout << "Enter omega: "; - cin >> omega2; - } - - - double inc1_inp, inc2_inp, om2_inp, om1_inp; - - inc1_inp = inc1; - inc2_inp = inc2; - om1_inp = omega1; - om2_inp = omega1; - - - inc1 *= M_PI/180.; - inc2 *= M_PI/180.; - omega1 *= M_PI/180.; - omega2 *= M_PI/180.; - omega1 += M_PI; - - fprintf(stderr,"Size ratio: %f Mass ratio: %f \n", ds, ms); - fprintf(stderr,"Relative impact par: %f Initial sep: %f \n", b, rsep); - fprintf(stderr,"Euler angles first: %f %f Second: %f %f \n", - inc1, omega1,inc2,omega2); - - vs = sqrt(ms/ds); /* adjustment for internal velocities */ - - - //Center everything in galaxy 1 and galaxy 2 - double galaxyMass1 = centerGalaxy(bodyPositions1, bodyVelocities1); - double galaxyMass2 = centerGalaxy(bodyPositions2, bodyVelocities2); - - - galaxyMass2 = ms*galaxyMass2; //Adjust total mass - - mu1 = galaxyMass2/(galaxyMass1 + galaxyMass2); - mu2 = -galaxyMass1/(galaxyMass1 + galaxyMass2); - - double m1 = galaxyMass1; - double m2 = galaxyMass2; - - - /* Relative Parabolic orbit - anti-clockwise */ - if( b > 0 ) { - vp = sqrt(2.0*(m1 + m2)/b); - x = 2*b - rsep; y = -2*sqrt(b*(rsep-b)); - vx = sqrt(b*(rsep-b))*vp/rsep; vy = b*vp/rsep; - } - else { - b = 0; - x = - rsep; y = 0.0; - vx = sqrt(2.0*(m1 + m2)/rsep); vy = 0.0; - } - - /* Calculate collison time */ - if( b > 0 ) { - theta = atan2(y,x); - tcoll = (0.5*tan(0.5*theta) + pow(tan(0.5*theta),3.0)/6.)*4*b/vp; - fprintf(stderr,"Collision time is t=%g\n",tcoll); - } - else { - tcoll = -pow(rsep,1.5)/(1.5*sqrt(2.0*(m1+m2))); - fprintf(stderr,"Collision time is t=%g\n",tcoll); - } - - /* These are the orbital adjustments for a parabolic encounter */ - /* Change to centre of mass frame */ - x1 = mu1*x; x2 = mu2*x; - y1 = mu1*y; y2 = mu2*y; - vx1 = mu1*vx; vx2 = mu2*vx; - vy1 = mu1*vy; vy2 = mu2*vy; - - - /* Rotate the galaxies */ - euler(bodyPositions1, bodyVelocities1, inc1,omega1); - euler(bodyPositions2, bodyVelocities2, inc2,omega2); - - for(i=0; i< bodyPositions1.size(); i++) { - bodyPositions1[i].x = (float) (bodyPositions1[i].x + x1); - bodyPositions1[i].y = (float) (bodyPositions1[i].y + y1); - bodyVelocities1[i].x = (float) (bodyVelocities1[i].x + vx1); - bodyVelocities1[i].y = (float) (bodyVelocities1[i].y + vy1); - } - /* Rescale and reset the second galaxy */ - for(i=0; i< bodyPositions2.size(); i++) { - bodyPositions2[i].w = (float) ms*bodyPositions2[i].w; - bodyPositions2[i].x = (float) (ds*bodyPositions2[i].x + x2); - bodyPositions2[i].y = (float) (ds*bodyPositions2[i].y + y2); - bodyPositions2[i].z = (float) ds*bodyPositions2[i].z; - bodyVelocities2[i].x = (float) (vs*bodyVelocities2[i].x + vx2); - bodyVelocities2[i].y = (float) (vs*bodyVelocities2[i].y + vy2); - bodyVelocities2[i].z = (float) vs*bodyVelocities2[i].z; - } - - - //Put them into one - bodyPositions1.insert(bodyPositions1.end(), bodyPositions2.begin(), bodyPositions2.end()); - bodyVelocities1.insert(bodyVelocities1.end(), bodyVelocities2.begin(), bodyVelocities2.end()); - bodyIDs1.insert(bodyIDs1.end(), bodyIDs2.begin(), bodyIDs2.end()); - - - return 0; -} - - -long long my_dev::base_mem::currentMemUsage; -long long my_dev::base_mem::maxMemUsage; - -int main(int argc, char** argv) -{ - my_dev::base_mem::currentMemUsage = 0; - my_dev::base_mem::maxMemUsage = 0; - - vector bodyPositions; - vector bodyVelocities; - vector bodyIDs; - - vector dustPositions; - vector dustVelocities; - vector dustIDs; - - - float eps = 0.05f; - float theta = 0.75f; - float timeStep = 1.0f / 16.0f; - float tEnd = 1; - int iterEnd = (1 << 30); - devID = 0; - renderDevID = 0; - - string fileName = ""; - string logFileName = "gpuLog.log"; - string snapshotFile = "snapshot_"; - float snapshotIter = -1; - float remoDistance = -1.0; - int snapShotAdd = 0; - int rebuild_tree_rate = 2; - int reduce_bodies_factor = 1; - int reduce_dust_factor = 1; - string fullScreenMode = ""; - bool direct = false; - bool fullscreen = false; - bool displayFPS = false; - bool diskmode = false; - bool stereo = false; - -#if ENABLE_LOG - ENABLE_RUNTIME_LOG = false; - PREPEND_RANK = false; -#endif - -#ifdef USE_OPENGL - TstartGlow = 0.0; - dTstartGlow = 1.0; -#endif - - int nPlummer = -1; - int nSphere = -1; - int nMilkyWay = -1; - int nMWfork = 4; - /************** beg - command line arguments ********/ -#if 1 - { - AnyOption opt; - -#define ADDUSAGE(line) {{std::stringstream oss; oss << line; opt.addUsage(oss.str());}} - - ADDUSAGE(" "); - ADDUSAGE("Usage"); - ADDUSAGE(" "); - ADDUSAGE(" -h --help Prints this help "); - ADDUSAGE(" -i --infile # Input snapshot filename "); - ADDUSAGE(" --logfile # Log filename [" << logFileName << "]"); - ADDUSAGE(" --dev # Device ID [" << devID << "]"); - ADDUSAGE(" --renderdev # Rendering Device ID [" << renderDevID << "]"); - ADDUSAGE(" -t --dt # time step [" << timeStep << "]"); - ADDUSAGE(" -T --tend # N-body end time [" << tEnd << "]"); - ADDUSAGE(" -I --iend # N-body end iteration [" << iterEnd << "]"); - ADDUSAGE(" -e --eps # softening (will be squared) [" << eps << "]"); - ADDUSAGE(" -o --theta # opening angle (theta) [" <get_time(); - - //Get parallel processing information - int procId = tree->mpiGetRank(); - int nProcs = tree->mpiGetNProcs(); - - if (procId == 0) - { - //NOte cant use LOGF here since MPI isnt initialized yet - cerr << "[INIT]\tUsed settings: \n"; - cerr << "[INIT]\tInput filename " << fileName << endl; - cerr << "[INIT]\tLog filename " << logFileName << endl; - cerr << "[INIT]\tTheta: \t\t" << theta << "\t\teps: \t\t" << eps << endl; - cerr << "[INIT]\tTimestep: \t" << timeStep << "\t\ttEnd: \t\t" << tEnd << endl; - cerr << "[INIT]\titerEnd: \t" << iterEnd << endl; - cerr << "[INIT]\tsnapshotFile: \t" << snapshotFile << "\tsnapshotIter: \t" << snapshotIter << endl; - cerr << "[INIT]\tInput file: \t" << fileName << "\t\tdevID: \t\t" << devID << endl; - cerr << "[INIT]\tRemove dist: \t" << remoDistance << endl; - cerr << "[INIT]\tSnapshot Addition: \t" << snapShotAdd << endl; - cerr << "[INIT]\tRebuild tree every " << rebuild_tree_rate << " timestep\n"; - - - if( reduce_bodies_factor > 1 ) - cout << "[INIT]\tReduce number of non-dust bodies by " << reduce_bodies_factor << " \n"; - if( reduce_dust_factor > 1 ) - cout << "[INIT]\tReduce number of dust bodies by " << reduce_dust_factor << " \n"; - -#if ENABLE_LOG - if (ENABLE_RUNTIME_LOG) - cerr << "[INIT]\tRuntime logging is ENABLED \n"; - else - cerr << "[INIT]\tRuntime logging is DISABLED \n"; -#endif - cerr << "[INIT]\tDirect gravitation is " << (direct ? "ENABLED" : "DISABLED") << endl; -#if USE_OPENGL - cerr << "[INIT]\tTglow = " << TstartGlow << endl; - cerr << "[INIT]\tdTglow = " << dTstartGlow << endl; - cerr << "[INIT]\tstereo = " << stereo << endl; -#endif -#ifdef USE_MPI - cerr << "[INIT]\tCode is built WITH MPI Support \n"; -#else - cerr << "[INIT]\tCode is built WITHOUT MPI Support \n"; -#endif - } - -#ifdef USE_MPI -#if 1 - omp_set_num_threads(16); -#pragma omp parallel - { - int tid = omp_get_thread_num(); - cpu_set_t cpuset; - CPU_ZERO(&cpuset); - pthread_getaffinity_np(pthread_self() , sizeof( cpu_set_t ), &cpuset ); - - - int num_cores = sysconf(_SC_NPROCESSORS_ONLN); - - int i, set=-1; - for (i = 0; i < CPU_SETSIZE; i++) - if (CPU_ISSET(i, &cpuset)) - set = i; - // fprintf(stderr,"[Proc: %d ] Thread %d bound to: %d Total cores: %d\n", - // procId, tid, set, num_cores); - } -#endif - - - - -#if 0 - omp_set_num_threads(4); - //default - // int cpulist[] = {0,1,2,3,8,9,10,11}; - int cpulist[] = {0,1,2,3, 8,9,10,11, 4,5,6,7, 12,13,14,15}; //HA-PACS - //int cpulist[] = {0,1,2,3,4,5,6,7}; - //int cpulist[] = {0,2,4,6, 8,10,12,14}; - //int cpulist[] = {1,3,5,7, 9,11,13,15}; - //int cpulist[] = {1,9,5,11, 3,7,13,15}; - //int cpulist[] = {1,15,3,13, 2,4,6,8}; - //int cpulist[] = {1,1,1,1, 1,1,1,1}; - - -#pragma omp parallel - { - int tid = omp_get_thread_num(); - //int core_id = procId*4+tid; - int core_id = (procId%4)*4+tid; - core_id = cpulist[core_id]; - - int num_cores = sysconf(_SC_NPROCESSORS_ONLN); - // if (core_id >= num_cores) - - cpu_set_t cpuset; - CPU_ZERO(&cpuset); - CPU_SET(core_id, &cpuset); - pthread_t current_thread = pthread_self(); - int return_val = pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset); - - CPU_ZERO(&cpuset); - pthread_getaffinity_np(pthread_self() , sizeof( cpu_set_t ), &cpuset ); - - int i, set=-1; - for (i = 0; i < CPU_SETSIZE; i++) - if (CPU_ISSET(i, &cpuset)) - set = i; - //printf("CPU2: CPU %d\n", i); - - - fprintf(stderr,"Binding thread: %d of rank: %d to cpu: %d CHECK: %d Total cores: %d\n", - tid, procId, core_id, set, num_cores); - - } -#endif -#endif - - - - - -#if ENABLE_LOG -#ifdef USE_MPI - PREPEND_RANK_PROCID = procId; - PREPEND_RANK_NPROCS = nProcs; -#endif -#endif - - - if(nProcs > 1) - { - logFileName.append("-"); - - char buff[16]; - sprintf(buff,"%d-%d", nProcs, procId); - logFileName.append(buff); - } - - ofstream logFile(logFileName.c_str()); - - tree->set_context(logFile, false); //Do logging to file and enable timing (false = enabled) - - if (nPlummer == -1 && nSphere == -1 && !diskmode && nMilkyWay == -1) - { - if(procId == 0) - { -#ifdef TIPSYOUTPUT - read_tipsy_file_parallel(bodyPositions, bodyVelocities, bodyIDs, eps, fileName, - procId, nProcs, NTotal, NFirst, NSecond, NThird, tree, - dustPositions, dustVelocities, dustIDs, reduce_bodies_factor, reduce_dust_factor); - - // read_generate_cube(bodyPositions, bodyVelocities, bodyIDs, eps, fileName, - // procId, nProcs, NTotal, NFirst, NSecond, NThird, tree, - // dustPositions, dustVelocities, dustIDs, reduce_bodies_factor, reduce_dust_factor); - -#else - read_dumbp_file_parallel(bodyPositions, bodyVelocities, bodyIDs, eps, fileName, procId, nProcs, NTotal, NFirst, NSecond, NThird, tree, reduce_bodies_factor); -#endif - } - else - { - tree->ICRecv(0, bodyPositions, bodyVelocities, bodyIDs); - } - } - else if(nMilkyWay >= 0) - { -#ifdef GALACTICS - if (procId == 0) printf("Using MilkyWay model with n= %d per proc, forked %d times \n", nMilkyWay, nMWfork); - assert(nMilkyWay > 0); - assert(nMWfork > 0); - - -#if 0 /* in this setup all particles will be of equal mass (exact number are galactic-depednant) */ - const float fdisk = 15.1; - const float fbulge = 5.1; - const float fhalo = 242.31; -#else /* here, bulge & mw particles have the same mass, but halo particles is 32x heavier */ - const float fdisk = 15.1; - const float fbulge = 5.1; - const float fhalo = 7.5; -#endif - - const float fsum = fdisk + fhalo + fbulge; - - const int ndisk = (int)(nMilkyWay * fdisk/fsum); - const int nbulge = (int)(nMilkyWay * fbulge/fsum); - const int nhalo = (int)(nMilkyWay * fhalo/fsum); - - assert(ndisk > 0); - assert(nbulge > 0); - assert(nhalo > 0); - - const double t0 = tree->get_time(); - const Galactics g(procId, nProcs, ndisk, nbulge, nhalo, nMWfork); - const double dt = tree->get_time() - t0; - if (procId == 0) - printf(" ndisk= %d nbulge= %d nhalo= %d :: ntotal= %d in %g sec\n", - g.get_ndisk(), g.get_nbulge(), g.get_nhalo(), g.get_ntot(), dt); - - const int ntot = g.get_ntot(); - bodyPositions.resize(ntot); - bodyVelocities.resize(ntot); - bodyIDs.resize(ntot); - for (int i= 0; i < ntot; i++) - { - assert(!std::isnan(g[i].x)); - assert(!std::isnan(g[i].y)); - assert(!std::isnan(g[i].z)); - assert(g[i].mass > 0.0); - bodyIDs[i] = g[i].id; - - bodyPositions[i].x = g[i].x; - bodyPositions[i].y = g[i].y; - bodyPositions[i].z = g[i].z; - bodyPositions[i].w = g[i].mass * 1.0/(double)nProcs; - - assert(!std::isnan(g[i].vx)); - assert(!std::isnan(g[i].vy)); - assert(!std::isnan(g[i].vz)); - - bodyVelocities[i].x = g[i].vx; - bodyVelocities[i].y = g[i].vy; - bodyVelocities[i].z = g[i].vz; - bodyVelocities[i].w = 0.0; - } -#else - assert(0); -#endif - } - else if(nPlummer >= 0) - { - if (procId == 0) printf("Using plummer model with n= %d per proc \n", nPlummer); - assert(nPlummer > 0); - const int seed = 19810614 + procId; - const Plummer m(nPlummer, procId, seed); - bodyPositions.resize(nPlummer); - bodyVelocities.resize(nPlummer); - bodyIDs.resize(nPlummer); - for (int i= 0; i < nPlummer; i++) - { - - assert(!std::isnan(m.pos[i].x)); - assert(!std::isnan(m.pos[i].y)); - assert(!std::isnan(m.pos[i].z)); - assert(m.mass[i] > 0.0); - bodyIDs[i] = nPlummer*procId + i; - - bodyPositions[i].x = m.pos[i].x; - bodyPositions[i].y = m.pos[i].y; - bodyPositions[i].z = m.pos[i].z; - bodyPositions[i].w = m.mass[i] * 1.0/nProcs; - - bodyVelocities[i].x = m.vel[i].x; - bodyVelocities[i].y = m.vel[i].y; - bodyVelocities[i].z = m.vel[i].z; - bodyVelocities[i].w = 0; - } - } - else if (nSphere >= 0) - { - //Sphere - if (procId == 0) printf("Using Spherical model with n= %d per proc \n", nSphere); - assert(nSphere >= 0); - bodyPositions.resize(nSphere); - bodyVelocities.resize(nSphere); - bodyIDs.resize(nSphere); - - srand48(procId+19840501); - - /* generate uniform sphere */ - int np = 0; - while (np < nSphere) - { - const double x = 2.0*drand48()-1.0; - const double y = 2.0*drand48()-1.0; - const double z = 2.0*drand48()-1.0; - const double r2 = x*x+y*y+z*z; - if (r2 < 1) - { - bodyIDs[np] = nSphere*procId + np; - - bodyPositions[np].x = x; - bodyPositions[np].y = y; - bodyPositions[np].z = z; - bodyPositions[np].w = (1.0/nSphere) * 1.0/nProcs; - - bodyVelocities[np].x = 0; - bodyVelocities[np].y = 0; - bodyVelocities[np].z = 0; - bodyVelocities[np].w = 0; - np++; - }//if - }//while - }//else - else if (diskmode) - { - if (procId == 0) printf("Using diskmode with filename %s\n", fileName.c_str()); - const int seed = procId+19840501; - srand48(seed); - const DiskShuffle disk(fileName); - const int np = disk.get_ntot(); - bodyPositions.resize(np); - bodyVelocities.resize(np); - bodyIDs.resize(np); - for (int i= 0; i < np; i++) - { - bodyIDs[i] = np*procId + i; - - bodyPositions[i].x = disk.pos(i).x; - bodyPositions[i].y = disk.pos (i).y; - bodyPositions[i].z = disk.pos (i).z; - bodyPositions[i].w = disk.mass(i) * 1.0/nProcs; - - bodyVelocities[i].x = disk.vel(i).x; - bodyVelocities[i].y = disk.vel(i).y; - bodyVelocities[i].z = disk.vel(i).z; - bodyVelocities[i].w = 0; - } - } - else - assert(0); - - tree->mpiSync(); - - -#ifdef TIPSYOUTPUT - LOGF(stderr, " t_current = %g\n", tree->get_t_current()); -#endif - - - //#define SETUP_MERGER -#ifdef SETUP_MERGER - vector bodyPositions2; - vector bodyVelocities2; - vector bodyIDs2; - - bodyPositions2.insert(bodyPositions2.begin(), bodyPositions.begin(), bodyPositions.end()); - bodyVelocities2.insert(bodyVelocities2.begin(), bodyVelocities.begin(), bodyVelocities.end()); - bodyIDs2.insert(bodyIDs2.begin(), bodyIDs.begin(), bodyIDs.end()); - - - setupMergerModel(bodyPositions, bodyVelocities, bodyIDs, - bodyPositions2, bodyVelocities2, bodyIDs2); - - NTotal *= 2; - NFirst *= 2; - NSecond *= 2; - NThird *= 2; -#endif - - - //Set the properties of the data set, it only is really used by process 0, which does the - //actual file I/O - tree->setDataSetProperties(NTotal, NFirst, NSecond, NThird); - - if(procId == 0) - LOG("Dataset particle information: Ntotal: %d\tNFirst: %d\tNSecond: %d\tNThird: %d \n", - NTotal, NFirst, NSecond, NThird); - - - //Sanity check for standard plummer spheres - double mass = 0, totalMass; - for(unsigned int i=0; i < bodyPositions.size(); i++) - { - mass += bodyPositions[i].w; - } - - tree->load_kernels(); - -#ifdef USE_MPI - MPI_Reduce(&mass,&totalMass,1, MPI_DOUBLE, MPI_SUM,0, MPI_COMM_WORLD); -#else - totalMass = mass; -#endif - - if(procId == 0) LOGF(stderr, "Combined Mass: %f \tNTotal: %d \n", totalMass, NTotal); - - LOG("Starting! Bootup time: %lg \n", tree->get_time()-tStartup); - - - double t0 = tree->get_time(); - - tree->localTree.setN((int)bodyPositions.size()); - tree->allocateParticleMemory(tree->localTree); - - //Load data onto the device - for(uint i=0; i < bodyPositions.size(); i++) - { - tree->localTree.bodies_pos[i] = bodyPositions[i]; - tree->localTree.bodies_vel[i] = bodyVelocities[i]; - tree->localTree.bodies_ids[i] = bodyIDs[i]; - - tree->localTree.bodies_Ppos[i] = bodyPositions[i]; - tree->localTree.bodies_Pvel[i] = bodyVelocities[i]; - tree->localTree.bodies_time[i] = make_float2(tree->get_t_current(), tree->get_t_current()); - } - - tree->localTree.bodies_time.h2d(); - tree->localTree.bodies_pos.h2d(); - tree->localTree.bodies_vel.h2d(); - tree->localTree.bodies_Ppos.h2d(); - tree->localTree.bodies_Pvel.h2d(); - tree->localTree.bodies_ids.h2d(); - - //fprintf(stderr,"Send data to device proc: %d \n", procId); - // tree->devContext.writeLogEvent("Send data to device\n"); - - - -#ifdef USE_MPI - //Use sampling particles, determine frequency - tree->mpiSumParticleCount(tree->localTree.n); //Determine initial frequency -#endif - - - //If required set the dust particles -#ifdef USE_DUST - if( (int)dustPositions.size() > 0) - { - LOGF(stderr, "Allocating dust properties for %d dust particles \n", - (int)dustPositions.size()); - tree->localTree.setNDust((int)dustPositions.size()); - tree->allocateDustMemory(tree->localTree); - - //Load dust data onto the device - for(uint i=0; i < dustPositions.size(); i++) - { - tree->localTree.dust_pos[i] = dustPositions[i]; - tree->localTree.dust_vel[i] = dustVelocities[i]; - tree->localTree.dust_ids[i] = dustIDs[i]; - } - - tree->localTree.dust_pos.h2d(); - tree->localTree.dust_vel.h2d(); - tree->localTree.dust_ids.h2d(); - } -#endif //ifdef USE_DUST - - -#ifdef USE_MPI - //Startup the OMP threads - omp_set_num_threads(4); -#endif - - - //Start the integration -#ifdef USE_OPENGL - octree::IterationData idata; - initAppRenderer(argc, argv, tree, idata, displayFPS, stereo); - LOG("Finished!!! Took in total: %lg sec\n", tree->get_time()-t0); -#else - tree->mpiSync(); - if (procId==0) - fprintf(stderr, " Starting iterating\n"); - tree->mpiSync(); - tree->iterate(); - - LOG("Finished!!! Took in total: %lg sec\n", tree->get_time()-t0); - - logFile.close(); - -#ifdef USE_MPI - MPI_Finalize(); -#endif - - if(tree->procId == 0) - { - LOGF(stderr, "TOTAL: Time spent between the start of 'iterate' and the final time-step (very first step is not accounted)\n",0); - LOGF(stderr, "Grav: Time spent to compute gravity, including communication (wall-clock time)\n",0); - LOGF(stderr, "GPUgrav: Time spent ON the GPU to compute local and LET gravity\n",0); - LOGF(stderr, "LET Com: Time spent in exchanging and building LET data\n",0); - LOGF(stderr, "Build: Time spent in constructing the tree (incl sorting, making groups, etc.)\n",0); - LOGF(stderr, "Domain: Time spent in computing new domain decomposition and exchanging particles between nodes.\n",0); - LOGF(stderr, "Wait: Time spent in waiting on other processes after the gravity part.\n",0); - } - - - delete tree; - tree = NULL; -#endif - - displayTimers(); - return 0; -} diff --git a/ptxtools/ptx.ll b/ptxtools/ptx.ll index 0c3a0fd4..56e45ae9 100644 --- a/ptxtools/ptx.ll +++ b/ptxtools/ptx.ll @@ -1,3 +1,39 @@ +/* + Copyright (c) 2014, Evghenii Gaburov + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + Based on GPU Ocelot PTX parser : https://code.google.com/p/gpuocelot/ + */ + %option yylineno %option noyywrap %option yyclass="parser::PTXLexer" diff --git a/ptxtools/ptxcc.cpp b/ptxtools/ptxcc.cpp index dad7452d..dfdad2d1 100644 --- a/ptxtools/ptxcc.cpp +++ b/ptxtools/ptxcc.cpp @@ -1,3 +1,40 @@ +// -*- mode: c++ -*- +/* + Copyright (c) 2014, Evghenii Gaburov + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + Based on GPU Ocelot PTX parser : https://code.google.com/p/gpuocelot/ + */ + #include #include #include diff --git a/ptxtools/ptxgen.cpp b/ptxtools/ptxgen.cpp index a05124b6..0dc64194 100644 --- a/ptxtools/ptxgen.cpp +++ b/ptxtools/ptxgen.cpp @@ -1,3 +1,39 @@ +// -*- mode: c++ -*- +/* + Copyright (c) 2014, Evghenii Gaburov + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + Based on "ptxgen" NVVM example from CUDA Toolkit + */ #include #include #include diff --git a/ptxtools/ptxgrammar.cpp b/ptxtools/ptxgrammar.cpp deleted file mode 100644 index 2419e654..00000000 --- a/ptxtools/ptxgrammar.cpp +++ /dev/null @@ -1,1929 +0,0 @@ - -/* A Bison parser, made by GNU Bison 2.4.1. */ - -/* Skeleton implementation for Bison's Yacc-like parsers in C - - Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 - Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* As a special exception, you may create a larger work that contains - part or all of the Bison parser skeleton and distribute that work - under terms of your choice, so long as that work isn't itself a - parser generator using the skeleton or a modified version thereof - as a parser skeleton. Alternatively, if you modify or redistribute - the parser skeleton itself, you may (at your option) remove this - special exception, which will cause the skeleton and the resulting - Bison output files to be licensed under the GNU General Public - License without this special exception. - - This special exception was added by the Free Software Foundation in - version 2.2 of Bison. */ - -/* C LALR(1) parser skeleton written by Richard Stallman, by - simplifying the original so-called "semantic" parser. */ - -/* All symbols defined below should begin with yy or YY, to avoid - infringing on user name space. This should be done even for local - variables, as they might otherwise be expanded by user macros. - There are some unavoidable exceptions within include files to - define necessary library symbols; they are noted "INFRINGES ON - USER NAME SPACE" below. */ - -/* Identify Bison output. */ -#define YYBISON 1 - -/* Bison version. */ -#define YYBISON_VERSION "2.4.1" - -/* Skeleton name. */ -#define YYSKELETON_NAME "yacc.c" - -/* Pure parsers. */ -#define YYPURE 1 - -/* Push parsers. */ -#define YYPUSH 0 - -/* Pull parsers. */ -#define YYPULL 1 - -/* Using locations. */ -#define YYLSP_NEEDED 1 - - - -/* Copy the first part of user declarations. */ - -/* Line 189 of yacc.c */ -#line 3 "ptxgrammar.yy" - - #include - #include "PTXParser.h" - #include "PTXLexer.h" - #include - #include - #include - - #define YYERROR_VERBOSE 1 - - #ifdef REPORT_BASE - #undef REPORT_BASE - #endif - - #define REPORT_BASE 0 - - namespace ptx - { - - int yylex( YYSTYPE* token, YYLTYPE* location, parser::PTXLexer& lexer, - parser::PTXParser& state ); - void yyerror( YYLTYPE* location, parser::PTXLexer& lexer, - parser::PTXParser& state, char const* message ); - - std::string yyTypeToString( int ); - - - -/* Line 189 of yacc.c */ -#line 102 "ptxgrammar.cc" - -/* Enabling traces. */ -#ifndef YYDEBUG -# define YYDEBUG 1 -#endif - -/* Enabling verbose error messages. */ -#ifdef YYERROR_VERBOSE -# undef YYERROR_VERBOSE -# define YYERROR_VERBOSE 1 -#else -# define YYERROR_VERBOSE 0 -#endif - -/* Enabling the token table. */ -#ifndef YYTOKEN_TABLE -# define YYTOKEN_TABLE 0 -#endif - - -/* Tokens. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - /* Put the tokens into the symbol table, so that GDB and other debuggers - know about them. */ - enum yytokentype { - TOKEN_VERSION = 258, - TOKEN_TARGET = 259, - TOKEN_ADDRESS_SIZE = 260, - TOKEN_VISIBLE = 261, - TOKEN_FUNC = 262, - TOKEN_ENTRY = 263, - TOKEN_PARAM = 264, - TOKEN_ALIGN = 265, - TOKEN_GLOBAL = 266, - TOKEN_B8 = 267, - TOKEN_B16 = 268, - TOKEN_B32 = 269, - TOKEN_B64 = 270, - TOKEN_U8 = 271, - TOKEN_U16 = 272, - TOKEN_U32 = 273, - TOKEN_U64 = 274, - TOKEN_S8 = 275, - TOKEN_S16 = 276, - TOKEN_S32 = 277, - TOKEN_S64 = 278, - TOKEN_F32 = 279, - TOKEN_F64 = 280, - TOKEN_INT = 281, - TOKEN_FLOAT = 282, - TOKEN_STRING = 283 - }; -#endif - - - -#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED -typedef union YYSTYPE -{ - -/* Line 214 of yacc.c */ -#line 32 "ptxgrammar.yy" - - char svalue[1024]; - double fvalue; - int ivalue; - unsigned int uvalue; - - - -/* Line 214 of yacc.c */ -#line 175 "ptxgrammar.cc" -} YYSTYPE; -# define YYSTYPE_IS_TRIVIAL 1 -# define yystype YYSTYPE /* obsolescent; will be withdrawn */ -# define YYSTYPE_IS_DECLARED 1 -#endif - -#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED -typedef struct YYLTYPE -{ - int first_line; - int first_column; - int last_line; - int last_column; -} YYLTYPE; -# define yyltype YYLTYPE /* obsolescent; will be withdrawn */ -# define YYLTYPE_IS_DECLARED 1 -# define YYLTYPE_IS_TRIVIAL 1 -#endif - - -/* Copy the second part of user declarations. */ - - -/* Line 264 of yacc.c */ -#line 200 "ptxgrammar.cc" - -#ifdef short -# undef short -#endif - -#ifdef YYTYPE_UINT8 -typedef YYTYPE_UINT8 yytype_uint8; -#else -typedef unsigned char yytype_uint8; -#endif - -#ifdef YYTYPE_INT8 -typedef YYTYPE_INT8 yytype_int8; -#elif (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -typedef signed char yytype_int8; -#else -typedef short int yytype_int8; -#endif - -#ifdef YYTYPE_UINT16 -typedef YYTYPE_UINT16 yytype_uint16; -#else -typedef unsigned short int yytype_uint16; -#endif - -#ifdef YYTYPE_INT16 -typedef YYTYPE_INT16 yytype_int16; -#else -typedef short int yytype_int16; -#endif - -#ifndef YYSIZE_T -# ifdef __SIZE_TYPE__ -# define YYSIZE_T __SIZE_TYPE__ -# elif defined size_t -# define YYSIZE_T size_t -# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -# include /* INFRINGES ON USER NAME SPACE */ -# define YYSIZE_T size_t -# else -# define YYSIZE_T unsigned int -# endif -#endif - -#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) - -#ifndef YY_ -# if YYENABLE_NLS -# if ENABLE_NLS -# include /* INFRINGES ON USER NAME SPACE */ -# define YY_(msgid) dgettext ("bison-runtime", msgid) -# endif -# endif -# ifndef YY_ -# define YY_(msgid) msgid -# endif -#endif - -/* Suppress unused-variable warnings by "using" E. */ -#if ! defined lint || defined __GNUC__ -# define YYUSE(e) ((void) (e)) -#else -# define YYUSE(e) /* empty */ -#endif - -/* Identity function, used to suppress warnings about constant conditions. */ -#ifndef lint -# define YYID(n) (n) -#else -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static int -YYID (int yyi) -#else -static int -YYID (yyi) - int yyi; -#endif -{ - return yyi; -} -#endif - -#if ! defined yyoverflow || YYERROR_VERBOSE - -/* The parser invokes alloca or malloc; define the necessary symbols. */ - -# ifdef YYSTACK_USE_ALLOCA -# if YYSTACK_USE_ALLOCA -# ifdef __GNUC__ -# define YYSTACK_ALLOC __builtin_alloca -# elif defined __BUILTIN_VA_ARG_INCR -# include /* INFRINGES ON USER NAME SPACE */ -# elif defined _AIX -# define YYSTACK_ALLOC __alloca -# elif defined _MSC_VER -# include /* INFRINGES ON USER NAME SPACE */ -# define alloca _alloca -# else -# define YYSTACK_ALLOC alloca -# if ! defined _ALLOCA_H && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -# include /* INFRINGES ON USER NAME SPACE */ -# ifndef _STDLIB_H -# define _STDLIB_H 1 -# endif -# endif -# endif -# endif -# endif - -# ifdef YYSTACK_ALLOC - /* Pacify GCC's `empty if-body' warning. */ -# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0)) -# ifndef YYSTACK_ALLOC_MAXIMUM - /* The OS might guarantee only one guard page at the bottom of the stack, - and a page size can be as small as 4096 bytes. So we cannot safely - invoke alloca (N) if N exceeds 4096. Use a slightly smaller number - to allow for a few compiler-allocated temporary stack slots. */ -# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ -# endif -# else -# define YYSTACK_ALLOC YYMALLOC -# define YYSTACK_FREE YYFREE -# ifndef YYSTACK_ALLOC_MAXIMUM -# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM -# endif -# if (defined __cplusplus && ! defined _STDLIB_H \ - && ! ((defined YYMALLOC || defined malloc) \ - && (defined YYFREE || defined free))) -# include /* INFRINGES ON USER NAME SPACE */ -# ifndef _STDLIB_H -# define _STDLIB_H 1 -# endif -# endif -# ifndef YYMALLOC -# define YYMALLOC malloc -# if ! defined malloc && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ -# endif -# endif -# ifndef YYFREE -# define YYFREE free -# if ! defined free && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -void free (void *); /* INFRINGES ON USER NAME SPACE */ -# endif -# endif -# endif -#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ - - -#if (! defined yyoverflow \ - && (! defined __cplusplus \ - || (defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL \ - && defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) - -/* A type that is properly aligned for any stack member. */ -union yyalloc -{ - yytype_int16 yyss_alloc; - YYSTYPE yyvs_alloc; - YYLTYPE yyls_alloc; -}; - -/* The size of the maximum gap between one aligned stack and the next. */ -# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) - -/* The size of an array large to enough to hold all stacks, each with - N elements. */ -# define YYSTACK_BYTES(N) \ - ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE) + sizeof (YYLTYPE)) \ - + 2 * YYSTACK_GAP_MAXIMUM) - -/* Copy COUNT objects from FROM to TO. The source and destination do - not overlap. */ -# ifndef YYCOPY -# if defined __GNUC__ && 1 < __GNUC__ -# define YYCOPY(To, From, Count) \ - __builtin_memcpy (To, From, (Count) * sizeof (*(From))) -# else -# define YYCOPY(To, From, Count) \ - do \ - { \ - YYSIZE_T yyi; \ - for (yyi = 0; yyi < (Count); yyi++) \ - (To)[yyi] = (From)[yyi]; \ - } \ - while (YYID (0)) -# endif -# endif - -/* Relocate STACK from its old location to the new one. The - local variables YYSIZE and YYSTACKSIZE give the old and new number of - elements in the stack, and YYPTR gives the new location of the - stack. Advance YYPTR to a properly aligned location for the next - stack. */ -# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ - do \ - { \ - YYSIZE_T yynewbytes; \ - YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ - Stack = &yyptr->Stack_alloc; \ - yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ - yyptr += yynewbytes / sizeof (*yyptr); \ - } \ - while (YYID (0)) - -#endif - -/* YYFINAL -- State number of the termination state. */ -#define YYFINAL 6 -/* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 97 - -/* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 34 -/* YYNNTS -- Number of nonterminals. */ -#define YYNNTS 28 -/* YYNRULES -- Number of rules. */ -#define YYNRULES 66 -/* YYNRULES -- Number of states. */ -#define YYNSTATES 92 - -/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ -#define YYUNDEFTOK 2 -#define YYMAXUTOK 283 - -#define YYTRANSLATE(YYX) \ - ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) - -/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */ -static const yytype_uint8 yytranslate[] = -{ - 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 31, 32, 2, 2, 33, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 29, 2, 30, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28 -}; - -#if YYDEBUG -/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in - YYRHS. */ -static const yytype_uint8 yyprhs[] = -{ - 0, 0, 3, 6, 10, 13, 16, 19, 21, 23, - 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, - 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, - 65, 67, 69, 71, 73, 75, 77, 80, 82, 85, - 87, 90, 92, 95, 97, 101, 102, 104, 106, 108, - 111, 113, 116, 121, 123, 125, 127, 128, 132, 136, - 141, 143, 145, 149, 151, 152, 158 -}; - -/* YYRHS -- A `-1'-separated list of the rules' RHS. */ -static const yytype_int8 yyrhs[] = -{ - 35, 0, -1, 36, 43, -1, 37, 38, 39, -1, - 3, 27, -1, 4, 28, -1, 5, 26, -1, 16, - -1, 17, -1, 18, -1, 19, -1, 20, -1, 21, - -1, 22, -1, 23, -1, 12, -1, 13, -1, 14, - -1, 15, -1, 24, -1, 25, -1, 40, -1, 10, - -1, 9, -1, 40, -1, 28, -1, 27, -1, 26, - -1, 7, -1, 8, -1, 11, -1, 29, -1, 30, - -1, 31, -1, 32, -1, 33, -1, 43, 60, -1, - 60, -1, 43, 55, -1, 55, -1, 43, 61, -1, - 61, -1, 43, 42, -1, 42, -1, 29, 26, 30, - -1, -1, 44, -1, 28, -1, 9, -1, 10, 26, - -1, 41, -1, 48, 41, -1, 47, 49, 46, 45, - -1, 31, -1, 32, -1, 50, -1, -1, 53, 33, - 50, -1, 51, 53, 52, -1, 6, 8, 46, 54, - -1, 31, -1, 32, -1, 56, 53, 57, -1, 58, - -1, -1, 6, 7, 59, 46, 54, -1, 6, 11, - 49, 46, 44, -1 -}; - -/* YYRLINE[YYN] -- source line where rule number YYN was defined. */ -static const yytype_uint8 yyrline[] = -{ - 0, 71, 71, 74, 81, 83, 85, 89, 89, 89, - 89, 90, 90, 90, 90, 91, 91, 91, 91, 92, - 92, 94, 97, 98, 99, 100, 100, 100, 101, 101, - 102, 103, 104, 105, 106, 107, 110, 110, 111, 111, - 112, 112, 113, 113, 117, 120, 121, 123, 124, 126, - 127, 128, 130, 136, 137, 138, 139, 140, 141, 143, - 148, 149, 150, 151, 151, 152, 158 -}; -#endif - -#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE -/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. - First, the terminals, then, starting at YYNTOKENS, nonterminals. */ -static const char *const yytname[] = -{ - "$end", "error", "$undefined", "TOKEN_VERSION", "TOKEN_TARGET", - "TOKEN_ADDRESS_SIZE", "TOKEN_VISIBLE", "TOKEN_FUNC", "TOKEN_ENTRY", - "TOKEN_PARAM", "TOKEN_ALIGN", "TOKEN_GLOBAL", "TOKEN_B8", "TOKEN_B16", - "TOKEN_B32", "TOKEN_B64", "TOKEN_U8", "TOKEN_U16", "TOKEN_U32", - "TOKEN_U64", "TOKEN_S8", "TOKEN_S16", "TOKEN_S32", "TOKEN_S64", - "TOKEN_F32", "TOKEN_F64", "TOKEN_INT", "TOKEN_FLOAT", "TOKEN_STRING", - "'['", "']'", "'('", "')'", "','", "$accept", "ptxsource", "header", - "version", "target", "address_size", "dataTypeId", "dataType", - "anytoken", "ptxbody", "arrayDimensionSet", "arrayDimensions", - "identifier", "parameter", "alignment", "addressableVariablePrefix", - "argumentDeclaration", "argumentListBegin", "argumentListEnd", - "argumentListBody", "argumentList", "visibleEntryDeclaration", - "returnArgumentListBegin", "returnArgumentListEnd", "returnArgumentList", - "optionalReturnArgumentList", "visibleFunctionDeclaration", - "visibleInitializableDeclaration", 0 -}; -#endif - -# ifdef YYPRINT -/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to - token YYLEX-NUM. */ -static const yytype_uint16 yytoknum[] = -{ - 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, - 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, - 275, 276, 277, 278, 279, 280, 281, 282, 283, 91, - 93, 40, 41, 44 -}; -# endif - -/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ -static const yytype_uint8 yyr1[] = -{ - 0, 34, 35, 36, 37, 38, 39, 40, 40, 40, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 40, 41, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 43, 43, 43, 43, - 43, 43, 43, 43, 44, 45, 45, 46, 47, 48, - 49, 49, 50, 51, 52, 53, 53, 53, 54, 55, - 56, 57, 58, 59, 59, 60, 61 -}; - -/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ -static const yytype_uint8 yyr2[] = -{ - 0, 2, 2, 3, 2, 2, 2, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, - 2, 1, 2, 1, 3, 0, 1, 1, 1, 2, - 1, 2, 4, 1, 1, 1, 0, 3, 3, 4, - 1, 1, 3, 1, 0, 5, 5 -}; - -/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state - STATE-NUM when YYTABLE doesn't specify something else to do. Zero - means the default is an error. */ -static const yytype_uint8 yydefact[] = -{ - 0, 0, 0, 0, 0, 4, 1, 0, 28, 29, - 23, 22, 30, 15, 16, 17, 18, 7, 8, 9, - 10, 11, 12, 13, 14, 19, 20, 27, 26, 25, - 31, 32, 33, 34, 35, 24, 43, 2, 39, 37, - 41, 0, 0, 64, 0, 0, 42, 38, 36, 40, - 5, 0, 3, 60, 56, 63, 0, 47, 0, 0, - 21, 50, 0, 0, 6, 48, 0, 55, 0, 0, - 53, 56, 59, 49, 51, 0, 0, 61, 0, 62, - 65, 0, 0, 66, 45, 57, 54, 58, 0, 46, - 52, 44 -}; - -/* YYDEFGOTO[NTERM-NUM]. */ -static const yytype_int8 yydefgoto[] = -{ - -1, 2, 3, 4, 42, 52, 60, 61, 36, 37, - 83, 90, 58, 66, 62, 63, 67, 71, 87, 68, - 72, 38, 54, 79, 55, 56, 39, 40 -}; - -/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing - STATE-NUM. */ -#define YYPACT_NINF -28 -static const yytype_int8 yypact[] = -{ - 27, 6, 41, -5, 39, -28, -28, 24, -28, -28, - -28, -28, -28, -28, -28, -28, -28, -28, -28, -28, - -28, -28, -28, -28, -28, -28, -28, -28, -28, -28, - -28, -28, -28, -28, -28, -28, -28, -5, -28, -28, - -28, 14, 40, 13, 18, 38, -28, -28, -28, -28, - -28, 21, -28, -28, 69, -28, 18, -28, 48, 54, - -28, -28, 52, 18, -28, -28, 38, -28, 5, 48, - -28, 69, -28, -28, -28, 53, 18, -28, 69, -28, - -28, 7, 55, -28, 53, -28, -28, -28, 56, -28, - -28, -28 -}; - -/* YYPGOTO[NTERM-NUM]. */ -static const yytype_int8 yypgoto[] = -{ - -28, -28, -28, -28, -28, -28, -3, 22, 46, -28, - 1, -28, -27, -28, -28, 23, 9, -28, -28, 17, - 25, 58, -28, -28, -28, -28, 59, 60 -}; - -/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If - positive, shift that token. If negative, reduce the rule which - number is the opposite. If zero, do what YYDEFACT says. - If YYTABLE_NINF, syntax error. */ -#define YYTABLE_NINF -1 -static const yytype_uint8 yytable[] = -{ - 35, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 33, 34, 69, - 1, 43, 44, 5, 35, 45, 75, 77, 78, 86, - 78, 6, 50, 41, 53, 51, 57, 64, 59, 84, - 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, - 23, 24, 25, 26, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 65, 70, - 73, 88, 82, 46, 74, 89, 91, 85, 81, 76, - 0, 0, 0, 0, 80, 47, 48, 49 -}; - -static const yytype_int8 yycheck[] = -{ - 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 32, 33, 56, - 3, 7, 8, 27, 37, 11, 63, 32, 33, 32, - 33, 0, 28, 4, 31, 5, 28, 26, 10, 76, - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 22, 23, 24, 25, 12, 13, 14, 15, 16, 17, - 18, 19, 20, 21, 22, 23, 24, 25, 9, 31, - 26, 26, 29, 37, 62, 84, 30, 78, 71, 66, - -1, -1, -1, -1, 69, 37, 37, 37 -}; - -/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing - symbol of state STATE-NUM. */ -static const yytype_uint8 yystos[] = -{ - 0, 3, 35, 36, 37, 27, 0, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 40, 42, 43, 55, 60, - 61, 4, 38, 7, 8, 11, 42, 55, 60, 61, - 28, 5, 39, 31, 56, 58, 59, 28, 46, 10, - 40, 41, 48, 49, 26, 9, 47, 50, 53, 46, - 31, 51, 54, 26, 41, 46, 49, 32, 33, 57, - 54, 53, 29, 44, 46, 50, 32, 52, 26, 44, - 45, 30 -}; - -#define yyerrok (yyerrstatus = 0) -#define yyclearin (yychar = YYEMPTY) -#define YYEMPTY (-2) -#define YYEOF 0 - -#define YYACCEPT goto yyacceptlab -#define YYABORT goto yyabortlab -#define YYERROR goto yyerrorlab - - -/* Like YYERROR except do call yyerror. This remains here temporarily - to ease the transition to the new meaning of YYERROR, for GCC. - Once GCC version 2 has supplanted version 1, this can go. */ - -#define YYFAIL goto yyerrlab - -#define YYRECOVERING() (!!yyerrstatus) - -#define YYBACKUP(Token, Value) \ -do \ - if (yychar == YYEMPTY && yylen == 1) \ - { \ - yychar = (Token); \ - yylval = (Value); \ - yytoken = YYTRANSLATE (yychar); \ - YYPOPSTACK (1); \ - goto yybackup; \ - } \ - else \ - { \ - yyerror (&yylloc, lexer, state, YY_("syntax error: cannot back up")); \ - YYERROR; \ - } \ -while (YYID (0)) - - -#define YYTERROR 1 -#define YYERRCODE 256 - - -/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N]. - If N is 0, then set CURRENT to the empty location which ends - the previous symbol: RHS[0] (always defined). */ - -#define YYRHSLOC(Rhs, K) ((Rhs)[K]) -#ifndef YYLLOC_DEFAULT -# define YYLLOC_DEFAULT(Current, Rhs, N) \ - do \ - if (YYID (N)) \ - { \ - (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ - (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ - (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ - (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ - } \ - else \ - { \ - (Current).first_line = (Current).last_line = \ - YYRHSLOC (Rhs, 0).last_line; \ - (Current).first_column = (Current).last_column = \ - YYRHSLOC (Rhs, 0).last_column; \ - } \ - while (YYID (0)) -#endif - - -/* YY_LOCATION_PRINT -- Print the location on the stream. - This macro was not mandated originally: define only if we know - we won't break user code: when these are the locations we know. */ - -#ifndef YY_LOCATION_PRINT -# if YYLTYPE_IS_TRIVIAL -# define YY_LOCATION_PRINT(File, Loc) \ - fprintf (File, "%d.%d-%d.%d", \ - (Loc).first_line, (Loc).first_column, \ - (Loc).last_line, (Loc).last_column) -# else -# define YY_LOCATION_PRINT(File, Loc) ((void) 0) -# endif -#endif - - -/* YYLEX -- calling `yylex' with the right arguments. */ - -#ifdef YYLEX_PARAM -# define YYLEX yylex (&yylval, &yylloc, YYLEX_PARAM) -#else -# define YYLEX yylex (&yylval, &yylloc, lexer, state) -#endif - -/* Enable debugging if requested. */ -#if YYDEBUG - -# ifndef YYFPRINTF -# include /* INFRINGES ON USER NAME SPACE */ -# define YYFPRINTF fprintf -# endif - -# define YYDPRINTF(Args) \ -do { \ - if (yydebug) \ - YYFPRINTF Args; \ -} while (YYID (0)) - -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ -do { \ - if (yydebug) \ - { \ - YYFPRINTF (stderr, "%s ", Title); \ - yy_symbol_print (stderr, \ - Type, Value, Location, lexer, state); \ - YYFPRINTF (stderr, "\n"); \ - } \ -} while (YYID (0)) - - -/*--------------------------------. -| Print this symbol on YYOUTPUT. | -`--------------------------------*/ - -/*ARGSUSED*/ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, YYLTYPE const * const yylocationp, parser::PTXLexer& lexer, parser::PTXParser& state) -#else -static void -yy_symbol_value_print (yyoutput, yytype, yyvaluep, yylocationp, lexer, state) - FILE *yyoutput; - int yytype; - YYSTYPE const * const yyvaluep; - YYLTYPE const * const yylocationp; - parser::PTXLexer& lexer; - parser::PTXParser& state; -#endif -{ - if (!yyvaluep) - return; - YYUSE (yylocationp); - YYUSE (lexer); - YYUSE (state); -# ifdef YYPRINT - if (yytype < YYNTOKENS) - YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); -# else - YYUSE (yyoutput); -# endif - switch (yytype) - { - default: - break; - } -} - - -/*--------------------------------. -| Print this symbol on YYOUTPUT. | -`--------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, YYLTYPE const * const yylocationp, parser::PTXLexer& lexer, parser::PTXParser& state) -#else -static void -yy_symbol_print (yyoutput, yytype, yyvaluep, yylocationp, lexer, state) - FILE *yyoutput; - int yytype; - YYSTYPE const * const yyvaluep; - YYLTYPE const * const yylocationp; - parser::PTXLexer& lexer; - parser::PTXParser& state; -#endif -{ - if (yytype < YYNTOKENS) - YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); - else - YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]); - - YY_LOCATION_PRINT (yyoutput, *yylocationp); - YYFPRINTF (yyoutput, ": "); - yy_symbol_value_print (yyoutput, yytype, yyvaluep, yylocationp, lexer, state); - YYFPRINTF (yyoutput, ")"); -} - -/*------------------------------------------------------------------. -| yy_stack_print -- Print the state stack from its BOTTOM up to its | -| TOP (included). | -`------------------------------------------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop) -#else -static void -yy_stack_print (yybottom, yytop) - yytype_int16 *yybottom; - yytype_int16 *yytop; -#endif -{ - YYFPRINTF (stderr, "Stack now"); - for (; yybottom <= yytop; yybottom++) - { - int yybot = *yybottom; - YYFPRINTF (stderr, " %d", yybot); - } - YYFPRINTF (stderr, "\n"); -} - -# define YY_STACK_PRINT(Bottom, Top) \ -do { \ - if (yydebug) \ - yy_stack_print ((Bottom), (Top)); \ -} while (YYID (0)) - - -/*------------------------------------------------. -| Report that the YYRULE is going to be reduced. | -`------------------------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_reduce_print (YYSTYPE *yyvsp, YYLTYPE *yylsp, int yyrule, parser::PTXLexer& lexer, parser::PTXParser& state) -#else -static void -yy_reduce_print (yyvsp, yylsp, yyrule, lexer, state) - YYSTYPE *yyvsp; - YYLTYPE *yylsp; - int yyrule; - parser::PTXLexer& lexer; - parser::PTXParser& state; -#endif -{ - int yynrhs = yyr2[yyrule]; - int yyi; - unsigned long int yylno = yyrline[yyrule]; - YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", - yyrule - 1, yylno); - /* The symbols being reduced. */ - for (yyi = 0; yyi < yynrhs; yyi++) - { - YYFPRINTF (stderr, " $%d = ", yyi + 1); - yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi], - &(yyvsp[(yyi + 1) - (yynrhs)]) - , &(yylsp[(yyi + 1) - (yynrhs)]) , lexer, state); - YYFPRINTF (stderr, "\n"); - } -} - -# define YY_REDUCE_PRINT(Rule) \ -do { \ - if (yydebug) \ - yy_reduce_print (yyvsp, yylsp, Rule, lexer, state); \ -} while (YYID (0)) - -/* Nonzero means print parse trace. It is left uninitialized so that - multiple parsers can coexist. */ -int yydebug; -#else /* !YYDEBUG */ -# define YYDPRINTF(Args) -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) -# define YY_STACK_PRINT(Bottom, Top) -# define YY_REDUCE_PRINT(Rule) -#endif /* !YYDEBUG */ - - -/* YYINITDEPTH -- initial size of the parser's stacks. */ -#ifndef YYINITDEPTH -# define YYINITDEPTH 200 -#endif - -/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only - if the built-in stack extension method is used). - - Do not make this value too large; the results are undefined if - YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) - evaluated with infinite-precision integer arithmetic. */ - -#ifndef YYMAXDEPTH -# define YYMAXDEPTH 10000 -#endif - - - -#if YYERROR_VERBOSE - -# ifndef yystrlen -# if defined __GLIBC__ && defined _STRING_H -# define yystrlen strlen -# else -/* Return the length of YYSTR. */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static YYSIZE_T -yystrlen (const char *yystr) -#else -static YYSIZE_T -yystrlen (yystr) - const char *yystr; -#endif -{ - YYSIZE_T yylen; - for (yylen = 0; yystr[yylen]; yylen++) - continue; - return yylen; -} -# endif -# endif - -# ifndef yystpcpy -# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE -# define yystpcpy stpcpy -# else -/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in - YYDEST. */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static char * -yystpcpy (char *yydest, const char *yysrc) -#else -static char * -yystpcpy (yydest, yysrc) - char *yydest; - const char *yysrc; -#endif -{ - char *yyd = yydest; - const char *yys = yysrc; - - while ((*yyd++ = *yys++) != '\0') - continue; - - return yyd - 1; -} -# endif -# endif - -# ifndef yytnamerr -/* Copy to YYRES the contents of YYSTR after stripping away unnecessary - quotes and backslashes, so that it's suitable for yyerror. The - heuristic is that double-quoting is unnecessary unless the string - contains an apostrophe, a comma, or backslash (other than - backslash-backslash). YYSTR is taken from yytname. If YYRES is - null, do not copy; instead, return the length of what the result - would have been. */ -static YYSIZE_T -yytnamerr (char *yyres, const char *yystr) -{ - if (*yystr == '"') - { - YYSIZE_T yyn = 0; - char const *yyp = yystr; - - for (;;) - switch (*++yyp) - { - case '\'': - case ',': - goto do_not_strip_quotes; - - case '\\': - if (*++yyp != '\\') - goto do_not_strip_quotes; - /* Fall through. */ - default: - if (yyres) - yyres[yyn] = *yyp; - yyn++; - break; - - case '"': - if (yyres) - yyres[yyn] = '\0'; - return yyn; - } - do_not_strip_quotes: ; - } - - if (! yyres) - return yystrlen (yystr); - - return yystpcpy (yyres, yystr) - yyres; -} -# endif - -/* Copy into YYRESULT an error message about the unexpected token - YYCHAR while in state YYSTATE. Return the number of bytes copied, - including the terminating null byte. If YYRESULT is null, do not - copy anything; just return the number of bytes that would be - copied. As a special case, return 0 if an ordinary "syntax error" - message will do. Return YYSIZE_MAXIMUM if overflow occurs during - size calculation. */ -static YYSIZE_T -yysyntax_error (char *yyresult, int yystate, int yychar) -{ - int yyn = yypact[yystate]; - - if (! (YYPACT_NINF < yyn && yyn <= YYLAST)) - return 0; - else - { - int yytype = YYTRANSLATE (yychar); - YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]); - YYSIZE_T yysize = yysize0; - YYSIZE_T yysize1; - int yysize_overflow = 0; - enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; - char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; - int yyx; - -# if 0 - /* This is so xgettext sees the translatable formats that are - constructed on the fly. */ - YY_("syntax error, unexpected %s"); - YY_("syntax error, unexpected %s, expecting %s"); - YY_("syntax error, unexpected %s, expecting %s or %s"); - YY_("syntax error, unexpected %s, expecting %s or %s or %s"); - YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"); -# endif - char *yyfmt; - char const *yyf; - static char const yyunexpected[] = "syntax error, unexpected %s"; - static char const yyexpecting[] = ", expecting %s"; - static char const yyor[] = " or %s"; - char yyformat[sizeof yyunexpected - + sizeof yyexpecting - 1 - + ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2) - * (sizeof yyor - 1))]; - char const *yyprefix = yyexpecting; - - /* Start YYX at -YYN if negative to avoid negative indexes in - YYCHECK. */ - int yyxbegin = yyn < 0 ? -yyn : 0; - - /* Stay within bounds of both yycheck and yytname. */ - int yychecklim = YYLAST - yyn + 1; - int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; - int yycount = 1; - - yyarg[0] = yytname[yytype]; - yyfmt = yystpcpy (yyformat, yyunexpected); - - for (yyx = yyxbegin; yyx < yyxend; ++yyx) - if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR) - { - if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) - { - yycount = 1; - yysize = yysize0; - yyformat[sizeof yyunexpected - 1] = '\0'; - break; - } - yyarg[yycount++] = yytname[yyx]; - yysize1 = yysize + yytnamerr (0, yytname[yyx]); - yysize_overflow |= (yysize1 < yysize); - yysize = yysize1; - yyfmt = yystpcpy (yyfmt, yyprefix); - yyprefix = yyor; - } - - yyf = YY_(yyformat); - yysize1 = yysize + yystrlen (yyf); - yysize_overflow |= (yysize1 < yysize); - yysize = yysize1; - - if (yysize_overflow) - return YYSIZE_MAXIMUM; - - if (yyresult) - { - /* Avoid sprintf, as that infringes on the user's name space. - Don't have undefined behavior even if the translation - produced a string with the wrong number of "%s"s. */ - char *yyp = yyresult; - int yyi = 0; - while ((*yyp = *yyf) != '\0') - { - if (*yyp == '%' && yyf[1] == 's' && yyi < yycount) - { - yyp += yytnamerr (yyp, yyarg[yyi++]); - yyf += 2; - } - else - { - yyp++; - yyf++; - } - } - } - return yysize; - } -} -#endif /* YYERROR_VERBOSE */ - - -/*-----------------------------------------------. -| Release the memory associated to this symbol. | -`-----------------------------------------------*/ - -/*ARGSUSED*/ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep, YYLTYPE *yylocationp, parser::PTXLexer& lexer, parser::PTXParser& state) -#else -static void -yydestruct (yymsg, yytype, yyvaluep, yylocationp, lexer, state) - const char *yymsg; - int yytype; - YYSTYPE *yyvaluep; - YYLTYPE *yylocationp; - parser::PTXLexer& lexer; - parser::PTXParser& state; -#endif -{ - YYUSE (yyvaluep); - YYUSE (yylocationp); - YYUSE (lexer); - YYUSE (state); - - if (!yymsg) - yymsg = "Deleting"; - YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); - - switch (yytype) - { - - default: - break; - } -} - -/* Prevent warnings from -Wmissing-prototypes. */ -#ifdef YYPARSE_PARAM -#if defined __STDC__ || defined __cplusplus -int yyparse (void *YYPARSE_PARAM); -#else -int yyparse (); -#endif -#else /* ! YYPARSE_PARAM */ -#if defined __STDC__ || defined __cplusplus -int yyparse (parser::PTXLexer& lexer, parser::PTXParser& state); -#else -int yyparse (); -#endif -#endif /* ! YYPARSE_PARAM */ - - - - - -/*-------------------------. -| yyparse or yypush_parse. | -`-------------------------*/ - -#ifdef YYPARSE_PARAM -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -int -yyparse (void *YYPARSE_PARAM) -#else -int -yyparse (YYPARSE_PARAM) - void *YYPARSE_PARAM; -#endif -#else /* ! YYPARSE_PARAM */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -int -yyparse (parser::PTXLexer& lexer, parser::PTXParser& state) -#else -int -yyparse (lexer, state) - parser::PTXLexer& lexer; - parser::PTXParser& state; -#endif -#endif -{ -/* The lookahead symbol. */ -int yychar; - -/* The semantic value of the lookahead symbol. */ -YYSTYPE yylval; - -/* Location data for the lookahead symbol. */ -YYLTYPE yylloc; - - /* Number of syntax errors so far. */ - int yynerrs; - - int yystate; - /* Number of tokens to shift before error messages enabled. */ - int yyerrstatus; - - /* The stacks and their tools: - `yyss': related to states. - `yyvs': related to semantic values. - `yyls': related to locations. - - Refer to the stacks thru separate pointers, to allow yyoverflow - to reallocate them elsewhere. */ - - /* The state stack. */ - yytype_int16 yyssa[YYINITDEPTH]; - yytype_int16 *yyss; - yytype_int16 *yyssp; - - /* The semantic value stack. */ - YYSTYPE yyvsa[YYINITDEPTH]; - YYSTYPE *yyvs; - YYSTYPE *yyvsp; - - /* The location stack. */ - YYLTYPE yylsa[YYINITDEPTH]; - YYLTYPE *yyls; - YYLTYPE *yylsp; - - /* The locations where the error started and ended. */ - YYLTYPE yyerror_range[2]; - - YYSIZE_T yystacksize; - - int yyn; - int yyresult; - /* Lookahead token as an internal (translated) token number. */ - int yytoken; - /* The variables used to return semantic value and location from the - action routines. */ - YYSTYPE yyval; - YYLTYPE yyloc; - -#if YYERROR_VERBOSE - /* Buffer for error messages, and its allocated size. */ - char yymsgbuf[128]; - char *yymsg = yymsgbuf; - YYSIZE_T yymsg_alloc = sizeof yymsgbuf; -#endif - -#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N), yylsp -= (N)) - - /* The number of symbols on the RHS of the reduced rule. - Keep to zero when no symbol should be popped. */ - int yylen = 0; - - yytoken = 0; - yyss = yyssa; - yyvs = yyvsa; - yyls = yylsa; - yystacksize = YYINITDEPTH; - - YYDPRINTF ((stderr, "Starting parse\n")); - - yystate = 0; - yyerrstatus = 0; - yynerrs = 0; - yychar = YYEMPTY; /* Cause a token to be read. */ - - /* Initialize stack pointers. - Waste one element of value and location stack - so that they stay on the same level as the state stack. - The wasted elements are never initialized. */ - yyssp = yyss; - yyvsp = yyvs; - yylsp = yyls; - -#if YYLTYPE_IS_TRIVIAL - /* Initialize the default location before parsing starts. */ - yylloc.first_line = yylloc.last_line = 1; - yylloc.first_column = yylloc.last_column = 1; -#endif - - goto yysetstate; - -/*------------------------------------------------------------. -| yynewstate -- Push a new state, which is found in yystate. | -`------------------------------------------------------------*/ - yynewstate: - /* In all cases, when you get here, the value and location stacks - have just been pushed. So pushing a state here evens the stacks. */ - yyssp++; - - yysetstate: - *yyssp = yystate; - - if (yyss + yystacksize - 1 <= yyssp) - { - /* Get the current used size of the three stacks, in elements. */ - YYSIZE_T yysize = yyssp - yyss + 1; - -#ifdef yyoverflow - { - /* Give user a chance to reallocate the stack. Use copies of - these so that the &'s don't force the real ones into - memory. */ - YYSTYPE *yyvs1 = yyvs; - yytype_int16 *yyss1 = yyss; - YYLTYPE *yyls1 = yyls; - - /* Each stack pointer address is followed by the size of the - data in use in that stack, in bytes. This used to be a - conditional around just the two extra args, but that might - be undefined if yyoverflow is a macro. */ - yyoverflow (YY_("memory exhausted"), - &yyss1, yysize * sizeof (*yyssp), - &yyvs1, yysize * sizeof (*yyvsp), - &yyls1, yysize * sizeof (*yylsp), - &yystacksize); - - yyls = yyls1; - yyss = yyss1; - yyvs = yyvs1; - } -#else /* no yyoverflow */ -# ifndef YYSTACK_RELOCATE - goto yyexhaustedlab; -# else - /* Extend the stack our own way. */ - if (YYMAXDEPTH <= yystacksize) - goto yyexhaustedlab; - yystacksize *= 2; - if (YYMAXDEPTH < yystacksize) - yystacksize = YYMAXDEPTH; - - { - yytype_int16 *yyss1 = yyss; - union yyalloc *yyptr = - (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); - if (! yyptr) - goto yyexhaustedlab; - YYSTACK_RELOCATE (yyss_alloc, yyss); - YYSTACK_RELOCATE (yyvs_alloc, yyvs); - YYSTACK_RELOCATE (yyls_alloc, yyls); -# undef YYSTACK_RELOCATE - if (yyss1 != yyssa) - YYSTACK_FREE (yyss1); - } -# endif -#endif /* no yyoverflow */ - - yyssp = yyss + yysize - 1; - yyvsp = yyvs + yysize - 1; - yylsp = yyls + yysize - 1; - - YYDPRINTF ((stderr, "Stack size increased to %lu\n", - (unsigned long int) yystacksize)); - - if (yyss + yystacksize - 1 <= yyssp) - YYABORT; - } - - YYDPRINTF ((stderr, "Entering state %d\n", yystate)); - - if (yystate == YYFINAL) - YYACCEPT; - - goto yybackup; - -/*-----------. -| yybackup. | -`-----------*/ -yybackup: - - /* Do appropriate processing given the current state. Read a - lookahead token if we need one and don't already have one. */ - - /* First try to decide what to do without reference to lookahead token. */ - yyn = yypact[yystate]; - if (yyn == YYPACT_NINF) - goto yydefault; - - /* Not known => get a lookahead token if don't already have one. */ - - /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ - if (yychar == YYEMPTY) - { - YYDPRINTF ((stderr, "Reading a token: ")); - yychar = YYLEX; - } - - if (yychar <= YYEOF) - { - yychar = yytoken = YYEOF; - YYDPRINTF ((stderr, "Now at end of input.\n")); - } - else - { - yytoken = YYTRANSLATE (yychar); - YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); - } - - /* If the proper action on seeing token YYTOKEN is to reduce or to - detect an error, take that action. */ - yyn += yytoken; - if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) - goto yydefault; - yyn = yytable[yyn]; - if (yyn <= 0) - { - if (yyn == 0 || yyn == YYTABLE_NINF) - goto yyerrlab; - yyn = -yyn; - goto yyreduce; - } - - /* Count tokens shifted since error; after three, turn off error - status. */ - if (yyerrstatus) - yyerrstatus--; - - /* Shift the lookahead token. */ - YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); - - /* Discard the shifted token. */ - yychar = YYEMPTY; - - yystate = yyn; - *++yyvsp = yylval; - *++yylsp = yylloc; - goto yynewstate; - - -/*-----------------------------------------------------------. -| yydefault -- do the default action for the current state. | -`-----------------------------------------------------------*/ -yydefault: - yyn = yydefact[yystate]; - if (yyn == 0) - goto yyerrlab; - goto yyreduce; - - -/*-----------------------------. -| yyreduce -- Do a reduction. | -`-----------------------------*/ -yyreduce: - /* yyn is the number of a rule to reduce with. */ - yylen = yyr2[yyn]; - - /* If YYLEN is nonzero, implement the default value of the action: - `$$ = $1'. - - Otherwise, the following line sets YYVAL to garbage. - This behavior is undocumented and Bison - users should not rely upon it. Assigning to YYVAL - unconditionally makes the parser a bit smaller, and it avoids a - GCC warning that YYVAL may be used uninitialized. */ - yyval = yyvsp[1-yylen]; - - /* Default location. */ - YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen); - YY_REDUCE_PRINT (yyn); - switch (yyn) - { - case 3: - -/* Line 1455 of yacc.c */ -#line 75 "ptxgrammar.yy" - { - std::cerr << "Done reading PTX \n" << std::endl; - state.printHeader(); -;} - break; - - case 4: - -/* Line 1455 of yacc.c */ -#line 81 "ptxgrammar.yy" - { std::cerr << "Reading PTX version " << (yyvsp[(2) - (2)].fvalue) << std::endl; ;} - break; - - case 5: - -/* Line 1455 of yacc.c */ -#line 83 "ptxgrammar.yy" - { std::cerr << "Target " << (yyvsp[(2) - (2)].svalue) << std::endl; ;} - break; - - case 6: - -/* Line 1455 of yacc.c */ -#line 85 "ptxgrammar.yy" - { std::cerr << "Address_Size " << (yyvsp[(2) - (2)].ivalue) << std::endl; ;} - break; - - case 21: - -/* Line 1455 of yacc.c */ -#line 94 "ptxgrammar.yy" - { state.dataTypeId((yyvsp[(1) - (1)].ivalue)); ;} - break; - - case 44: - -/* Line 1455 of yacc.c */ -#line 117 "ptxgrammar.yy" - { (yyval.ivalue) = (yyvsp[(2) - (3)].ivalue); state.arrayDimensions((yyvsp[(2) - (3)].ivalue)); ;} - break; - - case 47: - -/* Line 1455 of yacc.c */ -#line 123 "ptxgrammar.yy" - { strcpy((yyval.svalue), (yyvsp[(1) - (1)].svalue)); state.identifier((yyvsp[(1) - (1)].svalue)); ;} - break; - - case 49: - -/* Line 1455 of yacc.c */ -#line 126 "ptxgrammar.yy" - {(yyval.ivalue) = (yyvsp[(2) - (2)].ivalue); state.alignment((yyvsp[(2) - (2)].ivalue));;} - break; - - case 50: - -/* Line 1455 of yacc.c */ -#line 127 "ptxgrammar.yy" - { state.alignment(0); ;} - break; - - case 52: - -/* Line 1455 of yacc.c */ -#line 131 "ptxgrammar.yy" - { - state.argumentDeclaration((yylsp[(1) - (4)])); -;} - break; - - case 53: - -/* Line 1455 of yacc.c */ -#line 136 "ptxgrammar.yy" - { state.argumentListBegin((yylsp[(1) - (1)])); ;} - break; - - case 54: - -/* Line 1455 of yacc.c */ -#line 137 "ptxgrammar.yy" - {state.argumentListEnd((yylsp[(1) - (1)])); ;} - break; - - case 59: - -/* Line 1455 of yacc.c */ -#line 144 "ptxgrammar.yy" - { - state.visibleEntryDeclaration((yyvsp[(3) - (4)].svalue), (yylsp[(1) - (4)])); -;} - break; - - case 60: - -/* Line 1455 of yacc.c */ -#line 148 "ptxgrammar.yy" - { state.returnArgumentListBegin((yylsp[(1) - (1)])); ;} - break; - - case 61: - -/* Line 1455 of yacc.c */ -#line 149 "ptxgrammar.yy" - {state.returnArgumentListEnd((yylsp[(1) - (1)])); ;} - break; - - case 65: - -/* Line 1455 of yacc.c */ -#line 153 "ptxgrammar.yy" - { - state.visibleFunctionDeclaration((yyvsp[(4) - (5)].svalue), (yylsp[(1) - (5)])); -;} - break; - - case 66: - -/* Line 1455 of yacc.c */ -#line 159 "ptxgrammar.yy" - { - state.visibleInitializableDeclaration((yyvsp[(4) - (5)].svalue),(yylsp[(1) - (5)])); -;} - break; - - - -/* Line 1455 of yacc.c */ -#line 1664 "ptxgrammar.cc" - default: break; - } - YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); - - YYPOPSTACK (yylen); - yylen = 0; - YY_STACK_PRINT (yyss, yyssp); - - *++yyvsp = yyval; - *++yylsp = yyloc; - - /* Now `shift' the result of the reduction. Determine what state - that goes to, based on the state we popped back to and the rule - number reduced by. */ - - yyn = yyr1[yyn]; - - yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; - if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) - yystate = yytable[yystate]; - else - yystate = yydefgoto[yyn - YYNTOKENS]; - - goto yynewstate; - - -/*------------------------------------. -| yyerrlab -- here on detecting error | -`------------------------------------*/ -yyerrlab: - /* If not already recovering from an error, report this error. */ - if (!yyerrstatus) - { - ++yynerrs; -#if ! YYERROR_VERBOSE - yyerror (&yylloc, lexer, state, YY_("syntax error")); -#else - { - YYSIZE_T yysize = yysyntax_error (0, yystate, yychar); - if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM) - { - YYSIZE_T yyalloc = 2 * yysize; - if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM)) - yyalloc = YYSTACK_ALLOC_MAXIMUM; - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); - yymsg = (char *) YYSTACK_ALLOC (yyalloc); - if (yymsg) - yymsg_alloc = yyalloc; - else - { - yymsg = yymsgbuf; - yymsg_alloc = sizeof yymsgbuf; - } - } - - if (0 < yysize && yysize <= yymsg_alloc) - { - (void) yysyntax_error (yymsg, yystate, yychar); - yyerror (&yylloc, lexer, state, yymsg); - } - else - { - yyerror (&yylloc, lexer, state, YY_("syntax error")); - if (yysize != 0) - goto yyexhaustedlab; - } - } -#endif - } - - yyerror_range[0] = yylloc; - - if (yyerrstatus == 3) - { - /* If just tried and failed to reuse lookahead token after an - error, discard it. */ - - if (yychar <= YYEOF) - { - /* Return failure if at end of input. */ - if (yychar == YYEOF) - YYABORT; - } - else - { - yydestruct ("Error: discarding", - yytoken, &yylval, &yylloc, lexer, state); - yychar = YYEMPTY; - } - } - - /* Else will try to reuse lookahead token after shifting the error - token. */ - goto yyerrlab1; - - -/*---------------------------------------------------. -| yyerrorlab -- error raised explicitly by YYERROR. | -`---------------------------------------------------*/ -yyerrorlab: - - /* Pacify compilers like GCC when the user code never invokes - YYERROR and the label yyerrorlab therefore never appears in user - code. */ - if (/*CONSTCOND*/ 0) - goto yyerrorlab; - - yyerror_range[0] = yylsp[1-yylen]; - /* Do not reclaim the symbols of the rule which action triggered - this YYERROR. */ - YYPOPSTACK (yylen); - yylen = 0; - YY_STACK_PRINT (yyss, yyssp); - yystate = *yyssp; - goto yyerrlab1; - - -/*-------------------------------------------------------------. -| yyerrlab1 -- common code for both syntax error and YYERROR. | -`-------------------------------------------------------------*/ -yyerrlab1: - yyerrstatus = 3; /* Each real token shifted decrements this. */ - - for (;;) - { - yyn = yypact[yystate]; - if (yyn != YYPACT_NINF) - { - yyn += YYTERROR; - if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) - { - yyn = yytable[yyn]; - if (0 < yyn) - break; - } - } - - /* Pop the current state because it cannot handle the error token. */ - if (yyssp == yyss) - YYABORT; - - yyerror_range[0] = *yylsp; - yydestruct ("Error: popping", - yystos[yystate], yyvsp, yylsp, lexer, state); - YYPOPSTACK (1); - yystate = *yyssp; - YY_STACK_PRINT (yyss, yyssp); - } - - *++yyvsp = yylval; - - yyerror_range[1] = yylloc; - /* Using YYLLOC is tempting, but would change the location of - the lookahead. YYLOC is available though. */ - YYLLOC_DEFAULT (yyloc, (yyerror_range - 1), 2); - *++yylsp = yyloc; - - /* Shift the error token. */ - YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); - - yystate = yyn; - goto yynewstate; - - -/*-------------------------------------. -| yyacceptlab -- YYACCEPT comes here. | -`-------------------------------------*/ -yyacceptlab: - yyresult = 0; - goto yyreturn; - -/*-----------------------------------. -| yyabortlab -- YYABORT comes here. | -`-----------------------------------*/ -yyabortlab: - yyresult = 1; - goto yyreturn; - -#if !defined(yyoverflow) || YYERROR_VERBOSE -/*-------------------------------------------------. -| yyexhaustedlab -- memory exhaustion comes here. | -`-------------------------------------------------*/ -yyexhaustedlab: - yyerror (&yylloc, lexer, state, YY_("memory exhausted")); - yyresult = 2; - /* Fall through. */ -#endif - -yyreturn: - if (yychar != YYEMPTY) - yydestruct ("Cleanup: discarding lookahead", - yytoken, &yylval, &yylloc, lexer, state); - /* Do not reclaim the symbols of the rule which action triggered - this YYABORT or YYACCEPT. */ - YYPOPSTACK (yylen); - YY_STACK_PRINT (yyss, yyssp); - while (yyssp != yyss) - { - yydestruct ("Cleanup: popping", - yystos[*yyssp], yyvsp, yylsp, lexer, state); - YYPOPSTACK (1); - } -#ifndef yyoverflow - if (yyss != yyssa) - YYSTACK_FREE (yyss); -#endif -#if YYERROR_VERBOSE - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); -#endif - /* Make sure YYID is used. */ - return YYID (yyresult); -} - - - -/* Line 1675 of yacc.c */ -#line 164 "ptxgrammar.yy" - - -int yylex( YYSTYPE* token, YYLTYPE* location, parser::PTXLexer& lexer, - parser::PTXParser& state ) -{ - lexer.yylval = token; - - int tokenValue = lexer.yylexPosition(); - location->first_line = lexer.lineno(); - location->first_column = lexer.column; - -#if 0 - report( " Lexer (" << location->first_line << "," - << location->first_column - << "): " << parser::PTXLexer::toString( tokenValue ) << " \"" - << lexer.YYText() << "\""); -#endif - - return tokenValue; -} - -static std::string toString( YYLTYPE& location, parser::PTXParser& state ) -{ - std::stringstream stream; - stream -#if 0 - << state.fileName -#else - << "ptx " -#endif - << " (" << location.first_line << ", " - << location.first_column << "): "; - return stream.str(); -} - -void yyerror( YYLTYPE* location, parser::PTXLexer& lexer, - parser::PTXParser& state, char const* message ) -{ - std::stringstream stream; - stream << toString( *location, state ) - << " " << message; - fprintf(stderr, "--ERROR-- %s %s \n", toString(*location, state).c_str(), message); - assert(0); -} - -} - diff --git a/ptxtools/ptxgrammar.yy b/ptxtools/ptxgrammar.yy index 482340af..13120739 100644 --- a/ptxtools/ptxgrammar.yy +++ b/ptxtools/ptxgrammar.yy @@ -1,3 +1,38 @@ +/* + Copyright (c) 2014, Evghenii Gaburov + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + Based on GPU Ocelot PTX parser : https://code.google.com/p/gpuocelot/ + */ %locations %{ diff --git a/ptxtools/runtest_ptxcc.sh b/ptxtools/runtest_ptxcc.sh index 52eccad0..708d0538 100755 --- a/ptxtools/runtest_ptxcc.sh +++ b/ptxtools/runtest_ptxcc.sh @@ -1,4 +1,35 @@ #!/bin/sh +# +# Copyright (c) 2014, Evghenii Gaburov +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PTXCC=$ISPC_HOME/ptxtools/ptxcc PTXGEN=$ISPC_HOME/ptxtools/ptxgen