This commit is contained in:
Matt Pharr
2012-12-02 14:32:52 -08:00
7 changed files with 43 additions and 19 deletions

View File

@@ -129,6 +129,10 @@ objs/cbackend.o: cbackend.cpp
@echo Compiling $< @echo Compiling $<
@$(CXX) -fno-rtti -fno-exceptions $(CXXFLAGS) -o $@ -c $< @$(CXX) -fno-rtti -fno-exceptions $(CXXFLAGS) -o $@ -c $<
objs/opt.o: opt.cpp
@echo Compiling $<
@$(CXX) -fno-rtti $(CXXFLAGS) -o $@ -c $<
objs/%.o: objs/%.cpp objs/%.o: objs/%.cpp
@echo Compiling $< @echo Compiling $<
@$(CXX) $(CXXFLAGS) -o $@ -c $< @$(CXX) $(CXXFLAGS) -o $@ -c $<

View File

@@ -3701,15 +3701,18 @@ where the ``i`` th element of ``x`` has been replaced with the value ``v``
Reductions Reductions
---------- ----------
A number routines are available to evaluate conditions across the running A number routines are available to evaluate conditions across the
program instances. For example, ``any()`` returns ``true`` if the given running program instances. For example, ``any()`` returns ``true`` if
value ``v`` is ``true`` for any of the SPMD program instances currently the given value ``v`` is ``true`` for any of the SPMD program
running, and ``all()`` returns ``true`` if it true for all of them. instances currently running, ``all()`` returns ``true`` if it true
for all of them, and ``none()`` returns ``true`` if ``v`` is always
``false``.
:: ::
uniform bool any(bool v) uniform bool any(bool v)
uniform bool all(bool v) uniform bool all(bool v)
uniform bool none(bool v)
You can also compute a variety of reductions across the program instances. You can also compute a variety of reductions across the program instances.
For example, the values of the given value in each of the active program For example, the values of the given value in each of the active program

View File

@@ -482,7 +482,7 @@ static FORCEINLINE bool __all(__vec16_i1 mask) {
} }
static FORCEINLINE bool __none(__vec16_i1 mask) { static FORCEINLINE bool __none(__vec16_i1 mask) {
return !__any(mask); return _mm512_kortestz(mask, mask);
} }
static FORCEINLINE __vec16_i1 __equal_i1(__vec16_i1 a, __vec16_i1 b) { static FORCEINLINE __vec16_i1 __equal_i1(__vec16_i1 a, __vec16_i1 b) {
@@ -1959,7 +1959,7 @@ static FORCEINLINE __vec16_f __rsqrt_varying_float(__vec16_f v) {
#ifdef ISPC_FAST_MATH #ifdef ISPC_FAST_MATH
return _mm512_rsqrt23_ps(v); // Approximation with 0.775ULP accuracy return _mm512_rsqrt23_ps(v); // Approximation with 0.775ULP accuracy
#else #else
return _mm512_invsqrt_pd(v); return _mm512_invsqrt_ps(v);
#endif #endif
} }

2
ispc.h
View File

@@ -41,7 +41,7 @@
#define ISPC_VERSION "1.3.1dev" #define ISPC_VERSION "1.3.1dev"
#if !defined(LLVM_3_0) && !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) #if !defined(LLVM_3_0) && !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3)
#error "Only LLVM 3.0, 3.1, 3.2, and the 3.3 development branch are supported" #error "Only LLVM 3.0, 3.1, 3.2 and the 3.3 development branch are supported"
#endif #endif
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)

View File

@@ -1757,9 +1757,9 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
clang::TextDiagnosticPrinter *diagPrinter = clang::TextDiagnosticPrinter *diagPrinter =
new clang::TextDiagnosticPrinter(stderrRaw, clang::DiagnosticOptions()); new clang::TextDiagnosticPrinter(stderrRaw, clang::DiagnosticOptions());
#else #else
clang::DiagnosticOptions diagOptions; clang::DiagnosticOptions *diagOptions = new clang::DiagnosticOptions();
clang::TextDiagnosticPrinter *diagPrinter = clang::TextDiagnosticPrinter *diagPrinter =
new clang::TextDiagnosticPrinter(stderrRaw, &diagOptions); new clang::TextDiagnosticPrinter(stderrRaw, diagOptions);
#endif #endif
llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> diagIDs(new clang::DiagnosticIDs); llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> diagIDs(new clang::DiagnosticIDs);
#if defined(LLVM_3_0) || defined(LLVM_3_1) #if defined(LLVM_3_0) || defined(LLVM_3_1)
@@ -1767,7 +1767,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
new clang::DiagnosticsEngine(diagIDs, diagPrinter); new clang::DiagnosticsEngine(diagIDs, diagPrinter);
#else #else
clang::DiagnosticsEngine *diagEngine = clang::DiagnosticsEngine *diagEngine =
new clang::DiagnosticsEngine(diagIDs, &diagOptions, diagPrinter); new clang::DiagnosticsEngine(diagIDs, diagOptions, diagPrinter);
#endif #endif
inst.setDiagnostics(diagEngine); inst.setDiagnostics(diagEngine);

12
opt.cpp
View File

@@ -471,8 +471,14 @@ Optimize(llvm::Module *module, int optLevel) {
} }
optPM.add(llvm::createDeadInstEliminationPass()); optPM.add(llvm::createDeadInstEliminationPass());
// Max struct size threshold for scalar replacement is
// 1) 4 fields (r,g,b,w)
// 2) field size: vectorWidth * sizeof(float)
const int field_limit = 4;
int sr_threshold = g->target.vectorWidth * sizeof(float) * field_limit;
// On to more serious optimizations // On to more serious optimizations
optPM.add(llvm::createScalarReplAggregatesPass()); optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold));
optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createInstructionCombiningPass());
optPM.add(llvm::createCFGSimplificationPass()); optPM.add(llvm::createCFGSimplificationPass());
optPM.add(llvm::createPromoteMemoryToRegisterPass()); optPM.add(llvm::createPromoteMemoryToRegisterPass());
@@ -494,7 +500,7 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createInstructionCombiningPass());
optPM.add(llvm::createJumpThreadingPass()); optPM.add(llvm::createJumpThreadingPass());
optPM.add(llvm::createCFGSimplificationPass()); optPM.add(llvm::createCFGSimplificationPass());
optPM.add(llvm::createScalarReplAggregatesPass()); optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold));
optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createInstructionCombiningPass());
optPM.add(llvm::createTailCallEliminationPass()); optPM.add(llvm::createTailCallEliminationPass());
@@ -540,7 +546,7 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createFunctionInliningPass()); optPM.add(llvm::createFunctionInliningPass());
optPM.add(llvm::createArgumentPromotionPass()); optPM.add(llvm::createArgumentPromotionPass());
optPM.add(llvm::createScalarReplAggregatesPass(-1, false)); optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold, false));
optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createInstructionCombiningPass());
optPM.add(llvm::createCFGSimplificationPass()); optPM.add(llvm::createCFGSimplificationPass());
optPM.add(llvm::createReassociatePass()); optPM.add(llvm::createReassociatePass());

View File

@@ -340,9 +340,9 @@ static inline uniform bool any(bool v) {
// We only care about whether "any" is true for the active program instances, // We only care about whether "any" is true for the active program instances,
// so we have to make v with the current program mask. // so we have to make v with the current program mask.
#ifdef ISPC_TARGET_GENERIC #ifdef ISPC_TARGET_GENERIC
return __movmsk(v & __mask) != 0; return __any(v | !__mask);
#else #else
return __movmsk(__sext_varying_bool(v) & __mask) != 0; return __any(__sext_varying_bool(v) | !__mask);
#endif #endif
} }
@@ -350,13 +350,24 @@ __declspec(safe)
static inline uniform bool all(bool v) { static inline uniform bool all(bool v) {
// As with any(), we need to explicitly mask v with the current program mask // As with any(), we need to explicitly mask v with the current program mask
// so we're only looking at the current lanes // so we're only looking at the current lanes
#ifdef ISPC_TARGET_GENERIC #ifdef ISPC_TARGET_GENERIC
bool match = ((v & __mask) == __mask); return __all(v | !__mask);
#else #else
int32 match = __sext_varying_bool((__sext_varying_bool(v) & __mask) == __mask); return __all(__sext_varying_bool(v) | !__mask);
#endif
}
__declspec(safe)
static inline uniform bool none(bool v) {
// As with any(), we need to explicitly mask v with the current program mask
// so we're only looking at the current lanes
#ifdef ISPC_TARGET_GENERIC
return __none(v | !__mask);
#else
return __none(__sext_varying_bool(v) | !__mask);
#endif #endif
return __movmsk(match) == ((programCount == 64) ? ~0ull :
((1ull << programCount) - 1));
} }
__declspec(safe) __declspec(safe)