merge
This commit is contained in:
4
Makefile
4
Makefile
@@ -129,6 +129,10 @@ objs/cbackend.o: cbackend.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) -fno-rtti -fno-exceptions $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/opt.o: opt.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) -fno-rtti $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/%.o: objs/%.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
@@ -3701,15 +3701,18 @@ where the ``i`` th element of ``x`` has been replaced with the value ``v``
|
||||
Reductions
|
||||
----------
|
||||
|
||||
A number routines are available to evaluate conditions across the running
|
||||
program instances. For example, ``any()`` returns ``true`` if the given
|
||||
value ``v`` is ``true`` for any of the SPMD program instances currently
|
||||
running, and ``all()`` returns ``true`` if it true for all of them.
|
||||
A number routines are available to evaluate conditions across the
|
||||
running program instances. For example, ``any()`` returns ``true`` if
|
||||
the given value ``v`` is ``true`` for any of the SPMD program
|
||||
instances currently running, ``all()`` returns ``true`` if it true
|
||||
for all of them, and ``none()`` returns ``true`` if ``v`` is always
|
||||
``false``.
|
||||
|
||||
::
|
||||
|
||||
uniform bool any(bool v)
|
||||
uniform bool all(bool v)
|
||||
uniform bool none(bool v)
|
||||
|
||||
You can also compute a variety of reductions across the program instances.
|
||||
For example, the values of the given value in each of the active program
|
||||
|
||||
@@ -482,7 +482,7 @@ static FORCEINLINE bool __all(__vec16_i1 mask) {
|
||||
}
|
||||
|
||||
static FORCEINLINE bool __none(__vec16_i1 mask) {
|
||||
return !__any(mask);
|
||||
return _mm512_kortestz(mask, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_i1 __equal_i1(__vec16_i1 a, __vec16_i1 b) {
|
||||
@@ -1959,7 +1959,7 @@ static FORCEINLINE __vec16_f __rsqrt_varying_float(__vec16_f v) {
|
||||
#ifdef ISPC_FAST_MATH
|
||||
return _mm512_rsqrt23_ps(v); // Approximation with 0.775ULP accuracy
|
||||
#else
|
||||
return _mm512_invsqrt_pd(v);
|
||||
return _mm512_invsqrt_ps(v);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
2
ispc.h
2
ispc.h
@@ -41,7 +41,7 @@
|
||||
#define ISPC_VERSION "1.3.1dev"
|
||||
|
||||
#if !defined(LLVM_3_0) && !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3)
|
||||
#error "Only LLVM 3.0, 3.1, 3.2, and the 3.3 development branch are supported"
|
||||
#error "Only LLVM 3.0, 3.1, 3.2 and the 3.3 development branch are supported"
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
@@ -1757,9 +1757,9 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
|
||||
clang::TextDiagnosticPrinter *diagPrinter =
|
||||
new clang::TextDiagnosticPrinter(stderrRaw, clang::DiagnosticOptions());
|
||||
#else
|
||||
clang::DiagnosticOptions diagOptions;
|
||||
clang::DiagnosticOptions *diagOptions = new clang::DiagnosticOptions();
|
||||
clang::TextDiagnosticPrinter *diagPrinter =
|
||||
new clang::TextDiagnosticPrinter(stderrRaw, &diagOptions);
|
||||
new clang::TextDiagnosticPrinter(stderrRaw, diagOptions);
|
||||
#endif
|
||||
llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> diagIDs(new clang::DiagnosticIDs);
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_1)
|
||||
@@ -1767,7 +1767,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
|
||||
new clang::DiagnosticsEngine(diagIDs, diagPrinter);
|
||||
#else
|
||||
clang::DiagnosticsEngine *diagEngine =
|
||||
new clang::DiagnosticsEngine(diagIDs, &diagOptions, diagPrinter);
|
||||
new clang::DiagnosticsEngine(diagIDs, diagOptions, diagPrinter);
|
||||
#endif
|
||||
inst.setDiagnostics(diagEngine);
|
||||
|
||||
|
||||
12
opt.cpp
12
opt.cpp
@@ -471,8 +471,14 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
}
|
||||
optPM.add(llvm::createDeadInstEliminationPass());
|
||||
|
||||
// Max struct size threshold for scalar replacement is
|
||||
// 1) 4 fields (r,g,b,w)
|
||||
// 2) field size: vectorWidth * sizeof(float)
|
||||
const int field_limit = 4;
|
||||
int sr_threshold = g->target.vectorWidth * sizeof(float) * field_limit;
|
||||
|
||||
// On to more serious optimizations
|
||||
optPM.add(llvm::createScalarReplAggregatesPass());
|
||||
optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold));
|
||||
optPM.add(llvm::createInstructionCombiningPass());
|
||||
optPM.add(llvm::createCFGSimplificationPass());
|
||||
optPM.add(llvm::createPromoteMemoryToRegisterPass());
|
||||
@@ -494,7 +500,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
optPM.add(llvm::createInstructionCombiningPass());
|
||||
optPM.add(llvm::createJumpThreadingPass());
|
||||
optPM.add(llvm::createCFGSimplificationPass());
|
||||
optPM.add(llvm::createScalarReplAggregatesPass());
|
||||
optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold));
|
||||
optPM.add(llvm::createInstructionCombiningPass());
|
||||
optPM.add(llvm::createTailCallEliminationPass());
|
||||
|
||||
@@ -540,7 +546,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
|
||||
optPM.add(llvm::createFunctionInliningPass());
|
||||
optPM.add(llvm::createArgumentPromotionPass());
|
||||
optPM.add(llvm::createScalarReplAggregatesPass(-1, false));
|
||||
optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold, false));
|
||||
optPM.add(llvm::createInstructionCombiningPass());
|
||||
optPM.add(llvm::createCFGSimplificationPass());
|
||||
optPM.add(llvm::createReassociatePass());
|
||||
|
||||
23
stdlib.ispc
23
stdlib.ispc
@@ -340,9 +340,9 @@ static inline uniform bool any(bool v) {
|
||||
// We only care about whether "any" is true for the active program instances,
|
||||
// so we have to make v with the current program mask.
|
||||
#ifdef ISPC_TARGET_GENERIC
|
||||
return __movmsk(v & __mask) != 0;
|
||||
return __any(v | !__mask);
|
||||
#else
|
||||
return __movmsk(__sext_varying_bool(v) & __mask) != 0;
|
||||
return __any(__sext_varying_bool(v) | !__mask);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -350,13 +350,24 @@ __declspec(safe)
|
||||
static inline uniform bool all(bool v) {
|
||||
// As with any(), we need to explicitly mask v with the current program mask
|
||||
// so we're only looking at the current lanes
|
||||
|
||||
#ifdef ISPC_TARGET_GENERIC
|
||||
bool match = ((v & __mask) == __mask);
|
||||
return __all(v | !__mask);
|
||||
#else
|
||||
int32 match = __sext_varying_bool((__sext_varying_bool(v) & __mask) == __mask);
|
||||
return __all(__sext_varying_bool(v) | !__mask);
|
||||
#endif
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
static inline uniform bool none(bool v) {
|
||||
// As with any(), we need to explicitly mask v with the current program mask
|
||||
// so we're only looking at the current lanes
|
||||
|
||||
#ifdef ISPC_TARGET_GENERIC
|
||||
return __none(v | !__mask);
|
||||
#else
|
||||
return __none(__sext_varying_bool(v) | !__mask);
|
||||
#endif
|
||||
return __movmsk(match) == ((programCount == 64) ? ~0ull :
|
||||
((1ull << programCount) - 1));
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
|
||||
Reference in New Issue
Block a user