merge
This commit is contained in:
4
Makefile
4
Makefile
@@ -129,6 +129,10 @@ objs/cbackend.o: cbackend.cpp
|
|||||||
@echo Compiling $<
|
@echo Compiling $<
|
||||||
@$(CXX) -fno-rtti -fno-exceptions $(CXXFLAGS) -o $@ -c $<
|
@$(CXX) -fno-rtti -fno-exceptions $(CXXFLAGS) -o $@ -c $<
|
||||||
|
|
||||||
|
objs/opt.o: opt.cpp
|
||||||
|
@echo Compiling $<
|
||||||
|
@$(CXX) -fno-rtti $(CXXFLAGS) -o $@ -c $<
|
||||||
|
|
||||||
objs/%.o: objs/%.cpp
|
objs/%.o: objs/%.cpp
|
||||||
@echo Compiling $<
|
@echo Compiling $<
|
||||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||||
|
|||||||
@@ -3701,15 +3701,18 @@ where the ``i`` th element of ``x`` has been replaced with the value ``v``
|
|||||||
Reductions
|
Reductions
|
||||||
----------
|
----------
|
||||||
|
|
||||||
A number routines are available to evaluate conditions across the running
|
A number routines are available to evaluate conditions across the
|
||||||
program instances. For example, ``any()`` returns ``true`` if the given
|
running program instances. For example, ``any()`` returns ``true`` if
|
||||||
value ``v`` is ``true`` for any of the SPMD program instances currently
|
the given value ``v`` is ``true`` for any of the SPMD program
|
||||||
running, and ``all()`` returns ``true`` if it true for all of them.
|
instances currently running, ``all()`` returns ``true`` if it true
|
||||||
|
for all of them, and ``none()`` returns ``true`` if ``v`` is always
|
||||||
|
``false``.
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
uniform bool any(bool v)
|
uniform bool any(bool v)
|
||||||
uniform bool all(bool v)
|
uniform bool all(bool v)
|
||||||
|
uniform bool none(bool v)
|
||||||
|
|
||||||
You can also compute a variety of reductions across the program instances.
|
You can also compute a variety of reductions across the program instances.
|
||||||
For example, the values of the given value in each of the active program
|
For example, the values of the given value in each of the active program
|
||||||
|
|||||||
@@ -482,7 +482,7 @@ static FORCEINLINE bool __all(__vec16_i1 mask) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static FORCEINLINE bool __none(__vec16_i1 mask) {
|
static FORCEINLINE bool __none(__vec16_i1 mask) {
|
||||||
return !__any(mask);
|
return _mm512_kortestz(mask, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static FORCEINLINE __vec16_i1 __equal_i1(__vec16_i1 a, __vec16_i1 b) {
|
static FORCEINLINE __vec16_i1 __equal_i1(__vec16_i1 a, __vec16_i1 b) {
|
||||||
@@ -1959,7 +1959,7 @@ static FORCEINLINE __vec16_f __rsqrt_varying_float(__vec16_f v) {
|
|||||||
#ifdef ISPC_FAST_MATH
|
#ifdef ISPC_FAST_MATH
|
||||||
return _mm512_rsqrt23_ps(v); // Approximation with 0.775ULP accuracy
|
return _mm512_rsqrt23_ps(v); // Approximation with 0.775ULP accuracy
|
||||||
#else
|
#else
|
||||||
return _mm512_invsqrt_pd(v);
|
return _mm512_invsqrt_ps(v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
2
ispc.h
2
ispc.h
@@ -41,7 +41,7 @@
|
|||||||
#define ISPC_VERSION "1.3.1dev"
|
#define ISPC_VERSION "1.3.1dev"
|
||||||
|
|
||||||
#if !defined(LLVM_3_0) && !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3)
|
#if !defined(LLVM_3_0) && !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3)
|
||||||
#error "Only LLVM 3.0, 3.1, 3.2, and the 3.3 development branch are supported"
|
#error "Only LLVM 3.0, 3.1, 3.2 and the 3.3 development branch are supported"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
|||||||
@@ -1757,9 +1757,9 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
|
|||||||
clang::TextDiagnosticPrinter *diagPrinter =
|
clang::TextDiagnosticPrinter *diagPrinter =
|
||||||
new clang::TextDiagnosticPrinter(stderrRaw, clang::DiagnosticOptions());
|
new clang::TextDiagnosticPrinter(stderrRaw, clang::DiagnosticOptions());
|
||||||
#else
|
#else
|
||||||
clang::DiagnosticOptions diagOptions;
|
clang::DiagnosticOptions *diagOptions = new clang::DiagnosticOptions();
|
||||||
clang::TextDiagnosticPrinter *diagPrinter =
|
clang::TextDiagnosticPrinter *diagPrinter =
|
||||||
new clang::TextDiagnosticPrinter(stderrRaw, &diagOptions);
|
new clang::TextDiagnosticPrinter(stderrRaw, diagOptions);
|
||||||
#endif
|
#endif
|
||||||
llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> diagIDs(new clang::DiagnosticIDs);
|
llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> diagIDs(new clang::DiagnosticIDs);
|
||||||
#if defined(LLVM_3_0) || defined(LLVM_3_1)
|
#if defined(LLVM_3_0) || defined(LLVM_3_1)
|
||||||
@@ -1767,7 +1767,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
|
|||||||
new clang::DiagnosticsEngine(diagIDs, diagPrinter);
|
new clang::DiagnosticsEngine(diagIDs, diagPrinter);
|
||||||
#else
|
#else
|
||||||
clang::DiagnosticsEngine *diagEngine =
|
clang::DiagnosticsEngine *diagEngine =
|
||||||
new clang::DiagnosticsEngine(diagIDs, &diagOptions, diagPrinter);
|
new clang::DiagnosticsEngine(diagIDs, diagOptions, diagPrinter);
|
||||||
#endif
|
#endif
|
||||||
inst.setDiagnostics(diagEngine);
|
inst.setDiagnostics(diagEngine);
|
||||||
|
|
||||||
|
|||||||
12
opt.cpp
12
opt.cpp
@@ -471,8 +471,14 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
}
|
}
|
||||||
optPM.add(llvm::createDeadInstEliminationPass());
|
optPM.add(llvm::createDeadInstEliminationPass());
|
||||||
|
|
||||||
|
// Max struct size threshold for scalar replacement is
|
||||||
|
// 1) 4 fields (r,g,b,w)
|
||||||
|
// 2) field size: vectorWidth * sizeof(float)
|
||||||
|
const int field_limit = 4;
|
||||||
|
int sr_threshold = g->target.vectorWidth * sizeof(float) * field_limit;
|
||||||
|
|
||||||
// On to more serious optimizations
|
// On to more serious optimizations
|
||||||
optPM.add(llvm::createScalarReplAggregatesPass());
|
optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold));
|
||||||
optPM.add(llvm::createInstructionCombiningPass());
|
optPM.add(llvm::createInstructionCombiningPass());
|
||||||
optPM.add(llvm::createCFGSimplificationPass());
|
optPM.add(llvm::createCFGSimplificationPass());
|
||||||
optPM.add(llvm::createPromoteMemoryToRegisterPass());
|
optPM.add(llvm::createPromoteMemoryToRegisterPass());
|
||||||
@@ -494,7 +500,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
optPM.add(llvm::createInstructionCombiningPass());
|
optPM.add(llvm::createInstructionCombiningPass());
|
||||||
optPM.add(llvm::createJumpThreadingPass());
|
optPM.add(llvm::createJumpThreadingPass());
|
||||||
optPM.add(llvm::createCFGSimplificationPass());
|
optPM.add(llvm::createCFGSimplificationPass());
|
||||||
optPM.add(llvm::createScalarReplAggregatesPass());
|
optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold));
|
||||||
optPM.add(llvm::createInstructionCombiningPass());
|
optPM.add(llvm::createInstructionCombiningPass());
|
||||||
optPM.add(llvm::createTailCallEliminationPass());
|
optPM.add(llvm::createTailCallEliminationPass());
|
||||||
|
|
||||||
@@ -540,7 +546,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
|
|
||||||
optPM.add(llvm::createFunctionInliningPass());
|
optPM.add(llvm::createFunctionInliningPass());
|
||||||
optPM.add(llvm::createArgumentPromotionPass());
|
optPM.add(llvm::createArgumentPromotionPass());
|
||||||
optPM.add(llvm::createScalarReplAggregatesPass(-1, false));
|
optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold, false));
|
||||||
optPM.add(llvm::createInstructionCombiningPass());
|
optPM.add(llvm::createInstructionCombiningPass());
|
||||||
optPM.add(llvm::createCFGSimplificationPass());
|
optPM.add(llvm::createCFGSimplificationPass());
|
||||||
optPM.add(llvm::createReassociatePass());
|
optPM.add(llvm::createReassociatePass());
|
||||||
|
|||||||
23
stdlib.ispc
23
stdlib.ispc
@@ -340,9 +340,9 @@ static inline uniform bool any(bool v) {
|
|||||||
// We only care about whether "any" is true for the active program instances,
|
// We only care about whether "any" is true for the active program instances,
|
||||||
// so we have to make v with the current program mask.
|
// so we have to make v with the current program mask.
|
||||||
#ifdef ISPC_TARGET_GENERIC
|
#ifdef ISPC_TARGET_GENERIC
|
||||||
return __movmsk(v & __mask) != 0;
|
return __any(v | !__mask);
|
||||||
#else
|
#else
|
||||||
return __movmsk(__sext_varying_bool(v) & __mask) != 0;
|
return __any(__sext_varying_bool(v) | !__mask);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -350,13 +350,24 @@ __declspec(safe)
|
|||||||
static inline uniform bool all(bool v) {
|
static inline uniform bool all(bool v) {
|
||||||
// As with any(), we need to explicitly mask v with the current program mask
|
// As with any(), we need to explicitly mask v with the current program mask
|
||||||
// so we're only looking at the current lanes
|
// so we're only looking at the current lanes
|
||||||
|
|
||||||
#ifdef ISPC_TARGET_GENERIC
|
#ifdef ISPC_TARGET_GENERIC
|
||||||
bool match = ((v & __mask) == __mask);
|
return __all(v | !__mask);
|
||||||
#else
|
#else
|
||||||
int32 match = __sext_varying_bool((__sext_varying_bool(v) & __mask) == __mask);
|
return __all(__sext_varying_bool(v) | !__mask);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
__declspec(safe)
|
||||||
|
static inline uniform bool none(bool v) {
|
||||||
|
// As with any(), we need to explicitly mask v with the current program mask
|
||||||
|
// so we're only looking at the current lanes
|
||||||
|
|
||||||
|
#ifdef ISPC_TARGET_GENERIC
|
||||||
|
return __none(v | !__mask);
|
||||||
|
#else
|
||||||
|
return __none(__sext_varying_bool(v) | !__mask);
|
||||||
#endif
|
#endif
|
||||||
return __movmsk(match) == ((programCount == 64) ? ~0ull :
|
|
||||||
((1ull << programCount) - 1));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__declspec(safe)
|
__declspec(safe)
|
||||||
|
|||||||
Reference in New Issue
Block a user