From 5cc750ecee73807fad1870ee226428be05a082f5 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 6 Jul 2011 07:37:35 +0100 Subject: [PATCH] Add comment re popcnt on SSE2 target. --- stdlib-sse2.ll | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/stdlib-sse2.ll b/stdlib-sse2.ll index c37fdfb5..d1573e21 100644 --- a/stdlib-sse2.ll +++ b/stdlib-sse2.ll @@ -244,6 +244,14 @@ define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinli ; FIXME: this is very inefficient, loops over all 32 bits... +; we could use the LLVM intrinsic declare i32 @llvm.ctpop.i32(i32), +; although that currently ends up generating a POPCNT instruction even +; if we give --target=sse2 on the command line. We probably need to +; pipe through the 'sse2' request to LLVM via the 'features' string +; at codegen time... (If e.g. --cpu=penryn is also passed along, then +; it does generate non-POPCNT code and in particular better code than +; the below does.) + define internal i32 @__popcnt(i32) nounwind readonly alwaysinline { entry: br label %loop