aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClement Courbet <courbet@google.com>2019-08-22 21:24:46 +0000
committerClement Courbet <courbet@google.com>2019-08-22 21:24:46 +0000
commite7f54f559cd76af0e1a587a98cae6ebe0fe38c44 (patch)
treeefffd1493be01c547cbc2639bae680ea403146fe
parenta8ee671a8aec60a1e58d5d75f8a791e9933454cf (diff)
[MemFunctions] Add microbenchmarks for memory functions.
Summary: Memory functions (memcmp, memcpy, ...) are typically recognized by the compiler and expanded to specific asm patterns when the size is known at compile time. This will help catch any regressions in expansions. Right now we're only testing memcmp (see context in D60318). Subscribers: mgorny, llvm-commits, gchatelet, spatel Tags: #llvm Differential Revision: https://reviews.llvm.org/D64082 git-svn-id: https://llvm.org/svn/llvm-project/test-suite/trunk@369707 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--MicroBenchmarks/CMakeLists.txt1
-rw-r--r--MicroBenchmarks/MemFunctions/CMakeLists.txt5
-rw-r--r--MicroBenchmarks/MemFunctions/main.cpp127
3 files changed, 133 insertions, 0 deletions
diff --git a/MicroBenchmarks/CMakeLists.txt b/MicroBenchmarks/CMakeLists.txt
index 77a8e897..9f19b69f 100644
--- a/MicroBenchmarks/CMakeLists.txt
+++ b/MicroBenchmarks/CMakeLists.txt
@@ -6,3 +6,4 @@ add_subdirectory(LCALS)
add_subdirectory(harris)
add_subdirectory(ImageProcessing)
add_subdirectory(LoopInterchange)
+add_subdirectory(MemFunctions)
diff --git a/MicroBenchmarks/MemFunctions/CMakeLists.txt b/MicroBenchmarks/MemFunctions/CMakeLists.txt
new file mode 100644
index 00000000..6f08d856
--- /dev/null
+++ b/MicroBenchmarks/MemFunctions/CMakeLists.txt
@@ -0,0 +1,5 @@
+llvm_test_run(WORKDIR ${CMAKE_CURRENT_BINARY_DIR})
+
+llvm_test_executable(MemFunctions main.cpp)
+
+target_link_libraries(MemFunctions benchmark)
diff --git a/MicroBenchmarks/MemFunctions/main.cpp b/MicroBenchmarks/MemFunctions/main.cpp
new file mode 100644
index 00000000..8ffdbb6d
--- /dev/null
+++ b/MicroBenchmarks/MemFunctions/main.cpp
@@ -0,0 +1,127 @@
+//===- main.cc - Memory Functions Benchmarks ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Memory functions (memcmp, memcpy, ...) are typically recognized by the
+// compiler and expanded to specific asm patterns when the size is known at
+// compile time. THese microbenchmarks help catch potential CodeGen regressions.
+//
+// Note that these microbenchmarks do not represent a typical real-life
+// situation. They are designed to test the LLVM CodeGen. In particular,
+// real-life applications will typically be memory- rather than compute-bound
+// when manipulating memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstring>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+
+// Benchmarks `memcmp(p, q, size) OP 0` where n is known at compile time and OP
+// is defined by `Pred`. The compiler typically inlines the memcmp + comparion
+// to loads and compares.
+template <int kSize, typename Pred, typename Mod>
+void BM_MemCmp(benchmark::State& state) {
+ // The buffer size should be large enough that there are several elements in a
+ // buffer, but small enough to fit in cache. Alsom the smaller the buffer
+ // size, the more latency the benchmark framework has to adjust the number of
+ // iterations to make benchmarking faster.
+ static constexpr size_t kMaxBufSizeBytes = 4096;
+ constexpr const size_t kNumElements = kMaxBufSizeBytes / kSize;
+
+ std::vector<char> p_storage(kNumElements * kSize);
+ std::vector<char> q_storage(kNumElements * kSize);
+ char* p = p_storage.data();
+ const char* q = q_storage.data();
+
+ // We're comparing an all-zeros buffer (q) vs an all-zeros-but-one-element
+ // buffer (p). The non-zero element is detemined by `Mod`.
+ for (int i = 0; i < kNumElements; ++i)
+ Mod().template Change<kSize>(p + i * kSize);
+
+ benchmark::DoNotOptimize(p);
+ benchmark::DoNotOptimize(q);
+
+ for (auto _ : state) {
+ benchmark::ClobberMemory();
+ benchmark::ClobberMemory();
+
+ for (int i = 0; i < kNumElements; ++i) {
+ int res = Pred()(memcmp(p + i * kSize, q + i * kSize, kSize));
+ benchmark::DoNotOptimize(res);
+ }
+ }
+ state.SetBytesProcessed(p_storage.size() * state.iterations());
+}
+
+// Predicates.
+struct EqZero {
+ bool operator()(int v) const { return v == 0; }
+};
+struct LessThanZero {
+ bool operator()(int v) const { return v < 0; }
+};
+struct GreaterThanZero {
+ bool operator()(int v) const { return v > 0; }
+};
+
+// Functors to change the first/mid/last or no value.
+struct None {
+ template <int kSize>
+ void Change(char* const p) const {}
+};
+struct First {
+ template <int kSize>
+ void Change(char* const p) const {
+ p[0] = 128;
+ }
+};
+struct Mid {
+ template <int kSize>
+ void Change(char* const p) const {
+ p[kSize / 2] = 128;
+ }
+};
+struct Last {
+ template <int kSize>
+ void Change(char* const p) const {
+ p[kSize - 1] = 128;
+ }
+};
+
+#define MEMCMP_BENCHMARK_PRED_CHANGE(size, pred, change) \
+ BENCHMARK_TEMPLATE(BM_MemCmp, size, pred, change) \
+ ->Unit(benchmark::kNanosecond);
+
+#define MEMCMP_BENCHMARK_PRED(size, pred) \
+ MEMCMP_BENCHMARK_PRED_CHANGE(size, pred, None); \
+ MEMCMP_BENCHMARK_PRED_CHANGE(size, pred, First); \
+ MEMCMP_BENCHMARK_PRED_CHANGE(size, pred, Mid); \
+ MEMCMP_BENCHMARK_PRED_CHANGE(size, pred, Last);
+
+#define MEMCMP_BENCHMARK(size) \
+ MEMCMP_BENCHMARK_PRED(size, EqZero) \
+ MEMCMP_BENCHMARK_PRED(size, LessThanZero) \
+ MEMCMP_BENCHMARK_PRED(size, GreaterThanZero)
+
+MEMCMP_BENCHMARK(1)
+MEMCMP_BENCHMARK(2)
+MEMCMP_BENCHMARK(3)
+MEMCMP_BENCHMARK(4)
+MEMCMP_BENCHMARK(5)
+MEMCMP_BENCHMARK(6)
+MEMCMP_BENCHMARK(7)
+MEMCMP_BENCHMARK(8)
+MEMCMP_BENCHMARK(15)
+MEMCMP_BENCHMARK(16)
+MEMCMP_BENCHMARK(31)
+MEMCMP_BENCHMARK(32)
+MEMCMP_BENCHMARK(63)
+MEMCMP_BENCHMARK(64)
+
+BENCHMARK_MAIN();