aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDean Michael Berris <dberris@google.com>2018-07-13 04:48:41 +0000
committerDean Michael Berris <dberris@google.com>2018-07-13 04:48:41 +0000
commit8e00869a6a62ead9035073ce89b56c08e5967b54 (patch)
treee003729c8c8b47ed15a387004ae70516442e2bd7
parent06374543b176bc1e1348c77edfeeef07b45e8f23 (diff)
[XRay][test-suite] Benchmarks for profiling mode implementation
Summary: This patch adds microbenchmarks for the XRay Profiling Mode implementation to the test-suite. The benchmarks included cover: - Cost of the Profiling Mode runtime handler(s) and underlying implementation details when enabled. - Different benchmarks for different call stack traces. Initially showing deep, shallow, and wide function call stacks. These microbenchmarks can be used to measure progress on the optimisation work associated with the profiling mode runtime implementation going forward. It also allows us to better qualify the cost of the XRay runtime framework (in particular the trampolines) as we make improvements to those in the future. Depends on D48653. Reviewers: kpw, eizan Reviewed By: eizan Subscribers: mgorny, llvm-commits Differential Revision: https://reviews.llvm.org/D48879 git-svn-id: https://llvm.org/svn/llvm-project/test-suite/trunk@336970 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--MicroBenchmarks/XRay/CMakeLists.txt1
-rw-r--r--MicroBenchmarks/XRay/ProfilingMode/CMakeLists.txt20
-rw-r--r--MicroBenchmarks/XRay/ProfilingMode/deep-call-bench.cc90
-rw-r--r--MicroBenchmarks/XRay/ProfilingMode/shallow-call-bench.cc84
-rw-r--r--MicroBenchmarks/XRay/ProfilingMode/wide-call-bench.cc142
5 files changed, 337 insertions, 0 deletions
diff --git a/MicroBenchmarks/XRay/CMakeLists.txt b/MicroBenchmarks/XRay/CMakeLists.txt
index 4cc9754e..4edeec83 100644
--- a/MicroBenchmarks/XRay/CMakeLists.txt
+++ b/MicroBenchmarks/XRay/CMakeLists.txt
@@ -1,2 +1,3 @@
add_subdirectory(ReturnReference)
add_subdirectory(FDRMode)
+add_subdirectory(ProfilingMode)
diff --git a/MicroBenchmarks/XRay/ProfilingMode/CMakeLists.txt b/MicroBenchmarks/XRay/ProfilingMode/CMakeLists.txt
new file mode 100644
index 00000000..4d9a7db1
--- /dev/null
+++ b/MicroBenchmarks/XRay/ProfilingMode/CMakeLists.txt
@@ -0,0 +1,20 @@
+check_cxx_compiler_flag(-fxray-instrument COMPILER_HAS_FXRAY_INSTRUMENT)
+check_cxx_compiler_flag(-fxray-modes=xray-profiling
+ COMPILER_HAS_FXRAY_PROFILING)
+if(ARCH STREQUAL "x86"
+ AND COMPILER_HAS_FXRAY_INSTRUMENT
+ AND COMPILER_HAS_FXRAY_PROFILING)
+ list(APPEND CPPFLAGS
+ -std=c++11 -Wl,--gc-sections
+ -fxray-instrument -fxray-modes=xray-profiling)
+ list(APPEND LDFLAGS
+ -fxray-instrument -fxray-modes=xray-profiling)
+ llvm_test_run()
+ llvm_test_executable(deep-call-bench deep-call-bench.cc)
+ target_link_libraries(deep-call-bench benchmark)
+ llvm_test_executable(shallow-call-bench shallow-call-bench.cc)
+ target_link_libraries(shallow-call-bench benchmark)
+ llvm_test_executable(wide-call-bench wide-call-bench.cc)
+ target_link_libraries(wide-call-bench benchmark)
+endif()
+
diff --git a/MicroBenchmarks/XRay/ProfilingMode/deep-call-bench.cc b/MicroBenchmarks/XRay/ProfilingMode/deep-call-bench.cc
new file mode 100644
index 00000000..986725c3
--- /dev/null
+++ b/MicroBenchmarks/XRay/ProfilingMode/deep-call-bench.cc
@@ -0,0 +1,90 @@
+//===- deep-call-bench.cc - XRay Profiling Mode Benchmarks ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These benchmarks measure the cost of XRay profiling mode when enabled.
+//
+//===----------------------------------------------------------------------===//
+
+#include <atomic>
+#include <iostream>
+#include <mutex>
+#include <thread>
+#include "benchmark/benchmark.h"
+#include "xray/xray_log_interface.h"
+
+namespace {
+
+std::atomic<int> some_global{1};
+
+std::atomic<int> some_temporary{0};
+
+[[clang::xray_never_instrument]] static void profiling_setup() {
+ if (__xray_log_select_mode("xray-profiling") != XRAY_REGISTRATION_OK) {
+ std::cerr << "Failed selecting 'xray-profiling' mode. Aborting.\n";
+ std::abort();
+ }
+
+ if (__xray_log_init_mode("xray-profiling", "no_flush=true") !=
+ XRAY_LOG_INITIALIZED) {
+ std::cerr << "Failed initializing xray-profiling mode. Aborting.\n";
+ std::abort();
+ };
+
+ __xray_patch();
+}
+
+[[clang::xray_never_instrument]] static void profiling_teardown() {
+ if (__xray_log_finalize() != XRAY_LOG_FINALIZED) {
+ std::cerr << "Failed to finalize xray-profiling mode. Aborting.\n";
+ std::abort();
+ }
+
+ if (__xray_log_flushLog() != XRAY_LOG_FLUSHED) {
+ std::cerr << "Failed to flush xray-profiling mode. Aborting.\n";
+ std::abort();
+ }
+}
+
+} // namespace
+
+[[clang::xray_always_instrument]] __attribute__((weak))
+__attribute__((noinline)) int
+deep(int depth) {
+ if (depth == 0) return some_global.load(std::memory_order_acquire);
+ return some_global.load(std::memory_order_acquire) + deep(depth - 1);
+}
+
+// This benchmark measures the cost of XRay instrumentation in deep function
+// call stacks, where each function has been instrumented. We use function call
+// recursion to control the depth of the recursion as an input. We make the
+// recursion function a combination of: no-inline, have weak symbol binding, and
+// force instrumentation with XRay. Each iteration of the benchmark will
+// initialize the XRay profiling runtime, and then tear it down afterwards.
+//
+// We also run the benchmark on multiple threads, to track and identify
+// whether/where the contention and scalability issues are in the implementation
+// of the profiling runtime.
+[[clang::xray_never_instrument]] static void BM_XRayProfilingDeepCallStack(
+ benchmark::State &state) {
+ if (state.thread_index == 0) profiling_setup();
+
+ benchmark::DoNotOptimize(some_temporary = deep(state.range(0)));
+
+ for (auto _ : state)
+ benchmark::DoNotOptimize(some_temporary = deep(state.range(0)));
+
+ if (state.thread_index == 0) profiling_teardown();
+}
+BENCHMARK(BM_XRayProfilingDeepCallStack)
+ ->ThreadRange(1, 32)
+ ->RangeMultiplier(2)
+ ->Range(1, 64)
+ ->UseRealTime();
+
+BENCHMARK_MAIN();
diff --git a/MicroBenchmarks/XRay/ProfilingMode/shallow-call-bench.cc b/MicroBenchmarks/XRay/ProfilingMode/shallow-call-bench.cc
new file mode 100644
index 00000000..078b7eb6
--- /dev/null
+++ b/MicroBenchmarks/XRay/ProfilingMode/shallow-call-bench.cc
@@ -0,0 +1,84 @@
+//===- shallow-call-bench.cc - XRay Profiling Mode Benchmarks -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These benchmarks measure the cost of XRay profiling mode when enabled.
+//
+//===----------------------------------------------------------------------===//
+
+#include <atomic>
+#include <iostream>
+#include <mutex>
+#include <thread>
+#include "benchmark/benchmark.h"
+#include "xray/xray_log_interface.h"
+
+namespace {
+
+std::atomic<int> some_global{0};
+
+std::atomic<int> some_temporary{0};
+
+[[clang::xray_never_instrument]] static void profiling_setup() {
+ if (__xray_log_select_mode("xray-profiling") != XRAY_REGISTRATION_OK) {
+ std::cerr << "Failed selecting 'xray-profiling' mode. Aborting.\n";
+ std::abort();
+ }
+
+ if (__xray_log_init_mode("xray-profiling", "no_flush=true") !=
+ XRAY_LOG_INITIALIZED) {
+ std::cerr << "Failed initializing xray-profiling mode. Aborting.\n";
+ std::abort();
+ };
+
+ __xray_patch();
+}
+
+[[clang::xray_never_instrument]] static void profiling_teardown() {
+ if (__xray_log_finalize() != XRAY_LOG_FINALIZED) {
+ std::cerr << "Failed to finalize xray-profiling mode. Aborting.\n";
+ std::abort();
+ }
+
+ if (__xray_log_flushLog() != XRAY_LOG_FLUSHED) {
+ std::cerr << "Failed to flush xray-profiling mode. Aborting.\n";
+ std::abort();
+ }
+}
+
+} // namespace
+
+#define XRAY_WEAK_NOINLINE \
+ [[clang::xray_always_instrument]] __attribute__((weak)) \
+ __attribute__((noinline))
+
+XRAY_WEAK_NOINLINE int shallow() {
+ return some_global.fetch_add(1, std::memory_order_acq_rel);
+}
+
+// This benchmark measures the cost of XRay instrumentation in shallow function
+// call stack, where we instrument a single function call. We make the function
+// a combination of: no-inline, have weak symbol binding, and force
+// instrumentation with XRay. Each iteration of the benchmark will initialize
+// the XRay profiling runtime, and then tear it down afterwards.
+//
+// We also run the benchmark on multiple threads, to track and identify
+// whether/where the contention and scalability issues are in the implementation
+// of the profiling runtime.
+[[clang::xray_never_instrument]] static void BM_XRayProfilingShallowStack(
+ benchmark::State &state) {
+ if (state.thread_index == 0) profiling_setup();
+
+ benchmark::DoNotOptimize(some_temporary = shallow());
+ for (auto _ : state) benchmark::DoNotOptimize(some_temporary = shallow());
+
+ if (state.thread_index == 0) profiling_teardown();
+}
+BENCHMARK(BM_XRayProfilingShallowStack)->ThreadRange(1, 64)->UseRealTime();
+
+BENCHMARK_MAIN();
diff --git a/MicroBenchmarks/XRay/ProfilingMode/wide-call-bench.cc b/MicroBenchmarks/XRay/ProfilingMode/wide-call-bench.cc
new file mode 100644
index 00000000..1c547ae0
--- /dev/null
+++ b/MicroBenchmarks/XRay/ProfilingMode/wide-call-bench.cc
@@ -0,0 +1,142 @@
+//===- wide-call-bench.cc - XRay Profiling Mode Benchmarks ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These benchmarks measure the cost of XRay profiling mode when enabled.
+//
+//===----------------------------------------------------------------------===//
+
+#include <atomic>
+#include <iostream>
+#include <mutex>
+#include <thread>
+#include "benchmark/benchmark.h"
+#include "xray/xray_log_interface.h"
+
+namespace {
+
+std::atomic<int> some_global{1};
+
+std::atomic<int> some_temporary{0};
+
+[[clang::xray_never_instrument]] static void profiling_setup() {
+ if (__xray_log_select_mode("xray-profiling") != XRAY_REGISTRATION_OK) {
+ std::cerr << "Failed selecting 'xray-profiling' mode. Aborting.\n";
+ std::abort();
+ }
+
+ if (__xray_log_init_mode("xray-profiling", "no_flush=true") !=
+ XRAY_LOG_INITIALIZED) {
+ std::cerr << "Failed initializing xray-profiling mode. Aborting.\n";
+ std::abort();
+ };
+
+ __xray_patch();
+}
+
+[[clang::xray_never_instrument]] static void profiling_teardown() {
+ if (__xray_log_finalize() != XRAY_LOG_FINALIZED) {
+ std::cerr << "Failed to finalize xray-profiling mode. Aborting.\n";
+ std::abort();
+ }
+
+ if (__xray_log_flushLog() != XRAY_LOG_FLUSHED) {
+ std::cerr << "Failed to flush xray-profiling mode. Aborting.\n";
+ std::abort();
+ }
+}
+
+} // namespace
+
+#define XRAY_WEAK_NOINLINE \
+ [[clang::xray_always_instrument]] __attribute__((weak)) \
+ __attribute__((noinline))
+
+XRAY_WEAK_NOINLINE int wide8() {
+ return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int wide7() {
+ return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int wide6() {
+ return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int wide5() {
+ return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int wide4() {
+ return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int wide3() {
+ return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int wide2() {
+ return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int wide1() {
+ return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int call(int depth, int width) {
+ if (depth == 0) return some_global.load(std::memory_order_acquire);
+
+ auto val = 0;
+ switch (width) {
+ default:
+ case 8:
+ val += wide8();
+ case 7:
+ val += wide7();
+ case 6:
+ val += wide6();
+ case 5:
+ val += wide5();
+ case 4:
+ val += wide4();
+ case 3:
+ val += wide3();
+ case 2:
+ val += wide2();
+ case 1:
+ val += wide1();
+ }
+
+ return some_global.load(std::memory_order_acquire) + val +
+ call(depth - 1, width);
+}
+
+// This benchmark measures the cost of XRay instrumentation in wide function
+// call stacks, where each function has been instrumented. We use function call
+// recursion to control the depth of the recursion as an input, as well as an
+// input-controlled branching (non-looping) to determine the width of other
+// functions. We make the recursion function a combination of: no-inline, have
+// weak symbol binding, and force instrumentation with XRay. Each iteration of
+// the benchmark will initialize the XRay profiling runtime, and then tear it
+// down afterwards.
+//
+// We also run the benchmark on multiple threads, to track and identify
+// whether/where the contention and scalability issues are in the implementation
+// of the profiling runtime.
+[[clang::xray_never_instrument]] static void BM_XRayProfilingWideCallStack(
+ benchmark::State &state) {
+ if (state.thread_index == 0) profiling_setup();
+
+ benchmark::DoNotOptimize(some_temporary =
+ call(state.range(0), state.range(1)));
+ for (auto _ : state)
+ benchmark::DoNotOptimize(some_temporary =
+ call(state.range(0), state.range(1)));
+
+ if (state.thread_index == 0) profiling_teardown();
+}
+BENCHMARK(BM_XRayProfilingWideCallStack)
+ ->ThreadRange(1, 32)
+ ->RangeMultiplier(2)
+ ->Ranges({{1, 64}, {1, 8}})
+ ->UseRealTime();
+
+BENCHMARK_MAIN();