aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDean Michael Berris <dberris@google.com>2018-09-17 03:09:01 +0000
committerDean Michael Berris <dberris@google.com>2018-09-17 03:09:01 +0000
commit641b245943334ad18b672e0ed85497f47fa19b45 (patch)
tree44d3e284c3e329f296b38ddb2dba912e6c552fe9
parent31c8a31facbcf213ff5e2c9f066c4577f8208d99 (diff)
[XRay] Simplify FDR buffer management
Summary: This change makes XRay FDR mode use a single backing store for the buffer queue, and have indexes into that backing store instead. We also remove the reliance on the internal allocator implementation in the FDR mode logging implementation. In the process of making this change we found an inconsistency with the way we're returning buffers to the queue, and how we're setting the extents. We take the chance to simplify the way we're managing the extents of each buffer. It turns out we do not need the indirection for the extents, so we co-host the atomic 64-bit int with the buffer object. It also seems that we've not been returning the buffers for the thread running the flush functionality when writing out the files, so we can run into a situation where we could be missing data. We consolidate all the allocation routines now into xray_allocator.h, where we used to have routines defined in xray_buffer_queue.cc. Reviewers: mboerger, eizan Subscribers: jfb, llvm-commits Differential Revision: https://reviews.llvm.org/D52077 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@342356 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/xray/xray_allocator.h20
-rw-r--r--lib/xray/xray_buffer_queue.cc102
-rw-r--r--lib/xray/xray_buffer_queue.h25
-rw-r--r--lib/xray/xray_fdr_logging.cc50
4 files changed, 95 insertions, 102 deletions
diff --git a/lib/xray/xray_allocator.h b/lib/xray/xray_allocator.h
index 5a782739d..aea12809b 100644
--- a/lib/xray/xray_allocator.h
+++ b/lib/xray/xray_allocator.h
@@ -50,23 +50,33 @@ template <class T> void deallocate(T *B) XRAY_NEVER_INSTRUMENT {
internal_munmap(B, sizeof(T));
}
-inline void *allocateBuffer(size_t S) XRAY_NEVER_INSTRUMENT {
- auto B = reinterpret_cast<void *>(internal_mmap(
- NULL, S, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+template <class T = uint8_t> T *allocateBuffer(size_t S) XRAY_NEVER_INSTRUMENT {
+ auto B = reinterpret_cast<void *>(
+ internal_mmap(NULL, S * sizeof(T), PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
if (B == MAP_FAILED) {
if (Verbosity())
Report("XRay Profiling: Failed to allocate memory of size %d.\n", S);
return nullptr;
}
- return B;
+ return reinterpret_cast<T *>(B);
}
-inline void deallocateBuffer(void *B, size_t S) XRAY_NEVER_INSTRUMENT {
+template <class T> void deallocateBuffer(T *B, size_t S) XRAY_NEVER_INSTRUMENT {
if (B == nullptr)
return;
internal_munmap(B, S);
}
+template <class T, class... U>
+T *initArray(size_t N, U &&... Us) XRAY_NEVER_INSTRUMENT {
+ auto A = allocateBuffer<T>(N);
+ if (A != nullptr)
+ while (N > 0)
+ new (A + (--N)) T(std::forward<U>(Us)...);
+ return A;
+}
+
/// The Allocator type hands out fixed-sized chunks of memory that are
/// cache-line aligned and sized. This is useful for placement of
/// performance-sensitive data in memory that's frequently accessed. The
diff --git a/lib/xray/xray_buffer_queue.cc b/lib/xray/xray_buffer_queue.cc
index 5bdd9148d..5a88ecd33 100644
--- a/lib/xray/xray_buffer_queue.cc
+++ b/lib/xray/xray_buffer_queue.cc
@@ -16,68 +16,42 @@
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_libc.h"
#include "sanitizer_common/sanitizer_posix.h"
+#include "xray_allocator.h"
+#include "xray_defs.h"
#include <memory>
#include <sys/mman.h>
using namespace __xray;
using namespace __sanitizer;
-template <class T> static T *allocRaw(size_t N) {
- // TODO: Report errors?
- void *A = reinterpret_cast<void *>(
- internal_mmap(NULL, N * sizeof(T), PROT_WRITE | PROT_READ,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
- return (A == MAP_FAILED) ? nullptr : reinterpret_cast<T *>(A);
-}
-
-template <class T> static void deallocRaw(T *ptr, size_t N) {
- // TODO: Report errors?
- if (ptr != nullptr)
- internal_munmap(ptr, N);
-}
-
-template <class T> static T *initArray(size_t N) {
- auto A = allocRaw<T>(N);
- if (A != nullptr)
- while (N > 0)
- new (A + (--N)) T();
- return A;
-}
-
-BufferQueue::BufferQueue(size_t B, size_t N, bool &Success)
- : BufferSize(B), Buffers(initArray<BufferQueue::BufferRep>(N)),
- BufferCount(N), Finalizing{0}, OwnedBuffers(initArray<void *>(N)),
- Next(Buffers), First(Buffers), LiveBuffers(0) {
- if (Buffers == nullptr) {
+BufferQueue::BufferQueue(size_t B, size_t N,
+ bool &Success) XRAY_NEVER_INSTRUMENT
+ : BufferSize(B),
+ BufferCount(N),
+ Mutex(),
+ Finalizing{0},
+ BackingStore(allocateBuffer(B *N)),
+ Buffers(initArray<BufferQueue::BufferRep>(N)),
+ Next(Buffers),
+ First(Buffers),
+ LiveBuffers(0) {
+ if (BackingStore == nullptr) {
Success = false;
return;
}
- if (OwnedBuffers == nullptr) {
- // Clean up the buffers we've already allocated.
- for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B)
- B->~BufferRep();
- deallocRaw(Buffers, N);
+ if (Buffers == nullptr) {
+ deallocateBuffer(BackingStore, BufferSize * BufferCount);
Success = false;
return;
- };
+ }
for (size_t i = 0; i < N; ++i) {
auto &T = Buffers[i];
- void *Tmp = allocRaw<char>(BufferSize);
- if (Tmp == nullptr) {
- Success = false;
- return;
- }
- auto *Extents = allocRaw<BufferExtents>(1);
- if (Extents == nullptr) {
- Success = false;
- return;
- }
auto &Buf = T.Buff;
- Buf.Data = Tmp;
+ Buf.Data = reinterpret_cast<char *>(BackingStore) + (BufferSize * i);
Buf.Size = B;
- Buf.Extents = Extents;
- OwnedBuffers[i] = Tmp;
+ atomic_store(&Buf.Extents, 0, memory_order_release);
+ T.Used = false;
}
Success = true;
}
@@ -85,13 +59,17 @@ BufferQueue::BufferQueue(size_t B, size_t N, bool &Success)
BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) {
if (atomic_load(&Finalizing, memory_order_acquire))
return ErrorCode::QueueFinalizing;
+
SpinMutexLock Guard(&Mutex);
if (LiveBuffers == BufferCount)
return ErrorCode::NotEnoughMemory;
auto &T = *Next;
auto &B = T.Buff;
- Buf = B;
+ auto Extents = atomic_load(&B.Extents, memory_order_acquire);
+ atomic_store(&Buf.Extents, Extents, memory_order_release);
+ Buf.Data = B.Data;
+ Buf.Size = B.Size;
T.Used = true;
++LiveBuffers;
@@ -102,15 +80,11 @@ BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) {
}
BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
- // Blitz through the buffers array to find the buffer.
- bool Found = false;
- for (auto I = OwnedBuffers, E = OwnedBuffers + BufferCount; I != E; ++I) {
- if (*I == Buf.Data) {
- Found = true;
- break;
- }
- }
- if (!Found)
+ // Check whether the buffer being referred to is within the bounds of the
+ // backing store's range.
+ if (Buf.Data < BackingStore ||
+ Buf.Data >
+ reinterpret_cast<char *>(BackingStore) + (BufferCount * BufferSize))
return ErrorCode::UnrecognizedBuffer;
SpinMutexLock Guard(&Mutex);
@@ -121,10 +95,14 @@ BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
return ErrorCode::NotEnoughMemory;
// Now that the buffer has been released, we mark it as "used".
- First->Buff = Buf;
+ auto Extents = atomic_load(&Buf.Extents, memory_order_acquire);
+ atomic_store(&First->Buff.Extents, Extents, memory_order_release);
+ First->Buff.Data = Buf.Data;
+ First->Buff.Size = Buf.Size;
First->Used = true;
Buf.Data = nullptr;
Buf.Size = 0;
+ atomic_store(&Buf.Extents, 0, memory_order_release);
--LiveBuffers;
if (++First == (Buffers + BufferCount))
First = Buffers;
@@ -139,14 +117,8 @@ BufferQueue::ErrorCode BufferQueue::finalize() {
}
BufferQueue::~BufferQueue() {
- for (auto I = Buffers, E = Buffers + BufferCount; I != E; ++I) {
- auto &T = *I;
- auto &Buf = T.Buff;
- deallocRaw(Buf.Data, Buf.Size);
- deallocRaw(Buf.Extents, 1);
- }
for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B)
B->~BufferRep();
- deallocRaw(Buffers, BufferCount);
- deallocRaw(OwnedBuffers, BufferCount);
+ deallocateBuffer(Buffers, BufferCount);
+ deallocateBuffer(BackingStore, BufferSize * BufferCount);
}
diff --git a/lib/xray/xray_buffer_queue.h b/lib/xray/xray_buffer_queue.h
index e76fa7983..bfb091e3f 100644
--- a/lib/xray/xray_buffer_queue.h
+++ b/lib/xray/xray_buffer_queue.h
@@ -18,6 +18,7 @@
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_mutex.h"
+#include "xray_defs.h"
#include <cstddef>
namespace __xray {
@@ -29,14 +30,10 @@ namespace __xray {
/// trace collection.
class BufferQueue {
public:
- struct alignas(64) BufferExtents {
- atomic_uint64_t Size;
- };
-
struct Buffer {
+ atomic_uint64_t Extents{0};
void *Data = nullptr;
size_t Size = 0;
- BufferExtents *Extents;
};
struct BufferRep {
@@ -76,8 +73,10 @@ private:
T *operator->() const { return &(Buffers[Offset].Buff); }
- Iterator(BufferRep *Root, size_t O, size_t M)
- : Buffers(Root), Offset(O), Max(M) {
+ Iterator(BufferRep *Root, size_t O, size_t M) XRAY_NEVER_INSTRUMENT
+ : Buffers(Root),
+ Offset(O),
+ Max(M) {
// We want to advance to the first Offset where the 'Used' property is
// true, or to the end of the list/queue.
while (!Buffers[Offset].Used && Offset != Max) {
@@ -107,16 +106,18 @@ private:
// Size of each individual Buffer.
size_t BufferSize;
- BufferRep *Buffers;
-
// Amount of pre-allocated buffers.
size_t BufferCount;
SpinMutex Mutex;
atomic_uint8_t Finalizing;
- // Pointers to buffers managed/owned by the BufferQueue.
- void **OwnedBuffers;
+ // A pointer to a contiguous block of memory to serve as the backing store for
+ // all the individual buffers handed out.
+ void *BackingStore;
+
+ // A dynamically allocated array of BufferRep instances.
+ BufferRep *Buffers;
// Pointer to the next buffer to be handed out.
BufferRep *Next;
@@ -198,7 +199,7 @@ public:
/// Applies the provided function F to each Buffer in the queue, only if the
/// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a
/// releaseBuffer(...) operation).
- template <class F> void apply(F Fn) {
+ template <class F> void apply(F Fn) XRAY_NEVER_INSTRUMENT {
SpinMutexLock G(&Mutex);
for (auto I = begin(), E = end(); I != E; ++I)
Fn(*I);
diff --git a/lib/xray/xray_fdr_logging.cc b/lib/xray/xray_fdr_logging.cc
index 4b308b27f..131c137b6 100644
--- a/lib/xray/xray_fdr_logging.cc
+++ b/lib/xray/xray_fdr_logging.cc
@@ -30,6 +30,7 @@
#include "sanitizer_common/sanitizer_common.h"
#include "xray/xray_interface.h"
#include "xray/xray_records.h"
+#include "xray_allocator.h"
#include "xray_buffer_queue.h"
#include "xray_defs.h"
#include "xray_fdr_flags.h"
@@ -47,7 +48,7 @@ atomic_sint32_t LoggingStatus = {XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
// force the alignment to 64-bytes for x86 cache line alignment, as this
// structure is used in the hot path of implementation.
struct alignas(64) ThreadLocalData {
- BufferQueue::Buffer Buffer;
+ BufferQueue::Buffer Buffer{};
char *RecordPtr = nullptr;
// The number of FunctionEntry records immediately preceding RecordPtr.
uint8_t NumConsecutiveFnEnters = 0;
@@ -81,6 +82,7 @@ static constexpr auto FunctionRecSize = sizeof(FunctionRecord);
static pthread_key_t Key;
// Global BufferQueue.
+static std::aligned_storage<sizeof(BufferQueue)>::type BufferQueueStorage;
static BufferQueue *BQ = nullptr;
static atomic_sint32_t LogFlushStatus = {
@@ -184,11 +186,10 @@ static void writeNewBufferPreamble(tid_t Tid, timespec TS,
TLD.NumTailCalls = 0;
if (TLD.BQ == nullptr || TLD.BQ->finalizing())
return;
- internal_memcpy(TLD.RecordPtr, Metadata, sizeof(Metadata));
- TLD.RecordPtr += sizeof(Metadata);
- // Since we write out the extents as the first metadata record of the
- // buffer, we need to write out the extents including the extents record.
- atomic_store(&TLD.Buffer.Extents->Size, sizeof(Metadata),
+ internal_memcpy(TLD.RecordPtr, Metadata,
+ sizeof(MetadataRecord) * InitRecordsCount);
+ TLD.RecordPtr += sizeof(MetadataRecord) * InitRecordsCount;
+ atomic_store(&TLD.Buffer.Extents, sizeof(MetadataRecord) * InitRecordsCount,
memory_order_release);
}
@@ -209,12 +210,12 @@ static void setupNewBuffer(int (*wall_clock_reader)(
static void incrementExtents(size_t Add) {
auto &TLD = getThreadLocalData();
- atomic_fetch_add(&TLD.Buffer.Extents->Size, Add, memory_order_acq_rel);
+ atomic_fetch_add(&TLD.Buffer.Extents, Add, memory_order_acq_rel);
}
static void decrementExtents(size_t Subtract) {
auto &TLD = getThreadLocalData();
- atomic_fetch_sub(&TLD.Buffer.Extents->Size, Subtract, memory_order_acq_rel);
+ atomic_fetch_sub(&TLD.Buffer.Extents, Subtract, memory_order_acq_rel);
}
static void writeNewCPUIdMetadata(uint16_t CPU,
@@ -583,7 +584,7 @@ static void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
auto &TLD = getThreadLocalData();
- if (TLD.BQ == nullptr)
+ if (TLD.BQ == nullptr && BQ != nullptr)
TLD.BQ = BQ;
if (!isLogInitializedAndReady(TLD.BQ, TSC, CPU, wall_clock_reader))
@@ -758,6 +759,7 @@ XRayBuffer fdrIterator(const XRayBuffer B) {
static BufferQueue::const_iterator It{};
static BufferQueue::const_iterator End{};
static void *CurrentBuffer{nullptr};
+ static size_t SerializedBufferSize = 0;
if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) {
// From this point on, we provide raw access to the raw buffer we're getting
// from the BufferQueue. We're relying on the iterators from the current
@@ -767,7 +769,7 @@ XRayBuffer fdrIterator(const XRayBuffer B) {
}
if (CurrentBuffer != nullptr) {
- InternalFree(CurrentBuffer);
+ deallocateBuffer(CurrentBuffer, SerializedBufferSize);
CurrentBuffer = nullptr;
}
@@ -778,9 +780,9 @@ XRayBuffer fdrIterator(const XRayBuffer B) {
// out to disk. The difference here would be that we still write "empty"
// buffers, or at least go through the iterators faithfully to let the
// handlers see the empty buffers in the queue.
- auto BufferSize = atomic_load(&It->Extents->Size, memory_order_acquire);
- auto SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
- CurrentBuffer = InternalAlloc(SerializedBufferSize);
+ auto BufferSize = atomic_load(&It->Extents, memory_order_acquire);
+ SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
+ CurrentBuffer = allocateBuffer(SerializedBufferSize);
if (CurrentBuffer == nullptr)
return {nullptr, 0};
@@ -848,7 +850,6 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
if (TLD.RecordPtr != nullptr && TLD.BQ != nullptr)
releaseThreadLocalBuffer(*TLD.BQ);
BQ->~BufferQueue();
- InternalFree(BQ);
BQ = nullptr;
}
});
@@ -883,6 +884,13 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
reinterpret_cast<char *>(&Header) + sizeof(Header));
+ // Release the current thread's buffer before we attempt to write out all the
+ // buffers. This ensures that in case we had only a single thread going, that
+ // we are able to capture the data nonetheless.
+ auto &TLD = getThreadLocalData();
+ if (TLD.RecordPtr != nullptr && TLD.BQ != nullptr)
+ releaseThreadLocalBuffer(*TLD.BQ);
+
BQ->apply([&](const BufferQueue::Buffer &B) {
// Starting at version 2 of the FDR logging implementation, we only write
// the records identified by the extents of the buffer. We use the Extents
@@ -890,7 +898,7 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
// still use a Metadata record, but fill in the extents instead for the
// data.
MetadataRecord ExtentsRecord;
- auto BufferExtents = atomic_load(&B.Extents->Size, memory_order_acquire);
+ auto BufferExtents = atomic_load(&B.Extents, memory_order_acquire);
DCHECK(BufferExtents <= B.Size);
ExtentsRecord.Type = uint8_t(RecordType::Metadata);
ExtentsRecord.RecordKind =
@@ -922,7 +930,12 @@ XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
// Do special things to make the log finalize itself, and not allow any more
// operations to be performed until re-initialized.
- BQ->finalize();
+ if (BQ == nullptr) {
+ if (Verbosity())
+ Report("Attempting to finalize an uninitialized global buffer!\n");
+ } else {
+ BQ->finalize();
+ }
atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
memory_order_release);
@@ -1132,13 +1145,11 @@ XRayLogInitStatus fdrLoggingInit(UNUSED size_t BufferSize,
if (BQ != nullptr) {
BQ->~BufferQueue();
- InternalFree(BQ);
BQ = nullptr;
}
if (BQ == nullptr) {
- BQ = reinterpret_cast<BufferQueue *>(
- InternalAlloc(sizeof(BufferQueue), nullptr, 64));
+ BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
new (BQ) BufferQueue(BufferSize, BufferMax, Success);
}
@@ -1146,7 +1157,6 @@ XRayLogInitStatus fdrLoggingInit(UNUSED size_t BufferSize,
Report("BufferQueue init failed.\n");
if (BQ != nullptr) {
BQ->~BufferQueue();
- InternalFree(BQ);
BQ = nullptr;
}
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;