From 1c760b166ff3584bef868c696afb6492a1481309 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 1 Dec 2025 18:14:36 +0100 Subject: [PATCH 01/15] Revert "Missed adding getMetrics in one spot (#237)" This reverts commit e62349b3e124f71ebf786b9fbdddf606b9b51fb0. --- ts/src/index.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/ts/src/index.ts b/ts/src/index.ts index 42454629..51cceddb 100644 --- a/ts/src/index.ts +++ b/ts/src/index.ts @@ -40,7 +40,6 @@ export const time = { v8ProfilerStuckEventLoopDetected: timeProfiler.v8ProfilerStuckEventLoopDetected, getState: timeProfiler.getState, - getMetrics: timeProfiler.getMetrics, constants: timeProfiler.constants, }; From e733a16e6784fff73dbdc99b31bdad1d57160d15 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 1 Dec 2025 18:15:02 +0100 Subject: [PATCH 02/15] Revert "Add profiler metrics (#235)" This reverts commit 878689db6eac044dc0839ea22d4df56c67e74680. --- bindings/profilers/wall.cc | 43 +++++++++++++---------------------- bindings/profilers/wall.hh | 6 ++--- ts/src/time-profiler.ts | 12 +++------- ts/src/v8-types.ts | 5 ---- ts/test/test-time-profiler.ts | 4 ---- 5 files changed, 21 insertions(+), 49 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index 80be444e..ea13a226 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -820,6 +820,11 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, #endif // DD_WALL_USE_CPED } +std::atomic* WallProfiler::GetContextCountPtr() { + return reinterpret_cast*>( + &fields_[WallProfiler::Fields::kCPEDContextCount]); +} + void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { if (cpuProfiler_ != nullptr) { cpuProfiler_->Dispose(); @@ -849,6 +854,9 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { delete ptr; } deadContextPtrs_.clear(); + + std::atomic_store_explicit( + GetContextCountPtr(), 0, std::memory_order_relaxed); } } @@ -1028,6 +1036,7 @@ v8::ProfilerId WallProfiler::StartInternal() { if (withContexts_ || workaroundV8Bug_) { SignalHandler::IncreaseUseCount(); fields_[kSampleCount] = 0; + fields_[kCPEDContextCount] = 0; } if (collectCpuTime_) { @@ -1232,10 +1241,6 @@ NAN_MODULE_INIT(WallProfiler::Init) { Nan::New("state").ToLocalChecked(), SharedArrayGetter); - Nan::SetAccessor(tpl->InstanceTemplate(), - Nan::New("metrics").ToLocalChecked(), - GetMetrics); - PerIsolateData::For(Isolate::GetCurrent()) ->WallProfilerConstructor() .Reset(Nan::GetFunction(tpl).ToLocalChecked()); @@ -1251,6 +1256,11 @@ NAN_MODULE_INIT(WallProfiler::Init) { Nan::New(kSampleCount), ReadOnlyDontDelete) .FromJust(); + Nan::DefineOwnProperty(constants, + Nan::New("kCPEDContextCount").ToLocalChecked(), + Nan::New(kCPEDContextCount), + ReadOnlyDontDelete) + .FromJust(); Nan::DefineOwnProperty(target, Nan::New("constants").ToLocalChecked(), constants, @@ -1320,6 +1330,8 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { deadContextPtrs_.pop_back(); } else { contextPtr = new PersistentContextPtr(this); + std::atomic_fetch_add_explicit( + GetContextCountPtr(), 1, std::memory_order_relaxed); } liveContextPtrs_.insert(contextPtr); contextPtr->RegisterForGC(isolate, cpedObj); @@ -1388,24 +1400,6 @@ ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) { #endif } -Local WallProfiler::GetMetrics(Isolate* isolate) { - auto usedAsyncContextCount = liveContextPtrs_.size(); - auto totalAsyncContextCount = usedAsyncContextCount + deadContextPtrs_.size(); - auto context = isolate->GetCurrentContext(); - auto metrics = Object::New(isolate); - metrics - ->Set(context, - String::NewFromUtf8Literal(isolate, "usedAsyncContextCount"), - Number::New(isolate, usedAsyncContextCount)) - .ToChecked(); - metrics - ->Set(context, - String::NewFromUtf8Literal(isolate, "totalAsyncContextCount"), - Number::New(isolate, totalAsyncContextCount)) - .ToChecked(); - return metrics; -} - NAN_GETTER(WallProfiler::GetContext) { auto profiler = Nan::ObjectWrap::Unwrap(info.This()); info.GetReturnValue().Set(profiler->GetContext(info.GetIsolate())); @@ -1421,11 +1415,6 @@ NAN_GETTER(WallProfiler::SharedArrayGetter) { info.GetReturnValue().Set(profiler->jsArray_.Get(v8::Isolate::GetCurrent())); } -NAN_GETTER(WallProfiler::GetMetrics) { - auto profiler = Nan::ObjectWrap::Unwrap(info.This()); - info.GetReturnValue().Set(profiler->GetMetrics(info.GetIsolate())); -} - NAN_METHOD(WallProfiler::V8ProfilerStuckEventLoopDetected) { auto profiler = Nan::ObjectWrap::Unwrap(info.This()); info.GetReturnValue().Set(profiler->v8ProfilerStuckEventLoopDetected()); diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index e7c224b2..699672f2 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -43,7 +43,7 @@ class PersistentContextPtr; class WallProfiler : public Nan::ObjectWrap { public: enum class CollectionMode { kNoCollect, kPassThrough, kCollectContexts }; - enum Fields { kSampleCount, kFieldCount }; + enum Fields { kSampleCount, kCPEDContextCount, kFieldCount }; private: std::chrono::microseconds samplingPeriod_{0}; @@ -122,6 +122,7 @@ class WallProfiler : public Nan::ObjectWrap { ContextPtr GetContextPtrSignalSafe(v8::Isolate* isolate); void SetCurrentContextPtr(v8::Isolate* isolate, v8::Local context); + std::atomic* GetContextCountPtr(); public: /** @@ -151,8 +152,6 @@ class WallProfiler : public Nan::ObjectWrap { int64_t time_to, int64_t cpu_time, v8::Isolate* isolate); - v8::Local GetMetrics(v8::Isolate*); - Result StartImpl(); v8::ProfilerId StartInternal(); Result StopImpl(bool restart, v8::Local& profile); @@ -193,7 +192,6 @@ class WallProfiler : public Nan::ObjectWrap { static NAN_GETTER(GetContext); static NAN_SETTER(SetContext); static NAN_GETTER(SharedArrayGetter); - static NAN_GETTER(GetMetrics); }; } // namespace dd diff --git a/ts/src/time-profiler.ts b/ts/src/time-profiler.ts index a265364b..f252f9e8 100644 --- a/ts/src/time-profiler.ts +++ b/ts/src/time-profiler.ts @@ -27,10 +27,10 @@ import { getNativeThreadId, constants as profilerConstants, } from './time-profiler-bindings'; -import {GenerateTimeLabelsFunction, TimeProfilerMetrics} from './v8-types'; +import {GenerateTimeLabelsFunction} from './v8-types'; import {isMainThread} from 'worker_threads'; -const {kSampleCount} = profilerConstants; +const {kSampleCount, kCPEDContextCount} = profilerConstants; const DEFAULT_INTERVAL_MICROS: Microseconds = 1000; const DEFAULT_DURATION_MILLIS: Milliseconds = 60000; @@ -169,13 +169,6 @@ export function getContext() { return gProfiler.context; } -export function getMetrics(): TimeProfilerMetrics { - if (!gProfiler) { - throw new Error('Wall profiler is not started'); - } - return gProfiler.metrics as TimeProfilerMetrics; -} - export function isStarted() { return !!gProfiler; } @@ -187,6 +180,7 @@ export function v8ProfilerStuckEventLoopDetected() { export const constants = { kSampleCount, + kCPEDContextCount, GARBAGE_COLLECTION_FUNCTION_NAME, NON_JS_THREADS_FUNCTION_NAME, }; diff --git a/ts/src/v8-types.ts b/ts/src/v8-types.ts index 39178b19..cc043a52 100644 --- a/ts/src/v8-types.ts +++ b/ts/src/v8-types.ts @@ -73,8 +73,3 @@ export interface GenerateTimeLabelsArgs { export interface GenerateTimeLabelsFunction { (args: GenerateTimeLabelsArgs): LabelSet; } - -export interface TimeProfilerMetrics { - usedAsyncContextCount: number; - totalAsyncContextCount: number; -} diff --git a/ts/test/test-time-profiler.ts b/ts/test/test-time-profiler.ts index d7359578..608a8469 100644 --- a/ts/test/test-time-profiler.ts +++ b/ts/test/test-time-profiler.ts @@ -139,10 +139,6 @@ describe('Time Profiler', () => { for (let i = 0; i < repeats; ++i) { loop(); enableEndPoint = i % 2 === 0; - const metrics = time.getMetrics(); - const expectedAsyncContextCount = useCPED ? 1 : 0; - assert(metrics.totalAsyncContextCount === expectedAsyncContextCount); - assert(metrics.usedAsyncContextCount === expectedAsyncContextCount); validateProfile( time.stop( i < repeats - 1, From 1656f518472fbec32874c6aac5257d78bf665902 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 1 Dec 2025 18:15:10 +0100 Subject: [PATCH 03/15] Revert "Reuse `PersistentContextPtr` instances (#231)" This reverts commit bd2fc9ddeaa7fb26ef1c039f527b555766fc1208. --- bindings/profilers/wall.cc | 57 ++++++++++++++++---------------------- bindings/profilers/wall.hh | 9 +++--- 2 files changed, 28 insertions(+), 38 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index ea13a226..45093255 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -107,11 +107,11 @@ void SetContextPtr(ContextPtr& contextPtr, class PersistentContextPtr { ContextPtr context; - WallProfiler* owner; + std::vector* dead; Persistent per; public: - PersistentContextPtr(WallProfiler* owner) : owner(owner) {} + PersistentContextPtr(std::vector* dead) : dead(dead) {} void UnregisterFromGC() { if (!per.IsEmpty()) { @@ -120,10 +120,7 @@ class PersistentContextPtr { } } - void MarkDead() { - context.reset(); - owner->MarkDeadPersistentContextPtr(this); - } + void MarkDead() { dead->push_back(this); } void RegisterForGC(Isolate* isolate, const Local& obj) { // Register a callback to delete this object when the object is GCed @@ -132,8 +129,8 @@ class PersistentContextPtr { this, [](const WeakCallbackInfo& data) { auto ptr = data.GetParameter(); - ptr->UnregisterFromGC(); ptr->MarkDead(); + ptr->UnregisterFromGC(); }, WeakCallbackType::kParameter); } @@ -145,11 +142,6 @@ class PersistentContextPtr { ContextPtr Get() const { return context; } }; -void WallProfiler::MarkDeadPersistentContextPtr(PersistentContextPtr* ptr) { - deadContextPtrs_.push_back(ptr); - liveContextPtrs_.erase(ptr); -} - // Maximum number of rounds in the GetV8ToEpochOffset static constexpr int MAX_EPOCH_OFFSET_ATTEMPTS = 20; @@ -820,9 +812,12 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, #endif // DD_WALL_USE_CPED } -std::atomic* WallProfiler::GetContextCountPtr() { - return reinterpret_cast*>( - &fields_[WallProfiler::Fields::kCPEDContextCount]); +void WallProfiler::UpdateContextCount() { + std::atomic_store_explicit( + reinterpret_cast*>( + &fields_[WallProfiler::Fields::kCPEDContextCount]), + liveContextPtrs_.size(), + std::memory_order_relaxed); } void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { @@ -842,21 +837,13 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { node::RemoveEnvironmentCleanupHook( isolate, &WallProfiler::CleanupHook, isolate); - // Delete all live contexts for (auto ptr : liveContextPtrs_) { ptr->UnregisterFromGC(); delete ptr; } liveContextPtrs_.clear(); - - // Delete all unused contexts, too - for (auto ptr : deadContextPtrs_) { - delete ptr; - } deadContextPtrs_.clear(); - - std::atomic_store_explicit( - GetContextCountPtr(), 0, std::memory_order_relaxed); + UpdateContextCount(); } } @@ -1305,12 +1292,23 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { return; } + defer { + UpdateContextCount(); + }; + + // Clean up dead context pointers + for (auto ptr : deadContextPtrs_) { + liveContextPtrs_.erase(ptr); + delete ptr; + } + deadContextPtrs_.clear(); + auto cped = isolate->GetContinuationPreservedEmbedderData(); // No Node AsyncContextFrame in this continuation yet if (!cped->IsObject()) return; auto cpedObj = cped.As(); - PersistentContextPtr* contextPtr; + PersistentContextPtr* contextPtr = nullptr; SignalGuard m(setInProgress_); auto proxyProto = cpedProxyProto_.Get(isolate); if (!proxyProto->StrictEquals(cpedObj->GetPrototype())) { @@ -1325,14 +1323,7 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { proxyObj->SetPrototype(v8Ctx, proxyProto).Check(); proxyObj->Set(v8Ctx, cpedProxySymbol_.Get(isolate), cpedObj).Check(); // Set up the context pointer in the internal field - if (!deadContextPtrs_.empty()) { - contextPtr = deadContextPtrs_.back(); - deadContextPtrs_.pop_back(); - } else { - contextPtr = new PersistentContextPtr(this); - std::atomic_fetch_add_explicit( - GetContextCountPtr(), 1, std::memory_order_relaxed); - } + contextPtr = new PersistentContextPtr(&deadContextPtrs_); liveContextPtrs_.insert(contextPtr); contextPtr->RegisterForGC(isolate, cpedObj); proxyObj->SetAlignedPointerInInternalField(0, contextPtr); diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index 699672f2..94f77439 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -63,8 +63,9 @@ class WallProfiler : public Nan::ObjectWrap { // be deleted when the profiler is disposed. std::unordered_set liveContextPtrs_; // Context pointers belonging to GC'd CPED objects register themselves here. - // They will be reused. - std::deque deadContextPtrs_; + // They will be deleted and removed from liveContextPtrs_ next time SetContext + // is invoked. + std::vector deadContextPtrs_; std::atomic gcCount = 0; std::atomic setInProgress_ = false; @@ -122,7 +123,7 @@ class WallProfiler : public Nan::ObjectWrap { ContextPtr GetContextPtrSignalSafe(v8::Isolate* isolate); void SetCurrentContextPtr(v8::Isolate* isolate, v8::Local context); - std::atomic* GetContextCountPtr(); + void UpdateContextCount(); public: /** @@ -181,8 +182,6 @@ class WallProfiler : public Nan::ObjectWrap { void OnGCStart(v8::Isolate* isolate); void OnGCEnd(); - void MarkDeadPersistentContextPtr(PersistentContextPtr* ptr); - static NAN_METHOD(New); static NAN_METHOD(Start); static NAN_METHOD(Stop); From c4daa76bfe0d5ccc8143c77ad59c3801cab86fb1 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 1 Dec 2025 18:15:17 +0100 Subject: [PATCH 04/15] Revert "Use a proxy for ACF with an internal field for storing sampling context (#229)" This reverts commit 8ab3b76bd56c3dac326bf78a211121d6a99cdfeb. --- bindings/profilers/wall.cc | 226 ++++++++-------------------------- bindings/profilers/wall.hh | 9 +- doc/sample_context_in_cped.md | 186 ++++++++++++++-------------- 3 files changed, 147 insertions(+), 274 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index 45093255..6665b848 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -16,7 +16,6 @@ #include #include -#include #include #include #include @@ -619,123 +618,6 @@ void GCEpilogueCallback(Isolate* isolate, static_cast(data)->OnGCEnd(); } -#if DD_WALL_USE_CPED -// Implementation of method calls on the CPED proxy that invoke the method on -// the proxied object. "data" is a two-element array where element 0 is the -// Symbol used to find the proxied object in the proxy and element 1 is either -// the method name or a cached method. -void CpedProxyMethodCallback(const FunctionCallbackInfo& info) { - auto isolate = info.GetIsolate(); - auto context = isolate->GetCurrentContext(); - auto cpedProxy = info.This(); - auto data = info.Data().As(); - auto symbol = data->Get(context, 0).ToLocalChecked().As(); - auto propertyName = data->Get(context, 1).ToLocalChecked(); - auto proxied = cpedProxy->Get(context, symbol).ToLocalChecked(); - Local method; - if (propertyName->IsFunction()) { - // It was already cached as a method, so we can use it directly - method = propertyName.As(); - } else { - method = proxied.As() - ->Get(context, propertyName) - .ToLocalChecked() - .As(); - // replace the property name with the method once resolved so later - // invocations are faster - data->Set(context, 1, method).Check(); - } - MaybeLocal retval; - auto arglen = info.Length(); - switch (arglen) { - case 0: - retval = method->Call(context, proxied, 0, nullptr); - break; - case 1: { - auto arg = info[0]; - retval = method->Call(context, proxied, 1, &arg); - break; - } - case 2: { - Local args[] = {info[0], info[1]}; - retval = method->Call(context, proxied, 2, args); - break; - } - default: { - // No Map methods take more than 2 arguments, so this path should never - // get invoked. We still implement it for completeness sake. - auto args = new Local[arglen]; - for (int i = 0; i < arglen; ++i) { - args[i] = info[i]; - } - retval = method->Call(context, proxied, arglen, args); - delete[] args; - } - } - info.GetReturnValue().Set(retval.ToLocalChecked()); -} - -// Implementation of property getters on the CPED proxy that get the property on -// the proxied object. "data" the Symbol used to find the proxied object in the -// proxy. -void CpedProxyPropertyGetterCallback(Local property, - const PropertyCallbackInfo& info) { - auto isolate = info.GetIsolate(); - auto context = isolate->GetCurrentContext(); - auto cpedProxy = info.This(); - auto symbol = info.Data().As(); - auto proxied = cpedProxy->Get(context, symbol).ToLocalChecked(); - auto value = proxied.As()->Get(context, property).ToLocalChecked(); - info.GetReturnValue().Set(value); -} - -// Sets up all the proxy methods and properties for the CPED proxy prototype -void SetupCpedProxyProtoMethods(Isolate* isolate, - Local cpedProxyProto, - Local cpedProxySymbol) { - auto context = isolate->GetCurrentContext(); - - auto addProxyProtoMethod = [&](Local methodName) { - auto data = Array::New(isolate, 2); - data->Set(context, Number::New(isolate, 0), cpedProxySymbol).Check(); - data->Set(context, Number::New(isolate, 1), methodName).Check(); - cpedProxyProto - ->Set(context, - methodName, - Function::New(context, &CpedProxyMethodCallback, data) - .ToLocalChecked()) - .Check(); - }; - - // Map methods + AsyncContextFrame.disable method - static constexpr const char* methodNames[] = {"clear", - "delete", - "entries", - "forEach", - "get", - "has", - "keys", - "set", - "values", - "disable"}; - - for (const char* methodName : methodNames) { - addProxyProtoMethod( - String::NewFromUtf8(isolate, methodName).ToLocalChecked()); - } - addProxyProtoMethod(Symbol::GetIterator(isolate)); - - // Map.size property - cpedProxyProto - ->SetNativeDataProperty(context, - String::NewFromUtf8Literal(isolate, "size"), - &CpedProxyPropertyGetterCallback, - nullptr, - cpedProxySymbol) - .Check(); -} -#endif // DD_WALL_USE_CPED - WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, std::chrono::microseconds duration, bool includeLines, @@ -757,7 +639,7 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, workaroundV8Bug_ = workaroundV8Bug && DD_WALL_USE_SIGPROF && detectV8Bug_; collectCpuTime_ = collectCpuTime && withContexts; collectAsyncId_ = collectAsyncId && withContexts; -#if DD_WALL_USE_CPED +#if NODE_MAJOR_VERSION >= 23 useCPED_ = useCPED && withContexts; #else useCPED_ = false; @@ -786,30 +668,13 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, isolate->AddGCEpilogueCallback(&GCEpilogueCallback, this); } -#if DD_WALL_USE_CPED if (useCPED_) { - // Used to create CPED proxy objects that will have one internal field to - // store the sample context pointer. - auto cpedObjTpl = ObjectTemplate::New(isolate); - cpedObjTpl->SetInternalFieldCount(1); - cpedProxyTemplate_.Reset(isolate, cpedObjTpl); - // Symbol used for the property name that stores the proxied object in the - // CPED proxy object. - Local cpedProxySymbol = - Symbol::New(isolate, - String::NewFromUtf8Literal( - isolate, "WallProfiler::CPEDProxy::ProxiedObject")); - cpedProxySymbol_.Reset(isolate, cpedProxySymbol); - // Prototype for the CPED proxy object that will have methods and property - // getters that invoke the corresponding methods and properties on the - // proxied object. The set of methods & properties is chosen with the - // assumption that the proxied object is a Node.js AsyncContextFrame. - Local cpedProxyProto = Object::New(isolate); - cpedProxyProto_.Reset(isolate, cpedProxyProto); - - SetupCpedProxyProtoMethods(isolate, cpedProxyProto, cpedProxySymbol); - } -#endif // DD_WALL_USE_CPED + cpedSymbol_.Reset( + isolate, + Private::ForApi(isolate, + String::NewFromUtf8Literal( + isolate, "dd::WallProfiler::cpedSymbol_"))); + } } void WallProfiler::UpdateContextCount() { @@ -901,7 +766,7 @@ NAN_METHOD(WallProfiler::New) { DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(isMainThread); DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(useCPED); -#if !DD_WALL_USE_CPED +#if NODE_MAJOR_VERSION < 23 if (useCPED) { return Nan::ThrowTypeError( #ifndef _WIN32 @@ -1286,7 +1151,7 @@ void WallProfiler::SetCurrentContextPtr(Isolate* isolate, Local value) { } void WallProfiler::SetContext(Isolate* isolate, Local value) { -#if DD_WALL_USE_CPED +#if NODE_MAJOR_VERSION >= 23 if (!useCPED_) { SetCurrentContextPtr(isolate, value); return; @@ -1307,32 +1172,40 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { // No Node AsyncContextFrame in this continuation yet if (!cped->IsObject()) return; + auto v8Ctx = isolate->GetCurrentContext(); + // This should always be called from a V8 context, but check just in case. + if (v8Ctx.IsEmpty()) return; + auto cpedObj = cped.As(); + auto localSymbol = cpedSymbol_.Get(isolate); + auto maybeProfData = cpedObj->GetPrivate(v8Ctx, localSymbol); + if (maybeProfData.IsEmpty()) return; + PersistentContextPtr* contextPtr = nullptr; + auto profData = maybeProfData.ToLocalChecked(); SignalGuard m(setInProgress_); - auto proxyProto = cpedProxyProto_.Get(isolate); - if (!proxyProto->StrictEquals(cpedObj->GetPrototype())) { - auto v8Ctx = isolate->GetCurrentContext(); - // This should always be called from a V8 context, but check just in case. - if (v8Ctx.IsEmpty()) return; - // Create a new CPED object with an internal field for the context pointer - auto proxyObj = - cpedProxyTemplate_.Get(isolate)->NewInstance(v8Ctx).ToLocalChecked(); - // Set up the proxy object to hold the proxied object and have the prototype - // that proxies AsyncContextFrame methods and properties - proxyObj->SetPrototype(v8Ctx, proxyProto).Check(); - proxyObj->Set(v8Ctx, cpedProxySymbol_.Get(isolate), cpedObj).Check(); - // Set up the context pointer in the internal field + if (profData->IsUndefined()) { + if (value->IsNullOrUndefined()) { + // Don't go to the trouble of mutating the CPED for null or undefined as + // the absence of a sample context will be interpreted as undefined in + // GetContextPtr anyway. + return; + } contextPtr = new PersistentContextPtr(&deadContextPtrs_); + + auto external = External::New(isolate, contextPtr); + auto maybeSetResult = cpedObj->SetPrivate(v8Ctx, localSymbol, external); + if (maybeSetResult.IsNothing()) { + delete contextPtr; + return; + } liveContextPtrs_.insert(contextPtr); contextPtr->RegisterForGC(isolate, cpedObj); - proxyObj->SetAlignedPointerInInternalField(0, contextPtr); - // Set the proxy object as the continuation preserved embedder data - isolate->SetContinuationPreservedEmbedderData(proxyObj); } else { - contextPtr = static_cast( - cpedObj->GetAlignedPointerFromInternalField(0)); + contextPtr = + static_cast(profData.As()->Value()); } + contextPtr->Set(isolate, value); #else SetCurrentContextPtr(isolate, value); @@ -1360,7 +1233,7 @@ ContextPtr WallProfiler::GetContextPtrSignalSafe(Isolate* isolate) { } ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) { -#if DD_WALL_USE_CPED +#if NODE_MAJOR_VERSION >= 23 if (!useCPED_) { return curContext_; } @@ -1369,19 +1242,22 @@ ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) { // Must not try to create a handle scope if isolate is not in use. return ContextPtr(); } + HandleScope scope(isolate); - auto addr = reinterpret_cast( - reinterpret_cast(isolate) + - internal::Internals::kContinuationPreservedEmbedderDataOffset); - - if (internal::Internals::HasHeapObjectTag(*addr)) { - auto cped = reinterpret_cast(addr); - if (cped->IsObject()) { - auto cpedObj = static_cast(cped); - if (cpedObj->InternalFieldCount() > 0) { - return static_cast( - cpedObj->GetAlignedPointerFromInternalField(0)) - ->Get(); + auto cped = isolate->GetContinuationPreservedEmbedderData(); + if (cped->IsObject()) { + auto v8Ctx = isolate->GetEnteredOrMicrotaskContext(); + if (!v8Ctx.IsEmpty()) { + auto cpedObj = cped.As(); + auto localSymbol = cpedSymbol_.Get(isolate); + auto maybeProfData = cpedObj->GetPrivate(v8Ctx, localSymbol); + if (!maybeProfData.IsEmpty()) { + auto profData = maybeProfData.ToLocalChecked(); + if (!profData->IsUndefined()) { + return static_cast( + profData.As()->Value()) + ->Get(); + } } } } diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index 94f77439..15d5c17e 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -52,13 +52,8 @@ class WallProfiler : public Nan::ObjectWrap { bool useCPED_ = false; // If we aren't using the CPED, we use a single context ptr stored here. ContextPtr curContext_; - // Otherwise we'll use an internal field in objects stored in CPED. We must - // construct objects with an internal field count of 1 and a specially - // constructed prototype. - v8::Global cpedProxyTemplate_; - v8::Global cpedProxyProto_; - v8::Global cpedProxySymbol_; - + // Otherwise we'll use a private symbol to store the context in CPED objects. + v8::Global cpedSymbol_; // We track live context pointers in a set to avoid memory leaks. They will // be deleted when the profiler is disposed. std::unordered_set liveContextPtrs_; diff --git a/doc/sample_context_in_cped.md b/doc/sample_context_in_cped.md index f4dd8a4b..259c7749 100644 --- a/doc/sample_context_in_cped.md +++ b/doc/sample_context_in_cped.md @@ -56,113 +56,115 @@ to its store in the current async context. (This implementation is very similar to how e.g. Java implements `ThreadLocal`, which is a close analogue to ALS in Node.js.) ACF instances are then stored in CPED. -## Storing the Sample Context in CPED, take one +## Storing the Sample Context in CPED Node.js – as the embedder of V8 – commandeers the CPED to store instances of ACF in it. This means that our profiler can't directly store our sample context in the CPED, because then we'd overwrite the ACF reference already in there and -break Node.js. Our first attempt at solving this was to –- since ACF is "just" -an ordinary JavaScript object -- to define a new property on it, and store our -sample context in it! JavaScript properties can have strings, numbers, or -symbols as their keys, with symbols being the recommended practice to define -properties that are hidden from unrelated code as symbols are private to their -creator and only compare equal to themselves. Thus we created a private symbol in -the profiler instance for our property key, and our logic for storing the sample -context thus becomes: +break Node.js. Fortunately, since ACF is "just" an ordinary JavaScript object, +we can define a new property on it, and store our sample context in it! +JavaScript properties can have strings, numbers, or symbols as their keys, with +symbols being the recommended practice to define properties that are hidden from +unrelated code as symbols are private to their creator and only compare equal to +themselves. Thus we create a private symbol in the profiler instance for our +property key, and our logic for storing the sample context thus becomes: * get the CPED from the V8 isolate * if it is not an object, do nothing (we can't set the sample context) * otherwise set the sample context as a value in the object with our property key. -Unfortunately, this approach is not signal safe. When we want to read the value -in the signal handler, it now needs to retrieve the CPED, which creates a V8 -`Local`, and then it needs to read a property on it, which creates -another `Local`. It also needs to retrieve the current context, and a `Local` -for the symbol used as a key – four `Local`s in total. V8 tracks the object -addresses pointed to by locals so that GC doesn't touch them. It tracks them in -a series of arrays, and if the current array fills up, it needs to allocate a -new one. As we know, allocation is unsafe in a signal handler, hence our -problem. We were thinking of a solution where we check if there is at least 4 -slots free in the current array, but then our profiler's operation would be at -mercy of V8 internal state. - -## Storing the Sample Context in CPED, take two - -Next we thought of replacing the `AsyncContextFrame` object in CPED with one we -created with an internal field – we can store and retrieve an arbitrary `void *` -in it with `{Get|Set}AlignedPointerInInternalField` methods. The initial idea -was to leverage JavaScript's property of being a prototype-based language and -set the original CPED object as the prototype of our replacement, so that all -its methods would keep being invoked. This unfortunately didn't work because -the `AsyncContextFrame` is a `Map` and our replacement object doesn't have the -internal structure of V8's implementation of a map. The final solution turned -out to be the one where we store the original ACF as a property in our -replacement object (now effectively, a proxy to the ACF), and define all the -`Map` methods and properties on the proxy so that they are invoked on the ACF. -Even though the proxy does not pass an `instanceof Map` check, it is duck-typed -as a map. We even encapsulated this behavior in a special prototype object, so -the operations to set the context are: -* retrieve the ACF from CPED -* create a new object (the proxy) with one internal field -* set the ACF as a special property in the proxy -* set the prototype of the proxy to our prototype that defines all the proxied -methods and properties to forward through the proxy-referenced ACF. -* store our sample context in the internal field of the proxy -* set the proxy object as the CPED. - -Now, a keen eyed reader will notice that in the signal handler we still need to -call `Isolate::GetContinuationPreservedEmbedderData` which still creates a -`Local`. That would be true, except that we can import the `v8-internals.h` -header and directly read the address of the object by reading into the isolate -at the offset `kContinuationPreservedEmbedderDataOffset` declared in it. +The reality is a bit thornier, though. Imagine what happens if while we're +setting the property, we get interrupted by a PROF signal and the signal handler +tries to read the property value? It could easily observe an inconsistent state +and crash. But even if it reads a property value, which one did it read? Still +the old one, already the new one, or maybe a torn value between the two? + +Fortunately, we had the exact same problem with our previous approach where we +only stored one sample context in the profiler instances, and the solution is +the same. We encapsulate the pair of shared pointers to a V8 `Global` and an +atomic pointer-to-pointer in a class named `AtomicContextPtr`, which looks like +this: +``` +using ContextPtr = std::shared_ptr>; +class AtomicContextPtr { + ContextPtr ptr1; + ContextPtr ptr2; + std::atomic currentPtr = &ptr1; + ... +``` +A `Set` method on this class will first store the newly passed sample context in +either `ptr1` or `ptr2` – whichever `currentPtr` is _not_ pointing to at the +moment. Subsequently it atomically updates `currentPtr` to now point to it. + +Instead of storing the current sample context in the ACF property directly, +we want to store an `AtomicContextPtr` (ACP.) The only problem? This is a C++ +class, and properties of JavaScript objects can only be JavaScript values. +Fortunately, V8 gives us a solution for this as well: the `v8::External` type is +a V8 value type that wraps a `void *`. +So now the algorithm for setting a sample context is: +* get the CPED from the V8 isolate +* if it is not an object, do nothing (we can't set the sample context) +* Retrieve the property value. If there is one, it's the `External` wrapping the + pointer to the ACP we use. +* If there is none, allocate a new ACP on C++ heap, create a `v8::External` to + hold its pointer, and store it as a property in the ACF. +* Set the sample context as a value on the either retrieved or created ACP. The chain of data now looks something like this: ``` v8::Isolate (from Isolate::GetCurrent()) +-> current continuation (internally managed by V8) - +-- our proxy object - +-- node::AsyncContextFrame (in proxy's private property, for forwarding method calls) - +-- prototype: declares functions and properties that forward to the AsyncContextFrame - +-- dd:PersistentContextPtr* (in proxy's internal field) - +-> std::shared_ptr> (in PersistentContextPtr's context field) - +-> v8::Global (in shared_ptr) - +-> v8::Value (the actual sample context object) - + +-> node::AsyncContextFrame (in continuation's CPED field) + +-> v8::External (in AsyncContextFrame's private property) + +-> dd::AsyncContextPtr (in External's data field) + +-> std::shared_ptr> (in either AsyncContextPtr::ptr1 or ptr2) + +-> v8::Global (in shared_ptr) + +-> v8::Value (the actual sample context object) ``` -The last 3 steps are the same as when CPED is not being used, except `context` -is directly represented in the `WallProfiler`, so then it looks like this: +The last 3-4 steps were the same in the previous code version as well, except +`ptr1` and `ptr2` were directly represented in the `WallProfiler`, so then it +looked like this: ``` dd::WallProfiler +-> std::shared_ptr> (in either WallProfiler::ptr1 or ptr2) +-> v8::Global (in shared_ptr) +-> v8::Value (the actual sample context object) ``` - -### Memory allocations and garbage collection -We need to allocate a `PersistentContextPtr` (PCP) instance for every proxy we -create. The PCP has two concerns: it both has a shared pointer to the V8 global -that carries the sample context, and it also has a V8 weak reference to the -proxy object it is encapsulated within. This allows us to detect (since weak -references allow for GC callbacks) when the proxy object gets garbage collected, -and at that time the PCP itself can be either deleted or reused. We have an -optimization where we don't delete PCPs -- the assumption is that the number of -live ACFs (and thus proxies, and thus PCPs) will be constant for a server -application under load, so instead of doing a high amount of small new/delete -operations that can fragment the native heap, we keep the ones we'd delete in a -dequeue instead and reuse them. +The difference between the two diagrams shows how we encapsulated the +`(ptr1, ptr2, currentPtr)` tuple into a separate class and moved it out from +being an instance state of `WallProfiler` to being a property of every ACF we +encounter. ## Odds and ends And that's mostly it! There are few more small odds and ends to make it work -safely. We still need to guard reading the value in the signal handler while -it's being written. We guard by introducing an atomic boolean and proper signal -fencing. +safely. We still need to guard writing the property value to the ACF against +concurrent access by the signal handler, but now it happens only once for every +ACF, when we create its ACP. We guard by introducing an atomic boolean and +proper signal fencing. The signal handler code also needs to be prevented from trying to access the -data while GC is in progress. For this reason, we register GC prologue and -epilogue callbacks with the V8 isolate so we can know when GCs are ongoing and -the signal handler will refrain from reading the CPED field during them. We'll -however grab the current sample context from the CPED and store it in a profiler -instance field in the GC prologue and use it for any samples taken during GC. +data while a GC is in progress. With this new model, the signal handler +unfortunately needs to do a small number of V8 API invocations. It needs to +retrieve the current V8 `Context`, it needs to obtain a `Local` for the property +key, and finally it needs to use both in an `Object::Get` call on the CPED. +Calling a property getter on an object is reentrancy into V8, which is advised +against, but this being an ordinary property it ends up being a single dependent +load, which turns out to work safely… unless there's GC happening. For this +reason, we register GC prologue and epilogue callbacks with the V8 isolate so we +can know when GCs are ongoing and the signal handler will refrain from touching +CPED during them. We'll however grab the current sample context from the CPED +and store it in a profiler instance field in the GC prologue and use it for any +samples taken during GC. + +Speaking of GC, we can now have an unbounded number of ACPs – one for each live +ACF. Each ACP is allocated on the C++ heap, and needs to be deleted eventually. +The profiler tracks every ACP it creates in an internal set of live ACPs and +deletes them all when it itself gets disposed. This would still allow for +unbounded growth so we additionally register a V8 GC finalization callback for +every ACF. When V8 collects an ACF instance its finalization callback will put +that ACF's ACP into the profiler's internal vector of ready-to-delete ACPs and +the profiler processes that vector (both deletes the ACP and removes it from the +live set) on each call to `SetContext`. ## Changes in dd-trace-js For completeness, we'll describe the changes in dd-trace-js here as well. The @@ -173,16 +175,16 @@ now. There are some small performance optimizations that no longer apply with the new approach, though. For one, with the old approach we did some data conversions -(span IDs to bigint, a tag array to endpoint string) in a sample when a sample +(span IDs to string, a tag array to endpoint string) in a sample when a sample was captured. With the new approach, we do these conversions for all sample contexts during profile serialization. Doing them after each sample capture -amortized their cost, possibly reducing the latency induced at serialization -time. With the old approach we also called `SetContext` only once per sampling – -we'd install a sample context to be used for the next sample, and then kept -updating a `ref` field in it with a reference to the actual data from pure -JavaScript code. Since we no longer have a single sample context (but one per -continuation) we can not do this anymore, and we need to call `SetContext` on -every ACF change. The cost of this (basically, going into a native call from -JavaScript) are still well offset by not having to use async hooks and do work -on every async context change. We could arguably simplify the code by removing -those small optimizations. +amortized their cost possibly minimally reducing the latency induced at +serialization time. With the old approach we also called `SetContext` only once +per sampling – we'd install a sample context to be used for the next sample, and +then kept updating a `ref` field in it with a reference to the actual data. +Since we no longer have a single sample context (but one per continuation) we +can not do this anymore, and we need to call `SetContext` on every ACF change. +The cost of this (basically, going into a native call from JavaScript) are still +well offset by not having to use async hooks and do work on every async context +change. We could arguably simplify the code by removing those small +optimizations. From 091754439d5f23d5a77cf16a40a70c17af1398a2 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 4 Dec 2025 14:11:15 +0100 Subject: [PATCH 05/15] Add rebuild-debug script --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index a6cc6fd6..81812abb 100644 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ "pretest:js-valgrind": "npm run pretest", "pretest": "npm run compile", "rebuild": "node-gyp rebuild --jobs=max", + "rebuild-debug": "node-gyp rebuild --jobs=max --debug", "test:cpp": "node scripts/cctest.js", "test:js-asan": "LSAN_OPTIONS='suppressions=./suppressions/lsan_suppr.txt' LD_PRELOAD=`gcc -print-file-name=libasan.so` mocha out/test/test-*.js", "test:js-tsan": "LD_PRELOAD=`gcc -print-file-name=libtsan.so` mocha out/test/test-*.js", From baae78c2289eae44e9fbccd19e32c966931d499a Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 1 Dec 2025 20:27:00 +0100 Subject: [PATCH 06/15] Use ObjectWrap as we can't register a GC callback for an External --- bindings/profilers/wall.cc | 67 +++++++++++++++----------------------- bindings/profilers/wall.hh | 5 +-- 2 files changed, 27 insertions(+), 45 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index 6665b848..a618ff22 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -104,34 +104,23 @@ void SetContextPtr(ContextPtr& contextPtr, } } -class PersistentContextPtr { +class PersistentContextPtr : public node::ObjectWrap { ContextPtr context; - std::vector* dead; - Persistent per; + std::unordered_set* live; public: - PersistentContextPtr(std::vector* dead) : dead(dead) {} - - void UnregisterFromGC() { - if (!per.IsEmpty()) { - per.ClearWeak(); - per.Reset(); - } + PersistentContextPtr(std::unordered_set* live, Local wrap) : live(live) { + Wrap(wrap); } - void MarkDead() { dead->push_back(this); } + void Detach() { + live = nullptr; + } - void RegisterForGC(Isolate* isolate, const Local& obj) { - // Register a callback to delete this object when the object is GCed - per.Reset(isolate, obj); - per.SetWeak( - this, - [](const WeakCallbackInfo& data) { - auto ptr = data.GetParameter(); - ptr->MarkDead(); - ptr->UnregisterFromGC(); - }, - WeakCallbackType::kParameter); + ~PersistentContextPtr() { + if (live) { + live->erase(this); + } } void Set(Isolate* isolate, const Local& value) { @@ -139,6 +128,10 @@ class PersistentContextPtr { } ContextPtr Get() const { return context; } + + static PersistentContextPtr* Unwrap(Local wrap) { + return node::ObjectWrap::Unwrap(wrap); + } }; // Maximum number of rounds in the GetV8ToEpochOffset @@ -674,6 +667,9 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, Private::ForApi(isolate, String::NewFromUtf8Literal( isolate, "dd::WallProfiler::cpedSymbol_"))); + auto wrapObjectTemplate = ObjectTemplate::New(isolate); + wrapObjectTemplate->SetInternalFieldCount(1); + wrapObjectTemplate_.Reset(isolate, wrapObjectTemplate); } } @@ -703,11 +699,10 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { isolate, &WallProfiler::CleanupHook, isolate); for (auto ptr : liveContextPtrs_) { - ptr->UnregisterFromGC(); + ptr->Detach(); // so it doesn't invalidate our iterator delete ptr; } liveContextPtrs_.clear(); - deadContextPtrs_.clear(); UpdateContextCount(); } } @@ -1161,13 +1156,6 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { UpdateContextCount(); }; - // Clean up dead context pointers - for (auto ptr : deadContextPtrs_) { - liveContextPtrs_.erase(ptr); - delete ptr; - } - deadContextPtrs_.clear(); - auto cped = isolate->GetContinuationPreservedEmbedderData(); // No Node AsyncContextFrame in this continuation yet if (!cped->IsObject()) return; @@ -1191,16 +1179,14 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { // GetContextPtr anyway. return; } - contextPtr = new PersistentContextPtr(&deadContextPtrs_); - auto external = External::New(isolate, contextPtr); - auto maybeSetResult = cpedObj->SetPrivate(v8Ctx, localSymbol, external); - if (maybeSetResult.IsNothing()) { - delete contextPtr; + auto wrap = wrapObjectTemplate_.Get(isolate)->NewInstance(v8Ctx).ToLocalChecked(); + auto maybeSetResult = cpedMap->Set(v8Ctx, localSymbol, wrap); + if (maybeSetResult.IsEmpty()) { return; } + contextPtr = new PersistentContextPtr(&liveContextPtrs_, wrap); liveContextPtrs_.insert(contextPtr); - contextPtr->RegisterForGC(isolate, cpedObj); } else { contextPtr = static_cast(profData.As()->Value()); @@ -1253,10 +1239,9 @@ ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) { auto maybeProfData = cpedObj->GetPrivate(v8Ctx, localSymbol); if (!maybeProfData.IsEmpty()) { auto profData = maybeProfData.ToLocalChecked(); - if (!profData->IsUndefined()) { - return static_cast( - profData.As()->Value()) - ->Get(); + if (profData->IsObject()) { + auto profObj = profData.As(); + return PersistentContextPtr::Unwrap(profObj)->Get(); } } } diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index 15d5c17e..3bc78bf4 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -54,13 +54,10 @@ class WallProfiler : public Nan::ObjectWrap { ContextPtr curContext_; // Otherwise we'll use a private symbol to store the context in CPED objects. v8::Global cpedSymbol_; + v8::Global wrapObjectTemplate_; // We track live context pointers in a set to avoid memory leaks. They will // be deleted when the profiler is disposed. std::unordered_set liveContextPtrs_; - // Context pointers belonging to GC'd CPED objects register themselves here. - // They will be deleted and removed from liveContextPtrs_ next time SetContext - // is invoked. - std::vector deadContextPtrs_; std::atomic gcCount = 0; std::atomic setInProgress_ = false; From 540ff014cd8f2f060ae212b80afbb9319c7b9109 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 1 Dec 2025 20:28:17 +0100 Subject: [PATCH 07/15] Don't use explicit Node.js version numbers --- bindings/profilers/wall.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index a618ff22..9e287d3f 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -632,7 +632,7 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, workaroundV8Bug_ = workaroundV8Bug && DD_WALL_USE_SIGPROF && detectV8Bug_; collectCpuTime_ = collectCpuTime && withContexts; collectAsyncId_ = collectAsyncId && withContexts; -#if NODE_MAJOR_VERSION >= 23 +#if DD_WALL_USE_CPED useCPED_ = useCPED && withContexts; #else useCPED_ = false; @@ -661,6 +661,7 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, isolate->AddGCEpilogueCallback(&GCEpilogueCallback, this); } +#if DD_WALL_USE_CPED if (useCPED_) { cpedSymbol_.Reset( isolate, @@ -671,6 +672,7 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, wrapObjectTemplate->SetInternalFieldCount(1); wrapObjectTemplate_.Reset(isolate, wrapObjectTemplate); } +#endif } void WallProfiler::UpdateContextCount() { @@ -761,7 +763,7 @@ NAN_METHOD(WallProfiler::New) { DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(isMainThread); DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(useCPED); -#if NODE_MAJOR_VERSION < 23 +#if !DD_WALL_USE_CPED if (useCPED) { return Nan::ThrowTypeError( #ifndef _WIN32 @@ -1146,7 +1148,7 @@ void WallProfiler::SetCurrentContextPtr(Isolate* isolate, Local value) { } void WallProfiler::SetContext(Isolate* isolate, Local value) { -#if NODE_MAJOR_VERSION >= 23 +#if DD_WALL_USE_CPED if (!useCPED_) { SetCurrentContextPtr(isolate, value); return; @@ -1219,7 +1221,7 @@ ContextPtr WallProfiler::GetContextPtrSignalSafe(Isolate* isolate) { } ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) { -#if NODE_MAJOR_VERSION >= 23 +#if DD_WALL_USE_CPED if (!useCPED_) { return curContext_; } From 0e71434946c2eeb17d6ff25506357a6d8ff461ee Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 1 Dec 2025 20:29:27 +0100 Subject: [PATCH 08/15] Read V8 map data directly, for signal safety --- binding.gyp | 3 +- bindings/map-get.cc | 375 +++++++++++++++++++++++++++++++++++++ bindings/map-get.hh | 25 +++ bindings/profilers/wall.cc | 104 +++++----- bindings/profilers/wall.hh | 7 +- 5 files changed, 459 insertions(+), 55 deletions(-) create mode 100644 bindings/map-get.cc create mode 100644 bindings/map-get.hh diff --git a/binding.gyp b/binding.gyp index 71b0f215..381141f4 100644 --- a/binding.gyp +++ b/binding.gyp @@ -18,7 +18,8 @@ "bindings/thread-cpu-clock.cc", "bindings/translate-heap-profile.cc", "bindings/translate-time-profile.cc", - "bindings/binding.cc" + "bindings/binding.cc", + "bindings/map-get.cc" ], "include_dirs": [ "bindings", diff --git a/bindings/map-get.cc b/bindings/map-get.cc new file mode 100644 index 00000000..7de0ff93 --- /dev/null +++ b/bindings/map-get.cc @@ -0,0 +1,375 @@ +/** + * Copyright 2025 Datadog. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "map-get.hh" + +// Find a value in JavaScript map by directly reading the underlying V8 hash +// map. +// +// V8 uses TWO internal hash map representations: +// 1. SmallOrderedHashMap: For small maps (capacity 4-254) +// - Metadata stored as uint8_t bytes +// - Entry size: 2 (key, value) +// - Chain table separate from entries +// +// 2. OrderedHashMap: For larger maps (capacity >254) +// - Metadata stored as Smis in FixedArray +// - Entry size: 3 (key, value, chain) +// - Chain stored inline with entries +// +// This code handles both types by detecting the table format at runtime. +// Practical testing shows that at least the AsyncContextFrame maps use the +// large map format even for small cardinality maps, but just in case we handle +// both. + +#include + +namespace dd { + +using Address = uintptr_t; + +#ifndef _WIN32 +// ============================================================================ +// Constants from V8 internals +// ============================================================================ + +// Heap object tagging +constexpr int kHeapObjectTag = 1; + +// OrderedHashMap/SmallOrderedHashMap shared constants +constexpr int kNotFound = -1; +constexpr int kSmallLoadFactor = 2; + +// ============================================================================ +// Helper Functions (needed by struct methods) +// ============================================================================ + +inline Address UntagPointer(Address tagged) { + return tagged - kHeapObjectTag; +} + +inline bool IsSmi(Address value) { + return (value & 1) == 0; +} + +// SmiToInt Conversion below valid only on 64-bit platforms, the only ones we +// support +static_assert(sizeof(void*) == 8, "Only 64-bit platforms supported"); + +inline int SmiToInt(Address smi) { + return static_cast(static_cast(smi) >> 32); +} + +// ============================================================================ +// V8 Hashtable Structure Definitions +// ============================================================================ + +// HeapObject layout - base for all V8 heap objects +// From v8/src/objects/heap-object.h +struct HeapObjectLayout { + Address classMap_; // Tagged pointer to the class map +}; + +// JavaScript Map object +struct JSMapLayout { + HeapObjectLayout header_; // Map is a HeapObject + Address properties_or_hash_; // not used by us + Address elements_; // not used by us + // Tagged pointer to a [Small]OrderedHashMapLayout + Address table_; +}; + +// V8 FixedArray: length_ is a Smi, followed by that many element slots +struct FixedArrayLayout { + HeapObjectLayout header_; // FixedArray is a HeapObject + Address length_; + Address elements_[0]; +}; + +// NOTE: both OrderedHashMap and SmallOrderedHashMap have compatible method +// definitions so FindEntryByHash and FindValueByHash can be defined as +// templated function working on both. + +// OrderedHashMap layout (for large maps, capacity >254) +// From v8/src/objects/ordered-hash-table.h +struct OrderedHashMapLayout { + FixedArrayLayout fixedArray_; // OrderedHashMap is a FixedArray + // The first 3 address slots in the FixedArray that is a Hashtable are the + // number of elements, deleted elements, and buckets. Each one is a Smi. + Address number_of_elements_; + Address number_of_deleted_elements_; + Address number_of_buckets_; + // First number_of_buckets_ entries in head_and_data_table_ is the head table: + // each entry is an index of the first entry (head of the linked list of + // entries) in the data table for that bucket. This is followed by the data + // table. Each data table entry uses three (kEntrySize == 3) tagged pointer + // slots: + // [0]: key (Tagged Object) + // [1]: value (Tagged Object) + // [2]: chain (Smi - next entry index or -1) + // All indices (both to the head of the list and to the next entry are + // expressed in number of entries from the start of the data table, so to + // convert it into a head_and_data_table_ you need to add number_of_buckets_ + // (length of the head table) and then 3 * index. + Address head_and_data_table_[0]; // Variable: [head_table][data_table] + + // Constants for entry structure + static constexpr int kEntrySize = 3; + static constexpr int kKeyOffset = 0; + static constexpr int kValueOffset = 1; + static constexpr int kChainOffset = 2; + static constexpr int kNotFoundValue = kNotFound; + + // Get number of buckets (converts Smi to int) + int NumberOfBuckets() const { + return IsSmi(number_of_buckets_) ? SmiToInt(number_of_buckets_) : 0; + } + + // Get an upper bound for number of element chain in a bucket. Used to prevent + // infinite lookup chain. + int GetMaxChainLength() const { + return IsSmi(number_of_elements_) && IsSmi(number_of_deleted_elements_) + ? SmiToInt(number_of_elements_) + + SmiToInt(number_of_deleted_elements_) + : 0; + } + + // Convert hash to bucket index + int HashToBucket(int hash) const { + int num_buckets = NumberOfBuckets(); + return num_buckets > 0 ? (hash & (num_buckets - 1)) : 0; + } + + // Get first entry index for a bucket + int GetFirstEntry(int bucket) const { + Address entry_smi = head_and_data_table_[bucket]; + return IsSmi(entry_smi) ? SmiToInt(entry_smi) : kNotFound; + } + + // Convert entry index to head_and_data_table_ index for the entry's key + int EntryToIndex(int entry) const { + return NumberOfBuckets() + (entry * kEntrySize); + } + + // Get key at entry index + Address GetKey(int entry) const { + int index = EntryToIndex(entry); + return head_and_data_table_[index + kKeyOffset]; + } + + // Get value at entry index + Address GetValue(int entry) const { + int index = EntryToIndex(entry); + return head_and_data_table_[index + kValueOffset]; + } + + // Get next entry in chain + int GetNextChainEntry(int entry) const { + int index = EntryToIndex(entry); + Address chain_smi = head_and_data_table_[index + kChainOffset]; + return IsSmi(chain_smi) ? SmiToInt(chain_smi) : kNotFound; + } +}; + +// SmallOrderedHashMap layout (for small maps, capacity 4-254) +// Memory layout (stores metadata as uint8_t, not Smis): +// [0]: map pointer (HeapObject) +// [kHeaderSize + 0]: number_of_elements (uint8) +// [kHeaderSize + 1]: number_of_deleted_elements (uint8) +// [kHeaderSize + 2]: number_of_buckets (uint8) +// [kHeaderSize + 3...]: padding (5 bytes on 64-bit, 1 byte on 32-bit) +// [DataTableStartOffset...]: data table (key-value pairs as Tagged) +// [...]: hash table (uint8 bucket indices) +// [...]: chain table (uint8 next entry indices) +// +// Each entry is 2 Tagged elements (kEntrySize = 2): +// [0]: key (Tagged Object) +// [1]: value (Tagged Object) +// +// From v8/src/objects/ordered-hash-table.h +struct SmallOrderedHashMapLayout { + HeapObjectLayout header_; + uint8_t number_of_elements_; + uint8_t number_of_deleted_elements_; + uint8_t number_of_buckets_; + uint8_t padding_[5]; // 5 bytes on 64-bit + // Variable length: + // - Address data_table_[capacity * kEntrySize] // Keys and values + // - uint8_t hash_table_[number_of_buckets_] // Bucket -> first entry + // - uint8_t chain_table_[capacity] // Entry -> next entry + Address data_table_[0]; + + // Constants for entry structure + static constexpr int kEntrySize = 2; + static constexpr int kKeyOffset = 0; + static constexpr int kValueOffset = 1; + static constexpr int kNotFoundValue = 255; + + // Get capacity from number of buckets + int Capacity() const { return number_of_buckets_ * kSmallLoadFactor; } + + int NumberOfBuckets() const { return number_of_buckets_; } + + int GetMaxChainLength() const { + return number_of_elements_ + number_of_deleted_elements_; + } + + int HashToBucket(int hash) const { return hash & (NumberOfBuckets() - 1); } + + const uint8_t* GetHashTable() const { + return reinterpret_cast(data_table_ + + Capacity() * kEntrySize); + } + + const uint8_t* GetChainTable() const { + return GetHashTable() + number_of_buckets_; + } + + // Get key at entry index + Address GetKey(int entry) const { + return data_table_[entry * kEntrySize + kKeyOffset]; + } + + // Get value at entry index + Address GetValue(int entry) const { + return data_table_[entry * kEntrySize + kValueOffset]; + } + + // Get first entry in bucket + uint8_t GetFirstEntry(int bucket) const { + const uint8_t* hash_table = GetHashTable(); + return hash_table[bucket]; + } + + // Get next entry in chain + uint8_t GetNextChainEntry(int entry) const { + const uint8_t* chain_table = GetChainTable(); + return chain_table[entry]; + } +}; + +// ============================================================================ +// Templated Hash Table Lookup +// ============================================================================ + +// Find an entry by a key and its hash in any hash table layout +// Template parameter LayoutT should be either OrderedHashMapLayout or +// SmallOrderedHashMapLayout +template +int FindEntryByHash(const LayoutT* layout, int hash, Address key_to_find) { + int max_chain_length = layout->GetMaxChainLength(); + int bucket = layout->HashToBucket(hash); + int entry = layout->GetFirstEntry(bucket); + + // Paranoid: by never traversing more than the sum of elements and deleted + // elements we guarantee this terminates in bound time even if for some + // unforeseen reason the chain is cyclical. + for (int max_chain_left = max_chain_length; + entry != LayoutT::kNotFoundValue && max_chain_left > 0; + max_chain_left--) { + Address key_at_entry = layout->GetKey(entry); + if (key_at_entry == key_to_find) { + return entry; + } + entry = layout->GetNextChainEntry(entry); + } + + return kNotFound; +} + +// Find an entry by a key and its hash in any hash table layout, and return its +// value or the zero address if it is not found. +// Template parameter LayoutT should be either OrderedHashMapLayout or +// SmallOrderedHashMapLayout +template +Address FindValueByHash(const LayoutT* layout, int hash, Address key_to_find) { + auto entry = FindEntryByHash(layout, hash, key_to_find); + return entry == kNotFound ? 0 : layout->GetValue(entry); +} + +// Detect if the table is an OrderedHashMap or a SmallOrderedHashMap (or it can +// not safely be determined) by checking padding bytes. SmallOrderedHashMap has +// always-zero padding bytes after the metadata. +static uint8_t GetOrderedHashMapType(Address table_untagged) { + const SmallOrderedHashMapLayout* potential_small = + reinterpret_cast(table_untagged); + + // Read the header as one 64-bit value for validation + uint64_t smallHeader = + *reinterpret_cast(&potential_small->number_of_elements_); + + static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, + "Little-endian required"); + // Small map will have some bits in bytes 0-2 be nonzero, and all bits in + // bytes 3-7 zero. That effectively limits the value range of smallHeader to + // [0x1-0xFFFFFF]. + if (smallHeader > 0 && smallHeader < 0x1000000) { + auto num_elements = potential_small->number_of_elements_; + auto num_deleted = potential_small->number_of_deleted_elements_; + auto num_buckets = potential_small->number_of_buckets_; + + // SmallOrderedHashMap has constraints: + // - num_buckets must be a power of 2 between 2 and 127 + // - num_elements + num_deleted <= capacity (buckets * 2) + if (num_buckets >= 2 && num_buckets <= 127) { + // Check if num_buckets is a power of 2 + if ((num_buckets & (num_buckets - 1)) == 0) { + auto capacity = num_buckets * kSmallLoadFactor; + if (num_elements + num_deleted <= capacity) { + return 1; // small map + } + } + } + return 2; // undecided + } + return 0; // large map +} + +// ============================================================================ +// Main entry point +// ============================================================================ + +// Lookup value in a Map given the hash and key pointer. If the key is not found +// in the map (or the lookup can not be performed) returns a zero Address (which +// is essentially a zero Smi value.) +Address GetValueFromMap(Address map_addr, int hash, Address key) { + const JSMapLayout* map_untagged = + reinterpret_cast(UntagPointer(map_addr)); + Address table_untagged = UntagPointer(map_untagged->table_); + + switch (GetOrderedHashMapType(table_untagged)) { + case 0: { + const OrderedHashMapLayout* layout = + reinterpret_cast(table_untagged); + return FindValueByHash(layout, hash, key); + } + case 1: { + const SmallOrderedHashMapLayout* layout = + reinterpret_cast(table_untagged); + return FindValueByHash(layout, hash, key); + } + } + return 0; // We couldn't determine the kind of the map, just return zero. +} + +#else // _WIN32 + +Address GetValueFromMap(Address map_addr, int hash, Address key) { + return 0; +} + +#endif // _WIN32 +} // namespace dd diff --git a/bindings/map-get.hh b/bindings/map-get.hh new file mode 100644 index 00000000..bb20e0d6 --- /dev/null +++ b/bindings/map-get.hh @@ -0,0 +1,25 @@ +/** + * Copyright 2025 Datadog. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +using Address = uintptr_t; + +namespace dd { +Address GetValueFromMap(Address map_addr, int hash, Address key); +} // namespace dd diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index 9e287d3f..9eacfed8 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -26,6 +27,7 @@ #include #include "defer.hh" +#include "map-get.hh" #include "per-isolate-data.hh" #include "translate-time-profile.hh" #include "wall.hh" @@ -109,13 +111,13 @@ class PersistentContextPtr : public node::ObjectWrap { std::unordered_set* live; public: - PersistentContextPtr(std::unordered_set* live, Local wrap) : live(live) { + PersistentContextPtr(std::unordered_set* live, + Local wrap) + : live(live) { Wrap(wrap); } - void Detach() { - live = nullptr; - } + void Detach() { live = nullptr; } ~PersistentContextPtr() { if (live) { @@ -663,16 +665,17 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, #if DD_WALL_USE_CPED if (useCPED_) { - cpedSymbol_.Reset( - isolate, - Private::ForApi(isolate, - String::NewFromUtf8Literal( - isolate, "dd::WallProfiler::cpedSymbol_"))); + // Used to create Map value objects that will have one internal field to + // store the sample context pointer. auto wrapObjectTemplate = ObjectTemplate::New(isolate); wrapObjectTemplate->SetInternalFieldCount(1); wrapObjectTemplate_.Reset(isolate, wrapObjectTemplate); + // Object used as the Map key object. + auto cpedKeyObj = Object::New(isolate); + cpedKey_.Reset(isolate, cpedKeyObj); + cpedKeyHash_ = cpedKeyObj->GetIdentityHash(); } -#endif +#endif // DD_WALL_USE_CPED } void WallProfiler::UpdateContextCount() { @@ -700,8 +703,9 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { node::RemoveEnvironmentCleanupHook( isolate, &WallProfiler::CleanupHook, isolate); + // Delete all live contexts for (auto ptr : liveContextPtrs_) { - ptr->Detach(); // so it doesn't invalidate our iterator + ptr->Detach(); // so it doesn't invalidate our iterator delete ptr; } liveContextPtrs_.clear(); @@ -1160,41 +1164,32 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { auto cped = isolate->GetContinuationPreservedEmbedderData(); // No Node AsyncContextFrame in this continuation yet - if (!cped->IsObject()) return; + if (!cped->IsMap()) return; auto v8Ctx = isolate->GetCurrentContext(); // This should always be called from a V8 context, but check just in case. if (v8Ctx.IsEmpty()) return; - auto cpedObj = cped.As(); - auto localSymbol = cpedSymbol_.Get(isolate); - auto maybeProfData = cpedObj->GetPrivate(v8Ctx, localSymbol); - if (maybeProfData.IsEmpty()) return; + auto cpedMap = cped.As(); + auto localKey = cpedKey_.Get(isolate); - PersistentContextPtr* contextPtr = nullptr; - auto profData = maybeProfData.ToLocalChecked(); - SignalGuard m(setInProgress_); - if (profData->IsUndefined()) { - if (value->IsNullOrUndefined()) { - // Don't go to the trouble of mutating the CPED for null or undefined as - // the absence of a sample context will be interpreted as undefined in - // GetContextPtr anyway. - return; - } - - auto wrap = wrapObjectTemplate_.Get(isolate)->NewInstance(v8Ctx).ToLocalChecked(); - auto maybeSetResult = cpedMap->Set(v8Ctx, localSymbol, wrap); - if (maybeSetResult.IsEmpty()) { - return; - } - contextPtr = new PersistentContextPtr(&liveContextPtrs_, wrap); - liveContextPtrs_.insert(contextPtr); + // Always replace the PersistentContextPtr in the map even if it is present, + // we want the PersistentContextPtr in a parent map to not be mutated. + if (value->IsNullOrUndefined()) { + // The absence of a sample context will be interpreted as undefined in + // GetContextPtr so if value is null or undefined, just delete the key. + SignalGuard m(setInProgress_); + cpedMap->Delete(v8Ctx, localKey).Check(); } else { - contextPtr = - static_cast(profData.As()->Value()); - } + auto wrap = + wrapObjectTemplate_.Get(isolate)->NewInstance(v8Ctx).ToLocalChecked(); + auto contextPtr = new PersistentContextPtr(&liveContextPtrs_, wrap); + liveContextPtrs_.insert(contextPtr); + contextPtr->Set(isolate, value); - contextPtr->Set(isolate, value); + SignalGuard m(setInProgress_); + cpedMap->Set(v8Ctx, localKey, wrap).ToLocalChecked(); + } #else SetCurrentContextPtr(isolate, value); #endif @@ -1227,23 +1222,28 @@ ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) { } if (!isolate->IsInUse()) { - // Must not try to create a handle scope if isolate is not in use. return ContextPtr(); } - HandleScope scope(isolate); - auto cped = isolate->GetContinuationPreservedEmbedderData(); - if (cped->IsObject()) { - auto v8Ctx = isolate->GetEnteredOrMicrotaskContext(); - if (!v8Ctx.IsEmpty()) { - auto cpedObj = cped.As(); - auto localSymbol = cpedSymbol_.Get(isolate); - auto maybeProfData = cpedObj->GetPrivate(v8Ctx, localSymbol); - if (!maybeProfData.IsEmpty()) { - auto profData = maybeProfData.ToLocalChecked(); - if (profData->IsObject()) { - auto profObj = profData.As(); - return PersistentContextPtr::Unwrap(profObj)->Get(); + auto cpedAddrPtr = reinterpret_cast( + reinterpret_cast(isolate) + + internal::Internals::kContinuationPreservedEmbedderDataOffset); + auto cpedAddr = *cpedAddrPtr; + if (internal::Internals::HasHeapObjectTag(cpedAddr)) { + auto cpedValuePtr = reinterpret_cast(cpedAddrPtr); + if (cpedValuePtr->IsMap()) { + Address keyAddr = **(reinterpret_cast(&cpedKey_)); + + Address wrapAddr = GetValueFromMap(cpedAddr, cpedKeyHash_, keyAddr); + if (internal::Internals::HasHeapObjectTag(wrapAddr)) { + auto wrapValue = reinterpret_cast(&wrapAddr); + if (wrapValue->IsObject()) { + auto wrapObj = reinterpret_cast(wrapValue); + if (wrapObj->InternalFieldCount() > 0) { + return static_cast( + wrapObj->GetAlignedPointerFromInternalField(0)) + ->Get(); + } } } } diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index 3bc78bf4..d9ec26e7 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -52,9 +52,12 @@ class WallProfiler : public Nan::ObjectWrap { bool useCPED_ = false; // If we aren't using the CPED, we use a single context ptr stored here. ContextPtr curContext_; - // Otherwise we'll use a private symbol to store the context in CPED objects. - v8::Global cpedSymbol_; + // Otherwise we'll use an object as a key to store the context in + // AsyncContextFrame maps. + v8::Global cpedKey_; + int cpedKeyHash_; v8::Global wrapObjectTemplate_; + // We track live context pointers in a set to avoid memory leaks. They will // be deleted when the profiler is disposed. std::unordered_set liveContextPtrs_; From 8c7f7eabf985448fece7ec0c9f8d501828b2ec09 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 1 Dec 2025 21:04:17 +0100 Subject: [PATCH 09/15] Bring metrics back --- bindings/profilers/wall.cc | 47 ++++++++++++++++++++--------------- bindings/profilers/wall.hh | 6 +++-- ts/src/index.ts | 1 + ts/src/time-profiler.ts | 12 ++++++--- ts/src/v8-types.ts | 5 ++++ ts/test/test-time-profiler.ts | 4 +++ 6 files changed, 50 insertions(+), 25 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index 9eacfed8..3e0498a5 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -26,7 +26,6 @@ #include #include -#include "defer.hh" #include "map-get.hh" #include "per-isolate-data.hh" #include "translate-time-profile.hh" @@ -678,14 +677,6 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, #endif // DD_WALL_USE_CPED } -void WallProfiler::UpdateContextCount() { - std::atomic_store_explicit( - reinterpret_cast*>( - &fields_[WallProfiler::Fields::kCPEDContextCount]), - liveContextPtrs_.size(), - std::memory_order_relaxed); -} - void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { if (cpuProfiler_ != nullptr) { cpuProfiler_->Dispose(); @@ -709,7 +700,6 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { delete ptr; } liveContextPtrs_.clear(); - UpdateContextCount(); } } @@ -889,7 +879,6 @@ v8::ProfilerId WallProfiler::StartInternal() { if (withContexts_ || workaroundV8Bug_) { SignalHandler::IncreaseUseCount(); fields_[kSampleCount] = 0; - fields_[kCPEDContextCount] = 0; } if (collectCpuTime_) { @@ -1094,6 +1083,10 @@ NAN_MODULE_INIT(WallProfiler::Init) { Nan::New("state").ToLocalChecked(), SharedArrayGetter); + Nan::SetAccessor(tpl->InstanceTemplate(), + Nan::New("metrics").ToLocalChecked(), + GetMetrics); + PerIsolateData::For(Isolate::GetCurrent()) ->WallProfilerConstructor() .Reset(Nan::GetFunction(tpl).ToLocalChecked()); @@ -1109,11 +1102,6 @@ NAN_MODULE_INIT(WallProfiler::Init) { Nan::New(kSampleCount), ReadOnlyDontDelete) .FromJust(); - Nan::DefineOwnProperty(constants, - Nan::New("kCPEDContextCount").ToLocalChecked(), - Nan::New(kCPEDContextCount), - ReadOnlyDontDelete) - .FromJust(); Nan::DefineOwnProperty(target, Nan::New("constants").ToLocalChecked(), constants, @@ -1158,10 +1146,6 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { return; } - defer { - UpdateContextCount(); - }; - auto cped = isolate->GetContinuationPreservedEmbedderData(); // No Node AsyncContextFrame in this continuation yet if (!cped->IsMap()) return; @@ -1254,6 +1238,24 @@ ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) { #endif } +Local WallProfiler::GetMetrics(Isolate* isolate) { + auto usedAsyncContextCount = liveContextPtrs_.size(); + auto context = isolate->GetCurrentContext(); + auto metrics = Object::New(isolate); + metrics + ->Set(context, + String::NewFromUtf8Literal(isolate, "usedAsyncContextCount"), + Number::New(isolate, usedAsyncContextCount)) + .ToChecked(); + // totalAsyncContextCount == usedAsyncContextCount + metrics + ->Set(context, + String::NewFromUtf8Literal(isolate, "totalAsyncContextCount"), + Number::New(isolate, usedAsyncContextCount)) + .ToChecked(); + return metrics; +} + NAN_GETTER(WallProfiler::GetContext) { auto profiler = Nan::ObjectWrap::Unwrap(info.This()); info.GetReturnValue().Set(profiler->GetContext(info.GetIsolate())); @@ -1269,6 +1271,11 @@ NAN_GETTER(WallProfiler::SharedArrayGetter) { info.GetReturnValue().Set(profiler->jsArray_.Get(v8::Isolate::GetCurrent())); } +NAN_GETTER(WallProfiler::GetMetrics) { + auto profiler = Nan::ObjectWrap::Unwrap(info.This()); + info.GetReturnValue().Set(profiler->GetMetrics(info.GetIsolate())); +} + NAN_METHOD(WallProfiler::V8ProfilerStuckEventLoopDetected) { auto profiler = Nan::ObjectWrap::Unwrap(info.This()); info.GetReturnValue().Set(profiler->v8ProfilerStuckEventLoopDetected()); diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index d9ec26e7..51ec4c3f 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -43,7 +43,7 @@ class PersistentContextPtr; class WallProfiler : public Nan::ObjectWrap { public: enum class CollectionMode { kNoCollect, kPassThrough, kCollectContexts }; - enum Fields { kSampleCount, kCPEDContextCount, kFieldCount }; + enum Fields { kSampleCount, kFieldCount }; private: std::chrono::microseconds samplingPeriod_{0}; @@ -118,7 +118,6 @@ class WallProfiler : public Nan::ObjectWrap { ContextPtr GetContextPtrSignalSafe(v8::Isolate* isolate); void SetCurrentContextPtr(v8::Isolate* isolate, v8::Local context); - void UpdateContextCount(); public: /** @@ -148,6 +147,8 @@ class WallProfiler : public Nan::ObjectWrap { int64_t time_to, int64_t cpu_time, v8::Isolate* isolate); + v8::Local GetMetrics(v8::Isolate*); + Result StartImpl(); v8::ProfilerId StartInternal(); Result StopImpl(bool restart, v8::Local& profile); @@ -186,6 +187,7 @@ class WallProfiler : public Nan::ObjectWrap { static NAN_GETTER(GetContext); static NAN_SETTER(SetContext); static NAN_GETTER(SharedArrayGetter); + static NAN_GETTER(GetMetrics); }; } // namespace dd diff --git a/ts/src/index.ts b/ts/src/index.ts index 51cceddb..42454629 100644 --- a/ts/src/index.ts +++ b/ts/src/index.ts @@ -40,6 +40,7 @@ export const time = { v8ProfilerStuckEventLoopDetected: timeProfiler.v8ProfilerStuckEventLoopDetected, getState: timeProfiler.getState, + getMetrics: timeProfiler.getMetrics, constants: timeProfiler.constants, }; diff --git a/ts/src/time-profiler.ts b/ts/src/time-profiler.ts index f252f9e8..a265364b 100644 --- a/ts/src/time-profiler.ts +++ b/ts/src/time-profiler.ts @@ -27,10 +27,10 @@ import { getNativeThreadId, constants as profilerConstants, } from './time-profiler-bindings'; -import {GenerateTimeLabelsFunction} from './v8-types'; +import {GenerateTimeLabelsFunction, TimeProfilerMetrics} from './v8-types'; import {isMainThread} from 'worker_threads'; -const {kSampleCount, kCPEDContextCount} = profilerConstants; +const {kSampleCount} = profilerConstants; const DEFAULT_INTERVAL_MICROS: Microseconds = 1000; const DEFAULT_DURATION_MILLIS: Milliseconds = 60000; @@ -169,6 +169,13 @@ export function getContext() { return gProfiler.context; } +export function getMetrics(): TimeProfilerMetrics { + if (!gProfiler) { + throw new Error('Wall profiler is not started'); + } + return gProfiler.metrics as TimeProfilerMetrics; +} + export function isStarted() { return !!gProfiler; } @@ -180,7 +187,6 @@ export function v8ProfilerStuckEventLoopDetected() { export const constants = { kSampleCount, - kCPEDContextCount, GARBAGE_COLLECTION_FUNCTION_NAME, NON_JS_THREADS_FUNCTION_NAME, }; diff --git a/ts/src/v8-types.ts b/ts/src/v8-types.ts index cc043a52..39178b19 100644 --- a/ts/src/v8-types.ts +++ b/ts/src/v8-types.ts @@ -73,3 +73,8 @@ export interface GenerateTimeLabelsArgs { export interface GenerateTimeLabelsFunction { (args: GenerateTimeLabelsArgs): LabelSet; } + +export interface TimeProfilerMetrics { + usedAsyncContextCount: number; + totalAsyncContextCount: number; +} diff --git a/ts/test/test-time-profiler.ts b/ts/test/test-time-profiler.ts index 608a8469..90f1e580 100644 --- a/ts/test/test-time-profiler.ts +++ b/ts/test/test-time-profiler.ts @@ -139,6 +139,10 @@ describe('Time Profiler', () => { for (let i = 0; i < repeats; ++i) { loop(); enableEndPoint = i % 2 === 0; + const metrics = time.getMetrics(); + const expectedAsyncContextCount = useCPED ? 1 : 0; + assert(metrics.totalAsyncContextCount >= expectedAsyncContextCount); + assert(metrics.usedAsyncContextCount >= expectedAsyncContextCount); validateProfile( time.stop( i < repeats - 1, From 784299ee91c8879c9969ada1f3c9110e84fe3c99 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 4 Dec 2025 17:00:04 +0100 Subject: [PATCH 10/15] Documentation update --- doc/sample_context_in_cped.md | 194 ++++++++++++++++------------------ 1 file changed, 93 insertions(+), 101 deletions(-) diff --git a/doc/sample_context_in_cped.md b/doc/sample_context_in_cped.md index 259c7749..69f80e50 100644 --- a/doc/sample_context_in_cped.md +++ b/doc/sample_context_in_cped.md @@ -9,12 +9,9 @@ typical piece of data sample context stores is the tracing span ID, so whenever it changes, the sample context needs to be updated. ## How is the Sample Context stored and updated? -Before Node 23, the sample context would be stored in a +Before Node 22.7, the sample context would be stored in a `std::shared_ptr>` field on the C++ `WallProfiler` -instance. (In fact, due to the need for ensuring atomic updates and shared -pointers not being effectively updateable atomically it's actually a pair of -fields with an atomic pointer-to-shared-pointer switching between them, but I -digress.) Due to it being a single piece of instance state, it had to be updated +instance. Due to it being a single piece of instance state, it had to be updated every time the active span changed, possibly on every invocation of `AsyncLocalStorage.enterWith` and `.run`, but even more importantly on every async context change, and for that we needed to register a "before" callback @@ -39,8 +36,8 @@ per-continuation basis and the engine takes care to return the right one when `Isolate::GetContinuationPreservedEmbedderData()` method is invoked. We will refer to continuation-preserved embedder data as "CPED" from now on. -Starting with Node.js 23, CPED is used to implement data storage behind Node.js -`AsyncLocalStorage` API. This dovetails nicely with our needs as all the +Starting with Node.js 22.7, CPED is used to implement data storage behind +Node.js `AsyncLocalStorage` API. This dovetails nicely with our needs as all the span-related data we set on the sample context is normally managed in an async local storage (ALS) by the tracer. An application can create any number of ALSes, and each ALS manages a single value per async context. This value is @@ -50,141 +47,136 @@ one per async context) and "store", which is a value of a storage within a particular async context. The new implementation for storing ALS stores introduces an internal Node.js -class named `AsyncContextFrame` (ACF) which is a map that uses ALSes as keys, -and their stores as the map values, essentially providing a mapping from an ALS -to its store in the current async context. (This implementation is very similar -to how e.g. Java implements `ThreadLocal`, which is a close analogue to ALS in -Node.js.) ACF instances are then stored in CPED. +class named `AsyncContextFrame` (ACF) which is a subclass of JavaScript Map +class that uses ALSes as keys and their stores as the map values, essentially +providing a mapping from an ALS to its store in the current async context. (This +implementation is very similar to how e.g. Java implements `ThreadLocal`, which +is a close analogue to ALS in Node.js.) ACF instances are then stored in CPED. ## Storing the Sample Context in CPED Node.js – as the embedder of V8 – commandeers the CPED to store instances of ACF in it. This means that our profiler can't directly store our sample context in the CPED, because then we'd overwrite the ACF reference already in there and -break Node.js. Fortunately, since ACF is "just" an ordinary JavaScript object, -we can define a new property on it, and store our sample context in it! -JavaScript properties can have strings, numbers, or symbols as their keys, with -symbols being the recommended practice to define properties that are hidden from -unrelated code as symbols are private to their creator and only compare equal to -themselves. Thus we create a private symbol in the profiler instance for our -property key, and our logic for storing the sample context thus becomes: +break Node.js. Fortunately, since ACF is "just" an ordinary JavaScript Map, +we can store our sample context in it as a key-value pair! When a new ACF is +created (normally, through `AsyncLocalStorage.enterWith`), all key-value pairs +are copied into the new map, so our sample context is nicely propagated. +Our logic for storing the sample context thus becomes: * get the CPED from the V8 isolate -* if it is not an object, do nothing (we can't set the sample context) -* otherwise set the sample context as a value in the object with our property - key. - -The reality is a bit thornier, though. Imagine what happens if while we're -setting the property, we get interrupted by a PROF signal and the signal handler -tries to read the property value? It could easily observe an inconsistent state -and crash. But even if it reads a property value, which one did it read? Still -the old one, already the new one, or maybe a torn value between the two? - -Fortunately, we had the exact same problem with our previous approach where we -only stored one sample context in the profiler instances, and the solution is -the same. We encapsulate the pair of shared pointers to a V8 `Global` and an -atomic pointer-to-pointer in a class named `AtomicContextPtr`, which looks like -this: +* if it is not a Map, do nothing (we can't set the sample context) +* otherwise set the sample context as a value in the map with our key. + +It's worth noting that our key is just an ordinary empty JavaScript object +created internally by the profiler. We could've also passed it an externally +created `AsyncLocalStorage` instance, thus preserving the invariant that all +keys in an ACF are ALS instances, but this doesn't seem necessary. + +We use a mutex implemented as an atomic boolean to guard our writes to the map. +The JavaScript code for AsyncContextFrame/AsyncLocalStorage treats the maps as + +Internally, we hold on to the sample context value with a shared pointer to a +V8 `Global`: ``` using ContextPtr = std::shared_ptr>; - -class AtomicContextPtr { - ContextPtr ptr1; - ContextPtr ptr2; - std::atomic currentPtr = &ptr1; - ... ``` -A `Set` method on this class will first store the newly passed sample context in -either `ptr1` or `ptr2` – whichever `currentPtr` is _not_ pointing to at the -moment. Subsequently it atomically updates `currentPtr` to now point to it. - -Instead of storing the current sample context in the ACF property directly, -we want to store an `AtomicContextPtr` (ACP.) The only problem? This is a C++ -class, and properties of JavaScript objects can only be JavaScript values. -Fortunately, V8 gives us a solution for this as well: the `v8::External` type is -a V8 value type that wraps a `void *`. -So now the algorithm for setting a sample context is: + +The values we store in ACF need to be JavaScript values. We use Node.js +`WrapObject` class for this purpose – it allows defining C++ classes that have +a JavaScript "mirror" object, carry a pointer to their C++ object in an internal +field, and when the JS object is garbage collected, the C++ object is destroyed. +Our `WrapObject` subclass in named `PersistentContextPtr` (PCP) because it has +only one field – the above introduced `ContextPtr`, and it is "persistent" +because its lifecycle is bound to that of its representative JavaScript object. + +So the more detailed algorithm for setting a sample context is: * get the CPED from the V8 isolate -* if it is not an object, do nothing (we can't set the sample context) -* Retrieve the property value. If there is one, it's the `External` wrapping the - pointer to the ACP we use. -* If there is none, allocate a new ACP on C++ heap, create a `v8::External` to - hold its pointer, and store it as a property in the ACF. -* Set the sample context as a value on the either retrieved or created ACP. +* if it is not a Map, do nothing (we can't set the sample context) +* if sample context is undefined, delete the key (if it exists) from the map +* if sample context is a different value, create a new `PersistentContextPtr` + wrapped in a JS object, and set the JS object as the value with the key in the + map. The chain of data now looks something like this: ``` v8::Isolate (from Isolate::GetCurrent()) +-> current continuation (internally managed by V8) +-> node::AsyncContextFrame (in continuation's CPED field) - +-> v8::External (in AsyncContextFrame's private property) - +-> dd::AsyncContextPtr (in External's data field) - +-> std::shared_ptr> (in either AsyncContextPtr::ptr1 or ptr2) + +-> Object (the PersistentContextPtr wrapper, associated with our key) + +-> dd::PersistentContextPtr (pointed in Object's internal field) + +-> ContextPtr (in `context` field) +-> v8::Global (in shared_ptr) +-> v8::Value (the actual sample context object) ``` The last 3-4 steps were the same in the previous code version as well, except -`ptr1` and `ptr2` were directly represented in the `WallProfiler`, so then it -looked like this: +there we used a field directly in the `WallProfiler`: ``` dd::WallProfiler - +-> std::shared_ptr> (in either WallProfiler::ptr1 or ptr2) + +-> ContextPtr (in `curContext_` field) +-> v8::Global (in shared_ptr) +-> v8::Value (the actual sample context object) ``` -The difference between the two diagrams shows how we encapsulated the -`(ptr1, ptr2, currentPtr)` tuple into a separate class and moved it out from -being an instance state of `WallProfiler` to being a property of every ACF we -encounter. +The difference between the two diagrams shows how we moved the ContextPtr from +being a single instance state of `WallProfiler` to being an element in ACF maps. + +## Looking up values in a signal handler +The signal handler unfortunately can't directly call any V8 APIs, so in order to +traverse the chain of data above, it needs to rely on pointer arithmetic. Every +`Global` and `Local` have one field, and `Address*`. Thus, to dereference the +actual memory location of a JS object represented by a global reference `ref`, +we use `**(Address**)(&ref)`. These addresses are _tagged_, +meaning their LSB is set to 1, and need to be masked to obtain the actual memory +address. We can safely get the current Isolate pointer, but then we need to +interpret as an address the memory location at an internal offset where it keeps +the current CPED. If it's a JS Map, then we need to do more dead-reckoning into +it and retrieve a pointer to its OrderedHashMap, and then know its memory layout +to find the right hash bucket and traverse the linked list until we find a +key-value pair where the key address is our key object's current address (this +can be moved around by the GC, so that's why our Global is an `Address*`, for +a sufficient number of indirections to keep up with the moves.) The algorithm +for executing an equivalent of a `Map.get()` purely using pointer arithmetic +with knowledge of the V8 object memory layouts is encapsualted in `map-get.cc`. ## Odds and ends And that's mostly it! There are few more small odds and ends to make it work -safely. We still need to guard writing the property value to the ACF against -concurrent access by the signal handler, but now it happens only once for every -ACF, when we create its ACP. We guard by introducing an atomic boolean and -proper signal fencing. - -The signal handler code also needs to be prevented from trying to access the -data while a GC is in progress. With this new model, the signal handler -unfortunately needs to do a small number of V8 API invocations. It needs to -retrieve the current V8 `Context`, it needs to obtain a `Local` for the property -key, and finally it needs to use both in an `Object::Get` call on the CPED. -Calling a property getter on an object is reentrancy into V8, which is advised -against, but this being an ordinary property it ends up being a single dependent -load, which turns out to work safely… unless there's GC happening. For this -reason, we register GC prologue and epilogue callbacks with the V8 isolate so we -can know when GCs are ongoing and the signal handler will refrain from touching -CPED during them. We'll however grab the current sample context from the CPED +safely. As we mentioned above, we're preventing the signal handler from reading +if we're just writing the value using an atomic boolean. We also register GC +prologue and epilogue callbacks with the V8 isolate so we can know when GCs are +ongoing and the signal handler will also refrain from touching memory while a GC +runs. We'll however grab the current sample context from the CPED and store it in a profiler instance field in the GC prologue and use it for any samples taken during GC. -Speaking of GC, we can now have an unbounded number of ACPs – one for each live -ACF. Each ACP is allocated on the C++ heap, and needs to be deleted eventually. -The profiler tracks every ACP it creates in an internal set of live ACPs and -deletes them all when it itself gets disposed. This would still allow for -unbounded growth so we additionally register a V8 GC finalization callback for -every ACF. When V8 collects an ACF instance its finalization callback will put -that ACF's ACP into the profiler's internal vector of ready-to-delete ACPs and -the profiler processes that vector (both deletes the ACP and removes it from the -live set) on each call to `SetContext`. +Speaking of GC, we can now have an unbounded number of PersistentContextPtr +objects – one for each live ACF. Each PCP is allocated on the C++ heap, and +needs to be deleted eventually. The profiler tracks every PCP it creates in an +internal set of live PCPs and deletes them all when it itself gets disposed. +This is combined with `WrapObject` having GC finalization callback for every +PCP. When V8 collects a PCP wrapper its finalization callback will delete the +PCP. ## Changes in dd-trace-js For completeness, we'll describe the changes in dd-trace-js here as well. The main change is that with Node 24, we no longer require async hooks. The -instrumentation points for `AsyncLocalStorage.enterWith` and -`AsyncLocalStorage.run` remain in place – they are the only ones that are needed -now. +instrumentation point for `AsyncLocalStorage.enterWith` is the only one +remaining (`AsyncLocalStorage.run` is implemented in terms of `enterWith`.) +We can further optimize and _not_ set the sample context object if we see it's +the same as the current one (because `enterWith` was run without setting a new +span as the current span.) There are some small performance optimizations that no longer apply with the new approach, though. For one, with the old approach we did some data conversions -(span IDs to string, a tag array to endpoint string) in a sample when a sample -was captured. With the new approach, we do these conversions for all sample -contexts during profile serialization. Doing them after each sample capture -amortized their cost possibly minimally reducing the latency induced at +(span IDs to string, a tag array to endpoint string) in a sample context when a +sample was captured. With the new approach, we do these conversions for all +sample contexts during profile serialization. Doing them after each sample +capture amortized their cost possibly minimally reducing the latency induced at serialization time. With the old approach we also called `SetContext` only once per sampling – we'd install a sample context to be used for the next sample, and then kept updating a `ref` field in it with a reference to the actual data. -Since we no longer have a single sample context (but one per continuation) we -can not do this anymore, and we need to call `SetContext` on every ACF change. +Since we no longer have a single sample context (but rather one per +continuation) we can not do this anymore, and we need to call `SetContext` +either every time `enterWith` runs, or only when we notice that the relevant +span data changed. The cost of this (basically, going into a native call from JavaScript) are still well offset by not having to use async hooks and do work on every async context -change. We could arguably simplify the code by removing those small +change. We could arguably even simplify the code by removing those small optimizations. From de1730d1979670992a7c7bd5f85aa915b472bf0e Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Wed, 17 Dec 2025 17:56:51 +0100 Subject: [PATCH 11/15] Allow passing in an ACF key object externally to the profiler. time-profiler.ts uses it to pass an AsyncLocalStorage (ALS) as the key. This allows retrieval of context through the ALS, and can open a way for additional context features, like custom labels. --- bindings/profilers/wall.cc | 58 +++++++++++++++++++++-------------- bindings/profilers/wall.hh | 12 ++++---- ts/src/time-profiler.ts | 21 +++++++++++-- ts/test/test-time-profiler.ts | 14 --------- ts/test/worker.ts | 3 -- 5 files changed, 60 insertions(+), 48 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index 3e0498a5..a933d1b0 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -620,9 +620,8 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, bool collectCpuTime, bool collectAsyncId, bool isMainThread, - bool useCPED) + Local cpedKey) : samplingPeriod_(samplingPeriod), - useCPED_(useCPED), includeLines_(includeLines), withContexts_(withContexts), isMainThread_(isMainThread) { @@ -633,10 +632,11 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, workaroundV8Bug_ = workaroundV8Bug && DD_WALL_USE_SIGPROF && detectV8Bug_; collectCpuTime_ = collectCpuTime && withContexts; collectAsyncId_ = collectAsyncId && withContexts; + #if DD_WALL_USE_CPED - useCPED_ = useCPED && withContexts; + bool useCPED = withContexts && cpedKey->IsObject(); #else - useCPED_ = false; + constexpr bool useCPED = false; #endif if (withContexts_) { @@ -657,20 +657,19 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, jsArray_ = v8::Global(isolate, jsArray); std::fill(fields_, fields_ + kFieldCount, 0); - if (collectAsyncId_ || useCPED_) { + if (collectAsyncId_ || useCPED) { isolate->AddGCPrologueCallback(&GCPrologueCallback, this); isolate->AddGCEpilogueCallback(&GCEpilogueCallback, this); } #if DD_WALL_USE_CPED - if (useCPED_) { + if (useCPED) { // Used to create Map value objects that will have one internal field to // store the sample context pointer. auto wrapObjectTemplate = ObjectTemplate::New(isolate); wrapObjectTemplate->SetInternalFieldCount(1); wrapObjectTemplate_.Reset(isolate, wrapObjectTemplate); - // Object used as the Map key object. - auto cpedKeyObj = Object::New(isolate); + auto cpedKeyObj = cpedKey.As(); cpedKey_.Reset(isolate, cpedKeyObj); cpedKeyHash_ = cpedKeyObj->GetIdentityHash(); } @@ -686,7 +685,7 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { g_profilers.RemoveProfiler(isolate, this); } - if (collectAsyncId_ || useCPED_) { + if (collectAsyncId_ || useCPED()) { isolate->RemoveGCPrologueCallback(&GCPrologueCallback, this); isolate->RemoveGCEpilogueCallback(&GCEpilogueCallback, this); } @@ -704,8 +703,7 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { } #define DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(name) \ - auto name##Value = \ - Nan::Get(arg, Nan::New(#name).ToLocalChecked()); \ + auto name##Value = getArg(#name); \ if (name##Value.IsEmpty() || !name##Value.ToLocalChecked()->IsBoolean()) { \ return Nan::ThrowTypeError(#name " must be a boolean."); \ } \ @@ -718,8 +716,14 @@ NAN_METHOD(WallProfiler::New) { if (info.IsConstructCall()) { auto arg = info[0].As(); - auto intervalMicrosValue = - Nan::Get(arg, Nan::New("intervalMicros").ToLocalChecked()); + auto isolate = info.GetIsolate(); + auto context = isolate->GetCurrentContext(); + auto getArg = [&](const char* name) { + return arg->Get(context, + String::NewFromUtf8(isolate, name).ToLocalChecked()); + }; + + auto intervalMicrosValue = getArg("intervalMicros"); if (intervalMicrosValue.IsEmpty() || !intervalMicrosValue.ToLocalChecked()->IsNumber()) { return Nan::ThrowTypeError("intervalMicros must be a number."); @@ -732,8 +736,7 @@ NAN_METHOD(WallProfiler::New) { return Nan::ThrowTypeError("Sample rate must be positive."); } - auto durationMillisValue = - Nan::Get(arg, Nan::New("durationMillis").ToLocalChecked()); + auto durationMillisValue = getArg("durationMillis"); if (durationMillisValue.IsEmpty() || !durationMillisValue.ToLocalChecked()->IsNumber()) { return Nan::ThrowTypeError("durationMillis must be a number."); @@ -757,6 +760,13 @@ NAN_METHOD(WallProfiler::New) { DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(isMainThread); DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(useCPED); + auto cpedKey = getArg("CPEDKey").ToLocalChecked(); + if (cpedKey->IsObject() && !useCPED) { + return Nan::ThrowTypeError("useCPED is false but CPEDKey is specified"); + } + if (useCPED && cpedKey->IsUndefined()) { + cpedKey = Object::New(isolate); + } #if !DD_WALL_USE_CPED if (useCPED) { return Nan::ThrowTypeError( @@ -808,7 +818,7 @@ NAN_METHOD(WallProfiler::New) { collectCpuTime, collectAsyncId, isMainThread, - useCPED); + cpedKey); obj->Wrap(info.This()); info.GetReturnValue().Set(info.This()); } else { @@ -1141,7 +1151,7 @@ void WallProfiler::SetCurrentContextPtr(Isolate* isolate, Local value) { void WallProfiler::SetContext(Isolate* isolate, Local value) { #if DD_WALL_USE_CPED - if (!useCPED_) { + if (!useCPED()) { SetCurrentContextPtr(isolate, value); return; } @@ -1159,14 +1169,16 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { // Always replace the PersistentContextPtr in the map even if it is present, // we want the PersistentContextPtr in a parent map to not be mutated. - if (value->IsNullOrUndefined()) { + if (value->IsUndefined()) { // The absence of a sample context will be interpreted as undefined in - // GetContextPtr so if value is null or undefined, just delete the key. + // GetContextPtr so if value is undefined, just delete the key. SignalGuard m(setInProgress_); cpedMap->Delete(v8Ctx, localKey).Check(); } else { auto wrap = wrapObjectTemplate_.Get(isolate)->NewInstance(v8Ctx).ToLocalChecked(); + // for easy access from JS when cpedKey is an ALS, it can do als.getStore()?.[0]; + wrap->Set(v8Ctx, 0, value).Check(); auto contextPtr = new PersistentContextPtr(&liveContextPtrs_, wrap); liveContextPtrs_.insert(contextPtr); contextPtr->Set(isolate, value); @@ -1188,7 +1200,7 @@ ContextPtr WallProfiler::GetContextPtrSignalSafe(Isolate* isolate) { return ContextPtr(); } - if (useCPED_) { + if (useCPED()) { auto curGcCount = gcCount.load(std::memory_order_relaxed); std::atomic_signal_fence(std::memory_order_acquire); if (curGcCount > 0) { @@ -1201,7 +1213,7 @@ ContextPtr WallProfiler::GetContextPtrSignalSafe(Isolate* isolate) { ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) { #if DD_WALL_USE_CPED - if (!useCPED_) { + if (!useCPED()) { return curContext_; } @@ -1344,7 +1356,7 @@ void WallProfiler::OnGCStart(v8::Isolate* isolate) { if (collectAsyncId_) { gcAsyncId = GetAsyncIdNoGC(isolate); } - if (useCPED_) { + if (useCPED()) { gcContext_ = GetContextPtrSignalSafe(isolate); } } @@ -1354,7 +1366,7 @@ void WallProfiler::OnGCStart(v8::Isolate* isolate) { void WallProfiler::OnGCEnd() { auto oldCount = gcCount.fetch_sub(1, std::memory_order_relaxed); - if (oldCount == 1 && useCPED_) { + if (oldCount == 1 && useCPED()) { // Not strictly necessary, as we'll reset it to something else on next GC, // but why retain it longer than needed? gcContext_.reset(); diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index 51ec4c3f..f066d37a 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -49,7 +49,6 @@ class WallProfiler : public Nan::ObjectWrap { std::chrono::microseconds samplingPeriod_{0}; v8::CpuProfiler* cpuProfiler_ = nullptr; - bool useCPED_ = false; // If we aren't using the CPED, we use a single context ptr stored here. ContextPtr curContext_; // Otherwise we'll use an object as a key to store the context in @@ -119,6 +118,8 @@ class WallProfiler : public Nan::ObjectWrap { void SetCurrentContextPtr(v8::Isolate* isolate, v8::Local context); + inline bool useCPED() { return !cpedKey_.IsEmpty(); } + public: /** * @param samplingPeriodMicros sampling interval, in microseconds @@ -126,10 +127,9 @@ class WallProfiler : public Nan::ObjectWrap { * parameter is informative; it is up to the caller to call the Stop method * every period. The parameter is used to preallocate data structures that * should not be reallocated in async signal safe code. - * @param useCPED whether to use the V8 ContinuationPreservedEmbedderData to - * store the current sampling context. It can be used if AsyncLocalStorage - * uses the AsyncContextFrame implementation (experimental in Node 23, default - * in Node 24.) + * @param cpedKey if an object, then the profiler should use the + * AsyncLocalFrame stored in the V8 ContinuationPreservedEmbedderData to store + * the current sampling context. */ explicit WallProfiler(std::chrono::microseconds samplingPeriod, std::chrono::microseconds duration, @@ -139,7 +139,7 @@ class WallProfiler : public Nan::ObjectWrap { bool collectCpuTime, bool collectAsyncId, bool isMainThread, - bool useCPED); + v8::Local cpedKey); v8::Local GetContext(v8::Isolate*); void SetContext(v8::Isolate*, v8::Local); diff --git a/ts/src/time-profiler.ts b/ts/src/time-profiler.ts index a265364b..39866191 100644 --- a/ts/src/time-profiler.ts +++ b/ts/src/time-profiler.ts @@ -29,7 +29,7 @@ import { } from './time-profiler-bindings'; import {GenerateTimeLabelsFunction, TimeProfilerMetrics} from './v8-types'; import {isMainThread} from 'worker_threads'; - +import {AsyncLocalStorage} from 'async_hooks'; const {kSampleCount} = profilerConstants; const DEFAULT_INTERVAL_MICROS: Microseconds = 1000; @@ -39,6 +39,7 @@ type Microseconds = number; type Milliseconds = number; let gProfiler: InstanceType | undefined; +let gStore: AsyncLocalStorage | undefined; let gSourceMapper: SourceMapper | undefined; let gIntervalMicros: Microseconds; let gV8ProfilerStuckEventLoopDetected = 0; @@ -95,7 +96,10 @@ export function start(options: TimeProfilerOptions = {}) { throw new Error('Wall profiler is already started'); } - gProfiler = new TimeProfiler({...options, isMainThread}); + if (options.useCPED === true) { + gStore = new AsyncLocalStorage(); + } + gProfiler = new TimeProfiler({...options, CPEDKey: gStore, isMainThread}); gSourceMapper = options.sourceMapper; gIntervalMicros = options.intervalMicros!; gV8ProfilerStuckEventLoopDetected = 0; @@ -106,6 +110,11 @@ export function start(options: TimeProfilerOptions = {}) { if (options.withContexts && !options.useCPED) { setContext({}); } + + // If using CPED, ensure an async context frame exists + if (options.withContexts && options.useCPED && gStore) { + gStore.enterWith([]); + } } export function stop( @@ -128,6 +137,10 @@ export function stop( gProfiler.stop(false); gProfiler.start(); } + // Re-enter async context frame if using CPED, as it may have been cleared during restart + if (gStore) { + gStore.enterWith([]); + } } else { gV8ProfilerStuckEventLoopDetected = 0; } @@ -144,6 +157,10 @@ export function stop( gProfiler.dispose(); gProfiler = undefined; gSourceMapper = undefined; + if (gStore !== undefined) { + gStore.disable(); + gStore = undefined; + } } return serializedProfile; } diff --git a/ts/test/test-time-profiler.ts b/ts/test/test-time-profiler.ts index 90f1e580..df3f4a5d 100644 --- a/ts/test/test-time-profiler.ts +++ b/ts/test/test-time-profiler.ts @@ -23,7 +23,6 @@ import {hrtime} from 'process'; import {Label, Profile} from 'pprof-format'; import {AssertionError} from 'assert'; import {GenerateTimeLabelsArgs, LabelSet} from '../src/v8-types'; -import {AsyncLocalStorage} from 'async_hooks'; import {satisfies} from 'semver'; import assert from 'assert'; @@ -54,10 +53,6 @@ describe('Time Profiler', () => { this.skip(); } const startTime = BigInt(Date.now()) * 1000n; - if (useCPED) { - // Ensure an async context frame is created to hold the profiler context. - new AsyncLocalStorage().enterWith(1); - } time.start({ intervalMicros: 20 * 1_000, durationMillis: PROFILE_OPTIONS.durationMillis, @@ -112,10 +107,6 @@ describe('Time Profiler', () => { this.timeout(3000); const intervalNanos = PROFILE_OPTIONS.intervalMicros * 1_000; - if (useCPED) { - // Ensure an async context frame is created to hold the profiler context. - new AsyncLocalStorage().enterWith(1); - } time.start({ intervalMicros: PROFILE_OPTIONS.intervalMicros, durationMillis: PROFILE_OPTIONS.durationMillis, @@ -554,11 +545,6 @@ describe('Time Profiler', () => { } this.timeout(3000); - if (useCPED) { - // Ensure an async context frame is created to hold the profiler context. - new AsyncLocalStorage().enterWith(1); - } - // Set up some contexts with labels that we'll mark as low cardinality const lowCardLabel = 'service_name'; const highCardLabel = 'trace_id'; diff --git a/ts/test/worker.ts b/ts/test/worker.ts index bcc7d147..5bc7dca9 100644 --- a/ts/test/worker.ts +++ b/ts/test/worker.ts @@ -6,7 +6,6 @@ import {getAndVerifyPresence, getAndVerifyString} from './profiles-for-tests'; import {satisfies} from 'semver'; import assert from 'assert'; -import {AsyncLocalStorage} from 'async_hooks'; const DURATION_MILLIS = 1000; const intervalMicros = 10000; @@ -62,7 +61,6 @@ function getCpuUsage() { } async function main(durationMs: number) { - if (useCPED) new AsyncLocalStorage().enterWith(1); time.start({ durationMillis: durationMs * 3, intervalMicros, @@ -107,7 +105,6 @@ async function main(durationMs: number) { } async function worker(durationMs: number) { - if (useCPED) new AsyncLocalStorage().enterWith(1); time.start({ durationMillis: durationMs, intervalMicros, From 459ed66d6110ba23817d10561906464d77dee7d3 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Wed, 17 Dec 2025 18:05:13 +0100 Subject: [PATCH 12/15] Add timeProfiler.runWithContext --- bindings/profilers/wall.cc | 34 +++-- bindings/profilers/wall.hh | 5 + ts/src/index.ts | 1 + ts/src/time-profiler.ts | 13 ++ ts/test/test-time-profiler.ts | 246 +++++++++++++++++++++++++++++++++- 5 files changed, 287 insertions(+), 12 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index a933d1b0..f2ed23ff 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -1088,6 +1088,7 @@ NAN_MODULE_INIT(WallProfiler::Init) { Nan::SetPrototypeMethod(tpl, "v8ProfilerStuckEventLoopDetected", V8ProfilerStuckEventLoopDetected); + Nan::SetPrototypeMethod(tpl, "createContextHolder", CreateContextHolder); Nan::SetAccessor(tpl->InstanceTemplate(), Nan::New("state").ToLocalChecked(), @@ -1175,22 +1176,29 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { SignalGuard m(setInProgress_); cpedMap->Delete(v8Ctx, localKey).Check(); } else { - auto wrap = - wrapObjectTemplate_.Get(isolate)->NewInstance(v8Ctx).ToLocalChecked(); - // for easy access from JS when cpedKey is an ALS, it can do als.getStore()?.[0]; - wrap->Set(v8Ctx, 0, value).Check(); - auto contextPtr = new PersistentContextPtr(&liveContextPtrs_, wrap); - liveContextPtrs_.insert(contextPtr); - contextPtr->Set(isolate, value); - + auto contextHolder = CreateContextHolder(isolate, v8Ctx, value); SignalGuard m(setInProgress_); - cpedMap->Set(v8Ctx, localKey, wrap).ToLocalChecked(); + cpedMap->Set(v8Ctx, localKey, contextHolder).ToLocalChecked(); } #else SetCurrentContextPtr(isolate, value); #endif } +Local WallProfiler::CreateContextHolder(Isolate* isolate, + Local v8Ctx, + Local value) { + auto wrap = + wrapObjectTemplate_.Get(isolate)->NewInstance(v8Ctx).ToLocalChecked(); + // for easy access from JS when cpedKey is an ALS, it can do + // als.getStore()?.[0]; + wrap->Set(v8Ctx, 0, value).Check(); + auto contextPtr = new PersistentContextPtr(&liveContextPtrs_, wrap); + liveContextPtrs_.insert(contextPtr); + contextPtr->Set(isolate, value); + return wrap; +} + ContextPtr WallProfiler::GetContextPtrSignalSafe(Isolate* isolate) { auto isSetInProgress = setInProgress_.load(std::memory_order_relaxed); std::atomic_signal_fence(std::memory_order_acquire); @@ -1278,6 +1286,14 @@ NAN_SETTER(WallProfiler::SetContext) { profiler->SetContext(info.GetIsolate(), value); } +NAN_METHOD(WallProfiler::CreateContextHolder) { + auto profiler = Nan::ObjectWrap::Unwrap(info.This()); + auto isolate = info.GetIsolate(); + auto contextHolder = profiler->CreateContextHolder( + isolate, isolate->GetCurrentContext(), info[0]); + info.GetReturnValue().Set(contextHolder); +} + NAN_GETTER(WallProfiler::SharedArrayGetter) { auto profiler = Nan::ObjectWrap::Unwrap(info.This()); info.GetReturnValue().Set(profiler->jsArray_.Get(v8::Isolate::GetCurrent())); diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index f066d37a..43c9ba84 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -143,6 +143,10 @@ class WallProfiler : public Nan::ObjectWrap { v8::Local GetContext(v8::Isolate*); void SetContext(v8::Isolate*, v8::Local); + v8::Local CreateContextHolder(v8::Isolate*, + v8::Local, + v8::Local); + void PushContext(int64_t time_from, int64_t time_to, int64_t cpu_time, @@ -186,6 +190,7 @@ class WallProfiler : public Nan::ObjectWrap { static NAN_MODULE_INIT(Init); static NAN_GETTER(GetContext); static NAN_SETTER(SetContext); + static NAN_METHOD(CreateContextHolder); static NAN_GETTER(SharedArrayGetter); static NAN_GETTER(GetMetrics); }; diff --git a/ts/src/index.ts b/ts/src/index.ts index 42454629..92a7ec5c 100644 --- a/ts/src/index.ts +++ b/ts/src/index.ts @@ -36,6 +36,7 @@ export const time = { stop: timeProfiler.stop, getContext: timeProfiler.getContext, setContext: timeProfiler.setContext, + runWithContext: timeProfiler.runWithContext, isStarted: timeProfiler.isStarted, v8ProfilerStuckEventLoopDetected: timeProfiler.v8ProfilerStuckEventLoopDetected, diff --git a/ts/src/time-profiler.ts b/ts/src/time-profiler.ts index 39866191..475cadfc 100644 --- a/ts/src/time-profiler.ts +++ b/ts/src/time-profiler.ts @@ -179,6 +179,19 @@ export function setContext(context?: object) { gProfiler.context = context; } +export function runWithContext( + context: object, + f: (...args: TArgs) => R, + ...args: TArgs +): R { + if (!gProfiler) { + throw new Error('Wall profiler is not started'); + } else if (!gStore) { + throw new Error('Can only use runWithContext with AsyncContextFrame'); + } + return gStore.run(gProfiler.createContextHolder(context), f, ...args); +} + export function getContext() { if (!gProfiler) { throw new Error('Wall profiler is not started'); diff --git a/ts/test/test-time-profiler.ts b/ts/test/test-time-profiler.ts index df3f4a5d..235b8370 100644 --- a/ts/test/test-time-profiler.ts +++ b/ts/test/test-time-profiler.ts @@ -35,6 +35,10 @@ const useCPED = const collectAsyncId = satisfies(process.versions.node, '>=24.0.0'); +const unsupportedPlatform = + process.platform !== 'darwin' && process.platform !== 'linux'; +const shouldSkipCPEDTests = !useCPED || unsupportedPlatform; + const PROFILE_OPTIONS = { durationMillis: 500, intervalMicros: 1000, @@ -49,7 +53,7 @@ describe('Time Profiler', () => { }); it('should update state', function shouldUpdateState() { - if (process.platform !== 'darwin' && process.platform !== 'linux') { + if (unsupportedPlatform) { this.skip(); } const startTime = BigInt(Date.now()) * 1000n; @@ -101,7 +105,7 @@ describe('Time Profiler', () => { }); it('should have labels', function shouldHaveLabels() { - if (process.platform !== 'darwin' && process.platform !== 'linux') { + if (unsupportedPlatform) { this.skip(); } this.timeout(3000); @@ -540,7 +544,7 @@ describe('Time Profiler', () => { describe('lowCardinalityLabels', () => { it('should handle lowCardinalityLabels parameter in stop function', async function testLowCardinalityLabels() { - if (process.platform !== 'darwin' && process.platform !== 'linux') { + if (unsupportedPlatform) { this.skip(); } this.timeout(3000); @@ -730,4 +734,240 @@ describe('Time Profiler', () => { assert.ok(threadId > 0); }); }); + + describe('runWithContext', () => { + it('should throw when profiler is not started', () => { + assert.throws(() => { + time.runWithContext({label: 'test'}, () => {}); + }, /Wall profiler is not started/); + }); + + it('should throw when useCPED is not enabled', function testNoCPED() { + if (unsupportedPlatform) { + this.skip(); + } + + time.start({ + intervalMicros: PROFILE_OPTIONS.intervalMicros, + durationMillis: PROFILE_OPTIONS.durationMillis, + withContexts: true, + useCPED: false, + }); + + try { + assert.throws(() => { + time.runWithContext({label: 'test'}, () => {}); + }, /Can only use runWithContext with AsyncContextFrame/); + } finally { + time.stop(); + } + }); + + it('should run function with context when useCPED is enabled', function testRunWithContext() { + if (shouldSkipCPEDTests) { + this.skip(); + } + + time.start({ + intervalMicros: PROFILE_OPTIONS.intervalMicros, + durationMillis: PROFILE_OPTIONS.durationMillis, + withContexts: true, + useCPED: true, + }); + + try { + const testContext = {label: 'test-value', id: '123'}; + let contextInsideFunction; + + time.runWithContext(testContext, () => { + contextInsideFunction = time.getContext(); + }); + + assert.deepEqual( + contextInsideFunction, + testContext, + 'Context should be accessible within function' + ); + } finally { + time.stop(); + } + }); + + it('should pass arguments to function correctly', function testArguments() { + if (shouldSkipCPEDTests) { + this.skip(); + } + + time.start({ + intervalMicros: PROFILE_OPTIONS.intervalMicros, + durationMillis: PROFILE_OPTIONS.durationMillis, + withContexts: true, + useCPED: true, + }); + + try { + const testContext = {label: 'test'}; + const result = time.runWithContext( + testContext, + (a: number, b: string, c: boolean) => { + return {a, b, c}; + }, + 42, + 'hello', + true + ); + + assert.deepEqual( + result, + {a: 42, b: 'hello', c: true}, + 'Arguments should be passed correctly' + ); + } finally { + time.stop(); + } + }); + + it('should return function result', function testReturnValue() { + if (shouldSkipCPEDTests) { + this.skip(); + } + + time.start({ + intervalMicros: PROFILE_OPTIONS.intervalMicros, + durationMillis: PROFILE_OPTIONS.durationMillis, + withContexts: true, + useCPED: true, + }); + + try { + const testContext = {label: 'test'}; + const result = time.runWithContext(testContext, () => { + return 'test-result'; + }); + + assert.strictEqual( + result, + 'test-result', + 'Function result should be returned' + ); + } finally { + time.stop(); + } + }); + + it('should handle nested runWithContext calls', function testNestedCalls() { + if (shouldSkipCPEDTests) { + this.skip(); + } + + time.start({ + intervalMicros: PROFILE_OPTIONS.intervalMicros, + durationMillis: PROFILE_OPTIONS.durationMillis, + withContexts: true, + useCPED: true, + }); + + try { + const outerContext = {label: 'outer'}; + const innerContext = {label: 'inner'}; + const results: string[] = []; + + time.runWithContext(outerContext, () => { + const ctx1 = time.getContext(); + results.push((ctx1 as any).label); + + time.runWithContext(innerContext, () => { + const ctx2 = time.getContext(); + results.push((ctx2 as any).label); + }); + + const ctx3 = time.getContext(); + results.push((ctx3 as any).label); + }); + + assert.deepEqual( + results, + ['outer', 'inner', 'outer'], + 'Nested contexts should be properly isolated and restored' + ); + } finally { + time.stop(); + } + }); + + it('should isolate context from outside runWithContext', function testContextIsolation() { + if (shouldSkipCPEDTests) { + this.skip(); + } + + time.start({ + intervalMicros: PROFILE_OPTIONS.intervalMicros, + durationMillis: PROFILE_OPTIONS.durationMillis, + withContexts: true, + useCPED: true, + }); + + try { + const runWithContextContext = {label: 'inside'}; + let contextInside; + + time.runWithContext(runWithContextContext, () => { + contextInside = time.getContext(); + }); + + // Context outside runWithContext should be undefined since we're using CPED + const contextOutside = time.getContext(); + + assert.deepEqual( + contextInside, + runWithContextContext, + 'Context inside should match' + ); + assert.strictEqual( + contextOutside, + undefined, + 'Context outside should be undefined with CPED' + ); + } finally { + time.stop(); + } + }); + + it('should work with async functions', async function testAsyncFunction() { + if (shouldSkipCPEDTests) { + this.skip(); + } + + time.start({ + intervalMicros: PROFILE_OPTIONS.intervalMicros, + durationMillis: PROFILE_OPTIONS.durationMillis, + withContexts: true, + useCPED: true, + }); + + try { + const testContext = {label: 'async-test'}; + + const result = await time.runWithContext(testContext, async () => { + const ctx1 = time.getContext(); + await delay(10); + const ctx2 = time.getContext(); + return {ctx1, ctx2}; + }); + + assert.deepEqual( + result.ctx1, + testContext, + 'Context should be available before await' + ); + assert.deepEqual( + result.ctx2, + testContext, + 'Context should be preserved after await' + ); + } finally { + time.stop(); + } + }); + }); }); From d4ddbefe787f2eddf19f2fa62560f3b3bd2ce2b5 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 26 Jan 2026 19:05:58 +0100 Subject: [PATCH 13/15] Add explicit tests for map handling --- ts/test/test-get-value-from-map-profiler.ts | 216 ++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 ts/test/test-get-value-from-map-profiler.ts diff --git a/ts/test/test-get-value-from-map-profiler.ts b/ts/test/test-get-value-from-map-profiler.ts new file mode 100644 index 00000000..5e03bf00 --- /dev/null +++ b/ts/test/test-get-value-from-map-profiler.ts @@ -0,0 +1,216 @@ +/** + * Copyright 2025 Datadog, Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for GetValueFromMap through the TimeProfiler API. + * + * These tests verify that GetValueFromMap works correctly in its actual usage context: + * - The profiler creates a key object (AsyncLocalStorage or internal key) + * - Contexts are stored in the CPED map using that key + * - GetValueFromMap retrieves contexts using the same key during signal handling + * + * This is the real-world usage pattern, and these tests confirm the structure + * layout and key address extraction work correctly on Node 24.x / V8 13.6. + */ + +import assert from 'assert'; +import {join} from 'path'; +import {AsyncLocalStorage} from 'async_hooks'; +import {satisfies} from 'semver'; + +const findBinding = require('node-gyp-build'); +const profiler = findBinding(join(__dirname, '..', '..')); + +const useCPED = + (satisfies(process.versions.node, '>=24.0.0') && + !process.execArgv.includes('--no-async-context-frame')) || + (satisfies(process.versions.node, '>=22.7.0') && + process.execArgv.includes('--experimental-async-context-frame')); + +const supportedPlatform = + process.platform === 'darwin' || process.platform === 'linux'; + +if (useCPED && supportedPlatform) { + describe('GetValueFromMap (through TimeProfiler)', () => { + describe('basic context storage and retrieval', () => { + it('should store and retrieve a simple object context', () => { + const als = new AsyncLocalStorage(); + const profiler = createProfiler(als); + + als.enterWith([]); + + const context = {label: 'test-context'}; + profiler.context = context; + + const retrieved = profiler.context; + assert.strictEqual( + retrieved, + context, + 'Should retrieve the same object' + ); + + profiler.dispose(); + }); + + it('should store and retrieve context with multiple properties', () => { + const als = new AsyncLocalStorage(); + const profiler = createProfiler(als); + + als.enterWith([]); + + const context = { + spanId: '1234567890', + traceId: 'abcdef123456', + operation: 'test-operation', + resource: '/api/endpoint', + tags: {environment: 'test', version: '1.0'}, + }; + + profiler.context = context; + const retrieved = profiler.context; + + assert.deepStrictEqual(retrieved, context); + assert.strictEqual(retrieved.spanId, context.spanId); + assert.strictEqual(retrieved.traceId, context.traceId); + assert.deepStrictEqual(retrieved.tags, context.tags); + + profiler.dispose(); + }); + + it('should handle context updates', () => { + const als = new AsyncLocalStorage(); + const profiler = createProfiler(als); + + als.enterWith([]); + + const context1 = {label: 'first'}; + profiler.context = context1; + assert.strictEqual(profiler.context, context1); + + const context2 = {label: 'second'}; + profiler.context = context2; + assert.strictEqual(profiler.context, context2); + + const context3 = {label: 'third', extra: 'data'}; + profiler.context = context3; + assert.strictEqual(profiler.context, context3); + + profiler.dispose(); + }); + + it('should return undefined for undefined context', () => { + const als = new AsyncLocalStorage(); + const profiler = createProfiler(als); + + als.enterWith([]); + + profiler.context = undefined; + const retrieved = profiler.context; + + assert.strictEqual(retrieved, undefined); + + profiler.dispose(); + }); + + it('should work with createContextHolder pattern', () => { + // This tests the pattern used by runWithContext where + // createContextHolder creates a wrap object that's stored in CPED map + + const als = new AsyncLocalStorage(); + const profiler = createProfiler(als); + + const context = {label: 'wrapped-context', id: 999}; + + // Using als.run mimics what runWithContext does internally + als.run(profiler.createContextHolder(context), () => { + const retrieved = profiler.context; + + // The wrap object stores context at index 0 + assert.ok(retrieved !== null && typeof retrieved === 'object'); + assert.deepStrictEqual(retrieved, context); + }); + + profiler.dispose(); + }); + }); + + describe('multiple context frames', () => { + it('should isolate contexts in different async frames', () => { + const als = new AsyncLocalStorage(); + const profiler = createProfiler(als); + + const context1 = {frame: 'frame1'}; + const context2 = {frame: 'frame2'}; + + // Frame 1 + als.run([], () => { + profiler.context = context1; + assert.deepStrictEqual(profiler.context, context1); + }); + + // Frame 2 + als.run([], () => { + assert.strictEqual(profiler.context, undefined); + profiler.context = context2; + assert.deepStrictEqual(profiler.context, context2); + }); + + // Outside frames + assert.strictEqual(profiler.context, undefined); + + profiler.dispose(); + }); + + it('should handle nested async frames', () => { + const als = new AsyncLocalStorage(); + const profiler = createProfiler(als); + + const outerContext = {level: 'outer'}; + const innerContext = {level: 'inner'}; + + als.run([], () => { + profiler.context = outerContext; + assert.deepStrictEqual(profiler.context, outerContext); + + als.run([], () => { + profiler.context = innerContext; + assert.deepStrictEqual(profiler.context, innerContext); + }); + + // Back to outer context frame + assert.deepStrictEqual(profiler.context, outerContext); + }); + + profiler.dispose(); + }); + }); + }); +} + +function createProfiler(als: AsyncLocalStorage) { + return new profiler.TimeProfiler({ + intervalMicros: 10000, + durationMillis: 500, + withContexts: true, + useCPED: true, + CPEDKey: als, + lineNumbers: false, + workaroundV8Bug: false, + collectCpuTime: false, + collectAsyncId: true, + isMainThread: true, + }); +} From 33b3960266dc8f4f39d928bee30636bd5764c751 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Tue, 27 Jan 2026 16:55:46 +0100 Subject: [PATCH 14/15] Moved GC callback registration in StartImpl, to be symmetrical with their deregistration through StopImpl->Dispose. This was usually not a problem but test-get-value-from-map-profiler.ts creates a profiler without starting it. --- bindings/profilers/wall.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index f2ed23ff..eed8a831 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -657,11 +657,6 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, jsArray_ = v8::Global(isolate, jsArray); std::fill(fields_, fields_ + kFieldCount, 0); - if (collectAsyncId_ || useCPED) { - isolate->AddGCPrologueCallback(&GCPrologueCallback, this); - isolate->AddGCEpilogueCallback(&GCEpilogueCallback, this); - } - #if DD_WALL_USE_CPED if (useCPED) { // Used to create Map value objects that will have one internal field to @@ -856,6 +851,14 @@ Result WallProfiler::StartImpl() { return Result{"Cannot start profiler: another profiler is already active."}; } + // Register GC callbacks for async ID and CPED context tracking before + // starting profiling + auto isolate = Isolate::GetCurrent(); + if (collectAsyncId_ || useCPED()) { + isolate->AddGCPrologueCallback(&GCPrologueCallback, this); + isolate->AddGCEpilogueCallback(&GCEpilogueCallback, this); + } + profileId_ = StartInternal(); auto collectionMode = withContexts_ @@ -864,7 +867,6 @@ Result WallProfiler::StartImpl() { : CollectionMode::kNoCollect); collectionMode_.store(collectionMode, std::memory_order_relaxed); started_ = true; - auto isolate = Isolate::GetCurrent(); node::AddEnvironmentCleanupHook(isolate, &WallProfiler::CleanupHook, isolate); return {}; } From 9f0aa90ae6c82930a58d742931d0557ae08bac7a Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Tue, 27 Jan 2026 17:11:00 +0100 Subject: [PATCH 15/15] Ensuring live context pointers are released from a profiler destructor. This was usually not a problem but test-get-value-from-map-profiler.ts creates a profiler without starting it. --- bindings/profilers/wall.cc | 14 ++++++++------ bindings/profilers/wall.hh | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index eed8a831..0cc45754 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -687,14 +687,16 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { node::RemoveEnvironmentCleanupHook( isolate, &WallProfiler::CleanupHook, isolate); + } +} - // Delete all live contexts - for (auto ptr : liveContextPtrs_) { - ptr->Detach(); // so it doesn't invalidate our iterator - delete ptr; - } - liveContextPtrs_.clear(); +WallProfiler::~WallProfiler() { + // Delete all live contexts + for (auto ptr : liveContextPtrs_) { + ptr->Detach(); // so it doesn't invalidate our iterator + delete ptr; } + liveContextPtrs_.clear(); } #define DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(name) \ diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index 43c9ba84..12fe4b4c 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -98,7 +98,7 @@ class WallProfiler : public Nan::ObjectWrap { using ContextBuffer = std::vector; ContextBuffer contexts_; - ~WallProfiler() = default; + ~WallProfiler(); void Dispose(v8::Isolate* isolate, bool removeFromMap); // A new CPU profiler object will be created each time profiling is started