diff --git a/binding.gyp b/binding.gyp index 71b0f215..381141f4 100644 --- a/binding.gyp +++ b/binding.gyp @@ -18,7 +18,8 @@ "bindings/thread-cpu-clock.cc", "bindings/translate-heap-profile.cc", "bindings/translate-time-profile.cc", - "bindings/binding.cc" + "bindings/binding.cc", + "bindings/map-get.cc" ], "include_dirs": [ "bindings", diff --git a/bindings/map-get.cc b/bindings/map-get.cc new file mode 100644 index 00000000..7de0ff93 --- /dev/null +++ b/bindings/map-get.cc @@ -0,0 +1,375 @@ +/** + * Copyright 2025 Datadog. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "map-get.hh" + +// Find a value in JavaScript map by directly reading the underlying V8 hash +// map. +// +// V8 uses TWO internal hash map representations: +// 1. SmallOrderedHashMap: For small maps (capacity 4-254) +// - Metadata stored as uint8_t bytes +// - Entry size: 2 (key, value) +// - Chain table separate from entries +// +// 2. OrderedHashMap: For larger maps (capacity >254) +// - Metadata stored as Smis in FixedArray +// - Entry size: 3 (key, value, chain) +// - Chain stored inline with entries +// +// This code handles both types by detecting the table format at runtime. +// Practical testing shows that at least the AsyncContextFrame maps use the +// large map format even for small cardinality maps, but just in case we handle +// both. + +#include + +namespace dd { + +using Address = uintptr_t; + +#ifndef _WIN32 +// ============================================================================ +// Constants from V8 internals +// ============================================================================ + +// Heap object tagging +constexpr int kHeapObjectTag = 1; + +// OrderedHashMap/SmallOrderedHashMap shared constants +constexpr int kNotFound = -1; +constexpr int kSmallLoadFactor = 2; + +// ============================================================================ +// Helper Functions (needed by struct methods) +// ============================================================================ + +inline Address UntagPointer(Address tagged) { + return tagged - kHeapObjectTag; +} + +inline bool IsSmi(Address value) { + return (value & 1) == 0; +} + +// SmiToInt Conversion below valid only on 64-bit platforms, the only ones we +// support +static_assert(sizeof(void*) == 8, "Only 64-bit platforms supported"); + +inline int SmiToInt(Address smi) { + return static_cast(static_cast(smi) >> 32); +} + +// ============================================================================ +// V8 Hashtable Structure Definitions +// ============================================================================ + +// HeapObject layout - base for all V8 heap objects +// From v8/src/objects/heap-object.h +struct HeapObjectLayout { + Address classMap_; // Tagged pointer to the class map +}; + +// JavaScript Map object +struct JSMapLayout { + HeapObjectLayout header_; // Map is a HeapObject + Address properties_or_hash_; // not used by us + Address elements_; // not used by us + // Tagged pointer to a [Small]OrderedHashMapLayout + Address table_; +}; + +// V8 FixedArray: length_ is a Smi, followed by that many element slots +struct FixedArrayLayout { + HeapObjectLayout header_; // FixedArray is a HeapObject + Address length_; + Address elements_[0]; +}; + +// NOTE: both OrderedHashMap and SmallOrderedHashMap have compatible method +// definitions so FindEntryByHash and FindValueByHash can be defined as +// templated function working on both. + +// OrderedHashMap layout (for large maps, capacity >254) +// From v8/src/objects/ordered-hash-table.h +struct OrderedHashMapLayout { + FixedArrayLayout fixedArray_; // OrderedHashMap is a FixedArray + // The first 3 address slots in the FixedArray that is a Hashtable are the + // number of elements, deleted elements, and buckets. Each one is a Smi. + Address number_of_elements_; + Address number_of_deleted_elements_; + Address number_of_buckets_; + // First number_of_buckets_ entries in head_and_data_table_ is the head table: + // each entry is an index of the first entry (head of the linked list of + // entries) in the data table for that bucket. This is followed by the data + // table. Each data table entry uses three (kEntrySize == 3) tagged pointer + // slots: + // [0]: key (Tagged Object) + // [1]: value (Tagged Object) + // [2]: chain (Smi - next entry index or -1) + // All indices (both to the head of the list and to the next entry are + // expressed in number of entries from the start of the data table, so to + // convert it into a head_and_data_table_ you need to add number_of_buckets_ + // (length of the head table) and then 3 * index. + Address head_and_data_table_[0]; // Variable: [head_table][data_table] + + // Constants for entry structure + static constexpr int kEntrySize = 3; + static constexpr int kKeyOffset = 0; + static constexpr int kValueOffset = 1; + static constexpr int kChainOffset = 2; + static constexpr int kNotFoundValue = kNotFound; + + // Get number of buckets (converts Smi to int) + int NumberOfBuckets() const { + return IsSmi(number_of_buckets_) ? SmiToInt(number_of_buckets_) : 0; + } + + // Get an upper bound for number of element chain in a bucket. Used to prevent + // infinite lookup chain. + int GetMaxChainLength() const { + return IsSmi(number_of_elements_) && IsSmi(number_of_deleted_elements_) + ? SmiToInt(number_of_elements_) + + SmiToInt(number_of_deleted_elements_) + : 0; + } + + // Convert hash to bucket index + int HashToBucket(int hash) const { + int num_buckets = NumberOfBuckets(); + return num_buckets > 0 ? (hash & (num_buckets - 1)) : 0; + } + + // Get first entry index for a bucket + int GetFirstEntry(int bucket) const { + Address entry_smi = head_and_data_table_[bucket]; + return IsSmi(entry_smi) ? SmiToInt(entry_smi) : kNotFound; + } + + // Convert entry index to head_and_data_table_ index for the entry's key + int EntryToIndex(int entry) const { + return NumberOfBuckets() + (entry * kEntrySize); + } + + // Get key at entry index + Address GetKey(int entry) const { + int index = EntryToIndex(entry); + return head_and_data_table_[index + kKeyOffset]; + } + + // Get value at entry index + Address GetValue(int entry) const { + int index = EntryToIndex(entry); + return head_and_data_table_[index + kValueOffset]; + } + + // Get next entry in chain + int GetNextChainEntry(int entry) const { + int index = EntryToIndex(entry); + Address chain_smi = head_and_data_table_[index + kChainOffset]; + return IsSmi(chain_smi) ? SmiToInt(chain_smi) : kNotFound; + } +}; + +// SmallOrderedHashMap layout (for small maps, capacity 4-254) +// Memory layout (stores metadata as uint8_t, not Smis): +// [0]: map pointer (HeapObject) +// [kHeaderSize + 0]: number_of_elements (uint8) +// [kHeaderSize + 1]: number_of_deleted_elements (uint8) +// [kHeaderSize + 2]: number_of_buckets (uint8) +// [kHeaderSize + 3...]: padding (5 bytes on 64-bit, 1 byte on 32-bit) +// [DataTableStartOffset...]: data table (key-value pairs as Tagged) +// [...]: hash table (uint8 bucket indices) +// [...]: chain table (uint8 next entry indices) +// +// Each entry is 2 Tagged elements (kEntrySize = 2): +// [0]: key (Tagged Object) +// [1]: value (Tagged Object) +// +// From v8/src/objects/ordered-hash-table.h +struct SmallOrderedHashMapLayout { + HeapObjectLayout header_; + uint8_t number_of_elements_; + uint8_t number_of_deleted_elements_; + uint8_t number_of_buckets_; + uint8_t padding_[5]; // 5 bytes on 64-bit + // Variable length: + // - Address data_table_[capacity * kEntrySize] // Keys and values + // - uint8_t hash_table_[number_of_buckets_] // Bucket -> first entry + // - uint8_t chain_table_[capacity] // Entry -> next entry + Address data_table_[0]; + + // Constants for entry structure + static constexpr int kEntrySize = 2; + static constexpr int kKeyOffset = 0; + static constexpr int kValueOffset = 1; + static constexpr int kNotFoundValue = 255; + + // Get capacity from number of buckets + int Capacity() const { return number_of_buckets_ * kSmallLoadFactor; } + + int NumberOfBuckets() const { return number_of_buckets_; } + + int GetMaxChainLength() const { + return number_of_elements_ + number_of_deleted_elements_; + } + + int HashToBucket(int hash) const { return hash & (NumberOfBuckets() - 1); } + + const uint8_t* GetHashTable() const { + return reinterpret_cast(data_table_ + + Capacity() * kEntrySize); + } + + const uint8_t* GetChainTable() const { + return GetHashTable() + number_of_buckets_; + } + + // Get key at entry index + Address GetKey(int entry) const { + return data_table_[entry * kEntrySize + kKeyOffset]; + } + + // Get value at entry index + Address GetValue(int entry) const { + return data_table_[entry * kEntrySize + kValueOffset]; + } + + // Get first entry in bucket + uint8_t GetFirstEntry(int bucket) const { + const uint8_t* hash_table = GetHashTable(); + return hash_table[bucket]; + } + + // Get next entry in chain + uint8_t GetNextChainEntry(int entry) const { + const uint8_t* chain_table = GetChainTable(); + return chain_table[entry]; + } +}; + +// ============================================================================ +// Templated Hash Table Lookup +// ============================================================================ + +// Find an entry by a key and its hash in any hash table layout +// Template parameter LayoutT should be either OrderedHashMapLayout or +// SmallOrderedHashMapLayout +template +int FindEntryByHash(const LayoutT* layout, int hash, Address key_to_find) { + int max_chain_length = layout->GetMaxChainLength(); + int bucket = layout->HashToBucket(hash); + int entry = layout->GetFirstEntry(bucket); + + // Paranoid: by never traversing more than the sum of elements and deleted + // elements we guarantee this terminates in bound time even if for some + // unforeseen reason the chain is cyclical. + for (int max_chain_left = max_chain_length; + entry != LayoutT::kNotFoundValue && max_chain_left > 0; + max_chain_left--) { + Address key_at_entry = layout->GetKey(entry); + if (key_at_entry == key_to_find) { + return entry; + } + entry = layout->GetNextChainEntry(entry); + } + + return kNotFound; +} + +// Find an entry by a key and its hash in any hash table layout, and return its +// value or the zero address if it is not found. +// Template parameter LayoutT should be either OrderedHashMapLayout or +// SmallOrderedHashMapLayout +template +Address FindValueByHash(const LayoutT* layout, int hash, Address key_to_find) { + auto entry = FindEntryByHash(layout, hash, key_to_find); + return entry == kNotFound ? 0 : layout->GetValue(entry); +} + +// Detect if the table is an OrderedHashMap or a SmallOrderedHashMap (or it can +// not safely be determined) by checking padding bytes. SmallOrderedHashMap has +// always-zero padding bytes after the metadata. +static uint8_t GetOrderedHashMapType(Address table_untagged) { + const SmallOrderedHashMapLayout* potential_small = + reinterpret_cast(table_untagged); + + // Read the header as one 64-bit value for validation + uint64_t smallHeader = + *reinterpret_cast(&potential_small->number_of_elements_); + + static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, + "Little-endian required"); + // Small map will have some bits in bytes 0-2 be nonzero, and all bits in + // bytes 3-7 zero. That effectively limits the value range of smallHeader to + // [0x1-0xFFFFFF]. + if (smallHeader > 0 && smallHeader < 0x1000000) { + auto num_elements = potential_small->number_of_elements_; + auto num_deleted = potential_small->number_of_deleted_elements_; + auto num_buckets = potential_small->number_of_buckets_; + + // SmallOrderedHashMap has constraints: + // - num_buckets must be a power of 2 between 2 and 127 + // - num_elements + num_deleted <= capacity (buckets * 2) + if (num_buckets >= 2 && num_buckets <= 127) { + // Check if num_buckets is a power of 2 + if ((num_buckets & (num_buckets - 1)) == 0) { + auto capacity = num_buckets * kSmallLoadFactor; + if (num_elements + num_deleted <= capacity) { + return 1; // small map + } + } + } + return 2; // undecided + } + return 0; // large map +} + +// ============================================================================ +// Main entry point +// ============================================================================ + +// Lookup value in a Map given the hash and key pointer. If the key is not found +// in the map (or the lookup can not be performed) returns a zero Address (which +// is essentially a zero Smi value.) +Address GetValueFromMap(Address map_addr, int hash, Address key) { + const JSMapLayout* map_untagged = + reinterpret_cast(UntagPointer(map_addr)); + Address table_untagged = UntagPointer(map_untagged->table_); + + switch (GetOrderedHashMapType(table_untagged)) { + case 0: { + const OrderedHashMapLayout* layout = + reinterpret_cast(table_untagged); + return FindValueByHash(layout, hash, key); + } + case 1: { + const SmallOrderedHashMapLayout* layout = + reinterpret_cast(table_untagged); + return FindValueByHash(layout, hash, key); + } + } + return 0; // We couldn't determine the kind of the map, just return zero. +} + +#else // _WIN32 + +Address GetValueFromMap(Address map_addr, int hash, Address key) { + return 0; +} + +#endif // _WIN32 +} // namespace dd diff --git a/bindings/map-get.hh b/bindings/map-get.hh new file mode 100644 index 00000000..bb20e0d6 --- /dev/null +++ b/bindings/map-get.hh @@ -0,0 +1,25 @@ +/** + * Copyright 2025 Datadog. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +using Address = uintptr_t; + +namespace dd { +Address GetValueFromMap(Address map_addr, int hash, Address key); +} // namespace dd diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index 80be444e..0cc45754 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -26,7 +26,7 @@ #include #include -#include "defer.hh" +#include "map-get.hh" #include "per-isolate-data.hh" #include "translate-time-profile.hh" #include "wall.hh" @@ -105,37 +105,23 @@ void SetContextPtr(ContextPtr& contextPtr, } } -class PersistentContextPtr { +class PersistentContextPtr : public node::ObjectWrap { ContextPtr context; - WallProfiler* owner; - Persistent per; + std::unordered_set* live; public: - PersistentContextPtr(WallProfiler* owner) : owner(owner) {} - - void UnregisterFromGC() { - if (!per.IsEmpty()) { - per.ClearWeak(); - per.Reset(); - } + PersistentContextPtr(std::unordered_set* live, + Local wrap) + : live(live) { + Wrap(wrap); } - void MarkDead() { - context.reset(); - owner->MarkDeadPersistentContextPtr(this); - } + void Detach() { live = nullptr; } - void RegisterForGC(Isolate* isolate, const Local& obj) { - // Register a callback to delete this object when the object is GCed - per.Reset(isolate, obj); - per.SetWeak( - this, - [](const WeakCallbackInfo& data) { - auto ptr = data.GetParameter(); - ptr->UnregisterFromGC(); - ptr->MarkDead(); - }, - WeakCallbackType::kParameter); + ~PersistentContextPtr() { + if (live) { + live->erase(this); + } } void Set(Isolate* isolate, const Local& value) { @@ -143,12 +129,11 @@ class PersistentContextPtr { } ContextPtr Get() const { return context; } -}; -void WallProfiler::MarkDeadPersistentContextPtr(PersistentContextPtr* ptr) { - deadContextPtrs_.push_back(ptr); - liveContextPtrs_.erase(ptr); -} + static PersistentContextPtr* Unwrap(Local wrap) { + return node::ObjectWrap::Unwrap(wrap); + } +}; // Maximum number of rounds in the GetV8ToEpochOffset static constexpr int MAX_EPOCH_OFFSET_ATTEMPTS = 20; @@ -627,123 +612,6 @@ void GCEpilogueCallback(Isolate* isolate, static_cast(data)->OnGCEnd(); } -#if DD_WALL_USE_CPED -// Implementation of method calls on the CPED proxy that invoke the method on -// the proxied object. "data" is a two-element array where element 0 is the -// Symbol used to find the proxied object in the proxy and element 1 is either -// the method name or a cached method. -void CpedProxyMethodCallback(const FunctionCallbackInfo& info) { - auto isolate = info.GetIsolate(); - auto context = isolate->GetCurrentContext(); - auto cpedProxy = info.This(); - auto data = info.Data().As(); - auto symbol = data->Get(context, 0).ToLocalChecked().As(); - auto propertyName = data->Get(context, 1).ToLocalChecked(); - auto proxied = cpedProxy->Get(context, symbol).ToLocalChecked(); - Local method; - if (propertyName->IsFunction()) { - // It was already cached as a method, so we can use it directly - method = propertyName.As(); - } else { - method = proxied.As() - ->Get(context, propertyName) - .ToLocalChecked() - .As(); - // replace the property name with the method once resolved so later - // invocations are faster - data->Set(context, 1, method).Check(); - } - MaybeLocal retval; - auto arglen = info.Length(); - switch (arglen) { - case 0: - retval = method->Call(context, proxied, 0, nullptr); - break; - case 1: { - auto arg = info[0]; - retval = method->Call(context, proxied, 1, &arg); - break; - } - case 2: { - Local args[] = {info[0], info[1]}; - retval = method->Call(context, proxied, 2, args); - break; - } - default: { - // No Map methods take more than 2 arguments, so this path should never - // get invoked. We still implement it for completeness sake. - auto args = new Local[arglen]; - for (int i = 0; i < arglen; ++i) { - args[i] = info[i]; - } - retval = method->Call(context, proxied, arglen, args); - delete[] args; - } - } - info.GetReturnValue().Set(retval.ToLocalChecked()); -} - -// Implementation of property getters on the CPED proxy that get the property on -// the proxied object. "data" the Symbol used to find the proxied object in the -// proxy. -void CpedProxyPropertyGetterCallback(Local property, - const PropertyCallbackInfo& info) { - auto isolate = info.GetIsolate(); - auto context = isolate->GetCurrentContext(); - auto cpedProxy = info.This(); - auto symbol = info.Data().As(); - auto proxied = cpedProxy->Get(context, symbol).ToLocalChecked(); - auto value = proxied.As()->Get(context, property).ToLocalChecked(); - info.GetReturnValue().Set(value); -} - -// Sets up all the proxy methods and properties for the CPED proxy prototype -void SetupCpedProxyProtoMethods(Isolate* isolate, - Local cpedProxyProto, - Local cpedProxySymbol) { - auto context = isolate->GetCurrentContext(); - - auto addProxyProtoMethod = [&](Local methodName) { - auto data = Array::New(isolate, 2); - data->Set(context, Number::New(isolate, 0), cpedProxySymbol).Check(); - data->Set(context, Number::New(isolate, 1), methodName).Check(); - cpedProxyProto - ->Set(context, - methodName, - Function::New(context, &CpedProxyMethodCallback, data) - .ToLocalChecked()) - .Check(); - }; - - // Map methods + AsyncContextFrame.disable method - static constexpr const char* methodNames[] = {"clear", - "delete", - "entries", - "forEach", - "get", - "has", - "keys", - "set", - "values", - "disable"}; - - for (const char* methodName : methodNames) { - addProxyProtoMethod( - String::NewFromUtf8(isolate, methodName).ToLocalChecked()); - } - addProxyProtoMethod(Symbol::GetIterator(isolate)); - - // Map.size property - cpedProxyProto - ->SetNativeDataProperty(context, - String::NewFromUtf8Literal(isolate, "size"), - &CpedProxyPropertyGetterCallback, - nullptr, - cpedProxySymbol) - .Check(); -} -#endif // DD_WALL_USE_CPED - WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, std::chrono::microseconds duration, bool includeLines, @@ -752,9 +620,8 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, bool collectCpuTime, bool collectAsyncId, bool isMainThread, - bool useCPED) + Local cpedKey) : samplingPeriod_(samplingPeriod), - useCPED_(useCPED), includeLines_(includeLines), withContexts_(withContexts), isMainThread_(isMainThread) { @@ -765,10 +632,11 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, workaroundV8Bug_ = workaroundV8Bug && DD_WALL_USE_SIGPROF && detectV8Bug_; collectCpuTime_ = collectCpuTime && withContexts; collectAsyncId_ = collectAsyncId && withContexts; + #if DD_WALL_USE_CPED - useCPED_ = useCPED && withContexts; + bool useCPED = withContexts && cpedKey->IsObject(); #else - useCPED_ = false; + constexpr bool useCPED = false; #endif if (withContexts_) { @@ -789,33 +657,16 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, jsArray_ = v8::Global(isolate, jsArray); std::fill(fields_, fields_ + kFieldCount, 0); - if (collectAsyncId_ || useCPED_) { - isolate->AddGCPrologueCallback(&GCPrologueCallback, this); - isolate->AddGCEpilogueCallback(&GCEpilogueCallback, this); - } - #if DD_WALL_USE_CPED - if (useCPED_) { - // Used to create CPED proxy objects that will have one internal field to + if (useCPED) { + // Used to create Map value objects that will have one internal field to // store the sample context pointer. - auto cpedObjTpl = ObjectTemplate::New(isolate); - cpedObjTpl->SetInternalFieldCount(1); - cpedProxyTemplate_.Reset(isolate, cpedObjTpl); - // Symbol used for the property name that stores the proxied object in the - // CPED proxy object. - Local cpedProxySymbol = - Symbol::New(isolate, - String::NewFromUtf8Literal( - isolate, "WallProfiler::CPEDProxy::ProxiedObject")); - cpedProxySymbol_.Reset(isolate, cpedProxySymbol); - // Prototype for the CPED proxy object that will have methods and property - // getters that invoke the corresponding methods and properties on the - // proxied object. The set of methods & properties is chosen with the - // assumption that the proxied object is a Node.js AsyncContextFrame. - Local cpedProxyProto = Object::New(isolate); - cpedProxyProto_.Reset(isolate, cpedProxyProto); - - SetupCpedProxyProtoMethods(isolate, cpedProxyProto, cpedProxySymbol); + auto wrapObjectTemplate = ObjectTemplate::New(isolate); + wrapObjectTemplate->SetInternalFieldCount(1); + wrapObjectTemplate_.Reset(isolate, wrapObjectTemplate); + auto cpedKeyObj = cpedKey.As(); + cpedKey_.Reset(isolate, cpedKeyObj); + cpedKeyHash_ = cpedKeyObj->GetIdentityHash(); } #endif // DD_WALL_USE_CPED } @@ -829,32 +680,27 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { g_profilers.RemoveProfiler(isolate, this); } - if (collectAsyncId_ || useCPED_) { + if (collectAsyncId_ || useCPED()) { isolate->RemoveGCPrologueCallback(&GCPrologueCallback, this); isolate->RemoveGCEpilogueCallback(&GCEpilogueCallback, this); } node::RemoveEnvironmentCleanupHook( isolate, &WallProfiler::CleanupHook, isolate); + } +} - // Delete all live contexts - for (auto ptr : liveContextPtrs_) { - ptr->UnregisterFromGC(); - delete ptr; - } - liveContextPtrs_.clear(); - - // Delete all unused contexts, too - for (auto ptr : deadContextPtrs_) { - delete ptr; - } - deadContextPtrs_.clear(); +WallProfiler::~WallProfiler() { + // Delete all live contexts + for (auto ptr : liveContextPtrs_) { + ptr->Detach(); // so it doesn't invalidate our iterator + delete ptr; } + liveContextPtrs_.clear(); } #define DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(name) \ - auto name##Value = \ - Nan::Get(arg, Nan::New(#name).ToLocalChecked()); \ + auto name##Value = getArg(#name); \ if (name##Value.IsEmpty() || !name##Value.ToLocalChecked()->IsBoolean()) { \ return Nan::ThrowTypeError(#name " must be a boolean."); \ } \ @@ -867,8 +713,14 @@ NAN_METHOD(WallProfiler::New) { if (info.IsConstructCall()) { auto arg = info[0].As(); - auto intervalMicrosValue = - Nan::Get(arg, Nan::New("intervalMicros").ToLocalChecked()); + auto isolate = info.GetIsolate(); + auto context = isolate->GetCurrentContext(); + auto getArg = [&](const char* name) { + return arg->Get(context, + String::NewFromUtf8(isolate, name).ToLocalChecked()); + }; + + auto intervalMicrosValue = getArg("intervalMicros"); if (intervalMicrosValue.IsEmpty() || !intervalMicrosValue.ToLocalChecked()->IsNumber()) { return Nan::ThrowTypeError("intervalMicros must be a number."); @@ -881,8 +733,7 @@ NAN_METHOD(WallProfiler::New) { return Nan::ThrowTypeError("Sample rate must be positive."); } - auto durationMillisValue = - Nan::Get(arg, Nan::New("durationMillis").ToLocalChecked()); + auto durationMillisValue = getArg("durationMillis"); if (durationMillisValue.IsEmpty() || !durationMillisValue.ToLocalChecked()->IsNumber()) { return Nan::ThrowTypeError("durationMillis must be a number."); @@ -906,6 +757,13 @@ NAN_METHOD(WallProfiler::New) { DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(isMainThread); DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(useCPED); + auto cpedKey = getArg("CPEDKey").ToLocalChecked(); + if (cpedKey->IsObject() && !useCPED) { + return Nan::ThrowTypeError("useCPED is false but CPEDKey is specified"); + } + if (useCPED && cpedKey->IsUndefined()) { + cpedKey = Object::New(isolate); + } #if !DD_WALL_USE_CPED if (useCPED) { return Nan::ThrowTypeError( @@ -957,7 +815,7 @@ NAN_METHOD(WallProfiler::New) { collectCpuTime, collectAsyncId, isMainThread, - useCPED); + cpedKey); obj->Wrap(info.This()); info.GetReturnValue().Set(info.This()); } else { @@ -995,6 +853,14 @@ Result WallProfiler::StartImpl() { return Result{"Cannot start profiler: another profiler is already active."}; } + // Register GC callbacks for async ID and CPED context tracking before + // starting profiling + auto isolate = Isolate::GetCurrent(); + if (collectAsyncId_ || useCPED()) { + isolate->AddGCPrologueCallback(&GCPrologueCallback, this); + isolate->AddGCEpilogueCallback(&GCEpilogueCallback, this); + } + profileId_ = StartInternal(); auto collectionMode = withContexts_ @@ -1003,7 +869,6 @@ Result WallProfiler::StartImpl() { : CollectionMode::kNoCollect); collectionMode_.store(collectionMode, std::memory_order_relaxed); started_ = true; - auto isolate = Isolate::GetCurrent(); node::AddEnvironmentCleanupHook(isolate, &WallProfiler::CleanupHook, isolate); return {}; } @@ -1227,6 +1092,7 @@ NAN_MODULE_INIT(WallProfiler::Init) { Nan::SetPrototypeMethod(tpl, "v8ProfilerStuckEventLoopDetected", V8ProfilerStuckEventLoopDetected); + Nan::SetPrototypeMethod(tpl, "createContextHolder", CreateContextHolder); Nan::SetAccessor(tpl->InstanceTemplate(), Nan::New("state").ToLocalChecked(), @@ -1290,52 +1156,53 @@ void WallProfiler::SetCurrentContextPtr(Isolate* isolate, Local value) { void WallProfiler::SetContext(Isolate* isolate, Local value) { #if DD_WALL_USE_CPED - if (!useCPED_) { + if (!useCPED()) { SetCurrentContextPtr(isolate, value); return; } auto cped = isolate->GetContinuationPreservedEmbedderData(); // No Node AsyncContextFrame in this continuation yet - if (!cped->IsObject()) return; - - auto cpedObj = cped.As(); - PersistentContextPtr* contextPtr; - SignalGuard m(setInProgress_); - auto proxyProto = cpedProxyProto_.Get(isolate); - if (!proxyProto->StrictEquals(cpedObj->GetPrototype())) { - auto v8Ctx = isolate->GetCurrentContext(); - // This should always be called from a V8 context, but check just in case. - if (v8Ctx.IsEmpty()) return; - // Create a new CPED object with an internal field for the context pointer - auto proxyObj = - cpedProxyTemplate_.Get(isolate)->NewInstance(v8Ctx).ToLocalChecked(); - // Set up the proxy object to hold the proxied object and have the prototype - // that proxies AsyncContextFrame methods and properties - proxyObj->SetPrototype(v8Ctx, proxyProto).Check(); - proxyObj->Set(v8Ctx, cpedProxySymbol_.Get(isolate), cpedObj).Check(); - // Set up the context pointer in the internal field - if (!deadContextPtrs_.empty()) { - contextPtr = deadContextPtrs_.back(); - deadContextPtrs_.pop_back(); - } else { - contextPtr = new PersistentContextPtr(this); - } - liveContextPtrs_.insert(contextPtr); - contextPtr->RegisterForGC(isolate, cpedObj); - proxyObj->SetAlignedPointerInInternalField(0, contextPtr); - // Set the proxy object as the continuation preserved embedder data - isolate->SetContinuationPreservedEmbedderData(proxyObj); + if (!cped->IsMap()) return; + + auto v8Ctx = isolate->GetCurrentContext(); + // This should always be called from a V8 context, but check just in case. + if (v8Ctx.IsEmpty()) return; + + auto cpedMap = cped.As(); + auto localKey = cpedKey_.Get(isolate); + + // Always replace the PersistentContextPtr in the map even if it is present, + // we want the PersistentContextPtr in a parent map to not be mutated. + if (value->IsUndefined()) { + // The absence of a sample context will be interpreted as undefined in + // GetContextPtr so if value is undefined, just delete the key. + SignalGuard m(setInProgress_); + cpedMap->Delete(v8Ctx, localKey).Check(); } else { - contextPtr = static_cast( - cpedObj->GetAlignedPointerFromInternalField(0)); + auto contextHolder = CreateContextHolder(isolate, v8Ctx, value); + SignalGuard m(setInProgress_); + cpedMap->Set(v8Ctx, localKey, contextHolder).ToLocalChecked(); } - contextPtr->Set(isolate, value); #else SetCurrentContextPtr(isolate, value); #endif } +Local WallProfiler::CreateContextHolder(Isolate* isolate, + Local v8Ctx, + Local value) { + auto wrap = + wrapObjectTemplate_.Get(isolate)->NewInstance(v8Ctx).ToLocalChecked(); + // for easy access from JS when cpedKey is an ALS, it can do + // als.getStore()?.[0]; + wrap->Set(v8Ctx, 0, value).Check(); + auto contextPtr = new PersistentContextPtr(&liveContextPtrs_, wrap); + liveContextPtrs_.insert(contextPtr); + contextPtr->Set(isolate, value); + return wrap; +} + ContextPtr WallProfiler::GetContextPtrSignalSafe(Isolate* isolate) { auto isSetInProgress = setInProgress_.load(std::memory_order_relaxed); std::atomic_signal_fence(std::memory_order_acquire); @@ -1345,7 +1212,7 @@ ContextPtr WallProfiler::GetContextPtrSignalSafe(Isolate* isolate) { return ContextPtr(); } - if (useCPED_) { + if (useCPED()) { auto curGcCount = gcCount.load(std::memory_order_relaxed); std::atomic_signal_fence(std::memory_order_acquire); if (curGcCount > 0) { @@ -1358,27 +1225,34 @@ ContextPtr WallProfiler::GetContextPtrSignalSafe(Isolate* isolate) { ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) { #if DD_WALL_USE_CPED - if (!useCPED_) { + if (!useCPED()) { return curContext_; } if (!isolate->IsInUse()) { - // Must not try to create a handle scope if isolate is not in use. return ContextPtr(); } - auto addr = reinterpret_cast( + auto cpedAddrPtr = reinterpret_cast( reinterpret_cast(isolate) + internal::Internals::kContinuationPreservedEmbedderDataOffset); - - if (internal::Internals::HasHeapObjectTag(*addr)) { - auto cped = reinterpret_cast(addr); - if (cped->IsObject()) { - auto cpedObj = static_cast(cped); - if (cpedObj->InternalFieldCount() > 0) { - return static_cast( - cpedObj->GetAlignedPointerFromInternalField(0)) - ->Get(); + auto cpedAddr = *cpedAddrPtr; + if (internal::Internals::HasHeapObjectTag(cpedAddr)) { + auto cpedValuePtr = reinterpret_cast(cpedAddrPtr); + if (cpedValuePtr->IsMap()) { + Address keyAddr = **(reinterpret_cast(&cpedKey_)); + + Address wrapAddr = GetValueFromMap(cpedAddr, cpedKeyHash_, keyAddr); + if (internal::Internals::HasHeapObjectTag(wrapAddr)) { + auto wrapValue = reinterpret_cast(&wrapAddr); + if (wrapValue->IsObject()) { + auto wrapObj = reinterpret_cast(wrapValue); + if (wrapObj->InternalFieldCount() > 0) { + return static_cast( + wrapObj->GetAlignedPointerFromInternalField(0)) + ->Get(); + } + } } } } @@ -1390,7 +1264,6 @@ ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) { Local WallProfiler::GetMetrics(Isolate* isolate) { auto usedAsyncContextCount = liveContextPtrs_.size(); - auto totalAsyncContextCount = usedAsyncContextCount + deadContextPtrs_.size(); auto context = isolate->GetCurrentContext(); auto metrics = Object::New(isolate); metrics @@ -1398,10 +1271,11 @@ Local WallProfiler::GetMetrics(Isolate* isolate) { String::NewFromUtf8Literal(isolate, "usedAsyncContextCount"), Number::New(isolate, usedAsyncContextCount)) .ToChecked(); + // totalAsyncContextCount == usedAsyncContextCount metrics ->Set(context, String::NewFromUtf8Literal(isolate, "totalAsyncContextCount"), - Number::New(isolate, totalAsyncContextCount)) + Number::New(isolate, usedAsyncContextCount)) .ToChecked(); return metrics; } @@ -1416,6 +1290,14 @@ NAN_SETTER(WallProfiler::SetContext) { profiler->SetContext(info.GetIsolate(), value); } +NAN_METHOD(WallProfiler::CreateContextHolder) { + auto profiler = Nan::ObjectWrap::Unwrap(info.This()); + auto isolate = info.GetIsolate(); + auto contextHolder = profiler->CreateContextHolder( + isolate, isolate->GetCurrentContext(), info[0]); + info.GetReturnValue().Set(contextHolder); +} + NAN_GETTER(WallProfiler::SharedArrayGetter) { auto profiler = Nan::ObjectWrap::Unwrap(info.This()); info.GetReturnValue().Set(profiler->jsArray_.Get(v8::Isolate::GetCurrent())); @@ -1494,7 +1376,7 @@ void WallProfiler::OnGCStart(v8::Isolate* isolate) { if (collectAsyncId_) { gcAsyncId = GetAsyncIdNoGC(isolate); } - if (useCPED_) { + if (useCPED()) { gcContext_ = GetContextPtrSignalSafe(isolate); } } @@ -1504,7 +1386,7 @@ void WallProfiler::OnGCStart(v8::Isolate* isolate) { void WallProfiler::OnGCEnd() { auto oldCount = gcCount.fetch_sub(1, std::memory_order_relaxed); - if (oldCount == 1 && useCPED_) { + if (oldCount == 1 && useCPED()) { // Not strictly necessary, as we'll reset it to something else on next GC, // but why retain it longer than needed? gcContext_.reset(); diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index e7c224b2..12fe4b4c 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -49,22 +49,17 @@ class WallProfiler : public Nan::ObjectWrap { std::chrono::microseconds samplingPeriod_{0}; v8::CpuProfiler* cpuProfiler_ = nullptr; - bool useCPED_ = false; // If we aren't using the CPED, we use a single context ptr stored here. ContextPtr curContext_; - // Otherwise we'll use an internal field in objects stored in CPED. We must - // construct objects with an internal field count of 1 and a specially - // constructed prototype. - v8::Global cpedProxyTemplate_; - v8::Global cpedProxyProto_; - v8::Global cpedProxySymbol_; + // Otherwise we'll use an object as a key to store the context in + // AsyncContextFrame maps. + v8::Global cpedKey_; + int cpedKeyHash_; + v8::Global wrapObjectTemplate_; // We track live context pointers in a set to avoid memory leaks. They will // be deleted when the profiler is disposed. std::unordered_set liveContextPtrs_; - // Context pointers belonging to GC'd CPED objects register themselves here. - // They will be reused. - std::deque deadContextPtrs_; std::atomic gcCount = 0; std::atomic setInProgress_ = false; @@ -103,7 +98,7 @@ class WallProfiler : public Nan::ObjectWrap { using ContextBuffer = std::vector; ContextBuffer contexts_; - ~WallProfiler() = default; + ~WallProfiler(); void Dispose(v8::Isolate* isolate, bool removeFromMap); // A new CPU profiler object will be created each time profiling is started @@ -123,6 +118,8 @@ class WallProfiler : public Nan::ObjectWrap { void SetCurrentContextPtr(v8::Isolate* isolate, v8::Local context); + inline bool useCPED() { return !cpedKey_.IsEmpty(); } + public: /** * @param samplingPeriodMicros sampling interval, in microseconds @@ -130,10 +127,9 @@ class WallProfiler : public Nan::ObjectWrap { * parameter is informative; it is up to the caller to call the Stop method * every period. The parameter is used to preallocate data structures that * should not be reallocated in async signal safe code. - * @param useCPED whether to use the V8 ContinuationPreservedEmbedderData to - * store the current sampling context. It can be used if AsyncLocalStorage - * uses the AsyncContextFrame implementation (experimental in Node 23, default - * in Node 24.) + * @param cpedKey if an object, then the profiler should use the + * AsyncLocalFrame stored in the V8 ContinuationPreservedEmbedderData to store + * the current sampling context. */ explicit WallProfiler(std::chrono::microseconds samplingPeriod, std::chrono::microseconds duration, @@ -143,10 +139,14 @@ class WallProfiler : public Nan::ObjectWrap { bool collectCpuTime, bool collectAsyncId, bool isMainThread, - bool useCPED); + v8::Local cpedKey); v8::Local GetContext(v8::Isolate*); void SetContext(v8::Isolate*, v8::Local); + v8::Local CreateContextHolder(v8::Isolate*, + v8::Local, + v8::Local); + void PushContext(int64_t time_from, int64_t time_to, int64_t cpu_time, @@ -182,8 +182,6 @@ class WallProfiler : public Nan::ObjectWrap { void OnGCStart(v8::Isolate* isolate); void OnGCEnd(); - void MarkDeadPersistentContextPtr(PersistentContextPtr* ptr); - static NAN_METHOD(New); static NAN_METHOD(Start); static NAN_METHOD(Stop); @@ -192,6 +190,7 @@ class WallProfiler : public Nan::ObjectWrap { static NAN_MODULE_INIT(Init); static NAN_GETTER(GetContext); static NAN_SETTER(SetContext); + static NAN_METHOD(CreateContextHolder); static NAN_GETTER(SharedArrayGetter); static NAN_GETTER(GetMetrics); }; diff --git a/doc/sample_context_in_cped.md b/doc/sample_context_in_cped.md index f4dd8a4b..69f80e50 100644 --- a/doc/sample_context_in_cped.md +++ b/doc/sample_context_in_cped.md @@ -9,12 +9,9 @@ typical piece of data sample context stores is the tracing span ID, so whenever it changes, the sample context needs to be updated. ## How is the Sample Context stored and updated? -Before Node 23, the sample context would be stored in a +Before Node 22.7, the sample context would be stored in a `std::shared_ptr>` field on the C++ `WallProfiler` -instance. (In fact, due to the need for ensuring atomic updates and shared -pointers not being effectively updateable atomically it's actually a pair of -fields with an atomic pointer-to-shared-pointer switching between them, but I -digress.) Due to it being a single piece of instance state, it had to be updated +instance. Due to it being a single piece of instance state, it had to be updated every time the active span changed, possibly on every invocation of `AsyncLocalStorage.enterWith` and `.run`, but even more importantly on every async context change, and for that we needed to register a "before" callback @@ -39,8 +36,8 @@ per-continuation basis and the engine takes care to return the right one when `Isolate::GetContinuationPreservedEmbedderData()` method is invoked. We will refer to continuation-preserved embedder data as "CPED" from now on. -Starting with Node.js 23, CPED is used to implement data storage behind Node.js -`AsyncLocalStorage` API. This dovetails nicely with our needs as all the +Starting with Node.js 22.7, CPED is used to implement data storage behind +Node.js `AsyncLocalStorage` API. This dovetails nicely with our needs as all the span-related data we set on the sample context is normally managed in an async local storage (ALS) by the tracer. An application can create any number of ALSes, and each ALS manages a single value per async context. This value is @@ -50,139 +47,136 @@ one per async context) and "store", which is a value of a storage within a particular async context. The new implementation for storing ALS stores introduces an internal Node.js -class named `AsyncContextFrame` (ACF) which is a map that uses ALSes as keys, -and their stores as the map values, essentially providing a mapping from an ALS -to its store in the current async context. (This implementation is very similar -to how e.g. Java implements `ThreadLocal`, which is a close analogue to ALS in -Node.js.) ACF instances are then stored in CPED. +class named `AsyncContextFrame` (ACF) which is a subclass of JavaScript Map +class that uses ALSes as keys and their stores as the map values, essentially +providing a mapping from an ALS to its store in the current async context. (This +implementation is very similar to how e.g. Java implements `ThreadLocal`, which +is a close analogue to ALS in Node.js.) ACF instances are then stored in CPED. -## Storing the Sample Context in CPED, take one +## Storing the Sample Context in CPED Node.js – as the embedder of V8 – commandeers the CPED to store instances of ACF in it. This means that our profiler can't directly store our sample context in the CPED, because then we'd overwrite the ACF reference already in there and -break Node.js. Our first attempt at solving this was to –- since ACF is "just" -an ordinary JavaScript object -- to define a new property on it, and store our -sample context in it! JavaScript properties can have strings, numbers, or -symbols as their keys, with symbols being the recommended practice to define -properties that are hidden from unrelated code as symbols are private to their -creator and only compare equal to themselves. Thus we created a private symbol in -the profiler instance for our property key, and our logic for storing the sample -context thus becomes: +break Node.js. Fortunately, since ACF is "just" an ordinary JavaScript Map, +we can store our sample context in it as a key-value pair! When a new ACF is +created (normally, through `AsyncLocalStorage.enterWith`), all key-value pairs +are copied into the new map, so our sample context is nicely propagated. +Our logic for storing the sample context thus becomes: * get the CPED from the V8 isolate -* if it is not an object, do nothing (we can't set the sample context) -* otherwise set the sample context as a value in the object with our property - key. - -Unfortunately, this approach is not signal safe. When we want to read the value -in the signal handler, it now needs to retrieve the CPED, which creates a V8 -`Local`, and then it needs to read a property on it, which creates -another `Local`. It also needs to retrieve the current context, and a `Local` -for the symbol used as a key – four `Local`s in total. V8 tracks the object -addresses pointed to by locals so that GC doesn't touch them. It tracks them in -a series of arrays, and if the current array fills up, it needs to allocate a -new one. As we know, allocation is unsafe in a signal handler, hence our -problem. We were thinking of a solution where we check if there is at least 4 -slots free in the current array, but then our profiler's operation would be at -mercy of V8 internal state. - -## Storing the Sample Context in CPED, take two - -Next we thought of replacing the `AsyncContextFrame` object in CPED with one we -created with an internal field – we can store and retrieve an arbitrary `void *` -in it with `{Get|Set}AlignedPointerInInternalField` methods. The initial idea -was to leverage JavaScript's property of being a prototype-based language and -set the original CPED object as the prototype of our replacement, so that all -its methods would keep being invoked. This unfortunately didn't work because -the `AsyncContextFrame` is a `Map` and our replacement object doesn't have the -internal structure of V8's implementation of a map. The final solution turned -out to be the one where we store the original ACF as a property in our -replacement object (now effectively, a proxy to the ACF), and define all the -`Map` methods and properties on the proxy so that they are invoked on the ACF. -Even though the proxy does not pass an `instanceof Map` check, it is duck-typed -as a map. We even encapsulated this behavior in a special prototype object, so -the operations to set the context are: -* retrieve the ACF from CPED -* create a new object (the proxy) with one internal field -* set the ACF as a special property in the proxy -* set the prototype of the proxy to our prototype that defines all the proxied -methods and properties to forward through the proxy-referenced ACF. -* store our sample context in the internal field of the proxy -* set the proxy object as the CPED. - -Now, a keen eyed reader will notice that in the signal handler we still need to -call `Isolate::GetContinuationPreservedEmbedderData` which still creates a -`Local`. That would be true, except that we can import the `v8-internals.h` -header and directly read the address of the object by reading into the isolate -at the offset `kContinuationPreservedEmbedderDataOffset` declared in it. +* if it is not a Map, do nothing (we can't set the sample context) +* otherwise set the sample context as a value in the map with our key. +It's worth noting that our key is just an ordinary empty JavaScript object +created internally by the profiler. We could've also passed it an externally +created `AsyncLocalStorage` instance, thus preserving the invariant that all +keys in an ACF are ALS instances, but this doesn't seem necessary. + +We use a mutex implemented as an atomic boolean to guard our writes to the map. +The JavaScript code for AsyncContextFrame/AsyncLocalStorage treats the maps as + +Internally, we hold on to the sample context value with a shared pointer to a +V8 `Global`: +``` +using ContextPtr = std::shared_ptr>; +``` + +The values we store in ACF need to be JavaScript values. We use Node.js +`WrapObject` class for this purpose – it allows defining C++ classes that have +a JavaScript "mirror" object, carry a pointer to their C++ object in an internal +field, and when the JS object is garbage collected, the C++ object is destroyed. +Our `WrapObject` subclass in named `PersistentContextPtr` (PCP) because it has +only one field – the above introduced `ContextPtr`, and it is "persistent" +because its lifecycle is bound to that of its representative JavaScript object. + +So the more detailed algorithm for setting a sample context is: +* get the CPED from the V8 isolate +* if it is not a Map, do nothing (we can't set the sample context) +* if sample context is undefined, delete the key (if it exists) from the map +* if sample context is a different value, create a new `PersistentContextPtr` + wrapped in a JS object, and set the JS object as the value with the key in the + map. The chain of data now looks something like this: ``` v8::Isolate (from Isolate::GetCurrent()) +-> current continuation (internally managed by V8) - +-- our proxy object - +-- node::AsyncContextFrame (in proxy's private property, for forwarding method calls) - +-- prototype: declares functions and properties that forward to the AsyncContextFrame - +-- dd:PersistentContextPtr* (in proxy's internal field) - +-> std::shared_ptr> (in PersistentContextPtr's context field) - +-> v8::Global (in shared_ptr) - +-> v8::Value (the actual sample context object) - + +-> node::AsyncContextFrame (in continuation's CPED field) + +-> Object (the PersistentContextPtr wrapper, associated with our key) + +-> dd::PersistentContextPtr (pointed in Object's internal field) + +-> ContextPtr (in `context` field) + +-> v8::Global (in shared_ptr) + +-> v8::Value (the actual sample context object) ``` -The last 3 steps are the same as when CPED is not being used, except `context` -is directly represented in the `WallProfiler`, so then it looks like this: +The last 3-4 steps were the same in the previous code version as well, except +there we used a field directly in the `WallProfiler`: ``` dd::WallProfiler - +-> std::shared_ptr> (in either WallProfiler::ptr1 or ptr2) + +-> ContextPtr (in `curContext_` field) +-> v8::Global (in shared_ptr) +-> v8::Value (the actual sample context object) ``` - -### Memory allocations and garbage collection -We need to allocate a `PersistentContextPtr` (PCP) instance for every proxy we -create. The PCP has two concerns: it both has a shared pointer to the V8 global -that carries the sample context, and it also has a V8 weak reference to the -proxy object it is encapsulated within. This allows us to detect (since weak -references allow for GC callbacks) when the proxy object gets garbage collected, -and at that time the PCP itself can be either deleted or reused. We have an -optimization where we don't delete PCPs -- the assumption is that the number of -live ACFs (and thus proxies, and thus PCPs) will be constant for a server -application under load, so instead of doing a high amount of small new/delete -operations that can fragment the native heap, we keep the ones we'd delete in a -dequeue instead and reuse them. +The difference between the two diagrams shows how we moved the ContextPtr from +being a single instance state of `WallProfiler` to being an element in ACF maps. + +## Looking up values in a signal handler +The signal handler unfortunately can't directly call any V8 APIs, so in order to +traverse the chain of data above, it needs to rely on pointer arithmetic. Every +`Global` and `Local` have one field, and `Address*`. Thus, to dereference the +actual memory location of a JS object represented by a global reference `ref`, +we use `**(Address**)(&ref)`. These addresses are _tagged_, +meaning their LSB is set to 1, and need to be masked to obtain the actual memory +address. We can safely get the current Isolate pointer, but then we need to +interpret as an address the memory location at an internal offset where it keeps +the current CPED. If it's a JS Map, then we need to do more dead-reckoning into +it and retrieve a pointer to its OrderedHashMap, and then know its memory layout +to find the right hash bucket and traverse the linked list until we find a +key-value pair where the key address is our key object's current address (this +can be moved around by the GC, so that's why our Global is an `Address*`, for +a sufficient number of indirections to keep up with the moves.) The algorithm +for executing an equivalent of a `Map.get()` purely using pointer arithmetic +with knowledge of the V8 object memory layouts is encapsualted in `map-get.cc`. ## Odds and ends And that's mostly it! There are few more small odds and ends to make it work -safely. We still need to guard reading the value in the signal handler while -it's being written. We guard by introducing an atomic boolean and proper signal -fencing. - -The signal handler code also needs to be prevented from trying to access the -data while GC is in progress. For this reason, we register GC prologue and -epilogue callbacks with the V8 isolate so we can know when GCs are ongoing and -the signal handler will refrain from reading the CPED field during them. We'll -however grab the current sample context from the CPED and store it in a profiler -instance field in the GC prologue and use it for any samples taken during GC. +safely. As we mentioned above, we're preventing the signal handler from reading +if we're just writing the value using an atomic boolean. We also register GC +prologue and epilogue callbacks with the V8 isolate so we can know when GCs are +ongoing and the signal handler will also refrain from touching memory while a GC +runs. We'll however grab the current sample context from the CPED +and store it in a profiler instance field in the GC prologue and use it for any +samples taken during GC. + +Speaking of GC, we can now have an unbounded number of PersistentContextPtr +objects – one for each live ACF. Each PCP is allocated on the C++ heap, and +needs to be deleted eventually. The profiler tracks every PCP it creates in an +internal set of live PCPs and deletes them all when it itself gets disposed. +This is combined with `WrapObject` having GC finalization callback for every +PCP. When V8 collects a PCP wrapper its finalization callback will delete the +PCP. ## Changes in dd-trace-js For completeness, we'll describe the changes in dd-trace-js here as well. The main change is that with Node 24, we no longer require async hooks. The -instrumentation points for `AsyncLocalStorage.enterWith` and -`AsyncLocalStorage.run` remain in place – they are the only ones that are needed -now. +instrumentation point for `AsyncLocalStorage.enterWith` is the only one +remaining (`AsyncLocalStorage.run` is implemented in terms of `enterWith`.) +We can further optimize and _not_ set the sample context object if we see it's +the same as the current one (because `enterWith` was run without setting a new +span as the current span.) There are some small performance optimizations that no longer apply with the new approach, though. For one, with the old approach we did some data conversions -(span IDs to bigint, a tag array to endpoint string) in a sample when a sample -was captured. With the new approach, we do these conversions for all sample -contexts during profile serialization. Doing them after each sample capture -amortized their cost, possibly reducing the latency induced at serialization -time. With the old approach we also called `SetContext` only once per sampling – -we'd install a sample context to be used for the next sample, and then kept -updating a `ref` field in it with a reference to the actual data from pure -JavaScript code. Since we no longer have a single sample context (but one per -continuation) we can not do this anymore, and we need to call `SetContext` on -every ACF change. The cost of this (basically, going into a native call from -JavaScript) are still well offset by not having to use async hooks and do work -on every async context change. We could arguably simplify the code by removing -those small optimizations. +(span IDs to string, a tag array to endpoint string) in a sample context when a +sample was captured. With the new approach, we do these conversions for all +sample contexts during profile serialization. Doing them after each sample +capture amortized their cost possibly minimally reducing the latency induced at +serialization time. With the old approach we also called `SetContext` only once +per sampling – we'd install a sample context to be used for the next sample, and +then kept updating a `ref` field in it with a reference to the actual data. +Since we no longer have a single sample context (but rather one per +continuation) we can not do this anymore, and we need to call `SetContext` +either every time `enterWith` runs, or only when we notice that the relevant +span data changed. +The cost of this (basically, going into a native call from JavaScript) are still +well offset by not having to use async hooks and do work on every async context +change. We could arguably even simplify the code by removing those small +optimizations. diff --git a/package.json b/package.json index a6cc6fd6..81812abb 100644 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ "pretest:js-valgrind": "npm run pretest", "pretest": "npm run compile", "rebuild": "node-gyp rebuild --jobs=max", + "rebuild-debug": "node-gyp rebuild --jobs=max --debug", "test:cpp": "node scripts/cctest.js", "test:js-asan": "LSAN_OPTIONS='suppressions=./suppressions/lsan_suppr.txt' LD_PRELOAD=`gcc -print-file-name=libasan.so` mocha out/test/test-*.js", "test:js-tsan": "LD_PRELOAD=`gcc -print-file-name=libtsan.so` mocha out/test/test-*.js", diff --git a/ts/src/index.ts b/ts/src/index.ts index 42454629..92a7ec5c 100644 --- a/ts/src/index.ts +++ b/ts/src/index.ts @@ -36,6 +36,7 @@ export const time = { stop: timeProfiler.stop, getContext: timeProfiler.getContext, setContext: timeProfiler.setContext, + runWithContext: timeProfiler.runWithContext, isStarted: timeProfiler.isStarted, v8ProfilerStuckEventLoopDetected: timeProfiler.v8ProfilerStuckEventLoopDetected, diff --git a/ts/src/time-profiler.ts b/ts/src/time-profiler.ts index a265364b..475cadfc 100644 --- a/ts/src/time-profiler.ts +++ b/ts/src/time-profiler.ts @@ -29,7 +29,7 @@ import { } from './time-profiler-bindings'; import {GenerateTimeLabelsFunction, TimeProfilerMetrics} from './v8-types'; import {isMainThread} from 'worker_threads'; - +import {AsyncLocalStorage} from 'async_hooks'; const {kSampleCount} = profilerConstants; const DEFAULT_INTERVAL_MICROS: Microseconds = 1000; @@ -39,6 +39,7 @@ type Microseconds = number; type Milliseconds = number; let gProfiler: InstanceType | undefined; +let gStore: AsyncLocalStorage | undefined; let gSourceMapper: SourceMapper | undefined; let gIntervalMicros: Microseconds; let gV8ProfilerStuckEventLoopDetected = 0; @@ -95,7 +96,10 @@ export function start(options: TimeProfilerOptions = {}) { throw new Error('Wall profiler is already started'); } - gProfiler = new TimeProfiler({...options, isMainThread}); + if (options.useCPED === true) { + gStore = new AsyncLocalStorage(); + } + gProfiler = new TimeProfiler({...options, CPEDKey: gStore, isMainThread}); gSourceMapper = options.sourceMapper; gIntervalMicros = options.intervalMicros!; gV8ProfilerStuckEventLoopDetected = 0; @@ -106,6 +110,11 @@ export function start(options: TimeProfilerOptions = {}) { if (options.withContexts && !options.useCPED) { setContext({}); } + + // If using CPED, ensure an async context frame exists + if (options.withContexts && options.useCPED && gStore) { + gStore.enterWith([]); + } } export function stop( @@ -128,6 +137,10 @@ export function stop( gProfiler.stop(false); gProfiler.start(); } + // Re-enter async context frame if using CPED, as it may have been cleared during restart + if (gStore) { + gStore.enterWith([]); + } } else { gV8ProfilerStuckEventLoopDetected = 0; } @@ -144,6 +157,10 @@ export function stop( gProfiler.dispose(); gProfiler = undefined; gSourceMapper = undefined; + if (gStore !== undefined) { + gStore.disable(); + gStore = undefined; + } } return serializedProfile; } @@ -162,6 +179,19 @@ export function setContext(context?: object) { gProfiler.context = context; } +export function runWithContext( + context: object, + f: (...args: TArgs) => R, + ...args: TArgs +): R { + if (!gProfiler) { + throw new Error('Wall profiler is not started'); + } else if (!gStore) { + throw new Error('Can only use runWithContext with AsyncContextFrame'); + } + return gStore.run(gProfiler.createContextHolder(context), f, ...args); +} + export function getContext() { if (!gProfiler) { throw new Error('Wall profiler is not started'); diff --git a/ts/test/test-get-value-from-map-profiler.ts b/ts/test/test-get-value-from-map-profiler.ts new file mode 100644 index 00000000..5e03bf00 --- /dev/null +++ b/ts/test/test-get-value-from-map-profiler.ts @@ -0,0 +1,216 @@ +/** + * Copyright 2025 Datadog, Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for GetValueFromMap through the TimeProfiler API. + * + * These tests verify that GetValueFromMap works correctly in its actual usage context: + * - The profiler creates a key object (AsyncLocalStorage or internal key) + * - Contexts are stored in the CPED map using that key + * - GetValueFromMap retrieves contexts using the same key during signal handling + * + * This is the real-world usage pattern, and these tests confirm the structure + * layout and key address extraction work correctly on Node 24.x / V8 13.6. + */ + +import assert from 'assert'; +import {join} from 'path'; +import {AsyncLocalStorage} from 'async_hooks'; +import {satisfies} from 'semver'; + +const findBinding = require('node-gyp-build'); +const profiler = findBinding(join(__dirname, '..', '..')); + +const useCPED = + (satisfies(process.versions.node, '>=24.0.0') && + !process.execArgv.includes('--no-async-context-frame')) || + (satisfies(process.versions.node, '>=22.7.0') && + process.execArgv.includes('--experimental-async-context-frame')); + +const supportedPlatform = + process.platform === 'darwin' || process.platform === 'linux'; + +if (useCPED && supportedPlatform) { + describe('GetValueFromMap (through TimeProfiler)', () => { + describe('basic context storage and retrieval', () => { + it('should store and retrieve a simple object context', () => { + const als = new AsyncLocalStorage(); + const profiler = createProfiler(als); + + als.enterWith([]); + + const context = {label: 'test-context'}; + profiler.context = context; + + const retrieved = profiler.context; + assert.strictEqual( + retrieved, + context, + 'Should retrieve the same object' + ); + + profiler.dispose(); + }); + + it('should store and retrieve context with multiple properties', () => { + const als = new AsyncLocalStorage(); + const profiler = createProfiler(als); + + als.enterWith([]); + + const context = { + spanId: '1234567890', + traceId: 'abcdef123456', + operation: 'test-operation', + resource: '/api/endpoint', + tags: {environment: 'test', version: '1.0'}, + }; + + profiler.context = context; + const retrieved = profiler.context; + + assert.deepStrictEqual(retrieved, context); + assert.strictEqual(retrieved.spanId, context.spanId); + assert.strictEqual(retrieved.traceId, context.traceId); + assert.deepStrictEqual(retrieved.tags, context.tags); + + profiler.dispose(); + }); + + it('should handle context updates', () => { + const als = new AsyncLocalStorage(); + const profiler = createProfiler(als); + + als.enterWith([]); + + const context1 = {label: 'first'}; + profiler.context = context1; + assert.strictEqual(profiler.context, context1); + + const context2 = {label: 'second'}; + profiler.context = context2; + assert.strictEqual(profiler.context, context2); + + const context3 = {label: 'third', extra: 'data'}; + profiler.context = context3; + assert.strictEqual(profiler.context, context3); + + profiler.dispose(); + }); + + it('should return undefined for undefined context', () => { + const als = new AsyncLocalStorage(); + const profiler = createProfiler(als); + + als.enterWith([]); + + profiler.context = undefined; + const retrieved = profiler.context; + + assert.strictEqual(retrieved, undefined); + + profiler.dispose(); + }); + + it('should work with createContextHolder pattern', () => { + // This tests the pattern used by runWithContext where + // createContextHolder creates a wrap object that's stored in CPED map + + const als = new AsyncLocalStorage(); + const profiler = createProfiler(als); + + const context = {label: 'wrapped-context', id: 999}; + + // Using als.run mimics what runWithContext does internally + als.run(profiler.createContextHolder(context), () => { + const retrieved = profiler.context; + + // The wrap object stores context at index 0 + assert.ok(retrieved !== null && typeof retrieved === 'object'); + assert.deepStrictEqual(retrieved, context); + }); + + profiler.dispose(); + }); + }); + + describe('multiple context frames', () => { + it('should isolate contexts in different async frames', () => { + const als = new AsyncLocalStorage(); + const profiler = createProfiler(als); + + const context1 = {frame: 'frame1'}; + const context2 = {frame: 'frame2'}; + + // Frame 1 + als.run([], () => { + profiler.context = context1; + assert.deepStrictEqual(profiler.context, context1); + }); + + // Frame 2 + als.run([], () => { + assert.strictEqual(profiler.context, undefined); + profiler.context = context2; + assert.deepStrictEqual(profiler.context, context2); + }); + + // Outside frames + assert.strictEqual(profiler.context, undefined); + + profiler.dispose(); + }); + + it('should handle nested async frames', () => { + const als = new AsyncLocalStorage(); + const profiler = createProfiler(als); + + const outerContext = {level: 'outer'}; + const innerContext = {level: 'inner'}; + + als.run([], () => { + profiler.context = outerContext; + assert.deepStrictEqual(profiler.context, outerContext); + + als.run([], () => { + profiler.context = innerContext; + assert.deepStrictEqual(profiler.context, innerContext); + }); + + // Back to outer context frame + assert.deepStrictEqual(profiler.context, outerContext); + }); + + profiler.dispose(); + }); + }); + }); +} + +function createProfiler(als: AsyncLocalStorage) { + return new profiler.TimeProfiler({ + intervalMicros: 10000, + durationMillis: 500, + withContexts: true, + useCPED: true, + CPEDKey: als, + lineNumbers: false, + workaroundV8Bug: false, + collectCpuTime: false, + collectAsyncId: true, + isMainThread: true, + }); +} diff --git a/ts/test/test-time-profiler.ts b/ts/test/test-time-profiler.ts index d7359578..235b8370 100644 --- a/ts/test/test-time-profiler.ts +++ b/ts/test/test-time-profiler.ts @@ -23,7 +23,6 @@ import {hrtime} from 'process'; import {Label, Profile} from 'pprof-format'; import {AssertionError} from 'assert'; import {GenerateTimeLabelsArgs, LabelSet} from '../src/v8-types'; -import {AsyncLocalStorage} from 'async_hooks'; import {satisfies} from 'semver'; import assert from 'assert'; @@ -36,6 +35,10 @@ const useCPED = const collectAsyncId = satisfies(process.versions.node, '>=24.0.0'); +const unsupportedPlatform = + process.platform !== 'darwin' && process.platform !== 'linux'; +const shouldSkipCPEDTests = !useCPED || unsupportedPlatform; + const PROFILE_OPTIONS = { durationMillis: 500, intervalMicros: 1000, @@ -50,14 +53,10 @@ describe('Time Profiler', () => { }); it('should update state', function shouldUpdateState() { - if (process.platform !== 'darwin' && process.platform !== 'linux') { + if (unsupportedPlatform) { this.skip(); } const startTime = BigInt(Date.now()) * 1000n; - if (useCPED) { - // Ensure an async context frame is created to hold the profiler context. - new AsyncLocalStorage().enterWith(1); - } time.start({ intervalMicros: 20 * 1_000, durationMillis: PROFILE_OPTIONS.durationMillis, @@ -106,16 +105,12 @@ describe('Time Profiler', () => { }); it('should have labels', function shouldHaveLabels() { - if (process.platform !== 'darwin' && process.platform !== 'linux') { + if (unsupportedPlatform) { this.skip(); } this.timeout(3000); const intervalNanos = PROFILE_OPTIONS.intervalMicros * 1_000; - if (useCPED) { - // Ensure an async context frame is created to hold the profiler context. - new AsyncLocalStorage().enterWith(1); - } time.start({ intervalMicros: PROFILE_OPTIONS.intervalMicros, durationMillis: PROFILE_OPTIONS.durationMillis, @@ -141,8 +136,8 @@ describe('Time Profiler', () => { enableEndPoint = i % 2 === 0; const metrics = time.getMetrics(); const expectedAsyncContextCount = useCPED ? 1 : 0; - assert(metrics.totalAsyncContextCount === expectedAsyncContextCount); - assert(metrics.usedAsyncContextCount === expectedAsyncContextCount); + assert(metrics.totalAsyncContextCount >= expectedAsyncContextCount); + assert(metrics.usedAsyncContextCount >= expectedAsyncContextCount); validateProfile( time.stop( i < repeats - 1, @@ -549,16 +544,11 @@ describe('Time Profiler', () => { describe('lowCardinalityLabels', () => { it('should handle lowCardinalityLabels parameter in stop function', async function testLowCardinalityLabels() { - if (process.platform !== 'darwin' && process.platform !== 'linux') { + if (unsupportedPlatform) { this.skip(); } this.timeout(3000); - if (useCPED) { - // Ensure an async context frame is created to hold the profiler context. - new AsyncLocalStorage().enterWith(1); - } - // Set up some contexts with labels that we'll mark as low cardinality const lowCardLabel = 'service_name'; const highCardLabel = 'trace_id'; @@ -744,4 +734,240 @@ describe('Time Profiler', () => { assert.ok(threadId > 0); }); }); + + describe('runWithContext', () => { + it('should throw when profiler is not started', () => { + assert.throws(() => { + time.runWithContext({label: 'test'}, () => {}); + }, /Wall profiler is not started/); + }); + + it('should throw when useCPED is not enabled', function testNoCPED() { + if (unsupportedPlatform) { + this.skip(); + } + + time.start({ + intervalMicros: PROFILE_OPTIONS.intervalMicros, + durationMillis: PROFILE_OPTIONS.durationMillis, + withContexts: true, + useCPED: false, + }); + + try { + assert.throws(() => { + time.runWithContext({label: 'test'}, () => {}); + }, /Can only use runWithContext with AsyncContextFrame/); + } finally { + time.stop(); + } + }); + + it('should run function with context when useCPED is enabled', function testRunWithContext() { + if (shouldSkipCPEDTests) { + this.skip(); + } + + time.start({ + intervalMicros: PROFILE_OPTIONS.intervalMicros, + durationMillis: PROFILE_OPTIONS.durationMillis, + withContexts: true, + useCPED: true, + }); + + try { + const testContext = {label: 'test-value', id: '123'}; + let contextInsideFunction; + + time.runWithContext(testContext, () => { + contextInsideFunction = time.getContext(); + }); + + assert.deepEqual( + contextInsideFunction, + testContext, + 'Context should be accessible within function' + ); + } finally { + time.stop(); + } + }); + + it('should pass arguments to function correctly', function testArguments() { + if (shouldSkipCPEDTests) { + this.skip(); + } + + time.start({ + intervalMicros: PROFILE_OPTIONS.intervalMicros, + durationMillis: PROFILE_OPTIONS.durationMillis, + withContexts: true, + useCPED: true, + }); + + try { + const testContext = {label: 'test'}; + const result = time.runWithContext( + testContext, + (a: number, b: string, c: boolean) => { + return {a, b, c}; + }, + 42, + 'hello', + true + ); + + assert.deepEqual( + result, + {a: 42, b: 'hello', c: true}, + 'Arguments should be passed correctly' + ); + } finally { + time.stop(); + } + }); + + it('should return function result', function testReturnValue() { + if (shouldSkipCPEDTests) { + this.skip(); + } + + time.start({ + intervalMicros: PROFILE_OPTIONS.intervalMicros, + durationMillis: PROFILE_OPTIONS.durationMillis, + withContexts: true, + useCPED: true, + }); + + try { + const testContext = {label: 'test'}; + const result = time.runWithContext(testContext, () => { + return 'test-result'; + }); + + assert.strictEqual( + result, + 'test-result', + 'Function result should be returned' + ); + } finally { + time.stop(); + } + }); + + it('should handle nested runWithContext calls', function testNestedCalls() { + if (shouldSkipCPEDTests) { + this.skip(); + } + + time.start({ + intervalMicros: PROFILE_OPTIONS.intervalMicros, + durationMillis: PROFILE_OPTIONS.durationMillis, + withContexts: true, + useCPED: true, + }); + + try { + const outerContext = {label: 'outer'}; + const innerContext = {label: 'inner'}; + const results: string[] = []; + + time.runWithContext(outerContext, () => { + const ctx1 = time.getContext(); + results.push((ctx1 as any).label); + + time.runWithContext(innerContext, () => { + const ctx2 = time.getContext(); + results.push((ctx2 as any).label); + }); + + const ctx3 = time.getContext(); + results.push((ctx3 as any).label); + }); + + assert.deepEqual( + results, + ['outer', 'inner', 'outer'], + 'Nested contexts should be properly isolated and restored' + ); + } finally { + time.stop(); + } + }); + + it('should isolate context from outside runWithContext', function testContextIsolation() { + if (shouldSkipCPEDTests) { + this.skip(); + } + + time.start({ + intervalMicros: PROFILE_OPTIONS.intervalMicros, + durationMillis: PROFILE_OPTIONS.durationMillis, + withContexts: true, + useCPED: true, + }); + + try { + const runWithContextContext = {label: 'inside'}; + let contextInside; + + time.runWithContext(runWithContextContext, () => { + contextInside = time.getContext(); + }); + + // Context outside runWithContext should be undefined since we're using CPED + const contextOutside = time.getContext(); + + assert.deepEqual( + contextInside, + runWithContextContext, + 'Context inside should match' + ); + assert.strictEqual( + contextOutside, + undefined, + 'Context outside should be undefined with CPED' + ); + } finally { + time.stop(); + } + }); + + it('should work with async functions', async function testAsyncFunction() { + if (shouldSkipCPEDTests) { + this.skip(); + } + + time.start({ + intervalMicros: PROFILE_OPTIONS.intervalMicros, + durationMillis: PROFILE_OPTIONS.durationMillis, + withContexts: true, + useCPED: true, + }); + + try { + const testContext = {label: 'async-test'}; + + const result = await time.runWithContext(testContext, async () => { + const ctx1 = time.getContext(); + await delay(10); + const ctx2 = time.getContext(); + return {ctx1, ctx2}; + }); + + assert.deepEqual( + result.ctx1, + testContext, + 'Context should be available before await' + ); + assert.deepEqual( + result.ctx2, + testContext, + 'Context should be preserved after await' + ); + } finally { + time.stop(); + } + }); + }); }); diff --git a/ts/test/worker.ts b/ts/test/worker.ts index bcc7d147..5bc7dca9 100644 --- a/ts/test/worker.ts +++ b/ts/test/worker.ts @@ -6,7 +6,6 @@ import {getAndVerifyPresence, getAndVerifyString} from './profiles-for-tests'; import {satisfies} from 'semver'; import assert from 'assert'; -import {AsyncLocalStorage} from 'async_hooks'; const DURATION_MILLIS = 1000; const intervalMicros = 10000; @@ -62,7 +61,6 @@ function getCpuUsage() { } async function main(durationMs: number) { - if (useCPED) new AsyncLocalStorage().enterWith(1); time.start({ durationMillis: durationMs * 3, intervalMicros, @@ -107,7 +105,6 @@ async function main(durationMs: number) { } async function worker(durationMs: number) { - if (useCPED) new AsyncLocalStorage().enterWith(1); time.start({ durationMillis: durationMs, intervalMicros,