Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion binding.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
"bindings/thread-cpu-clock.cc",
"bindings/translate-heap-profile.cc",
"bindings/translate-time-profile.cc",
"bindings/binding.cc"
"bindings/binding.cc",
"bindings/map-get.cc"
],
"include_dirs": [
"bindings",
Expand Down
375 changes: 375 additions & 0 deletions bindings/map-get.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,375 @@
/**
* Copyright 2025 Datadog. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "map-get.hh"

// Find a value in JavaScript map by directly reading the underlying V8 hash
// map.
//
// V8 uses TWO internal hash map representations:
// 1. SmallOrderedHashMap: For small maps (capacity 4-254)
// - Metadata stored as uint8_t bytes
// - Entry size: 2 (key, value)
// - Chain table separate from entries
//
// 2. OrderedHashMap: For larger maps (capacity >254)
// - Metadata stored as Smis in FixedArray
// - Entry size: 3 (key, value, chain)
// - Chain stored inline with entries
//
// This code handles both types by detecting the table format at runtime.
// Practical testing shows that at least the AsyncContextFrame maps use the
// large map format even for small cardinality maps, but just in case we handle
// both.

#include <cstdint>

namespace dd {

using Address = uintptr_t;

#ifndef _WIN32
// ============================================================================
// Constants from V8 internals
// ============================================================================

// Heap object tagging
constexpr int kHeapObjectTag = 1;

// OrderedHashMap/SmallOrderedHashMap shared constants
constexpr int kNotFound = -1;
constexpr int kSmallLoadFactor = 2;

// ============================================================================
// Helper Functions (needed by struct methods)
// ============================================================================

inline Address UntagPointer(Address tagged) {
return tagged - kHeapObjectTag;
}

inline bool IsSmi(Address value) {
return (value & 1) == 0;
}

// SmiToInt Conversion below valid only on 64-bit platforms, the only ones we
// support
static_assert(sizeof(void*) == 8, "Only 64-bit platforms supported");

inline int SmiToInt(Address smi) {
return static_cast<int>(static_cast<intptr_t>(smi) >> 32);
}

// ============================================================================
// V8 Hashtable Structure Definitions
// ============================================================================

// HeapObject layout - base for all V8 heap objects
// From v8/src/objects/heap-object.h
struct HeapObjectLayout {
Address classMap_; // Tagged pointer to the class map
};

// JavaScript Map object
struct JSMapLayout {
HeapObjectLayout header_; // Map is a HeapObject
Address properties_or_hash_; // not used by us
Address elements_; // not used by us
// Tagged pointer to a [Small]OrderedHashMapLayout
Address table_;
};

// V8 FixedArray: length_ is a Smi, followed by that many element slots
struct FixedArrayLayout {
HeapObjectLayout header_; // FixedArray is a HeapObject
Address length_;
Address elements_[0];
};

// NOTE: both OrderedHashMap and SmallOrderedHashMap have compatible method
// definitions so FindEntryByHash and FindValueByHash can be defined as
// templated function working on both.

// OrderedHashMap layout (for large maps, capacity >254)
// From v8/src/objects/ordered-hash-table.h
struct OrderedHashMapLayout {
FixedArrayLayout fixedArray_; // OrderedHashMap is a FixedArray
// The first 3 address slots in the FixedArray that is a Hashtable are the
// number of elements, deleted elements, and buckets. Each one is a Smi.
Address number_of_elements_;
Address number_of_deleted_elements_;
Address number_of_buckets_;
// First number_of_buckets_ entries in head_and_data_table_ is the head table:
// each entry is an index of the first entry (head of the linked list of
// entries) in the data table for that bucket. This is followed by the data
// table. Each data table entry uses three (kEntrySize == 3) tagged pointer
// slots:
// [0]: key (Tagged Object)
// [1]: value (Tagged Object)
// [2]: chain (Smi - next entry index or -1)
// All indices (both to the head of the list and to the next entry are
// expressed in number of entries from the start of the data table, so to
// convert it into a head_and_data_table_ you need to add number_of_buckets_
// (length of the head table) and then 3 * index.
Address head_and_data_table_[0]; // Variable: [head_table][data_table]

// Constants for entry structure
static constexpr int kEntrySize = 3;
static constexpr int kKeyOffset = 0;
static constexpr int kValueOffset = 1;
static constexpr int kChainOffset = 2;
static constexpr int kNotFoundValue = kNotFound;

// Get number of buckets (converts Smi to int)
int NumberOfBuckets() const {
return IsSmi(number_of_buckets_) ? SmiToInt(number_of_buckets_) : 0;
}

// Get an upper bound for number of element chain in a bucket. Used to prevent
// infinite lookup chain.
int GetMaxChainLength() const {
return IsSmi(number_of_elements_) && IsSmi(number_of_deleted_elements_)
? SmiToInt(number_of_elements_) +
SmiToInt(number_of_deleted_elements_)
: 0;
}

// Convert hash to bucket index
int HashToBucket(int hash) const {
int num_buckets = NumberOfBuckets();
return num_buckets > 0 ? (hash & (num_buckets - 1)) : 0;
}

// Get first entry index for a bucket
int GetFirstEntry(int bucket) const {
Address entry_smi = head_and_data_table_[bucket];
return IsSmi(entry_smi) ? SmiToInt(entry_smi) : kNotFound;
}

// Convert entry index to head_and_data_table_ index for the entry's key
int EntryToIndex(int entry) const {
return NumberOfBuckets() + (entry * kEntrySize);
}

// Get key at entry index
Address GetKey(int entry) const {
int index = EntryToIndex(entry);
return head_and_data_table_[index + kKeyOffset];
}

// Get value at entry index
Address GetValue(int entry) const {
int index = EntryToIndex(entry);
return head_and_data_table_[index + kValueOffset];
}

// Get next entry in chain
int GetNextChainEntry(int entry) const {
int index = EntryToIndex(entry);
Address chain_smi = head_and_data_table_[index + kChainOffset];
return IsSmi(chain_smi) ? SmiToInt(chain_smi) : kNotFound;
}
};

// SmallOrderedHashMap layout (for small maps, capacity 4-254)
// Memory layout (stores metadata as uint8_t, not Smis):
// [0]: map pointer (HeapObject)
// [kHeaderSize + 0]: number_of_elements (uint8)
// [kHeaderSize + 1]: number_of_deleted_elements (uint8)
// [kHeaderSize + 2]: number_of_buckets (uint8)
// [kHeaderSize + 3...]: padding (5 bytes on 64-bit, 1 byte on 32-bit)
// [DataTableStartOffset...]: data table (key-value pairs as Tagged)
// [...]: hash table (uint8 bucket indices)
// [...]: chain table (uint8 next entry indices)
//
// Each entry is 2 Tagged elements (kEntrySize = 2):
// [0]: key (Tagged Object)
// [1]: value (Tagged Object)
//
// From v8/src/objects/ordered-hash-table.h
struct SmallOrderedHashMapLayout {
HeapObjectLayout header_;
uint8_t number_of_elements_;
uint8_t number_of_deleted_elements_;
uint8_t number_of_buckets_;
uint8_t padding_[5]; // 5 bytes on 64-bit
// Variable length:
// - Address data_table_[capacity * kEntrySize] // Keys and values
// - uint8_t hash_table_[number_of_buckets_] // Bucket -> first entry
// - uint8_t chain_table_[capacity] // Entry -> next entry
Address data_table_[0];

// Constants for entry structure
static constexpr int kEntrySize = 2;
static constexpr int kKeyOffset = 0;
static constexpr int kValueOffset = 1;
static constexpr int kNotFoundValue = 255;

// Get capacity from number of buckets
int Capacity() const { return number_of_buckets_ * kSmallLoadFactor; }

int NumberOfBuckets() const { return number_of_buckets_; }

int GetMaxChainLength() const {
return number_of_elements_ + number_of_deleted_elements_;
}

int HashToBucket(int hash) const { return hash & (NumberOfBuckets() - 1); }

const uint8_t* GetHashTable() const {
return reinterpret_cast<const uint8_t*>(data_table_ +
Capacity() * kEntrySize);
}

const uint8_t* GetChainTable() const {
return GetHashTable() + number_of_buckets_;
}

// Get key at entry index
Address GetKey(int entry) const {
return data_table_[entry * kEntrySize + kKeyOffset];
}

// Get value at entry index
Address GetValue(int entry) const {
return data_table_[entry * kEntrySize + kValueOffset];
}

// Get first entry in bucket
uint8_t GetFirstEntry(int bucket) const {
const uint8_t* hash_table = GetHashTable();
return hash_table[bucket];
}

// Get next entry in chain
uint8_t GetNextChainEntry(int entry) const {
const uint8_t* chain_table = GetChainTable();
return chain_table[entry];
}
};

// ============================================================================
// Templated Hash Table Lookup
// ============================================================================

// Find an entry by a key and its hash in any hash table layout
// Template parameter LayoutT should be either OrderedHashMapLayout or
// SmallOrderedHashMapLayout
template <typename LayoutT>
int FindEntryByHash(const LayoutT* layout, int hash, Address key_to_find) {
int max_chain_length = layout->GetMaxChainLength();
int bucket = layout->HashToBucket(hash);
int entry = layout->GetFirstEntry(bucket);

// Paranoid: by never traversing more than the sum of elements and deleted
// elements we guarantee this terminates in bound time even if for some
// unforeseen reason the chain is cyclical.
for (int max_chain_left = max_chain_length;
entry != LayoutT::kNotFoundValue && max_chain_left > 0;
max_chain_left--) {
Address key_at_entry = layout->GetKey(entry);
if (key_at_entry == key_to_find) {
return entry;
}
entry = layout->GetNextChainEntry(entry);
}

return kNotFound;
}

// Find an entry by a key and its hash in any hash table layout, and return its
// value or the zero address if it is not found.
// Template parameter LayoutT should be either OrderedHashMapLayout or
// SmallOrderedHashMapLayout
template <typename LayoutT>
Address FindValueByHash(const LayoutT* layout, int hash, Address key_to_find) {
auto entry = FindEntryByHash(layout, hash, key_to_find);
return entry == kNotFound ? 0 : layout->GetValue(entry);
}

// Detect if the table is an OrderedHashMap or a SmallOrderedHashMap (or it can
// not safely be determined) by checking padding bytes. SmallOrderedHashMap has
// always-zero padding bytes after the metadata.
static uint8_t GetOrderedHashMapType(Address table_untagged) {
const SmallOrderedHashMapLayout* potential_small =
reinterpret_cast<const SmallOrderedHashMapLayout*>(table_untagged);

// Read the header as one 64-bit value for validation
uint64_t smallHeader =
*reinterpret_cast<const uint64_t*>(&potential_small->number_of_elements_);

static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__,
"Little-endian required");
// Small map will have some bits in bytes 0-2 be nonzero, and all bits in
// bytes 3-7 zero. That effectively limits the value range of smallHeader to
// [0x1-0xFFFFFF].
if (smallHeader > 0 && smallHeader < 0x1000000) {
auto num_elements = potential_small->number_of_elements_;
auto num_deleted = potential_small->number_of_deleted_elements_;
auto num_buckets = potential_small->number_of_buckets_;

// SmallOrderedHashMap has constraints:
// - num_buckets must be a power of 2 between 2 and 127
// - num_elements + num_deleted <= capacity (buckets * 2)
if (num_buckets >= 2 && num_buckets <= 127) {
// Check if num_buckets is a power of 2
if ((num_buckets & (num_buckets - 1)) == 0) {
auto capacity = num_buckets * kSmallLoadFactor;
if (num_elements + num_deleted <= capacity) {
return 1; // small map
}
}
}
return 2; // undecided
}
return 0; // large map
}

// ============================================================================
// Main entry point
// ============================================================================

// Lookup value in a Map given the hash and key pointer. If the key is not found
// in the map (or the lookup can not be performed) returns a zero Address (which
// is essentially a zero Smi value.)
Address GetValueFromMap(Address map_addr, int hash, Address key) {
const JSMapLayout* map_untagged =
reinterpret_cast<const JSMapLayout*>(UntagPointer(map_addr));
Address table_untagged = UntagPointer(map_untagged->table_);

switch (GetOrderedHashMapType(table_untagged)) {
case 0: {
const OrderedHashMapLayout* layout =
reinterpret_cast<const OrderedHashMapLayout*>(table_untagged);
return FindValueByHash(layout, hash, key);
}
case 1: {
const SmallOrderedHashMapLayout* layout =
reinterpret_cast<const SmallOrderedHashMapLayout*>(table_untagged);
return FindValueByHash(layout, hash, key);
}
}
return 0; // We couldn't determine the kind of the map, just return zero.
}

#else // _WIN32

Address GetValueFromMap(Address map_addr, int hash, Address key) {
return 0;
}

#endif // _WIN32
} // namespace dd
Loading