Skip to content

Commit eaadf9d

Browse files
igchorvinser52
authored andcommitted
Fix race in acquire (#68)
The assumption for moving items was that once item is unmarked no one can add new waiters for that item. However, since incrementing item ref count was not done under the MoveMap lock, there was a race: item could have been unmarked right after incRef returned incFailedMoving.
1 parent b17a7a7 commit eaadf9d

File tree

2 files changed

+24
-14
lines changed

2 files changed

+24
-14
lines changed

cachelib/allocator/CacheAllocator-inl.h

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,16 +1023,21 @@ CacheAllocator<CacheTrait>::acquire(Item* it) {
10231023
SCOPE_FAIL { stats_.numRefcountOverflow.inc(); };
10241024

10251025
// TODO: do not block incRef for child items to avoid deadlock
1026-
auto failIfMoving = getNumTiers() > 1 && !it->isChainedItem();
1027-
auto incRes = incRef(*it, failIfMoving);
1028-
if (LIKELY(incRes == RefcountWithFlags::incResult::incOk)) {
1029-
return WriteHandle{it, *this};
1030-
} else if (incRes == RefcountWithFlags::incResult::incFailedEviction){
1031-
// item is being evicted
1032-
return WriteHandle{};
1033-
} else {
1034-
// item is being moved - wait for completion
1035-
return handleWithWaitContextForMovingItem(*it);
1026+
const auto failIfMoving = getNumTiers() > 1 && !it->isChainedItem();
1027+
1028+
while (true) {
1029+
auto incRes = incRef(*it, failIfMoving);
1030+
if (LIKELY(incRes == RefcountWithFlags::incResult::incOk)) {
1031+
return WriteHandle{it, *this};
1032+
} else if (incRes == RefcountWithFlags::incResult::incFailedEviction){
1033+
// item is being evicted
1034+
return WriteHandle{};
1035+
} else {
1036+
// item is being moved - wait for completion
1037+
WriteHandle handle;
1038+
if (tryGetHandleWithWaitContextForMovingItem(*it, handle))
1039+
return handle;
1040+
}
10361041
}
10371042
}
10381043

@@ -1254,20 +1259,25 @@ CacheAllocator<CacheTrait>::insertOrReplace(const WriteHandle& handle) {
12541259
* 3. Wait until the moving thread will complete its job.
12551260
*/
12561261
template <typename CacheTrait>
1257-
typename CacheAllocator<CacheTrait>::WriteHandle
1258-
CacheAllocator<CacheTrait>::handleWithWaitContextForMovingItem(Item& item) {
1262+
bool
1263+
CacheAllocator<CacheTrait>::tryGetHandleWithWaitContextForMovingItem(Item& item, WriteHandle& handle) {
12591264
auto shard = getShardForKey(item.getKey());
12601265
auto& movesMap = getMoveMapForShard(shard);
12611266
{
12621267
auto lock = getMoveLockForShard(shard);
12631268

1269+
// item might have been evicted or moved before the lock was acquired
1270+
if (!item.isMoving())
1271+
return false;
1272+
12641273
WriteHandle hdl{*this};
12651274
auto waitContext = hdl.getItemWaitContext();
12661275

12671276
auto ret = movesMap.try_emplace(item.getKey(), std::make_unique<MoveCtx>());
12681277
ret.first->second->addWaiter(std::move(waitContext));
12691278

1270-
return hdl;
1279+
handle = std::move(hdl);
1280+
return true;
12711281
}
12721282
}
12731283

cachelib/allocator/CacheAllocator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2280,7 +2280,7 @@ auto& mmContainer = getMMContainer(tid, pid, cid);
22802280

22812281
size_t memoryTierSize(TierId tid) const;
22822282

2283-
WriteHandle handleWithWaitContextForMovingItem(Item& item);
2283+
bool tryGetHandleWithWaitContextForMovingItem(Item& item, WriteHandle& handle);
22842284

22852285
size_t wakeUpWaitersLocked(folly::StringPiece key, WriteHandle&& handle);
22862286

0 commit comments

Comments
 (0)