From ca66bd4df6ed44bde2071f4e578a094b9440bfe8 Mon Sep 17 00:00:00 2001 From: Marc Hanson Date: Wed, 6 Aug 2025 19:04:23 -0400 Subject: [PATCH 01/10] PN tweaks. --- src/debug/debug.ts | 10 +++++----- src/p2p/ProblemNodeHandler.ts | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/debug/debug.ts b/src/debug/debug.ts index bf14cc0f2..af05eba2f 100644 --- a/src/debug/debug.ts +++ b/src/debug/debug.ts @@ -231,11 +231,11 @@ class Debug { this.network.registerExternalGet('debug_simulateProblematic', isDebugModeMiddleware, (req, res) => { try { // Parse parameters with defaults - const missConsensus = req.query.missConsensus ? parseFloat(req.query.missConsensus as string) : 0 - const networkDelay = req.query.networkDelay ? parseInt(req.query.networkDelay as string) : 0 - const dropMessages = req.query.dropMessages ? parseFloat(req.query.dropMessages as string) : 0 - const slowResponse = req.query.slowResponse ? parseFloat(req.query.slowResponse as string) : 0 - const slowDelayMs = req.query.slowDelayMs ? parseInt(req.query.slowDelayMs as string) : 0 + const missConsensus = req.query.missConsensus ? parseFloat(req.query.missConsensus as string) : 1 + const networkDelay = req.query.networkDelay ? parseInt(req.query.networkDelay as string) : 100 + const dropMessages = req.query.dropMessages ? parseFloat(req.query.dropMessages as string) : 1 + const slowResponse = req.query.slowResponse ? parseFloat(req.query.slowResponse as string) : 1 + const slowDelayMs = req.query.slowDelayMs ? parseInt(req.query.slowDelayMs as string) : 10000 const reset = req.query.reset === 'true' // Validate parameters diff --git a/src/p2p/ProblemNodeHandler.ts b/src/p2p/ProblemNodeHandler.ts index c96a88725..d76a5b706 100644 --- a/src/p2p/ProblemNodeHandler.ts +++ b/src/p2p/ProblemNodeHandler.ts @@ -82,7 +82,7 @@ export function exportProblematicNodeCache(): string | null { } try { - return problematicNodeCache.toCompressedJSON() + return problematicNodeCache.toJSON() } catch (err) { error('Failed to export ProblematicNodeCache:', err) return null From eae04c5a13ba1a94b645eff0f3ea90cbe646280b Mon Sep 17 00:00:00 2001 From: Marc Hanson Date: Wed, 6 Aug 2025 19:58:55 -0400 Subject: [PATCH 02/10] getConsecutiveRefutes() -> getMaxConsecutiveRefutes() --- src/debug/debug.ts | 2 +- src/p2p/ProblemNodeHandler.ts | 4 +- src/p2p/ProblematicNodeCache.ts | 46 +++++++++---------- test/unit/debug/index.test.ts | 2 +- test/unit/src/debug/debug.test.ts | 2 +- .../unit/src/p2p/ProblematicNodeCache.test.ts | 6 +-- 6 files changed, 30 insertions(+), 32 deletions(-) diff --git a/src/debug/debug.ts b/src/debug/debug.ts index af05eba2f..11e42ba3b 100644 --- a/src/debug/debug.ts +++ b/src/debug/debug.ts @@ -198,7 +198,7 @@ class Debug { id: nodeId.substring(0, 8), fullId: nodeId, refuteCycles: node.refuteCycles || [], - consecutiveRefutes: ProblemNodeHandler.getConsecutiveRefutes( + consecutiveRefutes: ProblemNodeHandler.getMaxConsecutiveRefutes( node.refuteCycles || [], currentCycleRecord.counter ), diff --git a/src/p2p/ProblemNodeHandler.ts b/src/p2p/ProblemNodeHandler.ts index d76a5b706..94c17e940 100644 --- a/src/p2p/ProblemNodeHandler.ts +++ b/src/p2p/ProblemNodeHandler.ts @@ -104,9 +104,9 @@ export function getRefutePercentage(refuteCycles: number[], currentCycle: number return 0 } -export function getConsecutiveRefutes(refuteCycles: number[], currentCycle: number): number { +export function getMaxConsecutiveRefutes(refuteCycles: number[], currentCycle: number): number { if (config.p2p.useProblematicNodeCacheV2 && problematicNodeCache) { - return problematicNodeCache.getConsecutiveRefutes(refuteCycles, currentCycle) + return problematicNodeCache.getMaxConsecutiveRefutes(refuteCycles, currentCycle) } return 0 } diff --git a/src/p2p/ProblematicNodeCache.ts b/src/p2p/ProblematicNodeCache.ts index 4ffc64049..be365498d 100644 --- a/src/p2p/ProblematicNodeCache.ts +++ b/src/p2p/ProblematicNodeCache.ts @@ -196,7 +196,7 @@ export class ProblematicNodeCache { const refuteCycles = this.refuteHistory.get(nodeId) || [] // Calculate consecutive refutes - const consecutiveRefutes = this.getConsecutiveRefutes(refuteCycles, currentCycle) + const consecutiveRefutes = this.getMaxConsecutiveRefutes(refuteCycles, currentCycle) // Calculate refute percentage const refutePercentage = this.getRefutePercentage(refuteCycles, currentCycle) @@ -213,40 +213,38 @@ export class ProblematicNodeCache { return metrics } - getConsecutiveRefutes(refuteCycles: number[], currentCycle: number): number { + getMaxConsecutiveRefutes(refuteCycles: number[], currentCycle: number): number { if (refuteCycles.length === 0) return 0 // Filter to only include refutes up to current cycle const relevantRefutes = refuteCycles.filter((cycle) => cycle <= currentCycle) if (relevantRefutes.length === 0) return 0 - // Count consecutive refutes ending at current cycle or one before - let count = 0 + // Find the maximum consecutive refutes in the retained cycle history const sortedRefutes = [...relevantRefutes].sort((a, b) => a - b) - - // Start from the end and count backwards - for (let i = sortedRefutes.length - 1; i >= 0; i--) { - const cycle = sortedRefutes[i] - - if (i === sortedRefutes.length - 1) { - // Last refute must be at current cycle or one before - if (cycle === currentCycle || cycle === currentCycle - 1) { - count = 1 - } else { - break - } + + let maxConsecutive = 1 // At least 1 if we have any refutes + let currentConsecutive = 1 + + // Iterate through sorted refutes to find all consecutive sequences + for (let i = 1; i < sortedRefutes.length; i++) { + const currentCycle = sortedRefutes[i] + const prevCycle = sortedRefutes[i - 1] + + if (currentCycle === prevCycle + 1) { + // Consecutive with previous cycle + currentConsecutive++ } else { - // Check if consecutive with next cycle - const nextCycle = sortedRefutes[i + 1] - if (cycle === nextCycle - 1) { - count++ - } else { - break - } + // Gap found, update max and reset current count + maxConsecutive = Math.max(maxConsecutive, currentConsecutive) + currentConsecutive = 1 } } - return count + // Update max with final sequence + maxConsecutive = Math.max(maxConsecutive, currentConsecutive) + + return maxConsecutive } getRefutePercentage(refuteCycles: number[], currentCycle: number): number { diff --git a/test/unit/debug/index.test.ts b/test/unit/debug/index.test.ts index 7b1366c62..7503bb922 100644 --- a/test/unit/debug/index.test.ts +++ b/test/unit/debug/index.test.ts @@ -62,7 +62,7 @@ jest.mock('../../../src/p2p/CycleChain', () => ({ jest.mock('../../../src/p2p/ProblemNodeHandler', () => ({ getProblematicNodes: jest.fn().mockReturnValue([]), getRefutePercentage: jest.fn().mockReturnValue(0), - getConsecutiveRefutes: jest.fn().mockReturnValue(0), + getMaxConsecutiveRefutes: jest.fn().mockReturnValue(0), isNodeProblematic: jest.fn().mockReturnValue(false), exportProblematicNodeCache: jest.fn().mockReturnValue(null), })) diff --git a/test/unit/src/debug/debug.test.ts b/test/unit/src/debug/debug.test.ts index 52bc05ad9..93232c1f7 100644 --- a/test/unit/src/debug/debug.test.ts +++ b/test/unit/src/debug/debug.test.ts @@ -39,7 +39,7 @@ jest.mock('../../../../src/p2p/Context', () => ({ })) jest.mock('../../../../src/p2p/ProblemNodeHandler', () => ({ getRefutePercentage: jest.fn(), - getConsecutiveRefutes: jest.fn(), + getMaxConsecutiveRefutes: jest.fn(), isNodeProblematic: jest.fn(), })) jest.mock('../../../../src/utils/nestedCounters') diff --git a/test/unit/src/p2p/ProblematicNodeCache.test.ts b/test/unit/src/p2p/ProblematicNodeCache.test.ts index e64e8a2d4..b56cba967 100644 --- a/test/unit/src/p2p/ProblematicNodeCache.test.ts +++ b/test/unit/src/p2p/ProblematicNodeCache.test.ts @@ -539,10 +539,10 @@ describe('ProblematicNodeCache', () => { expect(metrics.consecutiveRefutes).toBe(6) // cycles 95-100 }) - test('should reset consecutive count after gap', () => { - // node2 is refuted in 96-98, gap at 99, no refute at 100 + test('should find maximum consecutive refutes in history', () => { + // node2 is refuted in 96-98, gap at 99-100, max consecutive is 3 const metrics = cache.calculateNodeMetrics('node2', 100) - expect(metrics.consecutiveRefutes).toBe(0) // Gap breaks the consecutive count + expect(metrics.consecutiveRefutes).toBe(3) // Maximum consecutive refutes in history }) test('should handle consecutive refutes at history boundary', () => { From 713b1ef308ea4b658859336d4da352ade30f06cc Mon Sep 17 00:00:00 2001 From: Marc Hanson Date: Wed, 6 Aug 2025 21:52:08 -0400 Subject: [PATCH 03/10] Problematic data to monitor payload. --- src/p2p/ProblemNodeHandler.ts | 41 +++++++++++++++++++++++++++++++++ src/p2p/ProblematicNodeCache.ts | 2 +- src/reporter/index.ts | 5 ++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/p2p/ProblemNodeHandler.ts b/src/p2p/ProblemNodeHandler.ts index 94c17e940..f8d530375 100644 --- a/src/p2p/ProblemNodeHandler.ts +++ b/src/p2p/ProblemNodeHandler.ts @@ -97,6 +97,47 @@ export function getProblematicNodes(prevRecord: P2P.CycleCreatorTypes.CycleRecor } } +// Get problematic node info for reporting to monitor - info about self node only +export function getProblematicNodeInfoForSelf(nodeId: string): any | null { + if (!problematicNodeCache || !config.p2p.enableProblematicNodeCacheBuilding) { + return null + } + + try { + const currentCycle = CycleChain.newest?.counter || 0 + + const metrics = problematicNodeCache.calculateNodeMetrics(nodeId, currentCycle) + const refuteHistory = problematicNodeCache.refuteHistory.get(nodeId) || [] + + // Build cycle history for last 30 cycles + const cycleHistoryLength = 30 + const startCycle = Math.max(1, currentCycle - cycleHistoryLength + 1) + const cycleRefuteHistory: boolean[] = [] + + for (let cycle = startCycle; cycle <= currentCycle; cycle++) { + cycleRefuteHistory.push(refuteHistory.includes(cycle)) + } + + // Calculate total refutes in the history window + const totalRefutes = refuteHistory.filter((cycle) => cycle >= startCycle && cycle <= currentCycle).length + + const problematicNodeInfo = { + isProblematic: + metrics.consecutiveRefutes >= config.p2p.problematicNodeConsecutiveRefuteThreshold || + metrics.refutePercentage >= config.p2p.problematicNodeRefutePercentageThreshold, + totalRefutes: totalRefutes, + maxConsecutiveRefutes: metrics.consecutiveRefutes, + refutePercentage: metrics.refutePercentage, + cycleRefuteHistory: cycleRefuteHistory, + } + + return problematicNodeInfo + } catch (err) { + error('Failed to get problematic node info for self:', err) + return null + } +} + export function getRefutePercentage(refuteCycles: number[], currentCycle: number): number { if (config.p2p.useProblematicNodeCacheV2 && problematicNodeCache) { return problematicNodeCache.getRefutePercentage(refuteCycles, currentCycle) diff --git a/src/p2p/ProblematicNodeCache.ts b/src/p2p/ProblematicNodeCache.ts index be365498d..aecb81b99 100644 --- a/src/p2p/ProblematicNodeCache.ts +++ b/src/p2p/ProblematicNodeCache.ts @@ -222,7 +222,7 @@ export class ProblematicNodeCache { // Find the maximum consecutive refutes in the retained cycle history const sortedRefutes = [...relevantRefutes].sort((a, b) => a - b) - + let maxConsecutive = 1 // At least 1 if we have any refutes let currentConsecutive = 1 diff --git a/src/reporter/index.ts b/src/reporter/index.ts index 4c738d465..d6d3be8da 100644 --- a/src/reporter/index.ts +++ b/src/reporter/index.ts @@ -25,6 +25,7 @@ import { finishedSyncingCycle } from '../p2p/Join' import { currentCycle } from '../p2p/CycleCreator' import process from 'process' import { fireAndForget } from '../utils/functions/promises' +import * as ProblemNodeHandler from '../p2p/ProblemNodeHandler' const http = require('../http') const allZeroes64 = '0'.repeat(64) @@ -350,6 +351,9 @@ class Reporter { const archiverListHash = Archivers.getArchiverListHash() const lastInSyncResult = this.stateManager.accountPatcher.lastInSyncResult + // Get problematic node info for this validator + const problematicNodeInfo = ProblemNodeHandler.getProblematicNodeInfoForSelf(Self.id) + try { await this._sendReport({ repairsStarted, @@ -397,6 +401,7 @@ class Reporter { cycleFinishedSyncing, stillNeedsInitialPatchPostActive, memory: process.memoryUsage(), + problematicNodeInfo, }) if (this.stateManager != null && config.mode === 'debug' && !config.debug.disableTxCoverageReport) { this.stateManager.transactionQueue.resetTxCoverageMap() From 42a47f9deb75f89eee46a94f2dd7ae5cdd2ce344 Mon Sep 17 00:00:00 2001 From: Marc Hanson Date: Wed, 6 Aug 2025 22:01:29 -0400 Subject: [PATCH 04/10] 2.16.0-prerelease.2 --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 359787b6e..28fbf3e6f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@shardeum-foundation/core", - "version": "2.16.0-prerelease.1", + "version": "2.16.0-prerelease.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@shardeum-foundation/core", - "version": "2.16.0-prerelease.1", + "version": "2.16.0-prerelease.2", "dependencies": { "@hapi/sntp": "3.1.1", "@mapbox/node-pre-gyp": "1.0.10", diff --git a/package.json b/package.json index ca95436a8..06d2b525c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@shardeum-foundation/core", - "version": "2.16.0-prerelease.1", + "version": "2.16.0-prerelease.2", "engines": { "node": "20.19.3" }, From 1653f5bc1e03643b935abb00a3124b820d1a423e Mon Sep 17 00:00:00 2001 From: Marc Hanson Date: Wed, 6 Aug 2025 23:24:09 -0400 Subject: [PATCH 05/10] UX fixes --- src/p2p/ProblemNodeHandler.ts | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/p2p/ProblemNodeHandler.ts b/src/p2p/ProblemNodeHandler.ts index f8d530375..63e46d360 100644 --- a/src/p2p/ProblemNodeHandler.ts +++ b/src/p2p/ProblemNodeHandler.ts @@ -109,17 +109,19 @@ export function getProblematicNodeInfoForSelf(nodeId: string): any | null { const metrics = problematicNodeCache.calculateNodeMetrics(nodeId, currentCycle) const refuteHistory = problematicNodeCache.refuteHistory.get(nodeId) || [] - // Build cycle history for last 30 cycles - const cycleHistoryLength = 30 - const startCycle = Math.max(1, currentCycle - cycleHistoryLength + 1) - const cycleRefuteHistory: boolean[] = [] + // Get the cache's cycle range to determine what cycles we have data for + const cacheInfo = problematicNodeCache.getCycleCoverage() + let startCycle = cacheInfo.cycleRange ? cacheInfo.cycleRange.min : currentCycle + let endCycle = cacheInfo.cycleRange ? cacheInfo.cycleRange.max : currentCycle - for (let cycle = startCycle; cycle <= currentCycle; cycle++) { + // Build cycle history for all cycles in the cache + const cycleRefuteHistory: boolean[] = [] + for (let cycle = startCycle; cycle <= endCycle; cycle++) { cycleRefuteHistory.push(refuteHistory.includes(cycle)) } - // Calculate total refutes in the history window - const totalRefutes = refuteHistory.filter((cycle) => cycle >= startCycle && cycle <= currentCycle).length + // Calculate total refutes (all refutes in the cache) + const totalRefutes = refuteHistory.length const problematicNodeInfo = { isProblematic: @@ -129,6 +131,8 @@ export function getProblematicNodeInfoForSelf(nodeId: string): any | null { maxConsecutiveRefutes: metrics.consecutiveRefutes, refutePercentage: metrics.refutePercentage, cycleRefuteHistory: cycleRefuteHistory, + oldestCycle: startCycle, // The oldest cycle in cache + newestCycle: endCycle // The newest cycle in cache } return problematicNodeInfo From 4df9d71a83fcb690c8cac2e01c38cafd5a7455de Mon Sep 17 00:00:00 2001 From: Marc Hanson Date: Wed, 6 Aug 2025 23:24:35 -0400 Subject: [PATCH 06/10] Prettier'd --- src/p2p/ProblemNodeHandler.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/p2p/ProblemNodeHandler.ts b/src/p2p/ProblemNodeHandler.ts index 63e46d360..5eeb8827a 100644 --- a/src/p2p/ProblemNodeHandler.ts +++ b/src/p2p/ProblemNodeHandler.ts @@ -131,8 +131,8 @@ export function getProblematicNodeInfoForSelf(nodeId: string): any | null { maxConsecutiveRefutes: metrics.consecutiveRefutes, refutePercentage: metrics.refutePercentage, cycleRefuteHistory: cycleRefuteHistory, - oldestCycle: startCycle, // The oldest cycle in cache - newestCycle: endCycle // The newest cycle in cache + oldestCycle: startCycle, // The oldest cycle in cache + newestCycle: endCycle, // The newest cycle in cache } return problematicNodeInfo From 942a7034dbb1ee9a5e1b2ca2a26f40a2a4c7a2e6 Mon Sep 17 00:00:00 2001 From: Marc Hanson Date: Wed, 6 Aug 2025 23:25:11 -0400 Subject: [PATCH 07/10] 2.16.0-prerelease.3 --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 28fbf3e6f..2e1652183 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@shardeum-foundation/core", - "version": "2.16.0-prerelease.2", + "version": "2.16.0-prerelease.3", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@shardeum-foundation/core", - "version": "2.16.0-prerelease.2", + "version": "2.16.0-prerelease.3", "dependencies": { "@hapi/sntp": "3.1.1", "@mapbox/node-pre-gyp": "1.0.10", diff --git a/package.json b/package.json index 06d2b525c..a65ac28d5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@shardeum-foundation/core", - "version": "2.16.0-prerelease.2", + "version": "2.16.0-prerelease.3", "engines": { "node": "20.19.3" }, From 1a58671ab14eeb7dfd08f02d020787e8a37eb42a Mon Sep 17 00:00:00 2001 From: Marc Hanson Date: Thu, 7 Aug 2025 08:55:23 -0400 Subject: [PATCH 08/10] Fix startCycle --- src/p2p/ProblemNodeHandler.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/p2p/ProblemNodeHandler.ts b/src/p2p/ProblemNodeHandler.ts index 5eeb8827a..6a4cd35ef 100644 --- a/src/p2p/ProblemNodeHandler.ts +++ b/src/p2p/ProblemNodeHandler.ts @@ -131,7 +131,6 @@ export function getProblematicNodeInfoForSelf(nodeId: string): any | null { maxConsecutiveRefutes: metrics.consecutiveRefutes, refutePercentage: metrics.refutePercentage, cycleRefuteHistory: cycleRefuteHistory, - oldestCycle: startCycle, // The oldest cycle in cache newestCycle: endCycle, // The newest cycle in cache } From b198b87d08308edb3a8e218f78b3e0b60e7c3f26 Mon Sep 17 00:00:00 2001 From: Marc Hanson Date: Thu, 7 Aug 2025 11:44:55 -0400 Subject: [PATCH 09/10] Put in a problematic limiter --- src/config/server.ts | 2 +- src/p2p/ModeSystemFuncs.ts | 37 +++++++++--- src/p2p/ProblemNodeHandler.ts | 2 + src/p2p/ProblematicNodeCache.ts | 2 +- src/shardus/shardus-types.ts | 4 +- test/unit/src/p2p/ModeSystemFuncs.test.ts | 71 ++++++++++++----------- 6 files changed, 72 insertions(+), 46 deletions(-) diff --git a/src/config/server.ts b/src/config/server.ts index 2739d40cb..1dba271be 100644 --- a/src/config/server.ts +++ b/src/config/server.ts @@ -71,7 +71,6 @@ const SERVER_CONFIG: StrictServerConfiguration = { maxRotatedPerCycle: 1, flexibleRotationDelta: 1, flexibleRotationEnabled: false, - enableDangerousProblematicNodeRemoval: false, enableProblematicNodeRemoval: false, enableProblematicNodeRemovalOnCycle: 20000, maxProblematicNodeRemovalsPerCycle: 1, @@ -79,6 +78,7 @@ const SERVER_CONFIG: StrictServerConfiguration = { problematicNodeRefutePercentageThreshold: 0.1, problematicNodeHistoryLength: 60, problematicNodeRemovalCycleFrequency: 5, + problematicNodeRemovalSafetyDelta: 1, // How far below minNodes before we stop removing problematic nodes // New flags for problematic node cache v2 useProblematicNodeCacheV2: false, // When true, use the new cache-based implementation enableProblematicNodeCacheBuilding: false, // Enable shadow mode cache building for validation diff --git a/src/p2p/ModeSystemFuncs.ts b/src/p2p/ModeSystemFuncs.ts index 6823167ae..db21d2049 100644 --- a/src/p2p/ModeSystemFuncs.ts +++ b/src/p2p/ModeSystemFuncs.ts @@ -637,10 +637,6 @@ export function getExpiredRemovedV3( // filter out apoptosized nodes from the problematic nodes const problematicNodes = problematicWithApoptosizedNodes.filter((id) => !apoptosizedNodesList.includes(id)) const canRemoveProblematicNodesThisCycle = prevRecord.counter % config.p2p.problematicNodeRemovalCycleFrequency === 0 - const numProblematicRemovals = Math.min( - problematicNodes.length, - canRemoveProblematicNodesThisCycle ? config.p2p.maxProblematicNodeRemovalsPerCycle || 1 : 0 - ) // we can remove `remove` nodes, but we *must* remove the number of apoptosized nodes, // the remainder is the number of expired nodes we can remove this cycle @@ -648,6 +644,24 @@ export function getExpiredRemovedV3( const numPotentiallyExpiredRemovals = Math.max(remove - numApoptosizedRemovals, 0) const numExpiredRemovals = Math.min(numPotentiallyExpiredRemovals, numExpiredNodes) + // Calculate the safety threshold for problematic node removal + const safetyThreshold = config.p2p.minNodes - config.p2p.problematicNodeRemovalSafetyDelta + const activeAfterOtherRemovals = active - numApoptosizedRemovals - numExpiredRemovals + add + + // Check if we're below the safety threshold or would go below it + let numProblematicRemovals = 0 + let problematicRemovalPrevented = false + if (activeAfterOtherRemovals > safetyThreshold && canRemoveProblematicNodesThisCycle) { + // Calculate how many problematic nodes we can safely remove without going below threshold + const maxSafeProblematicRemovals = Math.max(0, activeAfterOtherRemovals - safetyThreshold) + const maxConfiguredRemovals = config.p2p.maxProblematicNodeRemovalsPerCycle + + numProblematicRemovals = Math.min(problematicNodes.length, maxConfiguredRemovals, maxSafeProblematicRemovals) + } else if (canRemoveProblematicNodesThisCycle && problematicNodes.length > 0) { + // We would have removed problematic nodes but safety threshold prevents it + problematicRemovalPrevented = true + } + const cycle = CycleChain.newest.counter if (cycle > lastLoggedCycle && remove > 0) { @@ -675,14 +689,23 @@ export function getExpiredRemovedV3( canRemoveProblematicNodesThisCycle: canRemoveProblematicNodesThisCycle, numPotentiallyExpiredRemovals: numPotentiallyExpiredRemovals, numExpiredRemovals: numExpiredRemovals, + activeAfterOtherRemovals: activeAfterOtherRemovals, + safetyThreshold: safetyThreshold, + problematicRemovalPrevented: problematicRemovalPrevented, } nestedCountersInstance.countEvent('p2p', `results of getExpiredRemovedV3: ${JSON.stringify(counters)}`) + + if (problematicRemovalPrevented) { + nestedCountersInstance.countEvent( + 'p2p', + `problematic node removal prevented: active after removals (${activeAfterOtherRemovals}) <= safety threshold (${safetyThreshold})` + ) + } } - const dangerousRemoval = remove === 0 && config.p2p.enableDangerousProblematicNodeRemoval === false const nodesNeverExpire = config.p2p.nodeExpiryAge < 0 - // if its recommended that we dont remove any nodes, Or the nodes never expire, we MUST adhere to this. - if (dangerousRemoval || nodesNeverExpire) { + // if nodes never expire, we MUST adhere to this. + if (nodesNeverExpire) { return { problematic: 0, expired: 0, diff --git a/src/p2p/ProblemNodeHandler.ts b/src/p2p/ProblemNodeHandler.ts index 6a4cd35ef..b0128ac7f 100644 --- a/src/p2p/ProblemNodeHandler.ts +++ b/src/p2p/ProblemNodeHandler.ts @@ -95,6 +95,8 @@ export function getProblematicNodes(prevRecord: P2P.CycleCreatorTypes.CycleRecor const activeNodeIds = new Set(NodeList.activeByIdOrder.map((node) => node.id)) return problematicNodeCache.getProblematicNodes(prevRecord.counter, activeNodeIds) } + // Return empty array when feature is disabled + return [] } // Get problematic node info for reporting to monitor - info about self node only diff --git a/src/p2p/ProblematicNodeCache.ts b/src/p2p/ProblematicNodeCache.ts index aecb81b99..46471b8dd 100644 --- a/src/p2p/ProblematicNodeCache.ts +++ b/src/p2p/ProblematicNodeCache.ts @@ -272,7 +272,7 @@ export class ProblematicNodeCache { } // Sort by score and return top N based on maxProblematicNodeRemovalsPerCycle - const maxRemovals = this.config.p2p.maxProblematicNodeRemovalsPerCycle || 1 + const maxRemovals = this.config.p2p.maxProblematicNodeRemovalsPerCycle return problematicNodes .sort((a, b) => b.score - a.score) diff --git a/src/shardus/shardus-types.ts b/src/shardus/shardus-types.ts index a1711b797..65927d988 100644 --- a/src/shardus/shardus-types.ts +++ b/src/shardus/shardus-types.ts @@ -774,13 +774,13 @@ export interface ServerConfiguration { /** Problematic Node configurations */ /** enable problematic node removal */ enableProblematicNodeRemoval?: boolean - /** when true, we will remove problematic nodes even when calculateToAcceptV2 says we should not remove any nodes. This is useful in development when testing this feature. */ - enableDangerousProblematicNodeRemoval?: boolean /** enable problematic node removal on a specific cycle. This is to allow the network to stabilize before removing problematic nodes. * enableProblematicNodeRemoval must be true for this to take effect*/ enableProblematicNodeRemovalOnCycle?: number /** The problematicNodeRemovalCycleFrequency parameter is an Integer specifying the number of cycles between problematic node removals. */ problematicNodeRemovalCycleFrequency?: number + /** The problematicNodeRemovalSafetyDelta parameter is an Integer specifying how far below minNodes the active count can be before we stop removing problematic nodes. For example, if minNodes is 100 and this value is 10, we won't remove problematic nodes if active count < 90. */ + problematicNodeRemovalSafetyDelta?: number /** The maxProblematicNodeRemovalsPerCycle parameter is an Integer specifying the maximum number of problematic nodes that can be removed from the network each cycle. */ maxProblematicNodeRemovalsPerCycle?: number /** The problematicNodeConsecutiveRefuteThreshold parameter is an Integer specifying the number of consecutive refutes a node must have before it is considered problematic. */ diff --git a/test/unit/src/p2p/ModeSystemFuncs.test.ts b/test/unit/src/p2p/ModeSystemFuncs.test.ts index 7215b6d08..4aaea2d58 100644 --- a/test/unit/src/p2p/ModeSystemFuncs.test.ts +++ b/test/unit/src/p2p/ModeSystemFuncs.test.ts @@ -36,9 +36,9 @@ jest.mock('../../../../src/p2p/Context', () => ({ rotationCountMultiply: 1, rotationCountAdd: 0, rotationPercentActive: 0.001, - enableDangerousProblematicNodeRemoval: false, problematicNodeRemovalCycleFrequency: 10, maxProblematicNodeRemovalsPerCycle: 1, + problematicNodeRemovalSafetyDelta: 1, }, debug: { verboseNestedCounters: false, @@ -174,9 +174,9 @@ describe('ModeSystemFuncs', () => { rotationCountMultiply: 1, rotationCountAdd: 0, rotationPercentActive: 0.001, - enableDangerousProblematicNodeRemoval: false, problematicNodeRemovalCycleFrequency: 10, maxProblematicNodeRemovalsPerCycle: 1, + problematicNodeRemovalSafetyDelta: 1, } // Reset mock functions @@ -1083,33 +1083,6 @@ describe('ModeSystemFuncs', () => { ;(getProblematicNodes as jest.Mock).mockReturnValue([]) }) - it('should return empty result when dangerous removal is prevented', () => { - ;(config.p2p.enableDangerousProblematicNodeRemoval as any) = false - ;(config.p2p.syncFloorEnabled as any) = false // Use old logic - - const prevRecord = { - start: Date.now(), - desired: 20, - counter: 1, - mode: 'processing', - lost: [], - } as any - - const txs = { apoptosis: [] } as any - - mockNodeList.activeByIdOrder = new Array(20) - mockNodeList.byJoinOrder = new Array(20) - mockTargetCount = 20 // No removals - - const result = ModeSystemFuncs.getExpiredRemovedV3(prevRecord, lastLoggedCycle, txs, mockInfo) - - expect(result).toEqual({ - problematic: 0, - expired: 0, - removed: [], - }) - }) - it('should return empty result when nodes never expire', () => { ;(config.p2p.nodeExpiryAge as any) = -1 @@ -1175,6 +1148,8 @@ describe('ModeSystemFuncs', () => { ;(config.p2p.problematicNodeRemovalCycleFrequency as any) = 10 ;(config.p2p.maxProblematicNodeRemovalsPerCycle as any) = 2 ;(config.p2p.syncFloorEnabled as any) = false // Use old logic + ;(config.p2p.minNodes as any) = 10 + ;(config.p2p.problematicNodeRemovalSafetyDelta as any) = 1 const prevRecord = { start: now, @@ -1184,6 +1159,16 @@ describe('ModeSystemFuncs', () => { lost: [], } as any + // Create enough nodes to be above safety threshold + const nodes = [] + for (let i = 0; i < 15; i++) { + nodes.push({ + id: `node${i}`, + status: 'active', + activeTimestamp: now - 100000, // Not expired + }) + } + const problematicNode1 = { id: 'prob1', status: 'active', @@ -1195,9 +1180,11 @@ describe('ModeSystemFuncs', () => { activeTimestamp: now - 100000, // Not expired } - mockNodeList.byJoinOrder = [problematicNode1, problematicNode2] - mockNodeList.activeByIdOrder = [problematicNode1, problematicNode2] - mockTargetCount = 0 // Remove 2 nodes + // Add problematic nodes to the node list + const allNodes = [...nodes, problematicNode1, problematicNode2] + mockNodeList.byJoinOrder = allNodes + mockNodeList.activeByIdOrder = allNodes + mockTargetCount = 15 // Remove 2 nodes (17 active - 15 target = 2 to remove) ;(getProblematicNodes as jest.Mock).mockReturnValue(['prob1', 'prob2']) const txs = { apoptosis: [] } as any @@ -1284,6 +1271,9 @@ describe('ModeSystemFuncs', () => { ;(config.p2p.nodeExpiryAge as any) = 300000 ;(config.p2p.problematicNodeRemovalCycleFrequency as any) = 10 ;(config.p2p.syncFloorEnabled as any) = false // Use old logic + ;(config.p2p.minNodes as any) = 10 + ;(config.p2p.problematicNodeRemovalSafetyDelta as any) = 1 + ;(config.p2p.maxProblematicNodeRemovalsPerCycle as any) = 2 const prevRecord = { start: now, @@ -1293,6 +1283,16 @@ describe('ModeSystemFuncs', () => { lost: [], } as any + // Create enough nodes to be above safety threshold + const nodes = [] + for (let i = 0; i < 15; i++) { + nodes.push({ + id: `node${i}`, + status: 'active', + activeTimestamp: now - 100000, // Not expired + }) + } + const problematicNode = { id: 'prob1', status: 'active', @@ -1304,13 +1304,14 @@ describe('ModeSystemFuncs', () => { activeTimestamp: now - 100000, } - mockNodeList.byJoinOrder = [problematicNode, apoptosisProblematicNode] - mockNodeList.activeByIdOrder = [problematicNode, apoptosisProblematicNode] + const allNodes = [...nodes, problematicNode, apoptosisProblematicNode] + mockNodeList.byJoinOrder = allNodes + mockNodeList.activeByIdOrder = allNodes mockNodeList.nodes = new Map([ ['prob1', problematicNode], ['apopprob1', apoptosisProblematicNode], ]) - mockTargetCount = 0 // Remove 2 nodes + mockTargetCount = 16 // Remove 1 node (17 active - 16 target = 1 to remove) // Both nodes are problematic, but one is also apoptosis ;(getProblematicNodes as jest.Mock).mockReturnValue(['prob1', 'apopprob1']) From 24f86addfea2302c1b563b41d4ee87825065e2d2 Mon Sep 17 00:00:00 2001 From: Marc Hanson Date: Thu, 7 Aug 2025 11:45:28 -0400 Subject: [PATCH 10/10] 2.16.0-prerelease.4 --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 2e1652183..0e7f9488a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@shardeum-foundation/core", - "version": "2.16.0-prerelease.3", + "version": "2.16.0-prerelease.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@shardeum-foundation/core", - "version": "2.16.0-prerelease.3", + "version": "2.16.0-prerelease.4", "dependencies": { "@hapi/sntp": "3.1.1", "@mapbox/node-pre-gyp": "1.0.10", diff --git a/package.json b/package.json index a65ac28d5..40adbab86 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@shardeum-foundation/core", - "version": "2.16.0-prerelease.3", + "version": "2.16.0-prerelease.4", "engines": { "node": "20.19.3" },