Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docs/join-protocol-v2/join-protocol-v2.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ The existing join code is in the repository "shardus-global-server", aka "Shardu
- On each cycle the active nodes decide how many nodes to add (already implemented).
- On each cycle the needed number of nodes N are selected from the standby node list based on a deterministic-but-unpredictable score that is a function of the node public key and the current cycle marker.
- The N nodes with the best score are added as joining nodes to the next cycle record.
- When a node is selected to join, some active nodes in the network send the cycle number to the selected node; letting it know that it has been selected.
- The selected standby node queries one of the active nodes for this cycle record and verifies that it is included as a joining node in the cycle record.
- When a node is selected to join it checks the `joined` route to verify
inclusion in the cycle record. The old notification via the `/accepted`
endpoint has been removed.
- robustQuery to ask if this node is in the `pendingJoinRequestList`

## Unjoining
Expand Down
1 change: 1 addition & 0 deletions src/config/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ const SERVER_CONFIG: StrictServerConfiguration = {
validateArchiverAppData: false,
useNetworkModes: true,
useJoinProtocolV2: true,
enableAcceptedRoute: false,
randomJoinRequestWait: 2000, //todo set this to 1000 before release
standbyListCyclesTTL: 10, //todo release should be > 1000
standbyListMaxRemoveTTL: 100, //todo set this be 100 for production
Expand Down
15 changes: 12 additions & 3 deletions src/p2p/Join/routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,9 @@ const joinedRoute: P2P.P2PTypes.Route<Handler> = {
}

/**
* todo deprecate this or, finish it
* for now deprecating the accepted path. does not seem to have any value
* Deprecated route used by early join protocol implementations. It is now
* disabled by default and can be enabled for legacy testing via the
* `enableAcceptedRoute` config flag.
*/
const acceptedRoute: P2P.P2PTypes.Route<Handler> = {
method: 'POST',
Expand Down Expand Up @@ -784,7 +785,15 @@ const gossipStandbyRefresh: P2P.P2PTypes.GossipHandler<
}

export const routes = {
external: [cycleMarkerRoute, joinRoute, joinedRoute, joinedV2Route, acceptedRoute, unjoinRoute, standbyRefreshRoute],
external: [
cycleMarkerRoute,
joinRoute,
joinedRoute,
joinedV2Route,
...(config?.p2p?.enableAcceptedRoute ? [acceptedRoute] : []),
unjoinRoute,
standbyRefreshRoute,
],
gossip: {
'gossip-join': gossipJoinRoute,
'gossip-valid-join-requests': gossipValidJoinRequests,
Expand Down
11 changes: 4 additions & 7 deletions src/p2p/Join/v2/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import { config, crypto, shardus } from '../../Context'
import * as CycleChain from '../../CycleChain'
import * as Self from '../../Self'
import rfdc from 'rfdc'
import { executeNodeSelection, notifyNewestJoinedConsensors } from './select'
import { executeNodeSelection } from './select'
import { attempt } from '../../Utils'
import { submitUnjoin } from './unjoin'
import { ResultAsync } from 'neverthrow'
Expand Down Expand Up @@ -43,12 +43,9 @@ export function init(): void {
// set up event listeners for cycle quarters
Self.emitter.on('cycle_q1_start', () => {
if (config.p2p.useJoinProtocolV2) {
//TODO clean out the accepted route or is it still useful?
//accepted endpoint does not return any more
// The accepted flow is deprecated
// notifyNewestJoinedConsensors().catch((e) => {
// console.error('failed to notify selected nodes:', e)
// })
// The old "accepted" route has been removed. Nodes should query
// the joined routes to verify selection instead of waiting for an
// acceptance notification.
}
})
Self.emitter.on('cycle_q2_start', () => {
Expand Down
79 changes: 0 additions & 79 deletions src/p2p/Join/v2/select.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,85 +109,6 @@ export function selectNodes(maxAllowed: number): void {
}
}
}
/**
* Notifies the nodes that have been selected that they have been selected by
* calling their `accepted` endpoints.`
*/
export async function notifyNewestJoinedConsensors(): Promise<void> {
return //accepted endpoint seem deprecated and always fails!

// const counter = CycleChain.getNewest().counter

// if (!Self.isActive) {
// if (Self.isRestartNetwork && Self.isFirst) {
// nestedCountersInstance.countEvent('joinV2', `C${counter}: notifyNewestJoinedConsensors: isRestartNetwork && isFirst`)
// notifyingNewestJoinedConsensors()
// // // decide if we should be in charge of notifying joining nodes
// // const params = {
// // getOurNodeIndex: CycleChain.getNewest().mode === 'restart' ? 0 : getOurNodeIndex(),
// // activeByIdOrderLength:
// // CycleChain.getNewest().mode === 'restart' ? 1 : NodeList.activeByIdOrder.length,
// // NUM_NOTIFYING_NODES,
// // CycleChainNewestCounter: CycleChain.newest.counter,
// // }
// // console.log(`C${counter} fastIsPicked params: ${JSON.stringify(params)}`)
// // const shouldNotify = fastIsPicked(
// // CycleChain.getNewest().mode === 'restart' ? 1 : getOurNodeIndex(),
// // CycleChain.getNewest().mode === 'restart' ? 0 : NodeList.activeByIdOrder.length,
// // NUM_NOTIFYING_NODES,
// // CycleChain.newest.counter
// // )
// // console.log(`C${counter} shouldNotify: ${shouldNotify}`)
// } else console.warn(`C${counter} not notifying nodes because we are not active yet`)
// return
// }

// // decide if we should be in charge of notifying joining nodes
// const params = {
// getOurNodeIndex: getOurNodeIndex(),
// activeByIdOrderLength: NodeList.activeByIdOrder.length,
// NUM_NOTIFYING_NODES,
// CycleChainNewestCounter: CycleChain.newest.counter
// }
// console.log(`C${counter} fastIsPicked params: ${JSON.stringify(params)}`)
// const shouldNotify = fastIsPicked(
// getOurNodeIndex(),
// NodeList.activeByIdOrder.length,
// NUM_NOTIFYING_NODES,
// CycleChain.newest.counter
// )

// // if so, do so
// if (shouldNotify) {
// nestedCountersInstance.countEvent('joinV2', `C${counter}: notifyNewestJoinedConsensors: shouldNotify`)
// notifyingNewestJoinedConsensors()
// }
}

export async function notifyingNewestJoinedConsensors(): Promise<void> {
const marker = CycleChain.getCurrentCycleMarker()
const counter = CycleChain.getNewest().counter

for (const joinedConsensor of CycleChain.newest.joinedConsensors) {
const publicKey = joinedConsensor.publicKey

// no need to notify ourselves
if (publicKey === crypto.keypair.publicKey) continue
console.log(`C${counter} notifying node`, publicKey, 'that it has been selected')

// sign an acceptance offer
const offer = crypto.sign({
cycleMarker: marker,
activeNodePublicKey: crypto.keypair.publicKey,
})

// make the call, but don't await. it might take a while.
http.post(`http://${joinedConsensor.externalIp}:${joinedConsensor.externalPort}/accepted`, offer).catch((e) => {
nestedCountersInstance.countEvent('joinV2', `C${counter}: notifyingNewestJoinedConsensors: http post failed`)
console.error(`C${counter} failed to notify node ${publicKey} that it has been selected:`, e)
})
}
}
/**
* Returns the list of public keys of the nodes that have been selected and
* empties the list.
Expand Down
2 changes: 2 additions & 0 deletions src/shardus/shardus-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,8 @@ export interface ServerConfiguration {
useNetworkModes: boolean
/** Use the new join protocol that gossips all valid join requests to validators. */
useJoinProtocolV2: boolean
/** Enable the deprecated `/accepted` route for legacy tests */
enableAcceptedRoute: boolean
/** Add a random wait before sending the join effect. Should not need this but may be a safet valve if timing in the network gets off
* for example the nodes trying to join do not have the same cycle time as the network
*/
Expand Down
Loading