@@ -2788,6 +2788,9 @@ ACTOR Future<Void> TagPartitionedLogSystem::recruitOldLogRouters(TagPartitionedL
27882788 // Recruit log routers for old generations of the primary locality
27892789 if (tLogs->locality == locality) {
27902790 logRouterInitializationReplies.emplace_back ();
2791+ TraceEvent (" LogRouterInitReqSent1" )
2792+ .detail (" Locality" , locality)
2793+ .detail (" LogRouterTags" , self->logRouterTags );
27912794 for (int i = 0 ; i < self->logRouterTags ; i++) {
27922795 InitializeLogRouterRequest req;
27932796 req.recoveryCount = recoveryCount;
@@ -2798,6 +2801,7 @@ ACTOR Future<Void> TagPartitionedLogSystem::recruitOldLogRouters(TagPartitionedL
27982801 req.locality = locality;
27992802 req.recoverAt = self->recoverAt .get ();
28002803 req.knownLockedTLogIds = self->knownLockedTLogIds ;
2804+ req.allowDropInSim = !forRemote;
28012805 auto reply = transformErrors (
28022806 throwErrorOr (workers[nextRouter].logRouter .getReplyUnlessFailedFor (
28032807 req, SERVER_KNOBS->TLOG_TIMEOUT , SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY )),
@@ -2839,6 +2843,9 @@ ACTOR Future<Void> TagPartitionedLogSystem::recruitOldLogRouters(TagPartitionedL
28392843 // Recruit log routers for old generations of the primary locality
28402844 if (tLogs->locality == locality) {
28412845 logRouterInitializationReplies.emplace_back ();
2846+ TraceEvent (" LogRouterInitReqSent2" )
2847+ .detail (" Locality" , locality)
2848+ .detail (" LogRouterTags" , old.logRouterTags );
28422849 for (int i = 0 ; i < old.logRouterTags ; i++) {
28432850 InitializeLogRouterRequest req;
28442851 req.recoveryCount = recoveryCount;
@@ -2848,6 +2855,7 @@ ACTOR Future<Void> TagPartitionedLogSystem::recruitOldLogRouters(TagPartitionedL
28482855 req.tLogPolicy = tLogPolicy;
28492856 req.locality = locality;
28502857 req.recoverAt = old.recoverAt ;
2858+ req.allowDropInSim = !forRemote;
28512859 auto reply = transformErrors (
28522860 throwErrorOr (workers[nextRouter].logRouter .getReplyUnlessFailedFor (
28532861 req, SERVER_KNOBS->TLOG_TIMEOUT , SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY )),
@@ -2860,7 +2868,7 @@ ACTOR Future<Void> TagPartitionedLogSystem::recruitOldLogRouters(TagPartitionedL
28602868 }
28612869 }
28622870
2863- wait (waitForAll (allReplies));
2871+ wait (traceAfter ( waitForAll (allReplies), " AllLogRouterRepliesReceived " ));
28642872
28652873 int nextReplies = 0 ;
28662874 lastStart = std::numeric_limits<Version>::max ();
@@ -2997,13 +3005,14 @@ ACTOR Future<Void> TagPartitionedLogSystem::newRemoteEpoch(TagPartitionedLogSyst
29973005 logSet->startVersion ,
29983006 localities,
29993007 logSet->tLogPolicy ,
3000- true );
3008+ /* forRemote */ true );
30013009 }
30023010
30033011 state std::vector<Future<TLogInterface>> logRouterInitializationReplies;
30043012 const Version startVersion = oldLogSystem->logRouterTags == 0
30053013 ? oldLogSystem->recoverAt .get () + 1
30063014 : std::max (self->tLogs [0 ]->startVersion , logSet->startVersion );
3015+ TraceEvent (" LogRouterInitReqSent3" ).detail (" Locality" , remoteLocality).detail (" LogRouterTags" , self->logRouterTags );
30073016 for (int i = 0 ; i < self->logRouterTags ; i++) {
30083017 InitializeLogRouterRequest req;
30093018 req.recoveryCount = recoveryCount;
@@ -3012,6 +3021,7 @@ ACTOR Future<Void> TagPartitionedLogSystem::newRemoteEpoch(TagPartitionedLogSyst
30123021 req.tLogLocalities = localities;
30133022 req.tLogPolicy = logSet->tLogPolicy ;
30143023 req.locality = remoteLocality;
3024+ req.allowDropInSim = false ;
30153025 TraceEvent (" RemoteTLogRouterReplies" , self->dbgid )
30163026 .detail (" WorkerID" , remoteWorkers.logRouters [i % remoteWorkers.logRouters .size ()].id ());
30173027 logRouterInitializationReplies.push_back (transformErrors (
@@ -3090,7 +3100,7 @@ ACTOR Future<Void> TagPartitionedLogSystem::newRemoteEpoch(TagPartitionedLogSyst
30903100
30913101 remoteTLogInitializationReplies.reserve (remoteWorkers.remoteTLogs .size ());
30923102 for (int i = 0 ; i < remoteWorkers.remoteTLogs .size (); i++) {
3093- TraceEvent (" RemoteTLogReplies " , self->dbgid ).detail (" WorkerID" , remoteWorkers.remoteTLogs [i].id ());
3103+ TraceEvent (" RemoteTLogInitReqSent " , self->dbgid ).detail (" WorkerID" , remoteWorkers.remoteTLogs [i].id ());
30943104 remoteTLogInitializationReplies.push_back (transformErrors (
30953105 throwErrorOr (remoteWorkers.remoteTLogs [i].tLog .getReplyUnlessFailedFor (
30963106 remoteTLogReqs[i], SERVER_KNOBS->TLOG_TIMEOUT , SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY )),
@@ -3289,7 +3299,7 @@ ACTOR Future<Reference<ILogSystem>> TagPartitionedLogSystem::newEpoch(
32893299 logSystem->tLogs [0 ]->startVersion ,
32903300 localities,
32913301 logSystem->tLogs [0 ]->tLogPolicy ,
3292- false );
3302+ /* forRemote */ false );
32933303 if (oldLogSystem->knownCommittedVersion - logSystem->tLogs [0 ]->startVersion >
32943304 SERVER_KNOBS->MAX_RECOVERY_VERSIONS ) {
32953305 // make sure we can recover in the other DC.
@@ -3380,7 +3390,7 @@ ACTOR Future<Reference<ILogSystem>> TagPartitionedLogSystem::newEpoch(
33803390
33813391 primaryTLogReplies.reserve (recr.tLogs .size ());
33823392 for (int i = 0 ; i < recr.tLogs .size (); i++) {
3383- TraceEvent (" PrimaryTLogReqSent " , logSystem->getDebugID ()).detail (" WorkerID" , recr.tLogs [i].id ());
3393+ TraceEvent (" PrimaryTLogInitReqSent " , logSystem->getDebugID ()).detail (" WorkerID" , recr.tLogs [i].id ());
33843394 primaryTLogReplies.push_back (transformErrors (
33853395 throwErrorOr (recr.tLogs [i].tLog .getReplyUnlessFailedFor (
33863396 reqs[i], SERVER_KNOBS->TLOG_TIMEOUT , SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY )),
@@ -3449,7 +3459,7 @@ ACTOR Future<Reference<ILogSystem>> TagPartitionedLogSystem::newEpoch(
34493459
34503460 satelliteInitializationReplies.reserve (recr.satelliteTLogs .size ());
34513461 for (int i = 0 ; i < recr.satelliteTLogs .size (); i++) {
3452- TraceEvent (" PrimarySatelliteTLogReplies " , logSystem->getDebugID ())
3462+ TraceEvent (" PrimarySatelliteTLogInitReqSent " , logSystem->getDebugID ())
34533463 .detail (" WorkerID" , recr.satelliteTLogs [i].id ());
34543464 satelliteInitializationReplies.push_back (transformErrors (
34553465 throwErrorOr (recr.satelliteTLogs [i].tLog .getReplyUnlessFailedFor (
0 commit comments