@@ -213,6 +213,7 @@ bool MtmVolksWagenMode;
213
213
TransactionId MtmUtilityProcessedInXid ;
214
214
215
215
static char * MtmConnStrs ;
216
+ static char * MtmClusterName ;
216
217
static int MtmQueueSize ;
217
218
static int MtmWorkers ;
218
219
static int MtmVacuumDelay ;
@@ -1866,6 +1867,39 @@ static void MtmRaftableInitialize()
1866
1867
raftable_start (MtmNodeId - 1 );
1867
1868
}
1868
1869
1870
+ static void MtmCheckControlFile (void )
1871
+ {
1872
+ char controlFilePath [MAXPGPATH ];
1873
+ char buf [MULTIMASTER_MAX_CTL_STR_SIZE ];
1874
+ FILE * f ;
1875
+ snprintf (controlFilePath , MAXPGPATH , "%s/global/mmts_control" , DataDir );
1876
+ f = fopen (controlFilePath , "r" );
1877
+ if (f != NULL && fgets (buf , sizeof buf , f )) {
1878
+ char * sep = strchr (buf , ':' );
1879
+ if (sep == NULL ) {
1880
+ elog (FATAL , "File mmts_control doesn't contain cluster name" );
1881
+ }
1882
+ * sep = '\0' ;
1883
+ if (strcmp (buf , MtmClusterName ) != 0 ) {
1884
+ elog (FATAL , "Database belongs to some other cluster %s rather than %s" , buf , MtmClusterName );
1885
+ }
1886
+ if (sscanf (sep + 1 , "%d" , & Mtm -> donorNodeId ) != 1 ) {
1887
+ elog (FATAL , "File mmts_control doesn't contain node id" );
1888
+ }
1889
+ fclose (f );
1890
+ } else {
1891
+ if (f != NULL ) {
1892
+ fclose (f );
1893
+ }
1894
+ f = fopen (controlFilePath , "w" );
1895
+ if (f == NULL ) {
1896
+ elog (FATAL , "Failed to create mmts_control file: %m" );
1897
+ }
1898
+ Mtm -> donorNodeId = -1 ;
1899
+ fprintf (f , "%s:%d\n" , MtmClusterName , Mtm -> donorNodeId );
1900
+ fclose (f );
1901
+ }
1902
+ }
1869
1903
1870
1904
static void MtmInitialize ()
1871
1905
{
@@ -1930,6 +1964,8 @@ static void MtmInitialize()
1930
1964
MtmDoReplication = true;
1931
1965
TM = & MtmTM ;
1932
1966
LWLockRelease (AddinShmemInitLock );
1967
+
1968
+ MtmCheckControlFile ();
1933
1969
}
1934
1970
1935
1971
static void
@@ -2471,6 +2507,19 @@ _PG_init(void)
2471
2507
NULL /* GucShowHook show_hook */
2472
2508
);
2473
2509
2510
+ DefineCustomStringVariable (
2511
+ "multimaster.cluster_name" ,
2512
+ "Name of the cluster" ,
2513
+ NULL ,
2514
+ & MtmClusterName ,
2515
+ "mmts" ,
2516
+ PGC_BACKEND , /* context */
2517
+ 0 , /* flags */
2518
+ NULL , /* GucStringCheckHook check_hook */
2519
+ NULL , /* GucStringAssignHook assign_hook */
2520
+ NULL /* GucShowHook show_hook */
2521
+ );
2522
+
2474
2523
DefineCustomIntVariable (
2475
2524
"multimaster.node_id" ,
2476
2525
"Multimaster node ID" ,
@@ -2608,8 +2657,10 @@ MtmReplicationMode MtmGetReplicationMode(int nodeId, sig_atomic_t volatile* shut
2608
2657
MtmLock (LW_EXCLUSIVE );
2609
2658
if (Mtm -> status == MTM_RECOVERY ) {
2610
2659
recovery = true;
2611
- if (Mtm -> recoverySlot == 0 || Mtm -> recoverySlot == nodeId ) {
2612
- /* Choose for recovery first available slot */
2660
+ if ((Mtm -> recoverySlot == 0 && (Mtm -> donorNodeId < 0 || Mtm -> donorNodeId == nodeId ))
2661
+ || Mtm -> recoverySlot == nodeId )
2662
+ {
2663
+ /* Choose for recovery first available slot or slot of donor node (if any) */
2613
2664
elog (WARNING , "Process %d starts recovery from node %d" , MyProcPid , nodeId );
2614
2665
Mtm -> recoverySlot = nodeId ;
2615
2666
Mtm -> nReceivers = 0 ;
@@ -2697,6 +2748,8 @@ MtmReplicationStartupHook(struct PGLogicalStartupHookArgs* args)
2697
2748
{
2698
2749
ListCell * param ;
2699
2750
bool recoveryCompleted = false;
2751
+ XLogRecPtr recoveryStartPos = InvalidXLogRecPtr ;
2752
+
2700
2753
MtmIsRecoverySession = false;
2701
2754
Mtm -> nodes [MtmReplicationNodeId - 1 ].senderPid = MyProcPid ;
2702
2755
Mtm -> nodes [MtmReplicationNodeId - 1 ].senderStartTime = MtmGetSystemTime ();
@@ -2716,11 +2769,21 @@ MtmReplicationStartupHook(struct PGLogicalStartupHookArgs* args)
2716
2769
elog (ERROR , "Replication mode is not specified" );
2717
2770
}
2718
2771
break ;
2772
+ } else if (strcmp ("mtm_restart_pos" , elem -> defname ) == 0 ) {
2773
+ if (elem -> arg != NULL && strVal (elem -> arg ) != NULL ) {
2774
+ recoveryStartPos = intVal (elem -> arg );
2775
+ } else {
2776
+ elog (ERROR , "Restart position is not specified" );
2777
+ }
2719
2778
}
2720
2779
}
2721
2780
MtmLock (LW_EXCLUSIVE );
2722
- if (MtmIsRecoverySession ) {
2723
- MTM_LOG1 ("%d: Node %d start recovery of node %d" , MyProcPid , MtmNodeId , MtmReplicationNodeId );
2781
+ if (MtmIsRecoverySession ) {
2782
+ MTM_LOG1 ("%d: Node %d start recovery of node %d at position %lx" , MyProcPid , MtmNodeId , MtmReplicationNodeId , recoveryStartPos );
2783
+ Assert (MyReplicationSlot != NULL );
2784
+ if (recoveryStartPos < MyReplicationSlot -> data .restart_lsn ) {
2785
+ elog (ERROR , "Specified recovery start position %lx is beyond restart lsn %lx" , recoveryStartPos , MyReplicationSlot -> data .restart_lsn );
2786
+ }
2724
2787
if (!BIT_CHECK (Mtm -> disabledNodeMask , MtmReplicationNodeId - 1 )) {
2725
2788
MtmDisableNode (MtmReplicationNodeId );
2726
2789
MtmCheckQuorum ();
0 commit comments