2222import java .util .ArrayList ;
2323import java .util .Collection ;
2424import java .util .Collections ;
25+ import java .util .Comparator ;
2526import java .util .HashMap ;
2627import java .util .HashSet ;
2728import java .util .List ;
2829import java .util .Map ;
2930import java .util .Set ;
31+ import java .util .SortedSet ;
32+ import java .util .TreeSet ;
3033import java .util .concurrent .CompletableFuture ;
3134import java .util .concurrent .Future ;
3235import java .util .concurrent .TimeUnit ;
@@ -232,12 +235,15 @@ public class AssignmentManager {
232235
233236 private final int forceRegionRetainmentRetries ;
234237
238+ private final RegionInTransitionTracker regionInTransitionTracker ;
239+
235240 public AssignmentManager (MasterServices master , MasterRegion masterRegion ) {
236241 this (master , masterRegion , new RegionStateStore (master , masterRegion ));
237242 }
238243
239244 AssignmentManager (MasterServices master , MasterRegion masterRegion , RegionStateStore stateStore ) {
240245 this .master = master ;
246+ regionInTransitionTracker = new RegionInTransitionTracker (master .getTableStateManager ());
241247 this .regionStateStore = stateStore ;
242248 this .metrics = new MetricsAssignmentManager ();
243249 this .masterRegion = masterRegion ;
@@ -331,6 +337,8 @@ public void start() throws IOException, KeeperException {
331337 regionNode .setLastHost (lastHost );
332338 regionNode .setRegionLocation (regionLocation );
333339 regionNode .setOpenSeqNum (openSeqNum );
340+ regionInTransitionTracker .handleRegionStateNodeOperation (regionNode );
341+
334342 if (regionNode .getProcedure () != null ) {
335343 regionNode .getProcedure ().stateLoaded (this , regionNode );
336344 }
@@ -382,7 +390,7 @@ public void setupRIT(List<TransitRegionStateProcedure> procs) {
382390 return ;
383391 }
384392 }
385- LOG .info ("Attach {} to {} to restore RIT " , proc , regionNode );
393+ LOG .info ("Attach {} to {} to restore" , proc , regionNode );
386394 regionNode .setProcedure (proc );
387395 });
388396 }
@@ -411,6 +419,7 @@ public void stop() {
411419
412420 // Stop the RegionStateStore
413421 regionStates .clear ();
422+ regionInTransitionTracker .stop ();
414423
415424 // Update meta events (for testing)
416425 if (hasProcExecutor ) {
@@ -1093,7 +1102,7 @@ private int submitUnassignProcedure(TableName tableName,
10931102 regionNode .lock ();
10941103 try {
10951104 if (shouldSubmit .apply (regionNode )) {
1096- if (regionNode .isInTransition ()) {
1105+ if (regionNode .isTransitionScheduled ()) {
10971106 logRIT .accept (regionNode );
10981107 inTransitionCount ++;
10991108 continue ;
@@ -1702,10 +1711,8 @@ public boolean isRegionTwiceOverThreshold(final RegionInfo regionInfo) {
17021711 }
17031712
17041713 protected void update (final AssignmentManager am ) {
1705- final RegionStates regionStates = am .getRegionStates ();
17061714 this .statTimestamp = EnvironmentEdgeManager .currentTime ();
1707- update (regionStates .getRegionsStateInTransition (), statTimestamp );
1708- update (regionStates .getRegionFailedOpen (), statTimestamp );
1715+ update (am .getRegionsStateInTransition (), statTimestamp );
17091716
17101717 if (LOG .isDebugEnabled () && ritsOverThreshold != null && !ritsOverThreshold .isEmpty ()) {
17111718 LOG .debug ("RITs over threshold: {}" ,
@@ -1873,6 +1880,11 @@ public void visitRegionState(Result result, final RegionInfo regionInfo, final S
18731880 if (regionNode .getProcedure () != null ) {
18741881 regionNode .getProcedure ().stateLoaded (AssignmentManager .this , regionNode );
18751882 }
1883+ // add regions to RIT while visiting the meta
1884+ regionInTransitionTracker .handleRegionStateNodeOperation (regionNode );
1885+ if (master .getServerManager ().isServerDead (regionNode .getRegionLocation ())) {
1886+ regionInTransitionTracker .regionCrashed (regionNode );
1887+ }
18761888 }
18771889 };
18781890
@@ -2046,15 +2058,52 @@ public Pair<Integer, Integer> getReopenStatus(TableName tableName) {
20462058 return new Pair <Integer , Integer >(ritCount , states .size ());
20472059 }
20482060
2061+ // This comparator sorts the RegionStates by time stamp then Region name.
2062+ // Comparing by timestamp alone can lead us to discard different RegionStates that happen
2063+ // to share a timestamp.
2064+ private static class RegionStateStampComparator implements Comparator <RegionState > {
2065+ @ Override
2066+ public int compare (final RegionState l , final RegionState r ) {
2067+ int stampCmp = Long .compare (l .getStamp (), r .getStamp ());
2068+ return stampCmp != 0 ? stampCmp : RegionInfo .COMPARATOR .compare (l .getRegion (), r .getRegion ());
2069+ }
2070+ }
2071+
2072+ public final static RegionStateStampComparator REGION_STATE_STAMP_COMPARATOR =
2073+ new RegionStateStampComparator ();
2074+
20492075 // ============================================================================================
20502076 // TODO: Region State In Transition
20512077 // ============================================================================================
20522078 public boolean hasRegionsInTransition () {
2053- return regionStates .hasRegionsInTransition ();
2079+ return regionInTransitionTracker .hasRegionsInTransition ();
20542080 }
20552081
20562082 public List <RegionStateNode > getRegionsInTransition () {
2057- return regionStates .getRegionsInTransition ();
2083+ return regionInTransitionTracker .getRegionsInTransition ();
2084+ }
2085+
2086+ public boolean isRegionInTransition (final RegionInfo regionInfo ) {
2087+ return regionInTransitionTracker .isRegionInTransition (regionInfo );
2088+ }
2089+
2090+ public int getRegionTransitScheduledCount () {
2091+ return regionStates .getRegionTransitScheduledCount ();
2092+ }
2093+
2094+ /**
2095+ * Get the number of regions in transition.
2096+ */
2097+ public int getRegionsInTransitionCount () {
2098+ return regionInTransitionTracker .getRegionsInTransition ().size ();
2099+ }
2100+
2101+ public SortedSet <RegionState > getRegionsStateInTransition () {
2102+ final SortedSet <RegionState > rit = new TreeSet <RegionState >(REGION_STATE_STAMP_COMPARATOR );
2103+ for (RegionStateNode node : getRegionsInTransition ()) {
2104+ rit .add (node .toRegionState ());
2105+ }
2106+ return rit ;
20582107 }
20592108
20602109 public List <RegionInfo > getAssignedRegions () {
@@ -2122,6 +2171,8 @@ private CompletableFuture<Void> transitStateAndUpdate(RegionStateNode regionNode
21222171 if (e != null ) {
21232172 // revert
21242173 regionNode .setState (state );
2174+ } else {
2175+ regionInTransitionTracker .handleRegionStateNodeOperation (regionNode );
21252176 }
21262177 });
21272178 return future ;
@@ -2170,6 +2221,7 @@ CompletableFuture<Void> regionFailedOpen(RegionStateNode regionNode, boolean giv
21702221 if (regionLocation != null ) {
21712222 regionStates .removeRegionFromServer (regionLocation , regionNode );
21722223 }
2224+ regionInTransitionTracker .handleRegionStateNodeOperation (regionNode );
21732225 } else {
21742226 // revert
21752227 regionNode .setState (state );
@@ -2230,6 +2282,7 @@ CompletableFuture<Void> persistToMeta(RegionStateNode regionNode) {
22302282 // on table that contains state.
22312283 setMetaAssigned (regionInfo , true );
22322284 }
2285+ regionInTransitionTracker .handleRegionStateNodeOperation (regionNode );
22332286 });
22342287 }
22352288
@@ -2247,6 +2300,7 @@ public CompletableFuture<Void> regionClosedAbnormally(RegionStateNode regionNode
22472300 regionNode .setLastHost (regionLocation );
22482301 regionStates .removeRegionFromServer (regionLocation , regionNode );
22492302 }
2303+ regionInTransitionTracker .handleRegionStateNodeOperation (regionNode );
22502304 } else {
22512305 // revert
22522306 regionNode .setState (state );
@@ -2260,6 +2314,17 @@ public CompletableFuture<Void> regionClosedAbnormally(RegionStateNode regionNode
22602314 // The above methods can only be called in TransitRegionStateProcedure(and related procedures)
22612315 // ============================================================================================
22622316
2317+ // As soon as a server a crashed, region hosting on that are un-available, this method helps to
2318+ // track those un-available regions. This method can only be called from ServerCrashProcedure.
2319+ public void markRegionsAsCrashed (List <RegionInfo > regionsOnCrashedServer ,
2320+ ServerName crashedServerName ) {
2321+ for (RegionInfo regionInfo : regionsOnCrashedServer ) {
2322+ RegionStateNode node = regionStates .getOrCreateRegionStateNode (regionInfo );
2323+ if (node .getRegionLocation () == crashedServerName )
2324+ regionInTransitionTracker .regionCrashed (node );
2325+ }
2326+ }
2327+
22632328 public void markRegionAsSplit (final RegionInfo parent , final ServerName serverName ,
22642329 final RegionInfo daughterA , final RegionInfo daughterB ) throws IOException {
22652330 // Update hbase:meta. Parent will be marked offline and split up in hbase:meta.
@@ -2284,6 +2349,9 @@ public void markRegionAsSplit(final RegionInfo parent, final ServerName serverNa
22842349 // it is a split parent. And usually only one of them can match, as after restart, the region
22852350 // state will be changed from SPLIT to CLOSED.
22862351 regionStateStore .splitRegion (parent , daughterA , daughterB , serverName , td );
2352+ regionInTransitionTracker .handleRegionStateNodeOperation (node );
2353+ regionInTransitionTracker .handleRegionStateNodeOperation (nodeA );
2354+ regionInTransitionTracker .handleRegionStateNodeOperation (nodeB );
22872355 if (shouldAssignFavoredNodes (parent )) {
22882356 List <ServerName > onlineServers = this .master .getServerManager ().getOnlineServersList ();
22892357 getFavoredNodePromoter ().generateFavoredNodesForDaughter (onlineServers , parent , daughterA ,
@@ -2303,12 +2371,14 @@ public void markRegionAsSplit(final RegionInfo parent, final ServerName serverNa
23032371 public void markRegionAsMerged (final RegionInfo child , final ServerName serverName ,
23042372 RegionInfo [] mergeParents ) throws IOException {
23052373 final RegionStateNode node = regionStates .getOrCreateRegionStateNode (child );
2306- node .setState (State .MERGED );
23072374 for (RegionInfo ri : mergeParents ) {
23082375 regionStates .deleteRegion (ri );
2376+ regionInTransitionTracker .handleRegionDelete (ri );
23092377 }
2378+
23102379 TableDescriptor td = master .getTableDescriptors ().get (child .getTable ());
23112380 regionStateStore .mergeRegions (child , mergeParents , serverName , td );
2381+ regionInTransitionTracker .handleRegionStateNodeOperation (node );
23122382 if (shouldAssignFavoredNodes (child )) {
23132383 getFavoredNodePromoter ().generateFavoredNodesForMergedRegion (child , mergeParents );
23142384 }
0 commit comments