Skip to content

Commit 2da766e

Browse files
author
ukumawat
committed
@HBASE-28158 code review changes
1 parent 8e911a0 commit 2da766e

File tree

10 files changed

+48
-24
lines changed

10 files changed

+48
-24
lines changed

hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,7 @@ AssignmentManager assignmentManager = master.getAssignmentManager();
724724
<tr>
725725
<th></th>
726726
<td><% deadServerName %></td>
727-
<td><% deadServerUtil.getTimeOfDeath(deadServerName) %></td>
727+
<td><% new Date(deadServerUtil.getTimeOfDeath(deadServerName)) %></td>
728728
<%if rsGroupName != null %>
729729
<td><% rsGroupName %></td>
730730
</%if>

hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
import java.util.ArrayList;
2121
import java.util.Collections;
22-
import java.util.Date;
2322
import java.util.HashMap;
2423
import java.util.HashSet;
2524
import java.util.Iterator;
@@ -75,7 +74,7 @@ synchronized void putIfAbsent(ServerName sn) {
7574
this.deadServers.putIfAbsent(sn, EnvironmentEdgeManager.currentTime());
7675
}
7776

78-
synchronized void putIfAbsent(ServerName sn, Long crashedTime) {
77+
synchronized void putIfAbsent(ServerName sn, long crashedTime) {
7978
this.deadServers.putIfAbsent(sn, crashedTime);
8079
}
8180

@@ -168,9 +167,9 @@ synchronized List<Pair<ServerName, Long>> copyDeadServersSince(long ts) {
168167
* @param deadServerName the dead server name
169168
* @return the date when the server died
170169
*/
171-
public synchronized Date getTimeOfDeath(final ServerName deadServerName) {
170+
public synchronized long getTimeOfDeath(final ServerName deadServerName) {
172171
Long time = deadServers.get(deadServerName);
173-
return time == null ? null : new Date(time);
172+
return time == null ? 0 : time;
174173
}
175174

176175
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2116,7 +2116,8 @@ public BalanceResponse balance(BalanceRequest request) throws IOException {
21162116
if (!request.isIgnoreRegionsInTransition() || metaInTransition) {
21172117
LOG.info("Not running balancer (ignoreRIT=false" + ", metaRIT=" + metaInTransition
21182118
+ ") because " + assignmentManager.getRegionTransitScheduledCount()
2119-
+ " region(s) in transition: " + toPrint + (truncated ? "(truncated list)" : ""));
2119+
+ " region(s) are scheduled to transit " + toPrint
2120+
+ (truncated ? "(truncated list)" : ""));
21202121
return responseBuilder.build();
21212122
}
21222123
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import java.util.Collection;
2424
import java.util.Collections;
2525
import java.util.Comparator;
26-
import java.util.Date;
2726
import java.util.HashMap;
2827
import java.util.HashSet;
2928
import java.util.List;
@@ -391,7 +390,7 @@ public void setupRIT(List<TransitRegionStateProcedure> procs) {
391390
return;
392391
}
393392
}
394-
LOG.info("Attach {} to {} to restore", proc, regionNode);
393+
LOG.info("Attach {} to {}", proc, regionNode);
395394
regionNode.setProcedure(proc);
396395
});
397396
}
@@ -1888,10 +1887,10 @@ public void visitRegionState(Result result, final RegionInfo regionInfo, final S
18881887
regionNode.getRegionLocation() != null
18891888
&& master.getServerManager().isServerDead(regionNode.getRegionLocation())
18901889
) {
1891-
Date timeOfCrash =
1890+
long timeOfCrash =
18921891
master.getServerManager().getDeadServers().getTimeOfDeath(regionNode.getRegionLocation());
1893-
if (timeOfCrash != null) {
1894-
regionNode.crashed(timeOfCrash.getTime());
1892+
if (timeOfCrash != 0) {
1893+
regionNode.crashed(timeOfCrash);
18951894
}
18961895
regionInTransitionTracker.regionCrashed(regionNode);
18971896
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionInTransitionTracker.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@
2828
import org.slf4j.Logger;
2929
import org.slf4j.LoggerFactory;
3030

31+
/**
32+
* Tracks regions that are currently in transition (RIT) - those not yet in their terminal state.
33+
*/
3134
@InterfaceAudience.Private
3235
public class RegionInTransitionTracker {
3336
private static final Logger LOG = LoggerFactory.getLogger(RegionInTransitionTracker.class);
@@ -50,6 +53,11 @@ public boolean isRegionInTransition(final RegionInfo regionInfo) {
5053
return regionInTransition.containsKey(regionInfo);
5154
}
5255

56+
/**
57+
* Handles a region whose hosting RegionServer has crashed. When a RegionServer fails, all regions
58+
* it was hosting are automatically added to the RIT list since they need to be reassigned to
59+
* other servers.
60+
*/
5361
public void regionCrashed(RegionStateNode regionStateNode) {
5462
if (regionStateNode.getRegionInfo().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
5563
return;
@@ -61,6 +69,14 @@ public void regionCrashed(RegionStateNode regionStateNode) {
6169
}
6270
}
6371

72+
/**
73+
* Processes a region state change and updates the RIT tracking accordingly. This is the core
74+
* method that determines whether a region should be added to or removed from the RIT list based
75+
* on its current state and the table's enabled/disabled status. This method should be called
76+
* whenever a region state changes get stored to hbase:meta Note: Only default replicas (replica
77+
* ID 0) are tracked. Read replicas are ignored.
78+
* @param regionStateNode the region state node with the current state information
79+
*/
6480
public void handleRegionStateNodeOperation(RegionStateNode regionStateNode) {
6581
// only consider default replica for availability
6682
if (regionStateNode.getRegionInfo().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
@@ -95,6 +111,13 @@ public void handleRegionStateNodeOperation(RegionStateNode regionStateNode) {
95111
}
96112
}
97113

114+
/**
115+
* Handles the deletion of a region by removing it from RIT tracking. This is called when a region
116+
* is permanently removed from the cluster, typically after a successful merge operation where the
117+
* parent regions are cleaned up. During table deletion, table should be already disabled and all
118+
* the region are already OFFLINE
119+
* @param regionInfo the region being deleted
120+
*/
98121
public void handleRegionDelete(RegionInfo regionInfo) {
99122
removeRegionInTransition(regionInfo);
100123
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@
6868
public class RegionStateNode implements Comparable<RegionStateNode> {
6969

7070
private static final Logger LOG = LoggerFactory.getLogger(RegionStateNode.class);
71-
private final AtomicInteger trspCounter;
71+
private final AtomicInteger activeTransitProcedureCount;
7272

7373
private static final class AssignmentProcedureEvent extends ProcedureEvent<RegionInfo> {
7474
public AssignmentProcedureEvent(final RegionInfo regionInfo) {
@@ -102,11 +102,11 @@ public AssignmentProcedureEvent(final RegionInfo regionInfo) {
102102

103103
private volatile long openSeqNum = HConstants.NO_SEQNUM;
104104

105-
RegionStateNode(RegionInfo regionInfo, AtomicInteger trspCounter) {
105+
RegionStateNode(RegionInfo regionInfo, AtomicInteger activeTransitProcedureCount) {
106106
this.regionInfo = regionInfo;
107107
this.event = new AssignmentProcedureEvent(regionInfo);
108108
this.lock = new RegionStateNodeLock(regionInfo);
109-
this.trspCounter = trspCounter;
109+
this.activeTransitProcedureCount = activeTransitProcedureCount;
110110
}
111111

112112
/**
@@ -211,13 +211,13 @@ public ServerName setRegionLocation(final ServerName serverName) {
211211
public TransitRegionStateProcedure setProcedure(TransitRegionStateProcedure proc) {
212212
assert this.procedure == null;
213213
this.procedure = proc;
214-
trspCounter.incrementAndGet();
214+
activeTransitProcedureCount.incrementAndGet();
215215
return proc;
216216
}
217217

218218
public void unsetProcedure(TransitRegionStateProcedure proc) {
219219
assert this.procedure == proc;
220-
trspCounter.decrementAndGet();
220+
activeTransitProcedureCount.decrementAndGet();
221221
this.procedure = null;
222222
}
223223

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public class RegionStates {
5353

5454
private final Object regionsMapLock = new Object();
5555

56-
private final AtomicInteger trspCounter = new AtomicInteger(0);
56+
private final AtomicInteger activeTransitProcedureCount = new AtomicInteger(0);
5757

5858
// TODO: Replace the ConcurrentSkipListMaps
5959
/**
@@ -105,7 +105,7 @@ public boolean isRegionInRegionStates(final RegionInfo hri) {
105105
RegionStateNode createRegionStateNode(RegionInfo regionInfo) {
106106
synchronized (regionsMapLock) {
107107
RegionStateNode node = regionsMap.computeIfAbsent(regionInfo.getRegionName(),
108-
key -> new RegionStateNode(regionInfo, trspCounter));
108+
key -> new RegionStateNode(regionInfo, activeTransitProcedureCount));
109109

110110
if (encodedRegionsMap.get(regionInfo.getEncodedName()) != node) {
111111
encodedRegionsMap.put(regionInfo.getEncodedName(), node);
@@ -602,7 +602,7 @@ public void addToOfflineRegions(final RegionStateNode regionNode) {
602602
}
603603

604604
public int getRegionTransitScheduledCount() {
605-
return trspCounter.get();
605+
return activeTransitProcedureCount.get();
606606
}
607607

608608
// ==========================================================================

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@
3131
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
3232
import org.apache.hadoop.hbase.exceptions.TimeoutIOException;
3333
import org.apache.hadoop.hbase.master.RegionState;
34-
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
3534
import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
35+
import org.apache.hadoop.hbase.master.assignment.RegionStates;
3636
import org.apache.hadoop.hbase.procedure2.Procedure;
3737
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
3838
import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
@@ -246,14 +246,13 @@ protected static void waitMetaRegions(final MasterProcedureEnv env) throws IOExc
246246

247247
protected static void waitRegionInTransition(final MasterProcedureEnv env,
248248
final List<RegionInfo> regions) throws IOException {
249-
final AssignmentManager assignmentManager = env.getAssignmentManager();
249+
final RegionStates states = env.getAssignmentManager().getRegionStates();
250250
for (final RegionInfo region : regions) {
251251
ProcedureSyncWait.waitFor(env, "regions " + region.getRegionNameAsString() + " in transition",
252252
new ProcedureSyncWait.Predicate<Boolean>() {
253253
@Override
254254
public Boolean evaluate() throws IOException {
255-
return !assignmentManager.getRegionStates().getRegionStateNode(region)
256-
.isTransitionScheduled();
255+
return !states.getRegionStateNode(region).isTransitionScheduled();
257256
}
258257
});
259258
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,8 @@ protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state)
169169
LOG.info("Start " + this);
170170
// If carrying meta, process it first. Else, get list of regions on crashed server.
171171
if (this.carryingMeta) {
172+
env.getAssignmentManager()
173+
.markRegionsAsCrashed(List.of(RegionInfoBuilder.FIRST_META_REGIONINFO), this);
172174
setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS);
173175
} else {
174176
setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);

hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.hadoop.hbase.master;
1919

2020
import static org.junit.Assert.assertFalse;
21+
import static org.junit.Assert.assertNotEquals;
2122
import static org.junit.Assert.assertTrue;
2223

2324
import java.util.List;
@@ -83,7 +84,7 @@ public void testIsDead() {
8384
assertTrue(ds.isDeadServer(deadServer));
8485
Set<ServerName> deadServerNames = ds.copyServerNames();
8586
for (ServerName eachDeadServer : deadServerNames) {
86-
Assert.assertNotNull(ds.getTimeOfDeath(eachDeadServer));
87+
assertNotEquals(0, ds.getTimeOfDeath(eachDeadServer));
8788
}
8889
final ServerName deadServerHostComingAlive = ServerName.valueOf("127.0.0.1", 9090, 223341L);
8990
assertTrue(ds.cleanPreviousInstance(deadServerHostComingAlive));

0 commit comments

Comments
 (0)