Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
b519098
HubSpot Edit: Add HubSpot build setup
bbeaudreault Jan 10, 2024
e5916dc
HubSpot Edit: HBASE-28365: ChaosMonkey batch suspend/resume action as…
bbeaudreault Feb 12, 2024
991513e
HubSpot Edit: Add retries to verify step of ITBLL
bbeaudreault Feb 17, 2024
0e45bd5
HubSpot Edit: Add an hbase-site.xml to our bundles that configures ZS…
charlesconnell Feb 2, 2024
4af1ad3
HubSpot Edit: Add hdfs stats for local and remote rack bytes read
bbeaudreault Apr 19, 2024
2709ece
HubSpot Edit: Basic healthcheck servlets
bbeaudreault Apr 18, 2024
2fe5815
HubSpot Edit: More info when interrupted while waiting on actions
bbeaudreault Mar 7, 2024
cc0df81
HubSpot Backport: HBASE-28596: Optimise BucketCache usage upon region…
wchevreuil Jun 19, 2024
9d04d57
HubSpot Edit: Upgrade zstd-jni to latest version
charlesconnell Dec 27, 2024
f992d77
HubSpot Edit: HBASE-29134: Optimize bulkload backup process for incre…
hgromer Feb 18, 2025
3d1a424
HubSpot Edit: archive backup fix
hgromer Feb 18, 2025
f415e9a
HubSpot Backport: HBASE-29123: A faster CodecPool for HBase (will be …
charlesconnell Feb 18, 2025
2fb9fd1
HubSpot Backport: HBASE-29135: ZStandard decompression can operate di…
ndimiduk Mar 3, 2025
2e83f19
HubSpot Backport: HBASE-29160: Cache region name string in RegionInfo…
charlesconnell Mar 5, 2025
d4b24e4
HubSpot Backport: HBASE-29160: Cache region name string in RegionInfo…
charlesconnell Mar 5, 2025
e440b4c
HubSpot Backport: HBASE-29148: BufferedMutator should be able to flus…
charlesconnell Mar 3, 2025
1d1fecc
HubSpot Backport: HBASE-29172: Fix to ZstdByteBuffDecompressor (not y…
charlesconnell Mar 7, 2025
277c169
HubSpot Backport: HBASE-28513 The StochasticLoadBalancer should suppo…
rmdmattingly Feb 28, 2025
9905b7e
HubSpot Backport: HBASE-29074 Balancer conditionals should support me…
rmdmattingly Mar 6, 2025
ed601de
HubSpot Backport: HBASE-29075 Balancer conditionals should support sy…
rmdmattingly Mar 7, 2025
5a2b53b
fix backup location (#162)
hgromer Mar 12, 2025
4933d3d
HubSpot Backport: HBASE-29186 RegionPlanConditionals can produce a nu…
rmdmattingly Mar 17, 2025
f30572b
HubSpot Backport: HBASE-29131 Introduce the option for post-compactio…
ndimiduk Feb 17, 2025
809751a
HubSpot Backport: HBASE-29202 Balancer conditionals make balancer act…
rmdmattingly Mar 21, 2025
3a29041
HubSpot Backport: HBASE-29203 There should be a StorefileSize equival…
rmdmattingly Mar 21, 2025
8e5892c
HubSpot Edit: I messed up 29202, 29203 backports with incompatibiliti…
rmdmattingly Mar 21, 2025
4996ad7
HubSpot Backport: HBASE-29193: Allow ZstdByteBuffDecompressor to take…
charlesconnell Mar 17, 2025
ab96902
HubSpot Backport: HBASE-29239: Subsequent runs of re-splitting HFiles…
hgromer Apr 8, 2025
45c5b4e
HubSpot Backport: HBASE-29134 Optimize bulkload backup process for in…
hgromer Apr 8, 2025
33075fc
HubSpot Backport: HBASE-29146 Incremental backups can fail due to not…
hgromer Apr 8, 2025
a59873c
HubSpot Backport: HBASE-29229: Throttles should support specific rest…
rmdmattingly Apr 9, 2025
0b7c5b9
HubSpot Backport: HBASE-29262 StochasticLoadBalancer should use the C…
rmdmattingly Apr 15, 2025
ac6f349
HubSpot Backport: HBASE-29218: Pass around an HFileDecompressionConte…
charlesconnell Mar 26, 2025
2ceddc9
HubSpot Backport: HBASE-29253: Avoid allocating a new closure on ever…
charlesconnell Apr 11, 2025
96402e7
HubSpot Backport: HBASE-29252: Reduce allocations in RowIndexSeekerV1…
charlesconnell Apr 11, 2025
e813342
HubSpot Backport: HBASE-29204: BufferedMutatorParams#clone() should c…
charlesconnell Mar 19, 2025
ca80fd5
HBASE-29259 Master crash when loading procedures (#6906)
Apache9 Apr 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .blazar.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
buildpack:
name: Blazar-Buildpack-Java-single-module

env:
MAVEN_PHASE: "package assembly:single deploy"
HADOOP_DEP_VERSION: "3.3.6-hubspot-SNAPSHOT"
MAVEN_BUILD_ARGS: "-Phadoop-3.0 -Dhadoop.profile=3.0 -Dhadoop-three.version=$HADOOP_DEP_VERSION -Dgpg.skip=true -DskipTests -DdeployAtEnd -pl hbase-assembly -am -T1C"

# Below variables are generated in prepare_environment.sh.
# The build environment requires environment variables to be explicitly defined before they may
# be modified by the `write-build-env-var` utilty script to persist changes to an environment variable
# throughout a build
REPO_NAME: ""
SET_VERSION: ""
HBASE_VERSION: ""
PKG_RELEASE: ""
FULL_BUILD_VERSION: ""

before:
- description: "Prepare build environment"
commands:
- $WORKSPACE/build-scripts/prepare_environment.sh

provides:
- hbase
Empty file added .build-jdk17
Empty file.
97 changes: 97 additions & 0 deletions build-scripts/prepare_environment.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#
# Generates the appropriate environment vars so that we:
# - build against the right version of hadoop, and properly set up maven
# - generate the correct maven version based on the branches
# - upload RPMs with the correct release based on the branch, and to the right yum repo
#
# Since we need to distribute .blazar.yaml to all sub-modules of the project, we define our constants once
# in this script which can be re-used by every .blazar.yaml.
#
set -ex
printenv

# We base the expected main branch and resulting maven version for clients on the hbase minor version
# The reason for this is hbase re-branches for each minor release (2.4, 2.5, 2.6, etc). At each re-branch
# the histories diverge. So we'll need to create our own fork of each new minor release branch.
# The convention is a fork named "hubspot-$minorVersion", and the maven coordinates "$minorVersion-hubspot-SNAPSHOT"
MINOR_VERSION="2.6"
MAIN_BRANCH="hubspot-${MINOR_VERSION}"

#
# Validate inputs from blazar
#

if [ -z "$WORKSPACE" ]; then
echo "Missing env var \$WORKSPACE"
exit 1
fi
if [ -z "$GIT_BRANCH" ]; then
echo "Missing env var \$GIT_BRANCH"
exit 1
fi
if [ -z "$BUILD_COMMAND_RC_FILE" ]; then
echo "Missing env var \$BUILD_COMMAND_RC_FILE"
exit 1
fi

#
# Extract current hbase version from root pom.xml
#

# the pom.xml has an invalid xml namespace, so just remove that so xmllint can parse it.
cat $WORKSPACE/pom.xml | sed '2 s/xmlns=".*"//g' > pom.xml.tmp
HBASE_VERSION=$(echo "cat /project/properties/revision/text()" | xmllint --nocdata --shell pom.xml.tmp | sed '1d;$d')
rm pom.xml.tmp

# sanity check that we've got some that looks right. it wouldn't be the end of the world if we got it wrong, but
# will help avoid confusion.
if [[ ! "$HBASE_VERSION" =~ 2\.[0-9]+\.[0-9]+ ]]; then
echo "Unexpected HBASE_Version extracted from pom.xml. Got $HBASE_VERSION but expected a string like '2.4.3', with 3 numbers separated by decimals, the first number being 2."
exit 1
fi

#
# Generate branch-specific env vars
# We are going to generate the maven version and the RPM release here:
# - For the maven version, we need to special case our main branch
# - For RPM, we want our final version to be:
# main branch: {hbase_version}-hs.{build_number}.el6
# other branches: {hbase_version}-hs~{branch_name}.{build_number}.el6, where branch_name substitutes underscore for non-alpha-numeric characters
#

echo "Git branch $GIT_BRANCH. Detecting appropriate version override and RPM release."

RELEASE="hs"

if [[ "$GIT_BRANCH" = "$MAIN_BRANCH" ]]; then
SET_VERSION="${MINOR_VERSION}-hubspot-SNAPSHOT"
REPO_NAME="AnyLinuxVersion_hs-hbase"
elif [[ "$GIT_BRANCH" != "hubspot" ]]; then
SET_VERSION="${MINOR_VERSION}-${GIT_BRANCH}-SNAPSHOT"
RELEASE="${RELEASE}~${GIT_BRANCH//[^[:alnum:]]/_}"
REPO_NAME="AnyLinuxVersion_hs-hbase-develop"
else
echo "Invalid git branch $GIT_BRANCH"
exit 1
fi

RELEASE="${RELEASE}.${BUILD_NUMBER}"
FULL_BUILD_VERSION="${HBASE_VERSION}-${RELEASE}"

# SET_VERSION is not the most intuitive name, but it's required for set-maven-versions script
write-build-env-var SET_VERSION "$SET_VERSION"
write-build-env-var HBASE_VERSION "$HBASE_VERSION"
write-build-env-var PKG_RELEASE "$RELEASE"
write-build-env-var FULL_BUILD_VERSION "$FULL_BUILD_VERSION"
write-build-env-var REPO_NAME "$REPO_NAME"
# Adding this value as versioninfo.version ensures we have the same value as would normally
# show up in a non-hubspot hbase build. Otherwise due to set-maven-versions we'd end up
# with 2.6-hubspot-SNAPSHOT which is not very useful as a point of reference.
# Another option would be to pass in our FULL_BUILD_VERSION but that might cause some funniness
# with the expectations in VersionInfo.compareVersion().
write-build-env-var MAVEN_BUILD_ARGS "$MAVEN_BUILD_ARGS -Dversioninfo.version=$HBASE_VERSION"

echo "Building HBase version $HBASE_VERSION"
echo "Will deploy to nexus with version $SET_VERSION"
echo "Will create rpm with version $FULL_BUILD_VERSION"
echo "Will run maven with extra args $MAVEN_BUILD_ARGS"
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,7 @@ protected static int getIndex(TableName tbl, List<TableName> sTableList) {
* @param tablesToBackup list of tables to be backed up
*/
protected List<BulkLoad> handleBulkLoad(List<TableName> tablesToBackup) throws IOException {
List<String> activeFiles = new ArrayList<>();
List<String> archiveFiles = new ArrayList<>();
Map<TableName, MergeSplitBulkloadInfo> toBulkload = new HashMap<>();
List<BulkLoad> bulkLoads = backupManager.readBulkloadRows(tablesToBackup);
FileSystem tgtFs;
try {
Expand All @@ -137,6 +136,8 @@ protected List<BulkLoad> handleBulkLoad(List<TableName> tablesToBackup) throws I

for (BulkLoad bulkLoad : bulkLoads) {
TableName srcTable = bulkLoad.getTableName();
MergeSplitBulkloadInfo bulkloadInfo =
toBulkload.computeIfAbsent(srcTable, MergeSplitBulkloadInfo::new);
String regionName = bulkLoad.getRegion();
String fam = bulkLoad.getColumnFamily();
String filename = FilenameUtils.getName(bulkLoad.getHfilePath());
Expand Down Expand Up @@ -166,27 +167,30 @@ protected List<BulkLoad> handleBulkLoad(List<TableName> tablesToBackup) throws I
srcTableQualifier);
LOG.trace("copying {} to {}", p, tgt);
}
activeFiles.add(p.toString());
bulkloadInfo.addActiveFile(p.toString());
} else if (fs.exists(archive)) {
LOG.debug("copying archive {} to {}", archive, tgt);
archiveFiles.add(archive.toString());
bulkloadInfo.addArchivedFiles(archive.toString());
}
mergeSplitBulkloads(activeFiles, archiveFiles, srcTable);
incrementalCopyBulkloadHFiles(tgtFs, srcTable);
}

for (MergeSplitBulkloadInfo bulkloadInfo : toBulkload.values()) {
mergeSplitAndCopyBulkloadedHFiles(bulkloadInfo.getActiveFiles(),
bulkloadInfo.getArchiveFiles(), bulkloadInfo.getSrcTable(), tgtFs);
}

return bulkLoads;
}

private void mergeSplitBulkloads(List<String> activeFiles, List<String> archiveFiles,
TableName tn) throws IOException {
private void mergeSplitAndCopyBulkloadedHFiles(List<String> activeFiles,
List<String> archiveFiles, TableName tn, FileSystem tgtFs) throws IOException {
int attempt = 1;

while (!activeFiles.isEmpty()) {
LOG.info("MergeSplit {} active bulk loaded files. Attempt={}", activeFiles.size(), attempt++);
// Active file can be archived during copy operation,
// we need to handle this properly
try {
mergeSplitBulkloads(activeFiles, tn);
mergeSplitAndCopyBulkloadedHFiles(activeFiles, tn, tgtFs);
break;
} catch (IOException e) {
int numActiveFiles = activeFiles.size();
Expand All @@ -200,11 +204,12 @@ private void mergeSplitBulkloads(List<String> activeFiles, List<String> archiveF
}

if (!archiveFiles.isEmpty()) {
mergeSplitBulkloads(archiveFiles, tn);
mergeSplitAndCopyBulkloadedHFiles(archiveFiles, tn, tgtFs);
}
}

private void mergeSplitBulkloads(List<String> files, TableName tn) throws IOException {
private void mergeSplitAndCopyBulkloadedHFiles(List<String> files, TableName tn, FileSystem tgtFs)
throws IOException {
MapReduceHFileSplitterJob player = new MapReduceHFileSplitterJob();
conf.set(MapReduceHFileSplitterJob.BULK_OUTPUT_CONF_KEY,
getBulkOutputDirForTable(tn).toString());
Expand All @@ -219,13 +224,18 @@ private void mergeSplitBulkloads(List<String> files, TableName tn) throws IOExce
result = player.run(args);
} catch (Exception e) {
LOG.error("Failed to run MapReduceHFileSplitterJob", e);
// Delete the bulkload directory if we fail to run the HFile splitter job for any reason
// as it might be re-tried
deleteBulkLoadDirectory();
throw new IOException(e);
}

if (result != 0) {
throw new IOException(
"Failed to run MapReduceHFileSplitterJob with invalid result: " + result);
}

incrementalCopyBulkloadHFiles(tgtFs, tn);
}

private void updateFileLists(List<String> activeFiles, List<String> archiveFiles)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.backup.impl;

import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hbase.TableName;
import org.apache.yetus.audience.InterfaceAudience;

@InterfaceAudience.Private
class MergeSplitBulkloadInfo {
private final List<String> activeFiles = new ArrayList<>();
private final List<String> archiveFiles = new ArrayList<>();

private final TableName srcTable;

public MergeSplitBulkloadInfo(TableName srcTable) {
this.srcTable = srcTable;
}

public TableName getSrcTable() {
return srcTable;
}

public List<String> getArchiveFiles() {
return archiveFiles;
}

public List<String> getActiveFiles() {
return activeFiles;
}

public void addActiveFile(String file) {
activeFiles.add(file);
}

public void addArchivedFiles(String file) {
archiveFiles.add(file);
}
}
Loading