diff --git a/.gitignore b/.gitignore index 96cee29b2e..7f583a6f01 100644 --- a/.gitignore +++ b/.gitignore @@ -61,3 +61,6 @@ tfe-releases-repos.json scripts/prebuild/prebuild-arm-mac-binary scripts/prebuild/prebuild-x64-linux-binary scripts/prebuild/prebuild-arm-linux-binary + +# GA to RC tool output files +scripts/sync-ga-to-rc/output/*.txt \ No newline at end of file diff --git a/scripts/sync-ga-to-rc/README.md b/scripts/sync-ga-to-rc/README.md new file mode 100644 index 0000000000..0adac63ae3 --- /dev/null +++ b/scripts/sync-ga-to-rc/README.md @@ -0,0 +1,177 @@ +# Sync GA change to RC docset + +The GA -> RC sync script helps with maintenance of long-lived release branches +for versioned docs by comparing updates since a provided cutoff in the current +(GA) docset against updates in an unreleased (RC) docset. + +The default cutoff date is the last run date for the provided product slug, if +it exists. Otherwise, the script defaults to the creation date of the RC release +branch. The script standardizes all timestamps to ISO for simplicity but takes +the optional override date as a local time. + + +## Assumptions + +- Your RC release branch use the following naming convention: `/ `. +- You have the GitHub CLI (`gh`) installed. The CLI is required if you want the + script to create a PR on your behalf. ==> DISABLED (THE PROCESS IS STILL BUGGY) + + +## Flags + +Flag | Type | Default | Description +--------- | -------- | ------------ | ----------- +`-slug` | `string` | No, required | Product slug used for the root content folder +`-ga` | `string` | No, required | Version of the current docset +`-rc` | `string` | No, required | Version of the unreleased docset +`-tag` | `string` | "" | String used to tag non-GA docsets (e.g., "(rc)") +`-branch` | `string` | `main` | Name of the GA branch +`-date` | `string` | null | Local override date in "YYYY-MM-DD HH:MM:SS" format for the commit date cutoff +`-update` | `bool` | false | Indicates whether to apply any safe changes locally +`-pr` | `bool` | false | Indicates whether to apply any safe changes locally and generate a PR if possible +`-merged` | `bool` | false | Indicates that RC docs are merged to `-branch` +`-help` | `bool` | false | Print usage help text and exit + + + +## Usage + +```text +node sync-ga-to-rc.mjs -slug -ga -rc [-tag ] [-branch ] [-date ": [ + "", + "", + ... + "", + ] + } +] +``` + +For example: + +```json +[ + { + "vault": [ + "/content/docs/updates/important-changes.mdx", + "/content/docs/updates/release-notes.mdx", + "/content/docs/updates/change-tracker.mdx" + ] + } +] +``` + +## General workflow + +The script syncs the local GA and RC branches and creates a new branch off of +the RC branch to work from. + +Next, the script builds the following file sets: + +- exclusions - a list of files the script should ignore during the sync +- GAΔ - files in the GA (current) docset with a last commit date later + than the provided cutoff date. +- RCΔ - files in the RC (unreleased) docset with a last commit date + later than the provided cutoff date. +- GA-only - files in the GA (current) docset that do not exist in the RC + docset. + +The script determines what to do with the files based on the following rubric +where GAu and RCu are the set of files unchanged since the cutoff in the GA and +RC docsets: + +Set definition | Implication | Action +-------------------- | ------------------ | ------------------------- +file ∈ { RCu ∧ GAu } | file unchanged | ignore +file ∈ { RCu ∧ GAΔ } | updated in GA only | safe to update in RC +file ∈ { RCΔ ∧ GAu } | updated in RC only | ignore +file ∈ { RCΔ ∧ GAΔ } | updated in both | possible conflict; needs manual review +file ∈ { RC ∧ !GA } | new file for RC | ignore +file ∈ { !RC ∧ GA } | new file for GA | safe to update in RC + +If `-update` is `true`, the script slams files in the RC folder with files from +the GA folder with any file deemed "safe", prints a note to review the +information in the conflict file, and updates the last run date. + +If `-update` is `false`, the script generates log files and exits. + + +## Script output + +The script is designed to be chatty and print details of the run to `stdout`, +but it also creates the following artifacts: + +- A new branch off of the RC branch called `bot/{product}-ga-to-rc-sync` +- A product record file (`data/product-records/last-run-{product}.txt`) with + the most recent run time. +- Local output files with the following file sets for human review if needed: + + File set | Output file + ------------------- | -------------------- + GAΔ | output/ga-delta.txt + RCΔ | output/rc-delta.txt + GA-only | output/ga-only.txt + updated files | output/safe-list.txt + potential conflicts | output/manual-review.txt \ No newline at end of file diff --git a/scripts/sync-ga-to-rc/bash-helpers/create-pr.sh b/scripts/sync-ga-to-rc/bash-helpers/create-pr.sh new file mode 100755 index 0000000000..675b4719f7 --- /dev/null +++ b/scripts/sync-ga-to-rc/bash-helpers/create-pr.sh @@ -0,0 +1,52 @@ +# +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 +# +# ------------------------------------------------------------------------------ +# +# Create GitHub PR -- Currently disabled +# +# Create a PR with the local changes +# +# Expected usage: create-pr.sh +# Example: create-pr.sh 'vault' 'v1.21.x (rc)' 'vault/1.21.x' 'bot/vault-ga-to-rc-sync-20251002' + +exit # PR creation currently buggy + +# Pull in the common variable definitions +currDir="$(dirname "$0")" +. "${currDir}/definitions.sh" + +# Set variables from command line argument +productKey="${1}" # product slug +rcFolder="${2}" # RC doc folder +rcBranch="${3}" # git branch name for RC docs +prSource="${4}" # git branch name for the local changes +ghcli=$(which gh) # Check for the GitHub CLI + +# Bail if any of the command line parameters were omitted +if [[ -z ${productKey} ]] ; then echo "!!! Missing product key" ; exit ; fi +if [[ -z ${rcFolder} ]] ; then echo "!!! Missing RC folder name" ; exit ; fi +if [[ -z ${rcBranch} ]] ; then echo "!!! Missing RC branch name" ; exit ; fi +if [[ -z ${prSource} ]] ; then echo "!!! Missing PR branch name" ; exit ; fi + +# Bail if the GitHub CLI needs to be installed +if [[ -z ${rcBranch} ]] ; then + echo "Could not create PR. Please install the GitHub CLI (gh)" + exit +fi + +cd "${repoRoot}" + +# Add any files updated under the RC directory +git add "content/${productKey}/${rcFolder}/*" > /dev/null 2>&1 +git commit -m "Auto update GA to RC sync" > /dev/null 2>&1 +git push -u origin ${prSource} > /dev/null 2>&1 + +# Create the draft PR +gh pr create \ + --title "${prTitle/""/${productKey}}" \ + --body "${prBody/""/${productKey}}" \ + --head "${prSource}" \ + --base "${rcBranch}" \ + --draft \ No newline at end of file diff --git a/scripts/sync-ga-to-rc/bash-helpers/definitions.sh b/scripts/sync-ga-to-rc/bash-helpers/definitions.sh new file mode 100755 index 0000000000..a3c793b222 --- /dev/null +++ b/scripts/sync-ga-to-rc/bash-helpers/definitions.sh @@ -0,0 +1,26 @@ +# +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 +# +# ------------------------------------------------------------------------------ +# +# Common values used by the bash helper files + +#myDir=$(pwd) +myDir="/home/goblin/repos/web-unified-docs/" # temp line for testing +localReposDir=${myDir%"/web-unified-docs"*} + +repoRoot="${localReposDir}/web-unified-docs" # Local root directory of the repo +docRoot="${repoRoot}/content/" # Root directory of product docs +rcTag=" (rc)" +betaTag=" (beta)" + +gaBranch="" # Set in helper from command line arguments; expected to be "main" +rcBranch="" # Set in helper from command line arguments; for example, "vault/1.21.x" +rcDocs="" # Set in helper from command line arguments; for example, "${docRoot}/v1.21.x" +gaDocs="" # Set in helper from command line arguments; for example, "${docRoot}/v1.20.x" + +jsonTemplate='{"file": "", "shortname": "", "commit": ""}' +prBranch="bot/-ga-to-rc-sync-$(date +%Y%m%d)" +prTitle=" GA to RC auto-sync" +prBody="Draft PR created by \`sync-ga-to-rc.mjs\` to push recent GA updates to the RC release branch for " diff --git a/scripts/sync-ga-to-rc/bash-helpers/get-cutoff.sh b/scripts/sync-ga-to-rc/bash-helpers/get-cutoff.sh new file mode 100755 index 0000000000..beee87325a --- /dev/null +++ b/scripts/sync-ga-to-rc/bash-helpers/get-cutoff.sh @@ -0,0 +1,49 @@ +# +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 +# +# ------------------------------------------------------------------------------ +# +# Get branch creation date +# +# Query the git logs to find the creation date (or oldest commit) for the target +# branch. +# +# Expected usage: get-cutoff.sh +# Example: get-cutoff.sh vault/1.21.x + +# Pull in the common variable definitions +currDir="$(dirname "$0")" +. "${currDir}/definitions.sh" + +# Set variables from command line argument +targetBranch="${1}" # git branch name for RC docs + +# Bail if any of the command line parameters were omitted +if [[ -z ${targetBranch} ]] ; then exit ; fi + +cd "${repoRoot}" + +if [[ "${targetBranch}" == "main" ]] ; then + # Find the earliest commit we can as the "creation" date; since git log + # entries expire based on the setting for reflogexpire on the repo/branch + branchDate=$( + git log \ + --pretty=format:%ad \ + --date=iso \ + --date=format:'%Y-%m-%d %H:%M:%S' \ + "${targetBranch}" \ + | tail -1 + ) +else + branchDate=$( + git reflog \ + --grep-reflog="Created from" \ + --pretty=format:%ad \ + --date=iso \ + --date=format:'%Y-%m-%d %H:%M:%S' \ + "${targetBranch}" + ) +fi + +echo "${branchDate}" \ No newline at end of file diff --git a/scripts/sync-ga-to-rc/bash-helpers/get-file-delta.sh b/scripts/sync-ga-to-rc/bash-helpers/get-file-delta.sh new file mode 100755 index 0000000000..2a85bd493e --- /dev/null +++ b/scripts/sync-ga-to-rc/bash-helpers/get-file-delta.sh @@ -0,0 +1,51 @@ +# +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 +# +# ------------------------------------------------------------------------------ +# +# Get file delta +# +# Look through every file in the target folder and check if the latest commit is +# after the cutoff. If so, echo the details so the script can add it to the +# result array +# +# Expected usage: get-file-delta.sh +# Example: get-file-delta.sh vault vault/1.20.x '2025-10-01 12:34:21' + +# Pull in the common variable definitions +currDir="$(dirname "$0")" +. "${currDir}/definitions.sh" + +# Set variables from command line argument +productKey="${1}" # product slug +verFolder="${2}" # folder for GA docs +cutoff="${3}" # cutoff date for commit comparison + +# Bail if any of the command line parameters were omitted +if [[ -z "${productKey}" ]] ; then exit ; fi +if [[ -z "${verFolder}" ]] ; then exit ; fi +if [[ -z "${cutoff}" ]] ; then exit ; fi + +# Set the absolute path to the local folder +docFolder="${docRoot/''/"${productKey}"}/"${verFolder}"" + +cd "${repoRoot}" + +# Loop through each file in the version folder +IFS=$'\n' +for file in $(find "${docFolder}" -type f); do + + lastCommit=$( + git log -1 --format=%ai "${file}" | + cut -d " " -f1,2 + ) + # If the last commit happened after the cutoff, add it to the results + if [[ "${cutoff}" < "${lastCommit}" ]]; then + shortName=${file/"${docFolder}"/""} + jsonString=${jsonTemplate/''/"${file}"} + jsonString=${jsonString/''/"${shortName}"} + jsonString=${jsonString/''/"${lastCommit}"} + echo ${jsonString} + fi +done \ No newline at end of file diff --git a/scripts/sync-ga-to-rc/bash-helpers/git-prep.sh b/scripts/sync-ga-to-rc/bash-helpers/git-prep.sh new file mode 100755 index 0000000000..b0f7290145 --- /dev/null +++ b/scripts/sync-ga-to-rc/bash-helpers/git-prep.sh @@ -0,0 +1,45 @@ +# +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 +# +# ------------------------------------------------------------------------------ +# +# Git prep +# +# Sync the GA or RC branches and create a new branch for the update PR. +# +# Expected usage: git-prep.sh +# For example: git-prep.sh vault main vault/1.21.x + +# Pull in the common variable definitions +currDir="$(dirname "$0")" +. "${currDir}/definitions.sh" + +# Set variables from command line argument +productKey="${1}" # product name for new branch name +gaBranch="${2}" # git branch name for GA docs +rcBranch="${3}" # git branch name for RC docs + +# Bail if any of the command line parameters were omitted +if [[ -z ${productKey} ]] ; then return ; fi +if [[ -z ${gaBranch} ]] ; then return ; fi +if [[ -z ${rcBranch} ]] ; then return ; fi + +cd "${repoRoot}" + +# Sync to git +if [[ "${gaBranch}" == "${rcBranch}" ]] ; then + # Sync to the latest updates in the GA branch if the branches are the same + git checkout ${gaBranch} > /dev/null 2>&1 + git pull > /dev/null 2>&1 +else + # Sync to the latest updates in the RC branch if the branches are different + git checkout ${rcBranch} > /dev/null 2>&1 + git pull > /dev/null 2>&1 +fi + +# Create a new branch for the changes +git checkout -B ${prBranch/""/"${productKey}"} > /dev/null 2>&1 + +# Send the PR branch name back to the script +echo ${prBranch/""/"${productKey}"} diff --git a/scripts/sync-ga-to-rc/bash-helpers/log-prep.sh b/scripts/sync-ga-to-rc/bash-helpers/log-prep.sh new file mode 100755 index 0000000000..bb130776cb --- /dev/null +++ b/scripts/sync-ga-to-rc/bash-helpers/log-prep.sh @@ -0,0 +1,44 @@ +# +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 +# +# ------------------------------------------------------------------------------ +# +# Local file/log prep +# +# Reset or create the output directory and associated files +# +# Usage: log-prep.sh +# Example: log-prep.sh '/home/web-unified-repo/scripts/sync-ga-to-rc/output' '/home/web-unified-repo/scripts/sync-ga-to-rc/data/run-records' + +# Pull in the common variable definitions +currDir="$(dirname "$0")" +. "${currDir}/definitions.sh" + +# Set variables from command line argument +logDir="${1}" +recordDir="${2}" + +# Bail if any of the command line parameters were omitted +if [[ -z ${logDir} ]] ; then return ; fi +if [[ -z ${recordDir} ]] ; then return ; fi + + +# Create the product records directory, if needed +if [[ ! -d ${recordDir} ]]; then mkdir ${recordDir} ; fi + +fileCount=$(ls ${logDir} | wc -l) + +# Create the log directory if needed, otherwise, delete the old files +if [[ ! -d ${logDir} ]]; then + mkdir ${logDir} +elif [ ${fileCount} -gt 0 ]; then + rm ${logDir}/* +fi + +# Prep the log files +touch ${logDir}/ga-delta.txt +touch ${logDir}/ga-only.txt +touch ${logDir}/rc-delta.txt +touch ${logDir}/safe-list.txt +touch ${logDir}/manual-review.txt \ No newline at end of file diff --git a/scripts/sync-ga-to-rc/bash-helpers/only-in-ga.sh b/scripts/sync-ga-to-rc/bash-helpers/only-in-ga.sh new file mode 100755 index 0000000000..aa1adc18ed --- /dev/null +++ b/scripts/sync-ga-to-rc/bash-helpers/only-in-ga.sh @@ -0,0 +1,46 @@ +# +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 +# +# ------------------------------------------------------------------------------ +# +# Only in GA +# +# Find files that exist in the GA directory but not the RC directory +# +# Expected usage: only-in-ga.sh +# Example: only-in-ga.sh vault 'v1.21.x' 'v1.21.x (rc)' + +# Pull in the common variable definitions +currDir="$(dirname "$0")" +. "${currDir}/definitions.sh" + +# Set variables from command line argument +productKey="${1}" # product slug +gaFolder="${2}" # folder for GA docs +rcFolder="${3}" # folder for RC docs + +# Bail if any of the command line parameters were omitted +if [[ -z "${productKey}" ]] ; then exit ; fi +if [[ -z "${gaFolder}" ]] ; then exit ; fi +if [[ -z "${rcFolder}" ]] ; then exit ; fi + +# Build the absolute path for the GA and RC folders +gaPath="${docRoot/''/"${productKey}"}/"${gaFolder}"" +rcPath="${docRoot/''/"${productKey}"}/"${rcFolder}"" + +cd "${repoRoot}" + +for file in $( + diff -rq "${gaPath}" "${rcPath}" | + grep "^Only in ${gaPath}" | + awk '{print $3 $4}' +); do + fileName="${file/":"/"/"}" + shortName=${fileName/"${gaPath}"/""} + naCommit="0000-00-00 00:00:00" + jsonString=${jsonTemplate/''/"${fileName}"} + jsonString=${jsonString/''/"${shortName}"} + jsonString=${jsonString/''/"${naCommit}"} + echo ${jsonString} +done diff --git a/scripts/sync-ga-to-rc/bash-helpers/update-rc-docs.sh b/scripts/sync-ga-to-rc/bash-helpers/update-rc-docs.sh new file mode 100755 index 0000000000..c6f089d6bf --- /dev/null +++ b/scripts/sync-ga-to-rc/bash-helpers/update-rc-docs.sh @@ -0,0 +1,42 @@ +# +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 +# +# ------------------------------------------------------------------------------ +# Update RC docs +# +# For every relative path in the input list, replace the RC version with the GA +# version +# +# Expected usage: update-rc-docs.sh +# Example: update-rc-docs.sh vault '1.20.x' '1.21.x (rc)' '../output/safe-list.txt' + +# Pull in the common variable definitions +currDir="$(dirname "$0")" +. "${currDir}/definitions.sh" + +# Set variables from command line argument +productKey="${1}" # root folder for product docs (product key) +gaFolder="${2}" # GA doc folder name +rcFolder="${3}" # RC doc folder name +safeList="${4}" # file of GA paths we can overwrite in RC + +# Bail if any of the command line parameters were omitted +if [[ -z "${productKey}" ]] ; then exit ; fi +if [[ -z "${gaFolder}" ]] ; then exit ; fi +if [[ -z "${rcFolder}" ]] ; then exit ; fi +if [[ -z "${safeList}" ]] ; then exit ; fi + +cd "${repoRoot}" + +while read line; do + + gaPath=$(echo "$line" | awk -F " " '{print $3}') + rcPath=${gaPath/${gaFolder}/${rcFolder}} + + # Skip any file that may have ended up in the list from a different product + if [[ "${gaPath}" != *"/content/${productKey}/"* ]]; then continue ; fi + if [[ "${rcPath}" != *"/content/${productKey}/"* ]]; then continue ; fi + cp -r "${gaPath}" "${rcPath}" + +done < "${safeList}" diff --git a/scripts/sync-ga-to-rc/data/exclude.json b/scripts/sync-ga-to-rc/data/exclude.json new file mode 100644 index 0000000000..42fad24670 --- /dev/null +++ b/scripts/sync-ga-to-rc/data/exclude.json @@ -0,0 +1,16 @@ +[ + { + "terraform": [ + "/tf/some/doc/1.mdx", + "/tf/some/doc/2.mdx", + "/tf/some/doc/.mdx" + ] + }, + { + "vault": [ + "/content/docs/updates/important-changes.mdx", + "/content/docs/updates/release-notes.mdx", + "/content/docs/updates/change-tracker.mdx" + ] + } +] \ No newline at end of file diff --git a/scripts/sync-ga-to-rc/data/markdown/help.txt b/scripts/sync-ga-to-rc/data/markdown/help.txt new file mode 100755 index 0000000000..bc5ae91609 --- /dev/null +++ b/scripts/sync-ga-to-rc/data/markdown/help.txt @@ -0,0 +1,53 @@ +Sync recent GA changes to an unreleased docset. + +Usage: node sync-ga-to-rc.mjs -slug -ga -rc + [-tag ] [-branch ] [-date ) +The product slug used for the root content folder. For example, "vault" or +"well-architected-framework". + +-ga (string : ) +Version (and folder) of the current docset. For example, "v1.20.x" or "v0.17.x". + +-rc (string : ) +Version of the unreleased docset. + +-tag (string : "rc") +String used to tag non-GA docsets, typically "rc" or "beta". + +-branch (string : "main") +Name of the GA branch. + +-date (string : "") +Local override date in "YYYY-MM-DD HH:MM:SS" format for the commit date +comparison cutoff. By default, the script uses the last run date, if it exists. +If the product record does not have the last run date, the script defaults to +the creation date of the RC branch. + +-update (bool: false) +Tells the script to apply safe changes locally. + +-pr (bool: false) +Tells the script to apply safe changes locally and generate a PR if possible. + +-merged (bool: false) +Tells the script the GA and RC docs exist in the same branch (-ga) + +## Examples + +Basic call: + + node sync-ga-to-rc.mjs -slug vault -ga 1.20.x -rc 1.21.x -tag rc + +Use an override date: + + node sync-ga-to-rc.mjs \ + -slug vault \ + -ga 1.20.x \ + -rc 1.21.x \ + -tag rc \ + -date '2025-07-31 17:10:27' diff --git a/scripts/sync-ga-to-rc/data/markdown/warning.txt b/scripts/sync-ga-to-rc/data/markdown/warning.txt new file mode 100755 index 0000000000..c43b29fb00 --- /dev/null +++ b/scripts/sync-ga-to-rc/data/markdown/warning.txt @@ -0,0 +1,15 @@ + ⌜‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾⌝ + !!! ATTENTION !!! + --------------------------------------------------------------------------- + + Even if the script does not report any potential conflicts, make sure you + review the local file changes. + + The script does its best, but you should not fully trust the results. The + script only compares the last commit date and SHAs, not the full git + history. + + Note: IA changes may result in false positives where the script copies + over files from the GA docset that were correctly deleted from the RC + repo. + ⌞_________________________________________________________________________⌟ \ No newline at end of file diff --git a/scripts/sync-ga-to-rc/data/run-records/last-run-vault.txt b/scripts/sync-ga-to-rc/data/run-records/last-run-vault.txt new file mode 100755 index 0000000000..a2f51592e2 --- /dev/null +++ b/scripts/sync-ga-to-rc/data/run-records/last-run-vault.txt @@ -0,0 +1 @@ +2025-10-08 10:34:00 \ No newline at end of file diff --git a/scripts/sync-ga-to-rc/functions/init.mjs b/scripts/sync-ga-to-rc/functions/init.mjs new file mode 100644 index 0000000000..e55f300470 --- /dev/null +++ b/scripts/sync-ga-to-rc/functions/init.mjs @@ -0,0 +1,98 @@ +/** + * Copyright (c) HashiCorp, Inc. + * SPDX-License-Identifier: BUSL-1.1 + */ + +import { readFileSync } from 'node:fs' + +/** + * @method getExclusions + * + * The getExclusions function loads a JSON file of comparison exclusions and + * returns an array of file keys that the script should exclude from the GA to + * RC sync. For example, the "Important changes" doc in Vault will always be + * different between versions and does not need to be synced. + * + * @param {String} excludeFile Local, absolute path to the exclusion + * @param {String} product Product slug + */ +export function getExclusions(excludeFile, product) { + var excludeList = [] + var jsonData = '' + + try { + jsonData = JSON.parse(readFileSync(excludeFile, 'utf8')) + } catch (err) { + console.log('!!! Error :: Opening exclude file failed') + console.log(' - Details: ' + err) + } + + // If the file is empty, return + if (jsonData == null) { + return excludeList + } + + // Filter out the exclusions specific to the product slug + Object.values(jsonData).forEach((jsonObj) => { + if (Object.keys(jsonObj).includes(product)) { + excludeList = jsonObj[product] + } + }) + return excludeList +} + +/** + * @method getArgs + * + * The getArgs function parses the arguments from argv and returns an array of + * expected flag values based on the information passed by the user + * + */ +export function getArgs() { + var argName, argValue, arg + + // Create the list of expected flags and assign defaults where appropriate + var flags = { + '-slug': null, + '-ga': null, + '-rc': null, + '-tag': '', + '-branch': 'main', + '-date': null, + '-pr': false, + '-update': false, + '-merged': false, + '-help': false, + } + + // Grab the command line arguments provided + Object.entries(process.argv).forEach(([key, str]) => { + // Replace '--' with '-' in the flag name in case folks used '--flag' + // notation instead of '-flag' notation + arg = str.replace('--', '-') + + // Split the argument on '=' if folks used '-flag=value' notation instead of + // the expected '-flag value' notation + if (arg.includes('=')) { + argName = arg.split('=')[0] + argValue = arg.split('=')[1] + } else if (['-pr', '-help', '-update', '-merged'].includes(arg)) { + // If we see the help flag, immediately print the help text and exit + if (arg == '-help') { + flags[arg] = true + return flags + } else { + flags[arg] = true + } + } else { + argName = arg + argValue = process.argv[parseInt(key) + 1] + } + // Save the argument value + if (Object.keys(flags).includes(argName)) { + flags[argName] = argValue + } + }) + + return flags +} diff --git a/scripts/sync-ga-to-rc/functions/run-bash-cmd.mjs b/scripts/sync-ga-to-rc/functions/run-bash-cmd.mjs new file mode 100644 index 0000000000..21a03a49f3 --- /dev/null +++ b/scripts/sync-ga-to-rc/functions/run-bash-cmd.mjs @@ -0,0 +1,62 @@ +/** + * Copyright (c) HashiCorp, Inc. + * SPDX-License-Identifier: BUSL-1.1 + */ + +import { spawn } from 'node:child_process' + +/** + * @method runBashCmdAsync + * + * The runBashCmdAsync function takes a bash command, spawns a new bash process + * to run the command, and pushes the results from stdout to an array. + * + * @param {Object} cmdString bash command or path to a bash script + */ + +export async function runBashCmdAsync(cmdString) { + var bashOutput = [] + const bash = spawn(`bash`) + bash.stdin.setDefaultEncoding('utf-8') + + // Wait for the process to spawn + await new Promise((resolve) => { + bash.once(`spawn`, resolve) + }) + + // Push info from stdout to an array + bash.stdout.on(`data`, (data) => { + bashOutput.push(data.toString()) + }) + + // Print any information from stderr to the console + bash.stderr.on(`data`, (data) => { + console.log(data.toString()) + }) + + // Wait for the command string to execute + await new Promise((resolve) => { + bash.stdin.write(`${cmdString}\n`, () => { + resolve() + }) + }) + + bash.stdin.end() + + // Wait for the stdout and stderr streams to end, and for the bash process to + // close + await Promise.all([ + new Promise((resolve) => { + bash.stdout.on('end', resolve) + }), + new Promise((resolve) => { + bash.stderr.on('end', resolve) + }), + new Promise((resolve) => { + bash.once(`close`, resolve) + }), + ]) + + // Return the stdout results as an array + return bashOutput +} diff --git a/scripts/sync-ga-to-rc/functions/tools.mjs b/scripts/sync-ga-to-rc/functions/tools.mjs new file mode 100644 index 0000000000..7732d0e98c --- /dev/null +++ b/scripts/sync-ga-to-rc/functions/tools.mjs @@ -0,0 +1,141 @@ +/** + * Copyright (c) HashiCorp, Inc. + * SPDX-License-Identifier: BUSL-1.1 + */ + +import { createReadStream, readFileSync } from 'node:fs' +import { createHash } from 'node:crypto' + +/** + * @method processJson + * + * The processJson function takes an array of JSON objects and converts it to an + * assocative array of arrays so we can eventually compare keys across arrays. + * + * Expected schema: + * [ + * '{"file": "filename_1", "shortname": "shortname_1", "commit": "last_commit_date_1"}', + * '{"file": "filename_2", "shortname": "shortname_2", "commit": "last_commit_date_2"}', + * ... + * '{"file": "filename_N", "shortname": "shortname_N", "commit": "last_commit_date_N"}', + * ] + * + * @param {Object} rawResults Array of JSON objects + */ +export function processJson(rawResults) { + var results = [] + var jsonData + + rawResults.forEach((outputLine) => { + jsonData = JSON.parse(outputLine) + results[jsonData.shortname] = [jsonData.commit, jsonData.file] + }) + + return results +} + +/** + * @method flattenArray + * + * The flattenArray function takes an array of JSON objects, flattens it, and + * removes any blank lines to ensure that each array entry corresponds to exactly + * one JSON object. The helper scripts may write data faster than runBashCmdAsync + * pushes to the result array, so we flatten any results before trying to process + * anything. + * + * @param {Array} rawResults Array of JSON objects, may have multiple objects + * per entry + */ +export function flattenArray(rawResults) { + var results = [] + + if (rawResults == null) { + return results + } + + if (!Array.isArray(rawResults)) { + return results.push(rawResults) + } + + rawResults.forEach((outputLine) => { + if (Array.isArray(outputLine)) { + results.push(...flattenArray(outputLine)) + } else if (outputLine.includes('\n')) { + results.push(...flattenArray(outputLine.split('\n'))) + } else if (outputLine.length > 0) { + results.push(outputLine) + } + }) + + return results +} + +/** + * @method sameSHA + * + * The sameSHA function takes two file paths and an algorithm key and compares + * the SHA of the two files to determine if they are the same. + * + * @param {String} file1 Local, absolute path to a file + * @param {String} file2 Local, absolute path to a file + * @param {String} [algoKey='sha256'] Crytographic algorithm for SHA creation; + */ +export async function sameSHA(file1, file2, algoKey) { + var hash1, hash2, hexHash1, hexHash2 + var stream1, stream2 + var algo + + if (algoKey == null) { + algo = 'sha256' + } else { + algo = algoKey + } + + await new Promise((resolve, reject) => { + hash1 = createHash(algo) + stream1 = createReadStream(file1) + stream1.on('error', reject) + stream1.on('data', (chunk) => { + return hash1.update(chunk) + }) + stream1.on('end', () => { + return resolve((hexHash1 = hash1.digest('hex'))) + }) + }) + + await new Promise((resolve, reject) => { + hash2 = createHash(algo) + stream2 = createReadStream(file2) + stream2.on('error', reject) + stream2.on('data', (chunk) => { + return hash2.update(chunk) + }) + stream2.on('end', () => { + return resolve((hexHash2 = hash2.digest('hex'))) + }) + }) + + return hexHash1 == hexHash2 +} + +/** + * @method printHelp + * + * The printHelp function takes an optional file path and prints the content to + * the screen. Calling helpFile without a file path prints the basica usage + * string. + * + * @param {String} helpFile Local, absolute path to a file + */ +export function printHelp(helpFile) { + const basicHelp = + 'Usage: node sync-ga-to-rc.mjs -slug -ga -rc ' + + try { + var helpText = readFileSync(helpFile, 'utf8') + console.log(helpText) + } catch (err) { + console.log('\n' + basicHelp + '\n') + throw err + } +} diff --git a/scripts/sync-ga-to-rc/functions/update-logs.mjs b/scripts/sync-ga-to-rc/functions/update-logs.mjs new file mode 100644 index 0000000000..ef027d8112 --- /dev/null +++ b/scripts/sync-ga-to-rc/functions/update-logs.mjs @@ -0,0 +1,93 @@ +/** + * Copyright (c) HashiCorp, Inc. + * SPDX-License-Identifier: BUSL-1.1 + */ + +import { appendFileSync } from 'node:fs' + +/** + * @method writeToFile + * + * The writeToFile function takes a string or assocative array of JSON objects + * and writes the data to a file with one entry per line. + * + * Expected schema for JSON array: + * [ + * "key_1": ["commit_date", "absolute_path_to_file"], + * "key_2": ["commit_date", "absolute_path_to_file"], + * ... + * "key_N": ["commit_date", "absolute_path_to_file"], + * ] + * + * @param {String} filePath Absolute file path to target file + * @param {Object} data Associative array/map of arrays + */ +export function writeToFile(filePath, data) { + if (filePath == null) { + return + } + if (data == null) { + return + } + + const dateIndex = 0 + const fileIndex = 1 + + var listEntry = '' + + if (Array.isArray(data)) { + Object.values(data).forEach((entry) => { + listEntry = '[' + entry[dateIndex] + '] ' + entry[fileIndex] + appendFileSync(filePath, listEntry + '\n') + }) + } else { + appendFileSync(filePath, data + '\n') + } +} + +/** + * @method writeConflictList + * + * The writeConflictList function takes an assocative array of strings with path + * and commit info of potential conflicts for manual review and writes the data + * to a file. + * + * Expected schema: + * [ + * "key_1": "multi_line string_1", + * "key_2": "multi_line string_2", + * ... + * "key_N": "multi_line string_N", + * ] + * + * Expected file entry string format: + * + * {short_file_path}: + * GA: [{ga_commit_date}] {full_ga_path} + * RC: [{rc_commit_date}] {full_rc_path} + * + * For example: + * + * /content/partials/tips/change-tracker.mdx: + * GA: [2025-08-15 16:38:58] /local/repo/path/web-unified-docs/content/vault/v1.20.x/content/docs/sync/gcpsm.mdx + * RC: [2025-08-23 10:52:06] /local/repo/path/web-unified-docs/content/vault/v1.21.x (rc)/content/docs/sync/gcpsm.mdx + * + * @param {String} filePath Absolute file path to target file + * @param {Object} data Associative array/map of string + */ +export function writeConflictList(filePath, data) { + if (filePath == null) { + return + } + if (data == null) { + return + } + + if (Array.isArray(data)) { + Object.values(data).forEach((entry) => { + appendFileSync(filePath, entry + '\n') + }) + } else { + appendFileSync(filePath, data + '\n') + } +} diff --git a/scripts/sync-ga-to-rc/restore-commands b/scripts/sync-ga-to-rc/restore-commands new file mode 100644 index 0000000000..e2a7828130 --- /dev/null +++ b/scripts/sync-ga-to-rc/restore-commands @@ -0,0 +1,3 @@ +git restore '../../content/vault/v1.21.x (rc)/**/*' +git restore '../../content/vault/v1.21.x (rc)/redirects.jsonc' +git clean -fd '../../content/vault/v1.21.x (rc)/**/*' \ No newline at end of file diff --git a/scripts/sync-ga-to-rc/sync-ga-to-rc.mjs b/scripts/sync-ga-to-rc/sync-ga-to-rc.mjs new file mode 100644 index 0000000000..394d23c20d --- /dev/null +++ b/scripts/sync-ga-to-rc/sync-ga-to-rc.mjs @@ -0,0 +1,388 @@ +/** + * Copyright (c) HashiCorp, Inc. + * SPDX-License-Identifier: BUSL-1.1 + * + * Sync GA change to RC docset + * + * The GA -> RC sync script helps with maintenance of long-lived release branches + * by comparing updates since a provided cutoff in the current (GA) docset + * against unpdates in an unreleased (RC) docset. The default cutoff date is the + * last run date for the associated product when available and the creation date + * of the RC release branch otherwise. The script standardizes timestamps to + * ISO for simplicity but takes the optional override date as a local time. + * + * You can also use the script to sync existing docsets, but that is a seconary + * use case. + * + * @param {String} product Slug used for the root product content folder + * @param {String} gaVersion Folder of the current docset, typically the GA version number + * @param {String} rcVersion Folder of the unreleased docset, typically the non-GA version number + * @param {String} docTag String used to tag non-GA docsets, typically "rc" or "beta" + * @param {String} gaBranch Name of the GA branch, typically "main" + * @param {String} overrideDate Optional local cutoff date and time in "YYYY-MM-DD HH:MM:SS" format + * + */ + +import { readFileSync, writeFileSync } from 'node:fs' +import { fileURLToPath } from 'node:url' +import { dirname } from 'node:path' +import { runBashCmdAsync } from './functions/run-bash-cmd.mjs' +import { writeToFile, writeConflictList } from './functions/update-logs.mjs' +import { + flattenArray, + processJson, + sameSHA, + printHelp, +} from './functions/tools.mjs' +import { getArgs, getExclusions } from './functions/init.mjs' + +// Figure out where the script lives so we can use absolute paths for things +const __filename = fileURLToPath(import.meta.url) +const __dirname = dirname(__filename) + +// Define the relevant directories and data files +const outputDir = __dirname + '/' + 'output' +const helpersDir = __dirname + '/' + 'bash-helpers' +const dataDir = __dirname + '/' + 'data' +const excludeFile = `${dataDir}/exclude.json` +const logDir = `${outputDir}` +const recordDir = `${dataDir}/run-records` +const warningFile = `${dataDir}/markdown/warning.txt` +const helpFile = `${dataDir}/markdown/help.txt` + +// Get the configuration flags +const flags = getArgs() + +// If -help is true, print the help file and exit +if (flags['-help']) { + printHelp(helpFile) + process.exit() +} + +// Set the remaining flags +const product = flags['-slug'] +const gaVersion = flags['-ga'] +const rcVersion = flags['-rc'] +const docTag = flags['-tag'] +const gaBranch = flags['-branch'] +const overrideDate = flags['-date'] +const rcMerged = flags['-merged'] +const makePR = flags['-pr'] && !flags['-merged'] +const updateFiles = flags['-update'] || flags['-pr'] + +// Make the script chatty so folks can track progress since the git-related +// steps may take a while +console.log('--- Syncing GA changes to RC folder under RC branch: start') + +// Grab the run date so we can update the product record later +const currentDate = new Date() +const runDate = currentDate.toISOString().substring(0, 19).replace('T', ' ') + +// Convert the override date to ISO +const isoOverrideDate = + overrideDate == null + ? overrideDate + : new Date(overrideDate).toISOString().substring(0, 19).replace('T', ' ') + +// Build constants for the RC branch and the versioned folder names + +// If the RC docset is merged, use the GA branch as the RC branch +const rcBranch = rcMerged ? gaBranch : product + '/' + rcVersion + +// Ignore the doc tag if the flag value is empty +const rcTag = docTag == '' ? docTag : ' (' + docTag + ')' +const gaFolder = 'v' + gaVersion +const rcFolder = 'v' + rcVersion + rcTag + +// Define the output files and bash script helpers +const gaDeltaFile = `${logDir}/ga-delta.txt` +const gaOnlyFile = `${logDir}/ga-only.txt` +const rcDeltaFile = `${logDir}/rc-delta.txt` +const safeListFile = `${logDir}/safe-list.txt` +const manualReviewFile = `${logDir}/manual-review.txt` +const productRecord = `${recordDir}/last-run-${product}.txt` +const logPrep = `${helpersDir}/log-prep.sh '${logDir}' '${recordDir}'` +const gitPrep = `${helpersDir}/git-prep.sh '${product}' '${gaBranch}' '${rcBranch}'` +const getCutoff = `${helpersDir}/get-cutoff.sh '${rcBranch}'` +const getGADelta = `${helpersDir}/get-file-delta.sh '${product}' '${gaFolder}' ''` +const getRCDelta = `${helpersDir}/get-file-delta.sh '${product}' '${rcFolder}' ''` +const getGAOnly = `${helpersDir}/only-in-ga.sh '${product}' '${gaFolder}' '${rcFolder}'` +const updateRCDocs = `${helpersDir}/update-rc-docs.sh '${product}' '${gaFolder}' '${rcFolder}' '${safeListFile}'` +const createPR = `${helpersDir}/create-pr.sh '${product}' '${rcFolder}' '${rcBranch}' ''` + +// Initialize some variables +var bashOutput = '' // Reusable variable used to catch the output from bash helpers +var lastRunDate = '' // Last run date in the product record file +var rcCutoffDate = '' // Cutoff date for comparing file updates, either the RC branch creation or the provided override date +var gaOnly = [] // Set of docs that only exist in the GA folder +var gaDelta = [] // GA docs with a last commit date after the cutoff +var rcDelta = [] // RC docs with a last commit date after the cutoff +var noDelta, noNewFiles +const dateIndex = 0 +const fileIndex = 1 + +/*** INITIALIZATION ***********************************************************/ + +// Prep: Initialize the output files +console.log('\n Prepping output directories/files') +await runBashCmdAsync(logPrep) + +// Prep: Sync the GA and RC branches and create the PR branch +const prBranch = await runBashCmdAsync(gitPrep) + +// Let folks know what information the script is working with +console.log( + '\n Syncing git data for GA and RC branches and creating PR branch', +) +if (updateFiles && makePR) { + console.log(' - Updating local files and creating PR') +} else if (updateFiles) { + console.log(' - Updating local files') +} else { + console.log(' - Data gathering only') +} + +console.log(' - Product: ' + product) +console.log(' - GA branch: ' + gaBranch) +console.log(' - GA version: ' + gaVersion) +console.log(' - RC branch: ' + rcBranch) +console.log(' - RC version: ' + rcVersion) +console.log(' - Work branch: ' + prBranch) + +// Prep: Get the exclusion list +console.log('\n Loading exclusions for ' + product) +const excludeList = getExclusions(excludeFile, product) +if (excludeList.length > 0) { + console.log(' Excluding:') + excludeList.forEach((key) => { + console.log(' - ' + key) + }) +} else { + console.log(' No exclusions found') +} + +/*** GET CUTOFF ***************************************************************/ +console.log('\n Determining cutoff date') + +/* + Check for a last run date, calculate the branch creation date and set the + cutoff. We always calculate the creation date so folks can compare the date + used during comparison with the age of the branch as a check that the date + makes sense +*/ + +// Grab the last run date for the product, if it exists +try { + console.log(' Reading ' + productRecord) + lastRunDate = readFileSync(productRecord, 'utf8') +} catch (err) { + console.log(' Error reading last run date: ' + err) + lastRunDate = null +} + +// Pull the creation date of the release branch; if the last run date is older +// than the branch date, use the branch date as the default. Otherwise, use +// the last script run date from the product record +console.log(' Fetching creation date for ' + rcBranch) +bashOutput = flattenArray(await runBashCmdAsync(getCutoff)) +if (bashOutput.length == 0) { + console.log('!!! ERROR: Could not fetch the branch creation date') + process.exit() +} + +if (isoOverrideDate != null) { + console.log(' - Override date (local) = ' + overrideDate) + console.log(' - Override date (ISO) = ' + isoOverrideDate) +} +console.log(' - Branch creation date = ' + bashOutput[0]) +console.log(' - Last run date = ' + lastRunDate) + +// If the last run date is unset or before branch creation, prefer the branch +// creation date +if (lastRunDate == null || lastRunDate < bashOutput[0]) { + lastRunDate = bashOutput[0] +} + +// Let folks know which date we selected as the cutoff +rcCutoffDate = isoOverrideDate == null ? lastRunDate : isoOverrideDate +console.log(' - Using cutoff date = ' + rcCutoffDate) + +/*** GET FILE SETS: GAΔ *******************************************************/ +process.stdout.write('\n Building GAΔ ') + +// Call helpers/get-file-delta.sh with the cutoff date and GA info to build GAΔ +const gaDeltaRaw = flattenArray( + await runBashCmdAsync(getGADelta.replace('', rcCutoffDate)), +) +gaDelta = processJson(gaDeltaRaw) +noDelta = Object.keys(gaDelta).length == 0 +console.log( + '[' + + Object.keys(gaDelta).length.toString().padStart(2, 0) + + '] ' + + gaDeltaFile, +) +writeToFile( + gaDeltaFile, + noDelta ? 'No GA changes since ' + rcCutoffDate : gaDelta, +) + +/*** GET FILE SETS: RCΔ *******************************************************/ +process.stdout.write(' Building RCΔ ') + +// Call helpers/get-file-delta.sh with the cutoff date and RC info to build RCΔ +const rcDeltaRaw = flattenArray( + await runBashCmdAsync(getRCDelta.replace('', rcCutoffDate)), +) +rcDelta = processJson(rcDeltaRaw) +noDelta = Object.keys(rcDelta).length == 0 +console.log( + '[' + + Object.keys(rcDelta).length.toString().padStart(2, 0) + + '] ' + + rcDeltaFile, +) +writeToFile( + rcDeltaFile, + noDelta ? 'No RC changes since ' + rcCutoffDate : rcDelta, +) + +/*** GET FILE SETS: GA-only ***************************************************/ +process.stdout.write(' Building GA-only ') + +// Call helpers/only-in-ga.sh to build GA-only +const gaOnlyRaw = flattenArray(await runBashCmdAsync(getGAOnly)) +gaOnly = processJson(gaOnlyRaw) +noNewFiles = Object.keys(gaOnly).length == 0 +console.log( + '[' + + Object.keys(gaOnly).length.toString().padStart(2, 0) + + '] ' + + gaOnlyFile, +) +writeToFile( + gaOnlyFile, + noNewFiles ? 'No new GA files since ' + rcCutoffDate : gaOnly, +) + +/*** GET FILE SETS: Safe and conflict lists ***********************************/ +console.log(' Comparing GAΔ and RCΔ') + +/* + * Create the set of files we can safely slam and the list of files we need to + * manually review: + * push to RC = file ∈ { !RCΔ ∧ GAΔ } or { GA-only } + * manual review = file ∈ { RCΔ ∧ GAΔ } + */ + +var pushtoRC = [] +var manualReview = [] +var noUpdates, noConflicts, canSkip + +for (const key in gaOnly) { + if (excludeList.includes(key)) { + // If the key is on the exclusion list, ignore it + console.log(' - Excluding ' + key) + } else { + // Add the file to the safe list + pushtoRC[key] = gaDelta[key] + } +} + +for (const key in gaDelta) { + if (excludeList.includes(key)) { + // If the key is on the exclusion list, ignore it + console.log(' - Excluding ' + key) + } else if (!Object.keys(rcDelta).includes(key)) { + // If the key only exists in GAΔ, add it to the safe list + pushtoRC[key] = gaDelta[key] + } else { + // If the key exists in GAΔ and RCΔ, check the SHA. + canSkip = await sameSHA(gaDelta[key][fileIndex], rcDelta[key][fileIndex]) + + // If the SHAs are the same, it means we already synced the changes or + // someone synced them manually + if (canSkip) { + continue + } + + // If the SHAs are different, add the file details for GA and RC to the + // conflict list with the lates commit date of each file for manual review + manualReview[key] = + key + + ':\n GA: ' + + '[' + + gaDelta[key][dateIndex] + + '] ' + + gaDelta[key][fileIndex] + + '\n RC: ' + + '[' + + rcDelta[key][dateIndex] + + '] ' + + rcDelta[key][fileIndex] + } +} + +noUpdates = Object.keys(pushtoRC).length == 0 +noConflicts = Object.keys(manualReview).length == 0 + +console.log( + ' - Files to update in RC: [' + + Object.keys(pushtoRC).length.toString().padStart(2, 0) + + '] ' + + safeListFile, +) +writeToFile(safeListFile, noUpdates ? 'No safe files found.' : pushtoRC) + +console.log( + ' - Files for manual review: [' + + Object.keys(manualReview).length.toString().padStart(2, 0) + + '] ' + + manualReviewFile, +) +writeConflictList( + manualReviewFile, + noConflicts ? 'No conflicts found.' : manualReview, +) + +// Only update things locally if the user provided the update or pr flag +if (updateFiles) { + console.log(' Updating RC files') + bashOutput = await runBashCmdAsync(updateRCDocs) + + if (Object.keys(manualReview).length > 0) { + console.log( + ' To make additional changes, review potential conflicts in: ' + + manualReviewFile, + ) + } + + // The automatic PR generation is currently buggy so the helper just exits + // immediately for now. + if (makePR) { + console.log(' Creating draft PR --- !!! CURRENTLY DISABLED !!!') + bashOutput = await runBashCmdAsync( + createPR.replace('', prBranch), + ) + bashOutput.forEach((line) => { + console.log(line) + }) + + try { + var warningText = readFileSync(warningFile, 'utf8') + console.log('\n' + warningText) + } catch (err) { + console.error('!!! ERROR!' + err) + console.error(' ' + err) + } + } + + // Update the product record with the new run date and print the reminder to + // review changes for false positives + writeFileSync(productRecord, runDate) +} + +console.log('') +console.log('--- Syncing GA changes to RC folder under RC branch: end') + +process.exit()