diff --git a/scripts/run.sh b/scripts/run.sh
index 5b84d85..bf269d4 100755
--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -15,6 +15,7 @@
 #   POD_IP_TYPE         = Address type of POD_IP (one of IPV4, IPv6)
 
 env | sort | grep "POD\|HOST\|NAME"
+echo "running">/scripts/setup.txt
 RECOVERY_DONE_FILE="/tmp/recovery.done"
 if [[ "$PITR_RESTORE" == "true" ]]; then
     while true; do
@@ -181,23 +182,31 @@ EOL
 # wait for mysql daemon be running (alive)
 function wait_for_mysqld_running() {
     local mysql="$mysql_header --host=$localhost"
+    local max_restarts=60
+    local restarts=0
 
-    for i in {900..0}; do
+    while true; do
+        if ! kill -0 "$pid" 2>/dev/null; then
+            if (( restarts >= max_restarts )); then
+                log "ERROR" "mysqld (pid=$pid) died and exceeded $max_restarts restart attempts. Aborting."
+                exit 1
+            fi
+            restarts=$((restarts + 1))
+            log "ERROR" "mysqld (pid=$pid) is no longer running. Restart attempt $restarts/$max_restarts..."
+            start_mysqld_in_background
+            sleep 10
+            continue
+        fi
         out=$(${mysql} -N -e "select 1;" 2>/dev/null)
         log "INFO" "Attempt $i: Pinging '$report_host' has returned: '$out'...................................."
         if [[ "$out" == "1" ]]; then
             break
         fi
-
+        log "INFO" "Pinging '$report_host' has returned: '$out' (pid=$pid alive, restarts=$restarts)"
         echo -n .
         sleep 1
     done
 
-    if [[ "$i" == "0" ]]; then
-        echo ""
-        log "ERROR" "Server ${report_host} failed to start in 900 seconds............."
-        exit 1
-    fi
     log "INFO" "mysql daemon is ready to use......."
 
     # Set read-only immediately after MySQL starts to prevent any external
@@ -597,6 +606,8 @@ install_group_replication_plugin
 install_clone_plugin
 
 while true; do
+    echo "running">/scripts/setup.txt
+    log "INFO" "creating setup.txt file"
     kill -0 $pid
     exit="$?"
     if [[ "$exit" == "0" ]]; then
@@ -632,6 +643,8 @@ while true; do
         join_by_clone
     fi
     joining_for_first_time=0
+    log "INFO" "removing setup.txt file"
+    rm -rf /scripts/setup.txt
     log "INFO" "waiting for mysql process id  = $pid"
     wait $pid
 done
diff --git a/scripts/run_innodb.sh b/scripts/run_innodb.sh
index f422731..ee4bf2f 100755
--- a/scripts/run_innodb.sh
+++ b/scripts/run_innodb.sh
@@ -1,5 +1,25 @@
 #!/usr/bin/env bash
 #set -x
+# run_innodb.sh — MySQL InnoDB Cluster init script
+# Compatibility: MySQL 8.0.x
+
+env | sort | grep "POD\|HOST\|NAME"
+echo "running">/scripts/setup.txt
+RECOVERY_DONE_FILE="/tmp/recovery.done"
+if [[ "$PITR_RESTORE" == "true" ]]; then
+    while true; do
+      sleep 2
+      echo "Point In Time Recovery In Progress. Waiting for $RECOVERY_DONE_FILE file"
+      if [[ -e "$RECOVERY_DONE_FILE" ]]; then
+        echo "$RECOVERY_DONE_FILE found."
+        break
+      fi
+    done
+fi
+
+if [[ -e "$RECOVERY_DONE_FILE" ]]; then
+  rm $RECOVERY_DONE_FILE
+fi
 
 function timestamp() {
     date +"%Y/%m/%d %T"
@@ -13,7 +33,7 @@ function log() {
 
 #stores all the arguments that are passed from statefulSet
 args=$@
-report_host="$HOSTNAME.$GOV_SVC.$POD_NAMESPACE.svc"
+report_host="$HOSTNAME.$GOV_SVC.$POD_NAMESPACE"
 log "INFO" "report_host = $report_host"
 # wait for the peer-list file created by coordinator
 while [ ! -f "/scripts/peer-list" ]; do
@@ -46,6 +66,7 @@ cat >>/etc/mysql/default.d/my.cnf <<EOL
 default_authentication_plugin=mysql_native_password
 #loose-group_replication_ip_whitelist = "${whitelist}"
 loose-group_replication_ip_allowlist = "${whitelist}"
+log_error_suppression_list = 'MY-013360'
 
 # recommended config
 innodb_buffer_pool_size = "$INNODB_BUFFER_POOL_SIZE"
@@ -68,6 +89,11 @@ function retry {
             return $exit
         fi
         count=$(($count + 1))
+        # Allow coordinator to stop retries
+        retryfile="/scripts/retry-stop"
+        if [ -e "$retryfile" ]; then
+            return 0
+        fi
     done
     return 0
 }
@@ -77,22 +103,72 @@ function wait_for_host_online() {
     log "INFO" "checking for host $2 to come online"
 
     local mysqlshell="mysql -u$1 -h$2 -p$3" # "mysql -uroot -ppass -hmysql-server-0.mysql-server.default.svc"
-    retry 900 ${mysqlshell} -e "select 1;" | awk '{print$1}'
-    out=$(${mysqlshell} -e "select 1;" | head -n1 | awk '{print$1}')
-    if [[ "$out" == "1" ]]; then
-        log "INFO" "host $2 is online"
-    else
-        log "INFO" "server failed to comes online within 900 seconds"
-    fi
+    local max_restarts=60
+    local restarts=0
+
+    while true; do
+        if ! kill -0 "$pid" 2>/dev/null; then
+            if (( restarts >= max_restarts )); then
+                log "ERROR" "mysqld (pid=$pid) died and exceeded $max_restarts restart attempts. Aborting."
+                exit 1
+            fi
+            restarts=$((restarts + 1))
+            log "ERROR" "mysqld (pid=$pid) is no longer running. Restart attempt $restarts/$max_restarts..."
+            start_mysqld_in_background
+            sleep 10
+            continue
+        fi
+        out=$(${mysqlshell} -e "select 1;" | head -n1 | awk '{print$1}')
+        log "INFO" "Attempt $i: Pinging '$report_host' has returned: '$out'...................................."
+        if [[ "$out" == "1" ]]; then
+            break
+        fi
+        log "INFO" "Pinging '$report_host' has returned: '$out' (pid=$pid alive, restarts=$restarts)"
+        echo -n .
+        sleep 1
+    done
 
+    log "INFO" "mysql daemon is ready to use......."
+
+    # Set read-only immediately after MySQL starts to prevent any external
+    # process from writing local GTIDs before the node joins the cluster.
+    local mysql_ro="mysql -u${MYSQL_ROOT_USERNAME} -hlocalhost -p${MYSQL_ROOT_PASSWORD} --port=3306"
+    ${mysql_ro} -N -e "SET GLOBAL read_only=ON; SET GLOBAL super_read_only=ON;" 2>/dev/null
+    log "INFO" "Set super_read_only=ON to prevent errant GTIDs"
+}
+
+# mysql client shorthand — always use root for local operations
+mysql_local="mysql -u${MYSQL_ROOT_USERNAME} -hlocalhost -p${MYSQL_ROOT_PASSWORD} --port=3306"
+replication_user=repl
+
+# Kill any stale mysqlsh AdminAPI session holding the cluster-wide EXCLUSIVE lock
+# on $1 (usually the primary). A session holding AdminAPI_lock while in Sleep
+# state means a previous mysqlsh call died without releasing — rescan/addInstance/
+# rejoinInstance will hang with MYSQLSH 51500. Legitimate in-flight AdminAPI ops
+# are always in Query state, never Sleep. Kill Sleep>5s holders to auto-recover.
+function clear_stale_cluster_lock() {
+    local target_host=$1
+    local mysql_root="mysql -u${MYSQL_ROOT_USERNAME} -h${target_host} -p${MYSQL_ROOT_PASSWORD} --port=3306 -N"
+    local stuck_ids
+    stuck_ids=$(${mysql_root} -e "
+        SELECT t.PROCESSLIST_ID
+        FROM performance_schema.metadata_locks m
+        JOIN performance_schema.threads t ON m.OWNER_THREAD_ID = t.THREAD_ID
+        WHERE m.OBJECT_SCHEMA='AdminAPI_cluster'
+          AND m.OBJECT_NAME='AdminAPI_lock'
+          AND m.LOCK_TYPE='EXCLUSIVE'
+          AND t.PROCESSLIST_COMMAND='Sleep'
+          AND t.PROCESSLIST_TIME > 5;" 2>/dev/null | awk 'NF')
+    if [[ -n "$stuck_ids" ]]; then
+        for stuck_id in $stuck_ids; do
+            log "WARNING" "Killing stale AdminAPI_lock holder on ${target_host} (conn=${stuck_id}, Sleep>5s)"
+            ${mysql_root} -e "KILL ${stuck_id};" 2>/dev/null
+        done
+        sleep 2
+    fi
 }
 
 function create_replication_user() {
-    # MySql server's need a replication user to communicate with each other
-    # 01. official doc (section from 17.2.1.3 to 17.2.1.5): https://dev.mysql.com/doc/refman/5.7/en/group-replication-user-credentials.html
-    # 02. https://dev.mysql.com/doc/refman/8.0/en/group-replication-secure-user.html
-    # 03. repl user permissions: https://www.sqlshack.com/deploy-mysql-innodb-clusters-for-high-availability/
-    # 04. digitalocean doc: https://www.digitalocean.com/community/tutorials/how-to-configure-mysql-group-replication-on-ubuntu-16-04
     log "INFO" "Checking whether replication user exist or not..."
     local mysql="mysql -u ${MYSQL_ROOT_USERNAME} -hlocalhost -p${MYSQL_ROOT_PASSWORD} --port=3306"
 
@@ -102,21 +178,36 @@ function create_replication_user() {
     # if the user doesn't exist, crete new one.
     if [[ "$out" -eq "0" ]]; then
         log "INFO" "Replication user not found. Creating new replication user..."
-        retry 120 ${mysql} -N -e "SET SQL_LOG_BIN=0;"
-        retry 120 ${mysql} -N -e "CREATE USER 'repl'@'%' IDENTIFIED BY '${MYSQL_ROOT_PASSWORD}' REQUIRE SSL;"
-        retry 120 ${mysql} -N -e "GRANT CREATE USER, FILE, PROCESS, RELOAD, REPLICATION CLIENT, REPLICATION SLAVE, SELECT, SHUTDOWN, SUPER ON *.* TO 'repl'@'%' WITH GRANT OPTION;"
-        retry 120 ${mysql} -N -e "GRANT DELETE, INSERT, UPDATE ON mysql.* TO 'repl'@'%' WITH GRANT OPTION;"
-        retry 120 ${mysql} -N -e "GRANT ALTER, ALTER ROUTINE, CREATE, CREATE ROUTINE, CREATE TEMPORARY TABLES, CREATE VIEW, DELETE, DROP, EVENT, EXECUTE, INDEX, INSERT, LOCK TABLES, REFERENCES, SHOW VIEW, TRIGGER, UPDATE ON mysql_innodb_cluster_metadata.* TO 'repl'@'%' WITH GRANT OPTION;"
-        retry 120 ${mysql} -N -e "GRANT ALTER, ALTER ROUTINE, CREATE, CREATE ROUTINE, CREATE TEMPORARY TABLES, CREATE VIEW, DELETE, DROP, EVENT, EXECUTE, INDEX, INSERT, LOCK TABLES, REFERENCES, SHOW VIEW, TRIGGER, UPDATE ON mysql_innodb_cluster_metadata_bkp.* TO 'repl'@'%' WITH GRANT OPTION;"
-        retry 120 ${mysql} -N -e "GRANT ALTER, ALTER ROUTINE, CREATE, CREATE ROUTINE, CREATE TEMPORARY TABLES, CREATE VIEW, DELETE, DROP, EVENT, EXECUTE, INDEX, INSERT, LOCK TABLES, REFERENCES, SHOW VIEW, TRIGGER, UPDATE ON mysql_innodb_cluster_metadata_previous.* TO 'repl'@'%' WITH GRANT OPTION;"
-        retry 120 ${mysql} -N -e "GRANT CLONE_ADMIN, BACKUP_ADMIN, CONNECTION_ADMIN, EXECUTE, GROUP_REPLICATION_ADMIN, PERSIST_RO_VARIABLES_ADMIN, REPLICATION_APPLIER, REPLICATION_SLAVE_ADMIN, ROLE_ADMIN, SYSTEM_VARIABLES_ADMIN ON *.* TO 'repl'@'%' WITH GRANT OPTION;"
-        #mysql-server docker image doesn't has the user root that can connect from any host
-        retry 10 ${mysql} -N -e "CREATE USER 'root'@'%' IDENTIFIED BY '${MYSQL_ROOT_PASSWORD}';"
-        retry 120 ${mysql} -N -e "GRANT ALL ON *.* TO 'root'@'%' WITH GRANT OPTION;"
-        retry 120 ${mysql} -N -e "FLUSH PRIVILEGES;"
-        retry 120 ${mysql} -N -e "SET SQL_LOG_BIN=1;"
+        retry 120 ${mysql} -N -e "
+            SET SQL_LOG_BIN=0;
+            SET GLOBAL super_read_only=OFF;
+            SET GLOBAL read_only=OFF;
+            CREATE USER IF NOT EXISTS 'repl'@'%' IDENTIFIED BY '${MYSQL_ROOT_PASSWORD}' REQUIRE SSL;
+            GRANT CREATE USER, FILE, PROCESS, RELOAD, REPLICATION CLIENT, REPLICATION SLAVE, SELECT, SHUTDOWN, SUPER ON *.* TO 'repl'@'%' WITH GRANT OPTION;
+            GRANT DELETE, INSERT, UPDATE ON mysql.* TO 'repl'@'%' WITH GRANT OPTION;
+            GRANT ALTER, ALTER ROUTINE, CREATE, CREATE ROUTINE, CREATE TEMPORARY TABLES, CREATE VIEW, DELETE, DROP, EVENT, EXECUTE, INDEX, INSERT, LOCK TABLES, REFERENCES, SHOW VIEW, TRIGGER, UPDATE ON mysql_innodb_cluster_metadata.* TO 'repl'@'%' WITH GRANT OPTION;
+            GRANT ALTER, ALTER ROUTINE, CREATE, CREATE ROUTINE, CREATE TEMPORARY TABLES, CREATE VIEW, DELETE, DROP, EVENT, EXECUTE, INDEX, INSERT, LOCK TABLES, REFERENCES, SHOW VIEW, TRIGGER, UPDATE ON mysql_innodb_cluster_metadata_bkp.* TO 'repl'@'%' WITH GRANT OPTION;
+            GRANT ALTER, ALTER ROUTINE, CREATE, CREATE ROUTINE, CREATE TEMPORARY TABLES, CREATE VIEW, DELETE, DROP, EVENT, EXECUTE, INDEX, INSERT, LOCK TABLES, REFERENCES, SHOW VIEW, TRIGGER, UPDATE ON mysql_innodb_cluster_metadata_previous.* TO 'repl'@'%' WITH GRANT OPTION;
+            GRANT CLONE_ADMIN, BACKUP_ADMIN, CONNECTION_ADMIN, EXECUTE, GROUP_REPLICATION_ADMIN, PERSIST_RO_VARIABLES_ADMIN, REPLICATION_APPLIER, REPLICATION_SLAVE_ADMIN, ROLE_ADMIN, SYSTEM_VARIABLES_ADMIN ON *.* TO 'repl'@'%' WITH GRANT OPTION;
+            CREATE USER IF NOT EXISTS 'root'@'%' IDENTIFIED BY '${MYSQL_ROOT_PASSWORD}';
+            GRANT ALL ON *.* TO 'root'@'%' WITH GRANT OPTION;
+            FLUSH PRIVILEGES;
+            SET SQL_LOG_BIN=1;
+        "
+    else
+        log "INFO" "Replication user exists. Updating password if changed..."
+        retry 120 ${mysql} -N -e "
+            SET SQL_LOG_BIN=0;
+            SET GLOBAL super_read_only=OFF;
+            SET GLOBAL read_only=OFF;
+            ALTER USER 'repl'@'%' IDENTIFIED BY '${MYSQL_ROOT_PASSWORD}';
+            ALTER USER IF EXISTS 'root'@'%' IDENTIFIED BY '${MYSQL_ROOT_PASSWORD}';
+            FLUSH PRIVILEGES;
+            SET SQL_LOG_BIN=1;
+        "
     fi
-    #    retry 120 ${mysql} -N -e "CHANGE MASTER TO MASTER_USER='repl', MASTER_PASSWORD='$MYSQL_ROOT_PASSWORD' FOR CHANNEL 'group_replication_recovery';"
+    # Re-enable read_only after user creation
+    ${mysql} -N -e "SET GLOBAL read_only=ON; SET GLOBAL super_read_only=ON;" 2>/dev/null
     touch /scripts/ready.txt
 }
 
@@ -125,7 +216,7 @@ already_configured=0
 
 function configure_instance() {
     log "INFO" "configuring instance $report_host."
-    local mysqlshell="mysqlsh -u${replication_user} -p${MYSQL_ROOT_PASSWORD}"
+    local mysqlshell="mysqlsh -u${MYSQL_ROOT_USERNAME} -p${MYSQL_ROOT_PASSWORD}"
 
     retry 120 ${mysqlshell} --sql -e "select @@gtid_mode;"
     gtid=($($mysqlshell --sql -e "select @@gtid_mode;"))
@@ -135,22 +226,17 @@ function configure_instance() {
         return
     fi
 
-    retry 30 ${mysqlshell} -e "dba.configureInstance('${replication_user}@${report_host}',{password:'${MYSQL_ROOT_PASSWORD}',interactive:false,restart:false});"
-    #instance need to restart after configuration
-    # Prevent creation of new process until this one is finished
-    #https://serverfault.com/questions/477448/mysql-keeps-crashing-innodb-unable-to-lock-ibdata1-error-11
-    #The most common cause of this problem is trying to start MySQL when it is already running.
+    yes | ${mysqlshell} -e "dba.configureInstance('${MYSQL_ROOT_USERNAME}:${MYSQL_ROOT_PASSWORD}@${report_host}:3306',{mycnfPath:'/etc/mysql/my.cnf',restart:false});"
 
-    #for non-root users, set the restart flag to false, stop the mysqld process, set restart_required=1 to start the process
     mysqladmin -u ${MYSQL_ROOT_USERNAME} -hlocalhost -p${MYSQL_ROOT_PASSWORD} --port=3306 shutdown
     wait $pid
     restart_required=1
 }
 
 function create_cluster() {
-    local mysqlshell="mysqlsh -u${replication_user} -p${MYSQL_ROOT_PASSWORD} -h${report_host}"
+    local mysqlshell="mysqlsh -u${MYSQL_ROOT_USERNAME} -p${MYSQL_ROOT_PASSWORD} -h${report_host}"
     clusterName=$(echo -n $BASE_NAME | sed 's/-/_/g')
-    retry 5 $mysqlshell -e "cluster=dba.createCluster('$clusterName',{consistency:'BEFORE_ON_PRIMARY_FAILOVER',manualStartOnBoot:'true'});"
+    retry 5 $mysqlshell -e "cluster=dba.createCluster('$clusterName',{communicationStack:'MYSQL',manualStartOnBoot:true});"
 }
 
 export primary=""
@@ -175,7 +261,8 @@ already_in_cluster=0
 
 function is_already_in_cluster() {
     local mysqlshell="mysqlsh -u${replication_user} -p${MYSQL_ROOT_PASSWORD} -h${primary}"
-    ${mysqlshell} -e "cluster = dba.getCluster();  cluster.rescan({addInstances:['${report_host}:3306'],interactive:false})"
+    clear_stale_cluster_lock "${primary}"
+    ${mysqlshell} -e "cluster = dba.getCluster();  cluster.rescan()"
     out=($(${mysqlshell} --sql -e "SELECT member_host FROM performance_schema.replication_group_members where member_state='ONLINE';"))
 
     for host in ${out[@]}; do
@@ -190,13 +277,16 @@ function is_already_in_cluster() {
 function join_in_cluster() {
     log "INFO " "$report_host joining in cluster"
     local mysqlshell="mysqlsh -u${replication_user} -p${MYSQL_ROOT_PASSWORD} -h${primary}"
+    clear_stale_cluster_lock "${primary}"
     retry 10 ${mysqlshell} -e "cluster = dba.getCluster();cluster.addInstance('${replication_user}@${report_host}',{recoveryMethod:'incremental'});"
 }
 
 function join_by_clone() {
     log "INFO " "$report_host joining in cluster"
     local mysqlshell="mysqlsh -u${replication_user} -p${MYSQL_ROOT_PASSWORD} -h${primary}"
+    clear_stale_cluster_lock "${primary}"
     retry 10 ${mysqlshell} -e "cluster = dba.getCluster();cluster.removeInstance('$report_host',{force:'true'});"
+    clear_stale_cluster_lock "${primary}"
     retry 10 ${mysqlshell} -e "cluster = dba.getCluster(); cluster.addInstance('${replication_user}@${report_host}',{recoveryMethod:'clone'});"
 
     #this is required for clone method
@@ -219,11 +309,13 @@ check_instance_joined_in_cluster() {
 
 function make_sure_instance_join_in_cluster() {
     local mysqlshell="mysqlsh -u${replication_user} -p${MYSQL_ROOT_PASSWORD} -h${primary}"
-    retry 10 ${mysqlshell} -e "cluster = dba.getCluster();  cluster.rescan({addInstances:['${report_host}:3306'],interactive:false})"
+    clear_stale_cluster_lock "${primary}"
+    retry 10 ${mysqlshell} -e "cluster = dba.getCluster();  cluster.rescan()"
 }
 
 function rejoin_in_cluster() {
     local mysqlshell="mysqlsh -u${replication_user} -p${MYSQL_ROOT_PASSWORD} -h${primary}"
+    clear_stale_cluster_lock "${primary}"
     ${mysqlshell} -e "cluster=dba.getCluster(); cluster.rejoinInstance('${replication_user}@${report_host}')"
     out=($(${mysqlshell} --sql -e "SELECT member_host FROM performance_schema.replication_group_members;"))
 
@@ -238,6 +330,7 @@ function rejoin_in_cluster() {
     fi
     check_instance_joined_in_cluster
     if [[ "$joined_in_cluster" == "0" ]]; then
+        clear_stale_cluster_lock "${primary}"
         retry 1 ${mysqlshell} -e "cluster = dba.getCluster();cluster.removeInstance('$report_host',{force:'true'});"
         join_in_cluster
     fi
@@ -246,24 +339,36 @@ function rejoin_in_cluster() {
 
 export pid
 function reboot_from_completeOutage() {
-    local mysqlshell="mysqlsh -u${replication_user} -h${report_host} -p${MYSQL_ROOT_PASSWORD}"
+    local mysqlshell="mysqlsh -u${MYSQL_ROOT_USERNAME} -h${report_host} -p${MYSQL_ROOT_PASSWORD}"
     #https://dev.mysql.com/doc/dev/mysqlsh-api-javascript/8.0/classmysqlsh_1_1dba_1_1_dba.html#ac68556e9a8e909423baa47dc3b42aadb
     #mysql wait for user interaction to remove the unavailable seed from the cluster..
     clusterName=$(echo -n $BASE_NAME | sed 's/-/_/g')
+
+    # Stop GR on any peer stuck in ERROR state before reboot.
+    # dba.rebootClusterFromCompleteOutage() refuses to proceed if any peer has GR
+    # in ERROR state ("belongs to a GR group that is not managed as an InnoDB Cluster").
+    for host in "${peers[@]}"; do
+        peer_state=$(mysql -u${MYSQL_ROOT_USERNAME} -h${host} -p${MYSQL_ROOT_PASSWORD} --port=3306 -N -e \
+            "SELECT MEMBER_STATE FROM performance_schema.replication_group_members LIMIT 1;" 2>/dev/null)
+        if [[ "$peer_state" == "ERROR" ]]; then
+            log "INFO" "Stopping GR on $host (stuck in ERROR state) before cluster reboot..."
+            mysql -u${MYSQL_ROOT_USERNAME} -h${host} -p${MYSQL_ROOT_PASSWORD} --port=3306 -N -e "STOP GROUP_REPLICATION;" 2>/dev/null
+        fi
+    done
+
     yes | $mysqlshell -e "dba.rebootClusterFromCompleteOutage('$clusterName',{force:'true'})"
+    clear_stale_cluster_lock "${report_host}"
     yes | $mysqlshell -e "cluster = dba.getCluster();  cluster.rescan()"
     wait $pid
 }
 
 function start_mysqld_in_background() {
     log "INFO" "Starting mysql server with 'docker-entrypoint.sh mysqld $args'..."
-    /entrypoint.sh mysqld --user=root --report-host=$report_host --bind-address=* $args &
+    docker-entrypoint.sh mysqld --user=root --report-host=$report_host --bind-address=* $args &
     pid=$!
     log "INFO" "The process id of mysqld is '$pid'"
 }
 
-replication_user=repl
-
 start_mysqld_in_background
 wait_for_host_online "root" "localhost" "$MYSQL_ROOT_PASSWORD"
 create_replication_user
@@ -271,7 +376,7 @@ configure_instance
 
 if [[ "$restart_required" == "1" ]]; then
     start_mysqld_in_background
-    wait_for_host_online "repl" "$report_host" "$MYSQL_ROOT_PASSWORD"
+    wait_for_host_online "${MYSQL_ROOT_USERNAME}" "$report_host" "$MYSQL_ROOT_PASSWORD"
 fi
 
 mysqld_alive=0
@@ -286,55 +391,73 @@ function check_mysqld_alive() {
 }
 
 while true; do
+    echo "running">/scripts/setup.txt
+    log "INFO" "creating setup.txt file"
     check_mysqld_alive
     if [[ "$mysqld_alive" == "1" ]]; then
         echo "mysqld process is running"
     else
         echo "need start mysqld and wait_for_mysqld_running"
         start_mysqld_in_background
-        wait_for_host_online "repl" "$report_host" "$MYSQL_ROOT_PASSWORD"
+        wait_for_host_online "${MYSQL_ROOT_USERNAME}" "$report_host" "$MYSQL_ROOT_PASSWORD"
     fi
 
-    # wait for the script copied by coordinator
+    # wait for the signal file from coordinator
+    # Also check if this node is already ONLINE in GR — this happens when
+    # another pod's coordinator called rebootClusterFromCompleteOutage() which
+    # rejoins all members remotely via mysqlsh AdminAPI, bypassing this script.
     while [ ! -f "/scripts/signal.txt" ]; do
+        member_state=$(mysql -u${MYSQL_ROOT_USERNAME} -hlocalhost -p${MYSQL_ROOT_PASSWORD} -N -e \
+            "SELECT MEMBER_STATE FROM performance_schema.replication_group_members WHERE MEMBER_HOST='${report_host}' LIMIT 1;" 2>/dev/null)
+        if [[ "$member_state" == "ONLINE" ]]; then
+            log "INFO" "Already ONLINE in GR group (joined by another node's reboot) — skipping signal wait"
+            break
+        fi
         log "WARNING" "signal is not present yet!"
         sleep 1
     done
 
-    desired_func=$(cat /scripts/signal.txt)
-    rm -rf /scripts/signal.txt
-    log "INFO" "going to execute $desired_func"
+    # If we broke out because GR is already ONLINE (no signal file), skip to wait.
+    if [ ! -f "/scripts/signal.txt" ]; then
+        log "INFO" "No signal to execute — node already joined via external reboot"
+    else
+        desired_func=$(cat /scripts/signal.txt)
+        rm -rf /scripts/signal.txt
+        log "INFO" "going to execute $desired_func"
 
-    if [[ $desired_func == "create_cluster" ]]; then
-        create_cluster
-    fi
+        if [[ $desired_func == "create_cluster" ]]; then
+            create_cluster
+        fi
 
-    if [[ $desired_func == "join_in_cluster" ]]; then
-        select_primary
-        join_in_cluster
-        check_instance_joined_in_cluster
-        if [[ "$joined_in_cluster" == "0" ]]; then
-            make_sure_instance_join_in_cluster
+        if [[ $desired_func == "join_in_cluster" ]]; then
+            select_primary
+            join_in_cluster
+            check_instance_joined_in_cluster
+            if [[ "$joined_in_cluster" == "0" ]]; then
+                make_sure_instance_join_in_cluster
+            fi
         fi
-    fi
 
-    if [[ $desired_func == "rejoin_in_cluster" ]]; then
-        select_primary
-        rejoin_in_cluster
-    fi
-    if [[ $desired_func == "join_by_clone" ]]; then
-        select_primary
-        join_by_clone
-        start_mysqld_in_background
-        wait_for_host_online "repl" "$report_host" "$MYSQL_ROOT_PASSWORD"
-        join_in_cluster
-    fi
+        if [[ $desired_func == "rejoin_in_cluster" ]]; then
+            select_primary
+            rejoin_in_cluster
+        fi
+        if [[ $desired_func == "join_by_clone" ]]; then
+            select_primary
+            join_by_clone
+            start_mysqld_in_background
+            wait_for_host_online "${MYSQL_ROOT_USERNAME}" "$report_host" "$MYSQL_ROOT_PASSWORD"
+            join_in_cluster
+        fi
 
-    if [[ $desired_func == "reboot_from_complete_outage" ]]; then
-        reboot_from_completeOutage
+        if [[ $desired_func == "reboot_from_complete_outage" ]]; then
+            reboot_from_completeOutage
+        fi
     fi
-    log "INFO" "waiting for mysql process id  = $pid"
-    wait $pid
-    rm -rf /scripts/signal.txt
 
+    log "INFO" "removing setup.txt file"
+    rm -rf /scripts/signal.txt
+    rm -rf /scripts/setup.txt
+    log "INFO" "waiting for mysql process id = $pid"
+    wait $pid
 done