Skip to content

feat: support medium sync policy in CCR tasks #624

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions pkg/ccr/base/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -1555,6 +1555,33 @@ func (s *Spec) ModifyTableProperty(destTableName string, modifyProperty *record.
return s.Exec(sql)
}

func (s *Spec) ModifyPartitionProperty(destTableName string, batchModifyPartitionsInfo *record.BatchModifyPartitionsInfo) error {
if batchModifyPartitionsInfo == nil || len(batchModifyPartitionsInfo.Infos) == 0 {
log.Warnf("empty partition infos, skip modify partition property")
return nil
}

dbName := utils.FormatKeywordName(s.Database)
destTableName = utils.FormatKeywordName(destTableName)

for _, partitionInfo := range batchModifyPartitionsInfo.Infos {
if partitionInfo.DataProperty == nil || partitionInfo.DataProperty.StorageMedium == "" {
log.Warnf("partition %d has no storage medium, skip modify partition property", partitionInfo.PartitionId)
continue
}

sql := fmt.Sprintf("ALTER TABLE %s.%s MODIFY PARTITION %s SET (\"storage_medium\" = \"%s\")",
dbName, destTableName, utils.FormatKeywordName(partitionInfo.PartitionName), partitionInfo.DataProperty.StorageMedium)

log.Infof("modify partition property sql: %s", sql)
if err := s.Exec(sql); err != nil {
log.Warnf("modify partition %s property failed: %v", partitionInfo.PartitionName, err)
}
}

return nil
}

// Determine whether the error are network related, eg connection refused, connection reset, exposed from net packages.
func isNetworkRelated(err error) bool {
msg := err.Error()
Expand Down
1 change: 1 addition & 0 deletions pkg/ccr/base/specer.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ type Specer interface {
AddPartition(destTableName string, addPartition *record.AddPartition) error
DropPartition(destTableName string, dropPartition *record.DropPartition) error
RenamePartition(destTableName, oldPartition, newPartition string) error
ModifyPartitionProperty(destTableName string, batchModifyPartitionsInfo *record.BatchModifyPartitionsInfo) error

LightningIndexChange(tableAlias string, changes *record.ModifyTableAddOrDropInvertedIndices) error
BuildIndex(tableAlias string, buildIndex *record.IndexChangeJob) error
Expand Down
179 changes: 175 additions & 4 deletions pkg/ccr/handle/create_table.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package handle

import (
"fmt"
"regexp"
"strings"

"github.com/selectdb/ccr_syncer/pkg/ccr"
Expand All @@ -18,6 +20,176 @@ type CreateTableHandle struct {
IdempotentJobHandle[*record.CreateTable]
}

// Check if error message indicates storage medium or capacity related issues
func isStorageMediumError(errMsg string) bool {
log.Infof("STORAGE_MEDIUM_DEBUG: Analyzing error message: %s", errMsg)

patterns := []string{
"capExceedLimit",
"Failed to find enough backend",
"not enough backend",
"storage medium",
"storage_medium",
"avail capacity",
"disk space",
"not enough space",
"replication num",
"replication tag",
}

for _, pattern := range patterns {
if strings.Contains(strings.ToLower(errMsg), strings.ToLower(pattern)) {
log.Infof("STORAGE_MEDIUM_DEBUG: Found storage/capacity related pattern '%s' in error message", pattern)
return true
}
}

log.Infof("STORAGE_MEDIUM_DEBUG: No storage/capacity related patterns found in error message")
return false
}

// Extract storage_medium from CREATE TABLE SQL
func extractStorageMediumFromCreateTableSql(createSql string) string {
pattern := `"storage_medium"\s*=\s*"([^"]*)"`
re := regexp.MustCompile(pattern)
matches := re.FindStringSubmatch(createSql)
if len(matches) >= 2 {
medium := strings.ToLower(matches[1])
log.Infof("STORAGE_MEDIUM_DEBUG: Extracted storage medium: %s", medium)
return medium
}
log.Infof("STORAGE_MEDIUM_DEBUG: No storage medium found in SQL")
return ""
}

// Switch storage medium between SSD and HDD
func switchStorageMedium(medium string) string {
switch strings.ToLower(medium) {
case "ssd":
return "hdd"
case "hdd":
return "ssd"
default:
// Default to hdd if not standard medium
return "hdd"
}
}

// Set specific storage_medium in CREATE TABLE SQL
func setStorageMediumInCreateTableSql(createSql string, medium string) string {
// Remove existing storage_medium first
createSql = ccr.FilterStorageMediumFromCreateTableSql(createSql)

// Check if PROPERTIES clause exists
propertiesPattern := `PROPERTIES\s*\(`
if matched, _ := regexp.MatchString(propertiesPattern, createSql); matched {
// Add storage_medium at the beginning of PROPERTIES
pattern := `(PROPERTIES\s*\(\s*)`
replacement := fmt.Sprintf(`${1}"storage_medium" = "%s", `, medium)
createSql = regexp.MustCompile(pattern).ReplaceAllString(createSql, replacement)
} else {
// Add entire PROPERTIES clause
pattern := `(\s*)$`
replacement := fmt.Sprintf(` PROPERTIES ("storage_medium" = "%s")`, medium)
createSql = regexp.MustCompile(pattern).ReplaceAllString(createSql, replacement)
}

return createSql
}

// Process CREATE TABLE SQL according to medium sync policy
func processCreateTableSqlByMediumPolicy(j *ccr.Job, createTable *record.CreateTable) error {
// Note: We need to access Job's medium sync policy and feature flags
// For now, we'll implement basic logic based on what we know the Job should do

// Check if medium sync policy feature is enabled (we assume it's enabled for new handler)
// This is a simplified version that handles the main cases
mediumPolicy := j.MediumSyncPolicy

switch mediumPolicy {
case ccr.MediumSyncPolicySameWithUpstream:
// Keep upstream storage_medium unchanged
log.Infof("using same_with_upstream policy, keeping original storage_medium")
return nil

case ccr.MediumSyncPolicyHDD:
// Force set to HDD
log.Infof("using hdd policy, setting storage_medium to hdd")
createTable.Sql = setStorageMediumInCreateTableSql(createTable.Sql, "hdd")
return nil

default:
log.Warnf("unknown medium sync policy: %s, falling back to filter storage_medium", mediumPolicy)
if ccr.FeatureFilterStorageMedium {
createTable.Sql = ccr.FilterStorageMediumFromCreateTableSql(createTable.Sql)
}
return nil
}
}

// Create table with medium retry mechanism
func createTableWithMediumRetry(j *ccr.Job, createTable *record.CreateTable, srcDb string) error {
originalSql := createTable.Sql
log.Infof("STORAGE_MEDIUM_DEBUG: Starting create table with medium retry for table: %s", createTable.TableName)

// Process SQL according to medium policy
if err := processCreateTableSqlByMediumPolicy(j, createTable); err != nil {
return err
}

// First attempt
err := j.IDest.CreateTableOrView(createTable, srcDb)
if err == nil {
log.Infof("STORAGE_MEDIUM_DEBUG: Create table succeeded on first attempt")
return nil
}

log.Warnf("STORAGE_MEDIUM_DEBUG: First attempt failed: %s", err.Error())

// Check if it's storage related error and should retry
if !isStorageMediumError(err.Error()) {
log.Infof("STORAGE_MEDIUM_DEBUG: Not a storage related error, no retry")
return err
}

// Extract current medium and switch to the other one
currentMedium := extractStorageMediumFromCreateTableSql(createTable.Sql)
if currentMedium == "" {
currentMedium = "ssd" // default
}

switchedMedium := switchStorageMedium(currentMedium)
log.Infof("STORAGE_MEDIUM_DEBUG: Switching from %s to %s", currentMedium, switchedMedium)

createTable.Sql = setStorageMediumInCreateTableSql(originalSql, switchedMedium)

// Second attempt with switched medium
err = j.IDest.CreateTableOrView(createTable, srcDb)
if err == nil {
log.Infof("STORAGE_MEDIUM_DEBUG: Create table succeeded after switching to %s", switchedMedium)
return nil
}

log.Warnf("STORAGE_MEDIUM_DEBUG: Second attempt with %s also failed: %s", switchedMedium, err.Error())

// Final attempt: remove storage_medium if still storage related error
if isStorageMediumError(err.Error()) {
log.Infof("STORAGE_MEDIUM_DEBUG: Removing storage_medium for final attempt")
createTable.Sql = ccr.FilterStorageMediumFromCreateTableSql(originalSql)

err = j.IDest.CreateTableOrView(createTable, srcDb)
if err == nil {
log.Infof("STORAGE_MEDIUM_DEBUG: Create table succeeded after removing storage_medium")
return nil
}

log.Warnf("STORAGE_MEDIUM_DEBUG: Final attempt without storage_medium also failed: %s", err.Error())
}

log.Errorf("STORAGE_MEDIUM_DEBUG: All attempts failed, returning final error")
return err
}

func (h *CreateTableHandle) Handle(j *ccr.Job, commitSeq int64, createTable *record.CreateTable) error {
if j.SyncType != ccr.DBSync {
return xerror.Errorf(xerror.Normal, "invalid sync type: %v", j.SyncType)
Expand Down Expand Up @@ -68,12 +240,11 @@ func (h *CreateTableHandle) Handle(j *ccr.Job, commitSeq int64, createTable *rec
}
}

if ccr.FeatureFilterStorageMedium {
createTable.Sql = ccr.FilterStorageMediumFromCreateTableSql(createTable.Sql)
}
// Remove old storage_medium filtering logic, handled by new retry function
createTable.Sql = ccr.FilterDynamicPartitionStoragePolicyFromCreateTableSql(createTable.Sql)

if err := j.IDest.CreateTableOrView(createTable, j.Src.Database); err != nil {
// Use new create table function with medium retry mechanism
if err := createTableWithMediumRetry(j, createTable, j.Src.Database); err != nil {
errMsg := err.Error()
if strings.Contains(errMsg, "Can not found function") {
log.Warnf("skip creating table/view because the UDF function is not supported yet: %s", errMsg)
Expand Down
61 changes: 61 additions & 0 deletions pkg/ccr/handle/modify_partitions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package handle

import (
"github.com/selectdb/ccr_syncer/pkg/ccr"
"github.com/selectdb/ccr_syncer/pkg/ccr/record"
festruct "github.com/selectdb/ccr_syncer/pkg/rpc/kitex_gen/frontendservice"
"github.com/selectdb/ccr_syncer/pkg/xerror"
log "github.com/sirupsen/logrus"
)

func init() {
ccr.RegisterJobHandle[*record.BatchModifyPartitionsInfo](festruct.TBinlogType_MODIFY_PARTITIONS, &ModifyPartitionsHandle{})
}

type ModifyPartitionsHandle struct {
// The modify partitions binlog is idempotent
IdempotentJobHandle[*record.BatchModifyPartitionsInfo]
}

func (h *ModifyPartitionsHandle) Handle(j *ccr.Job, commitSeq int64, batchModifyPartitionsInfo *record.BatchModifyPartitionsInfo) error {
// TODO: custom by medium_sync_policy
if !ccr.FeatureMediumSyncPolicy || j.MediumSyncPolicy == "hdd" {
log.Warnf("skip modify partitions for FeatureMediumSyncPolicy off or medium_sync_policy is hdd")
return nil
}

// Safety check: ensure we have partition infos to process
if batchModifyPartitionsInfo == nil || len(batchModifyPartitionsInfo.Infos) == 0 {
return xerror.Errorf(xerror.Normal, "batch modify partitions info is empty or nil")
}

// Get table ID from the first partition info (all partitions should belong to the same table)
tableId := batchModifyPartitionsInfo.GetTableId()
if tableId <= 0 {
return xerror.Errorf(xerror.Normal, "invalid table ID: %d", tableId)
}

// Check if it's a materialized view table
if isAsyncMv, err := j.IsMaterializedViewTable(tableId); err != nil {
return err
} else if isAsyncMv {
log.Warnf("skip modify partitions for materialized view table %d", tableId)
return nil
}

// Get destination table name
destTableName, err := j.GetDestNameBySrcId(tableId)
if err != nil {
return err
}

// Get the source cluster meta information and supplement the partition name information
srcMeta := j.GetSrcMeta()
if err := batchModifyPartitionsInfo.EnrichWithPartitionNames(srcMeta); err != nil {
log.Errorf("failed to enrich partition names from source meta: %v", err)
return err
}

// Call spec layer method directly
return j.Dest.ModifyPartitionProperty(destTableName, batchModifyPartitionsInfo)
}
Loading
Loading