Skip to content

Commit

Permalink
Refactor migration assessment codepath: flatten reported issues (#2153)
Browse files Browse the repository at this point in the history
* Added new arg - impact to reportCase() for assigning impact to the issues detected using regexp in analyze schema

* Use Issue Type for issues lookup in schema analysis report wherever possible

* Defining impact for issues_ddl and issues_dml and propagating that to AssessmentIssue

* moved pg/omnibus schema-migration test to the correct place
* Renamed TypeName and TypeDescription field of Issues structs to Name and Description
  • Loading branch information
sanyamsinghal authored Jan 8, 2025
1 parent 2cccc27 commit d490424
Show file tree
Hide file tree
Showing 13 changed files with 550 additions and 290 deletions.
3 changes: 0 additions & 3 deletions .github/workflows/misc-migtests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,6 @@ jobs:
echo "127.0.0.1 yb-master-n1" | sudo tee -a /etc/hosts
psql "postgresql://yugabyte@yb-tserver-n1:5433/yugabyte" -c "SELECT version();"
echo "TEST: PG sample schemas (omnibus)"
migtests/scripts/run-schema-migration.sh pg/omnibus
echo "Setup the gcp credentials"
migtests/scripts/gcs/create_gcs_credentials_file
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/pg-17-migtests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ jobs:
if: ${{ !cancelled() && matrix.test_group == 'offline' }}
run: migtests/scripts/run-schema-migration.sh pg/osm

- name: "TEST: PG sample schemas (omnibus)"
if: ${{ !cancelled() && matrix.test_group == 'offline' }}
run: migtests/scripts/run-schema-migration.sh pg/omnibus

- name: "TEST: PG sample schemas (adventureworks)"
if: ${{ !cancelled() && matrix.test_group == 'offline' }}
run: migtests/scripts/run-schema-migration.sh pg/adventureworks
Expand Down
21 changes: 17 additions & 4 deletions migtests/scripts/functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -877,18 +877,31 @@ normalize_json() {
# Normalize JSON with jq; use --sort-keys to avoid the need to keep the same sequence of keys in expected vs actual json
jq --sort-keys 'walk(
if type == "object" then
.ObjectNames? |= (if type == "string" then split(", ") | sort | join(", ") else . end) |
.ObjectNames? |= (
if type == "string" then
split(", ") | sort | join(", ")
else
.
end
) |
.VoyagerVersion? = "IGNORED" |
.TargetDBVersion? = "IGNORED" |
.DbVersion? = "IGNORED" |
.FilePath? = "IGNORED" |
.OptimalSelectConnectionsPerNode? = "IGNORED" |
.OptimalInsertConnectionsPerNode? = "IGNORED" |
.RowCount? = "IGNORED" |
.SqlStatement? |= (if type == "string" then gsub("\\n"; " ") else . end)
# Replace newline characters in SqlStatement with spaces
.SqlStatement? |= (
if type == "string" then
gsub("\\n"; " ")
else
.
end
)
elif type == "array" then
sort_by(tostring)
else
sort_by(tostring)
else
.
end
)' "$input_file" > "$temp_file"
Expand Down
129 changes: 66 additions & 63 deletions yb-voyager/cmd/analyzeSchema.go

Large diffs are not rendered by default.

224 changes: 172 additions & 52 deletions yb-voyager/cmd/assessMigrationCommand.go

Large diffs are not rendered by default.

16 changes: 11 additions & 5 deletions yb-voyager/cmd/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -1195,13 +1195,15 @@ type AssessmentReport struct {
MigrationCaveats []UnsupportedFeature `json:"MigrationCaveats"`
}

// Fields apart from Category, CategoryDescription, TypeName and Impact will be populated only if/when available
type AssessmentIssue struct {
Category string
Category string // expected values: feature, query_constrcuts, migration_caveats, plpgsql_objects, datatytpe
CategoryDescription string
TypeName string
TypeDescription string
Impact string
ObjectType string
Type string // Ex: GIN_INDEXES, SECURITY_INVOKER_VIEWS, STORED_GENERATED_COLUMNS
Name string // Ex: "Stored generated columns are not supported."
Description string
Impact string // Level-1, Level-2, Level-3 (default: Level-1 ??)
ObjectType string // For datatype category, ObjectType will be datatype (for eg "geometry")
ObjectName string
SqlStatement string
DocsLink string
Expand Down Expand Up @@ -1313,6 +1315,10 @@ func ParseJSONToAssessmentReport(reportPath string) (*AssessmentReport, error) {
return &report, nil
}

func (ar *AssessmentReport) AppendIssues(issues ...AssessmentIssue) {
ar.Issues = append(ar.Issues, issues...)
}

func (ar *AssessmentReport) GetShardedTablesRecommendation() ([]string, error) {
if ar.Sizing == nil {
return nil, fmt.Errorf("sizing report is null, can't fetch sharded tables")
Expand Down
75 changes: 47 additions & 28 deletions yb-voyager/cmd/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ limitations under the License.
*/
package cmd

import (
"github.com/yugabyte/yb-voyager/yb-voyager/src/constants"
"github.com/yugabyte/yb-voyager/yb-voyager/src/utils"
)

const (
KB = 1024
MB = 1024 * 1024
Expand Down Expand Up @@ -102,10 +107,12 @@ const (
REFERENCE_PARTITION = "REFERENCE PARTITION"
SYSTEM_PARTITION = "SYSTEM PARTITION"

UNSUPPORTED_FEATURES = "unsupported_features"
UNSUPPORTED_DATATYPES = "unsupported_datatypes"
UNSUPPORTED_PLPGSQL_OBEJCTS = "unsupported_plpgsql_objects"
REPORT_UNSUPPORTED_QUERY_CONSTRUCTS = "REPORT_UNSUPPORTED_QUERY_CONSTRUCTS"
UNSUPPORTED_FEATURES_CATEGORY = "unsupported_features"
UNSUPPORTED_DATATYPES_CATEGORY = "unsupported_datatypes"
UNSUPPORTED_QUERY_CONSTRUCTS_CATEGORY = "unsupported_query_constructs"
UNSUPPORTED_PLPGSQL_OBJECTS_CATEGORY = "unsupported_plpgsql_objects"
MIGRATION_CAVEATS_CATEGORY = "migration_caveats"
REPORT_UNSUPPORTED_QUERY_CONSTRUCTS = "REPORT_UNSUPPORTED_QUERY_CONSTRUCTS"

HTML = "html"
JSON = "json"
Expand Down Expand Up @@ -173,21 +180,14 @@ const (
List of all the features we are reporting as part of Unsupported features and Migration caveats
*/
const (
// AssessmentIssue types used in YugabyteD payload
FEATURE = "feature"
DATATYPE = "datatype"
QUERY_CONSTRUCT = "query_construct" // confused: in json for some values we are using space separated and for some snake_case
MIGRATION_CAVEATS = "migration_caveats"
PLPGSQL_OBJECT = "plpgsql_object"

// Description
FEATURE_ISSUE_TYPE_DESCRIPTION = "Features of the source database that are not supported on the target YugabyteDB."
DATATYPE_ISSUE_TYPE_DESCRIPTION = "Data types of the source database that are not supported on the target YugabyteDB."
MIGRATION_CAVEATS_TYPE_DESCRIPTION = "Migration Caveats highlights the current limitations with the migration workflow."
UNSUPPORTED_QUERY_CONSTRUTS_DESCRIPTION = "Source database queries not supported in YugabyteDB, identified by scanning system tables."
UNSUPPPORTED_PLPGSQL_OBJECT_DESCRIPTION = "Source schema objects having unsupported statements on the target YugabyteDB in PL/pgSQL code block"
SCHEMA_SUMMARY_DESCRIPTION = "Objects that will be created on the target YugabyteDB."
SCHEMA_SUMMARY_DESCRIPTION_ORACLE = SCHEMA_SUMMARY_DESCRIPTION + " Some of the index and sequence names might be different from those in the source database."
FEATURE_CATEGORY_DESCRIPTION = "Features of the source database that are not supported on the target YugabyteDB."
DATATYPE_CATEGORY_DESCRIPTION = "Data types of the source database that are not supported on the target YugabyteDB."
MIGRATION_CAVEATS_CATEGORY_DESCRIPTION = "Migration Caveats highlights the current limitations with the migration workflow."
UNSUPPORTED_QUERY_CONSTRUCTS_CATEGORY_DESCRIPTION = "Source database queries not supported in YugabyteDB, identified by scanning system tables."
UNSUPPPORTED_PLPGSQL_OBJECT_CATEGORY_DESCRIPTION = "Source schema objects having unsupported statements on the target YugabyteDB in PL/pgSQL code block"
SCHEMA_SUMMARY_DESCRIPTION = "Objects that will be created on the target YugabyteDB."
SCHEMA_SUMMARY_DESCRIPTION_ORACLE = SCHEMA_SUMMARY_DESCRIPTION + " Some of the index and sequence names might be different from those in the source database."

//Unsupported Features

Expand Down Expand Up @@ -222,16 +222,16 @@ const (
// Migration caveats

//POSTGRESQL
ALTER_PARTITION_ADD_PK_CAVEAT_FEATURE = "Alter partitioned tables to add Primary Key"
FOREIGN_TABLE_CAVEAT_FEATURE = "Foreign tables"
POLICIES_CAVEAT_FEATURE = "Policies"
UNSUPPORTED_DATATYPES_LIVE_CAVEAT_FEATURE = "Unsupported Data Types for Live Migration"
UNSUPPORTED_DATATYPES_LIVE_WITH_FF_FB_CAVEAT_FEATURE = "Unsupported Data Types for Live Migration with Fall-forward/Fallback"
UNSUPPORTED_DATATYPES_FOR_LIVE_MIGRATION_ISSUE = "There are some data types in the schema that are not supported by live migration of data. These columns will be excluded when exporting and importing data in live migration workflows."
UNSUPPORTED_DATATYPES_FOR_LIVE_MIGRATION_WITH_FF_FB_ISSUE = "There are some data types in the schema that are not supported by live migration with fall-forward/fall-back. These columns will be excluded when exporting and importing data in live migration workflows."
DESCRIPTION_ADD_PK_TO_PARTITION_TABLE = `After export schema, the ALTER table should be merged with CREATE table for partitioned tables as alter of partitioned tables to add primary key is not supported.`
DESCRIPTION_FOREIGN_TABLES = `During the export schema phase, SERVER and USER MAPPING objects are not exported. These should be manually created to make the foreign tables work.`
DESCRIPTION_POLICY_ROLE_ISSUE = `There are some policies that are created for certain users/roles. During the export schema phase, USERs and GRANTs are not exported. Therefore, they will have to be manually created before running import schema.`
ALTER_PARTITION_ADD_PK_CAVEAT_FEATURE = "Alter partitioned tables to add Primary Key"
FOREIGN_TABLE_CAVEAT_FEATURE = "Foreign tables"
POLICIES_CAVEAT_FEATURE = "Policies"
UNSUPPORTED_DATATYPES_LIVE_CAVEAT_FEATURE = "Unsupported Data Types for Live Migration"
UNSUPPORTED_DATATYPES_LIVE_WITH_FF_FB_CAVEAT_FEATURE = "Unsupported Data Types for Live Migration with Fall-forward/Fallback"
UNSUPPORTED_DATATYPES_FOR_LIVE_MIGRATION_DESCRIPTION = "There are some data types in the schema that are not supported by live migration of data. These columns will be excluded when exporting and importing data in live migration workflows."
UNSUPPORTED_DATATYPES_FOR_LIVE_MIGRATION_WITH_FF_FB_DESCRIPTION = "There are some data types in the schema that are not supported by live migration with fall-forward/fall-back. These columns will be excluded when exporting and importing data in live migration workflows."
DESCRIPTION_ADD_PK_TO_PARTITION_TABLE = `After export schema, the ALTER table should be merged with CREATE table for partitioned tables as alter of partitioned tables to add primary key is not supported.`
DESCRIPTION_FOREIGN_TABLES = `During the export schema phase, SERVER and USER MAPPING objects are not exported. These should be manually created to make the foreign tables work.`
DESCRIPTION_POLICY_ROLE_DESCRIPTION = `There are some policies that are created for certain users/roles. During the export schema phase, USERs and GRANTs are not exported. Therefore, they will have to be manually created before running import schema.`
)

var supportedSourceDBTypes = []string{ORACLE, MYSQL, POSTGRESQL, YUGABYTEDB}
Expand All @@ -244,3 +244,22 @@ var validSSLModes = map[string][]string{
}

var EVENT_BATCH_MAX_RETRY_COUNT = 50

// returns the description for a given assessment issue category
func GetCategoryDescription(category string) string {
switch category {
case UNSUPPORTED_FEATURES_CATEGORY, constants.FEATURE:
return FEATURE_CATEGORY_DESCRIPTION
case UNSUPPORTED_DATATYPES_CATEGORY, constants.DATATYPE:
return DATATYPE_CATEGORY_DESCRIPTION
case UNSUPPORTED_QUERY_CONSTRUCTS_CATEGORY, constants.QUERY_CONSTRUCT:
return UNSUPPORTED_QUERY_CONSTRUCTS_CATEGORY_DESCRIPTION
case UNSUPPORTED_PLPGSQL_OBJECTS_CATEGORY, constants.PLPGSQL_OBJECT:
return UNSUPPPORTED_PLPGSQL_OBJECT_CATEGORY_DESCRIPTION
case MIGRATION_CAVEATS_CATEGORY: // or constants.MIGRATION_CAVEATS (identical)
return MIGRATION_CAVEATS_CATEGORY_DESCRIPTION
default:
utils.ErrExit("ERROR: unsupported assessment issue category %q", category)
}
return ""
}
16 changes: 15 additions & 1 deletion yb-voyager/src/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,24 @@ const (
// Database Object types
TABLE = "table"
FUNCTION = "function"
COLUMN = "column"

// Source DB Types
YUGABYTEDB = "yugabytedb"
POSTGRESQL = "postgresql"
ORACLE = "oracle"
MYSQL = "mysql"
)

// AssessmentIssue Categoes - used by YugabyteD payload and Migration Complexity Explainability
// TODO: soon to be renamed as SCHEMA, SCHEMA_PLPGSQL, DML_QUERY, MIGRATION_CAVEAT, "DATATYPE"
FEATURE = "feature"
DATATYPE = "datatype"
QUERY_CONSTRUCT = "query_construct"
MIGRATION_CAVEATS = "migration_caveats"
PLPGSQL_OBJECT = "plpgsql_object"

// constants for the Impact Buckets
IMPACT_LEVEL_1 = "LEVEL_1" // Represents minimal impact like only the schema ddl
IMPACT_LEVEL_2 = "LEVEL_2" // Represents moderate impact like dml queries which might impact a lot of implementation/assumption in app layer
IMPACT_LEVEL_3 = "LEVEL_3" // Represent significant impact like TABLE INHERITANCE, which doesn't have any simple workaround but can impact multiple objects/apps
)
38 changes: 36 additions & 2 deletions yb-voyager/src/issue/issue.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ import (

type Issue struct {
Type string // (advisory_locks, index_not_supported, etc)
TypeName string // for display
TypeDescription string
Name string // for display
Description string
Impact string
Suggestion string
GH string
DocsLink string
Expand All @@ -40,3 +41,36 @@ func (i Issue) IsFixedIn(v *ybversion.YBVersion) (bool, error) {
}
return v.GreaterThanOrEqual(minVersionFixedInSeries), nil
}

/*
Dynamic Impact Determination (TODO)
- We can define the impact calculator function based on issue type wherever/whenever needed
- Map will have functions only for issue type with dynamic impact determination
For example:
type ImpactCalcFunc func(issue QueryIssue, stats *PgStats) string
var impactCalculators = map[string]ImpactCalcFunc{
INHERITED_TABLE: inheritedTableImpactCalc,
// etc...
}
// Example dynamic function
func inheritedTableImpactCalc(i QueryIssue, stats *PgStats) string {
usage := stats.GetUsage(i.ObjectName) // e.g. how many reads/writes
if usage.WritesPerDay > 1000 {
return "LEVEL_2"
}
return "LEVEL_3"
}
func (i Issue) GetImpact(stats *PgStats) string {
if calc, ok := impactCalculators[i.Type]; ok {
return calc(i, stats)
}
return lo.Ternary(i.Impact != "", i.Impact, constants.IMPACT_LEVEL_1)
}
*/
Loading

0 comments on commit d490424

Please sign in to comment.