diff --git a/pkg/iscp/index_sqlwriter.go b/pkg/iscp/index_sqlwriter.go index 5242a633cdac7..321323ff0d70b 100644 --- a/pkg/iscp/index_sqlwriter.go +++ b/pkg/iscp/index_sqlwriter.go @@ -339,8 +339,9 @@ func NewGenericHnswSqlWriter[T types.RealNumbers](algo string, jobID JobID, info // get the first indexdef as they are the same idxdef := indexdef[0] + writer_capacity := 8192 - w := &HnswSqlWriter[T]{tabledef: tabledef, indexdef: indexdef, jobID: jobID, info: info, cdc: vectorindex.NewVectorIndexCdc[T]()} + w := &HnswSqlWriter[T]{tabledef: tabledef, indexdef: indexdef, jobID: jobID, info: info, cdc: vectorindex.NewVectorIndexCdc[T](writer_capacity)} paramstr := idxdef.IndexAlgoParams var meta, storage string diff --git a/pkg/sql/compile/alter.go b/pkg/sql/compile/alter.go index 785d2815556c1..aae408d177419 100644 --- a/pkg/sql/compile/alter.go +++ b/pkg/sql/compile/alter.go @@ -49,6 +49,7 @@ func convertDBEOB(ctx context.Context, e error, name string) error { func (s *Scope) AlterTableCopy(c *Compile) error { qry := s.Plan.GetDdl().GetAlterTable() dbName := qry.Database + if dbName == "" { dbName = c.db } @@ -149,25 +150,34 @@ func (s *Scope) AlterTableCopy(c *Compile) error { if qry.Options.SkipPkDedup || len(qry.Options.SkipUniqueIdxDedup) > 0 { opt = opt.WithAlterCopyOpt(qry.Options) } - // 4. copy the original table data to the temporary replica table - err = c.runSqlWithOptions(qry.InsertTmpDataSql, opt) + + //4. obtain relation for new tables + newRel, err := dbSource.Relation(c.proc.Ctx, qry.CopyTableDef.Name, nil) if err != nil { - c.proc.Error(c.proc.Ctx, "insert data to copy table for alter table", + c.proc.Error(c.proc.Ctx, "obtain new relation for copy table for alter table", zap.String("databaseName", dbName), zap.String("origin tableName", qry.GetTableDef().Name), - zap.String("copy tableName", qry.CopyTableDef.Name), - zap.String("InsertTmpDataSql", qry.InsertTmpDataSql), + zap.String("copy table name", qry.CopyTableDef.Name), zap.Error(err)) return err } - //5. obtain relation for new tables - newRel, err := dbSource.Relation(c.proc.Ctx, qry.CopyTableDef.Name, nil) + //5. ISCP: temp table already created pitr and iscp job with temp table name + // and we don't want iscp to run with temp table so drop pitr and iscp job with the temp table here + newTmpTableDef := newRel.CopyTableDef(c.proc.Ctx) + err = DropAllIndexCdcTasks(c, newTmpTableDef, dbName, qry.CopyTableDef.Name) if err != nil { - c.proc.Error(c.proc.Ctx, "obtain new relation for copy table for alter table", + return err + } + + // 6. copy the original table data to the temporary replica table + err = c.runSqlWithOptions(qry.InsertTmpDataSql, opt) + if err != nil { + c.proc.Error(c.proc.Ctx, "insert data to copy table for alter table", zap.String("databaseName", dbName), zap.String("origin tableName", qry.GetTableDef().Name), - zap.String("copy table name", qry.CopyTableDef.Name), + zap.String("copy tableName", qry.CopyTableDef.Name), + zap.String("InsertTmpDataSql", qry.InsertTmpDataSql), zap.Error(err)) return err } @@ -179,7 +189,8 @@ func (s *Scope) AlterTableCopy(c *Compile) error { return err } - // 7. drop original table + // 7. drop original table. + // ISCP: That will also drop ISCP related jobs and pitr of the original table. dropSql := fmt.Sprintf("drop table `%s`.`%s`", dbName, tblName) if err := c.runSqlWithOptions( dropSql, @@ -193,26 +204,6 @@ func (s *Scope) AlterTableCopy(c *Compile) error { return err } - // 7.1 delete all index objects of the table in mo_catalog.mo_indexes - if qry.Database != catalog.MO_CATALOG && qry.TableDef.Name != catalog.MO_INDEXES { - if qry.GetTableDef().Pkey != nil || len(qry.GetTableDef().Indexes) > 0 { - deleteSql := fmt.Sprintf( - deleteMoIndexesWithTableIdFormat, - qry.GetTableDef().TblId, - ) - err = c.runSql(deleteSql) - if err != nil { - c.proc.Error(c.proc.Ctx, "delete all index meta data of origin table in `mo_indexes` for alter table", - zap.String("databaseName", dbName), - zap.String("origin tableName", qry.GetTableDef().Name), - zap.String("delete all index sql", deleteSql), - zap.Error(err)) - - return err - } - } - } - newId := newRel.GetTableID(c.proc.Ctx) //------------------------------------------------------------------------- // 8. rename temporary replica table into the original table(Table Id remains unchanged) @@ -237,11 +228,11 @@ func (s *Scope) AlterTableCopy(c *Compile) error { return err } + newTableDef := newRel.CopyTableDef(c.proc.Ctx) //-------------------------------------------------------------------------------------------------------------- { // 9. invoke reindex for the new table, if it contains ivf index. multiTableIndexes := make(map[string]*MultiTableIndex) - newTableDef := newRel.CopyTableDef(c.proc.Ctx) extra := newRel.GetExtraInfo() id := newRel.GetTableID(c.proc.Ctx) @@ -311,6 +302,12 @@ func (s *Scope) AlterTableCopy(c *Compile) error { } } + // 10. register ISCP job again + err = CreateAllIndexCdcTasks(c, newTableDef.Indexes, dbName, tblName) + if err != nil { + return err + } + // get and update the change mapping information of table colIds if err = updateNewTableColId(c, newRel, qry.ChangeTblColIdMap); err != nil { c.proc.Error(c.proc.Ctx, "get and update the change mapping information of table colIds for alter table", diff --git a/pkg/sql/compile/ddl.go b/pkg/sql/compile/ddl.go index 0d875baecef32..f2e460c13f237 100644 --- a/pkg/sql/compile/ddl.go +++ b/pkg/sql/compile/ddl.go @@ -27,6 +27,7 @@ import ( moruntime "github.com/matrixorigin/matrixone/pkg/common/runtime" "github.com/matrixorigin/matrixone/pkg/config" + "github.com/matrixorigin/matrixone/pkg/iscp" "github.com/matrixorigin/matrixone/pkg/pb/task" "github.com/matrixorigin/matrixone/pkg/cdc" @@ -239,6 +240,13 @@ func (s *Scope) DropDatabase(c *Compile) error { return err } } + + // 5.unregister iscp jobs + err = iscp.UnregisterJobsByDBName(c.proc.Ctx, c.proc.GetService(), c.proc.GetTxnOperator(), dbName) + if err != nil { + return err + } + return err } @@ -589,6 +597,13 @@ func (s *Scope) AlterTableInplace(c *Compile) error { newIndexes = append(newIndexes, extra.IndexTables[idx]) } } + + // drop index cdc task + err = DropIndexCdcTask(c, oTableDef, dbName, tblName, constraintName) + if err != nil { + return err + } + // Avoid modifying slice directly during iteration oTableDef.Indexes = notDroppedIndex extra.IndexTables = newIndexes @@ -746,9 +761,8 @@ func (s *Scope) AlterTableInplace(c *Compile) error { if err != nil { return err } - case catalog.MoIndexHnswAlgo.ToString(): - // PASS: keep option unchange for incremental update + // PASS default: return moerr.NewInternalError(c.proc.Ctx, "invalid index algo type for alter reindex") } @@ -890,6 +904,25 @@ func (s *Scope) AlterTableInplace(c *Compile) error { return err } + // post alter table rename -- AlterKind_RenameTable to update iscp job + for _, req := range reqs { + if req.Kind == api.AlterKind_RenameTable { + op, ok := req.Operation.(*api.AlterTableReq_RenameTable) + if ok { + err = iscp.RenameSrcTable(c.proc.Ctx, + c.proc.GetService(), + c.proc.GetTxnOperator(), + req.DbId, + req.TableId, + op.RenameTable.OldName, + op.RenameTable.NewName) + if err != nil { + return err + } + } + } + } + // remove refChildTbls for drop foreign key clause //remove the child table id -- tblId from the parent table -- fkTblId for _, fkTblId := range removeRefChildTbls { @@ -1468,6 +1501,21 @@ func (s *Scope) CreateTable(c *Compile) error { ) return err } + + // create iscp jobs for index async update + ct, err := GetConstraintDef(c.proc.Ctx, newRelation) + if err != nil { + return err + } + for _, constraint := range ct.Cts { + if idxdef, ok := constraint.(*engine.IndexDef); ok && len(idxdef.Indexes) > 0 { + err = CreateAllIndexCdcTasks(c, idxdef.Indexes, dbName, tblName) + if err != nil { + return err + } + } + } + } if c.keepAutoIncrement == 0 { @@ -2078,6 +2126,21 @@ func (s *Scope) handleVectorIvfFlatIndex( return err } + async, err := catalog.IsIndexAsync(indexDefs[catalog.SystemSI_IVFFLAT_TblType_Metadata].IndexAlgoParams) + if err != nil { + return err + } + + // create ISCP job when Async is true + if async { + logutil.Infof("Ivfflat index Async is true") + sinker_type := getSinkerTypeFromAlgo(catalog.MoIndexIvfFlatAlgo.ToString()) + err = CreateIndexCdcTask(c, qryDatabase, originalTableDef.Name, + indexDefs[catalog.SystemSI_IVFFLAT_TblType_Metadata].IndexName, sinker_type) + if err != nil { + return err + } + } return nil } @@ -2102,6 +2165,9 @@ func (s *Scope) DropIndex(c *Compile) error { return err } + // old tabledef + oldTableDef := r.GetTableDef(c.proc.Ctx) + //1. build and update constraint def oldCt, err := GetConstraintDef(c.proc.Ctx, r) if err != nil { @@ -2130,14 +2196,20 @@ func (s *Scope) DropIndex(c *Compile) error { return err } + //3. delete iscp job for vector, fulltext index + err = DropIndexCdcTask(c, oldTableDef, qry.Database, qry.Table, qry.IndexName) + if err != nil { + return err + } } - //3. delete index object from mo_catalog.mo_indexes + //4. delete index object from mo_catalog.mo_indexes deleteSql := fmt.Sprintf(deleteMoIndexesWithTableIdAndIndexNameFormat, r.GetTableID(c.proc.Ctx), qry.IndexName) err = c.runSql(deleteSql) if err != nil { return err } + return nil } @@ -2651,6 +2723,12 @@ func (s *Scope) DropTable(c *Compile) error { } } + // delete cdc task of the vector and fulltext index here + err = DropAllIndexCdcTasks(c, rel.GetTableDef(c.proc.Ctx), qry.Database, qry.Table) + if err != nil { + return err + } + // delete all index objects record of the table in mo_catalog.mo_indexes if !qry.IsView && qry.Database != catalog.MO_CATALOG && qry.Table != catalog.MO_INDEXES { if qry.GetTableDef().Pkey != nil || len(qry.GetTableDef().Indexes) > 0 { diff --git a/pkg/sql/compile/ddl_index_algo.go b/pkg/sql/compile/ddl_index_algo.go index be4eec553a0e0..8d744e7ad8504 100644 --- a/pkg/sql/compile/ddl_index_algo.go +++ b/pkg/sql/compile/ddl_index_algo.go @@ -24,6 +24,7 @@ import ( "github.com/matrixorigin/matrixone/pkg/catalog" "github.com/matrixorigin/matrixone/pkg/common/moerr" "github.com/matrixorigin/matrixone/pkg/container/vector" + "github.com/matrixorigin/matrixone/pkg/logutil" "github.com/matrixorigin/matrixone/pkg/pb/api" "github.com/matrixorigin/matrixone/pkg/pb/plan" "github.com/matrixorigin/matrixone/pkg/util/executor" @@ -140,6 +141,7 @@ func (s *Scope) handleFullTextIndexTable( return moerr.NewInternalErrorNoCtx("FullText index is not enabled") } + // create hidden tables if indexInfo != nil { if len(indexInfo.GetIndexTables()) != 1 { return moerr.NewInternalErrorNoCtx("index table count not equal to 1") @@ -152,13 +154,33 @@ func (s *Scope) handleFullTextIndexTable( } } - insertSQLs := genInsertIndexTableSqlForFullTextIndex(originalTableDef, indexDef, qryDatabase) + insertSQLs, err := genInsertIndexTableSqlForFullTextIndex(originalTableDef, indexDef, qryDatabase) + if err != nil { + return err + } + for _, insertSQL := range insertSQLs { err = c.runSql(insertSQL) if err != nil { return err } } + + async, err := catalog.IsIndexAsync(indexDef.IndexAlgoParams) + if err != nil { + return err + } + // create ISCP job for Async fulltext index + if async { + logutil.Infof("fulltext index Async is true") + sinker_type := getSinkerTypeFromAlgo(catalog.MOIndexFullTextAlgo.ToString()) + err = CreateIndexCdcTask(c, qryDatabase, originalTableDef.Name, + indexDef.IndexName, sinker_type) + if err != nil { + return err + } + } + return nil } @@ -580,5 +602,12 @@ func (s *Scope) handleVectorHnswIndex( } } + // 4. register ISCP job for async update + sinker_type := getSinkerTypeFromAlgo(catalog.MoIndexHnswAlgo.ToString()) + err = CreateIndexCdcTask(c, qryDatabase, originalTableDef.Name, indexDefs[catalog.Hnsw_TblType_Metadata].IndexName, sinker_type) + if err != nil { + return err + } + return nil } diff --git a/pkg/sql/compile/iscp_util.go b/pkg/sql/compile/iscp_util.go new file mode 100644 index 0000000000000..c244f328fc2d3 --- /dev/null +++ b/pkg/sql/compile/iscp_util.go @@ -0,0 +1,208 @@ +// Copyright 2023 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compile + +import ( + "context" + + "github.com/matrixorigin/matrixone/pkg/catalog" + "github.com/matrixorigin/matrixone/pkg/iscp" + "github.com/matrixorigin/matrixone/pkg/logutil" + "github.com/matrixorigin/matrixone/pkg/pb/plan" + "github.com/matrixorigin/matrixone/pkg/txn/client" +) + +var ( + iscpRegisterJobFunc = iscp.RegisterJob + iscpUnregisterJobFunc = iscp.UnregisterJob +) + +/* CDC APIs */ +func RegisterJob(ctx context.Context, cnUUID string, txn client.TxnOperator, spec *iscp.JobSpec, job *iscp.JobID) (bool, error) { + //dummyurl := "mysql://root:111@127.0.0.1:6001" + // sql = fmt.Sprintf("CREATE CDC `%s` '%s' 'indexsync' '%s' '%s.%s' {'Level'='table'};", cdcname, dummyurl, dummyurl, qryDatabase, srctbl) + return iscpRegisterJobFunc(ctx, cnUUID, txn, spec, job, true) +} + +func UnregisterJob(ctx context.Context, cnUUID string, txn client.TxnOperator, job *iscp.JobID) (bool, error) { + return iscpUnregisterJobFunc(ctx, cnUUID, txn, job) +} + +/* start here */ +func CreateCdcTask(c *Compile, spec *iscp.JobSpec, job *iscp.JobID) (bool, error) { + logutil.Infof("Create Index Task %v", spec) + + return RegisterJob(c.proc.Ctx, c.proc.GetService(), c.proc.GetTxnOperator(), spec, job) +} + +func DeleteCdcTask(c *Compile, job *iscp.JobID) (bool, error) { + logutil.Infof("Delete Index Task %v", job) + return UnregisterJob(c.proc.Ctx, c.proc.GetService(), c.proc.GetTxnOperator(), job) +} + +func checkValidIndexCdcByIndexdef(idx *plan.IndexDef) (bool, error) { + var err error + + if idx.TableExist && + (catalog.IsHnswIndexAlgo(idx.IndexAlgo) || + catalog.IsIvfIndexAlgo(idx.IndexAlgo) || + catalog.IsFullTextIndexAlgo(idx.IndexAlgo)) { + async := false + if catalog.IsHnswIndexAlgo(idx.IndexAlgo) { + // HNSW always async + async = true + } else { + async, err = catalog.IsIndexAsync(idx.IndexAlgoParams) + if err != nil { + return false, err + } + } + + return async, nil + } + return false, nil +} + +func checkValidIndexCdc(tableDef *plan.TableDef, indexname string) (bool, error) { + for _, idx := range tableDef.Indexes { + + if idx.IndexName == indexname { + valid, err := checkValidIndexCdcByIndexdef(idx) + if err != nil { + return false, err + } + if valid { + return true, nil + } + } + } + return false, nil +} + +// NOTE: CreateIndexCdcTask will create CDC task without any checking. Original TableDef may be empty +func CreateIndexCdcTask(c *Compile, dbname string, tablename string, indexname string, sinker_type int8) error { + var err error + + spec := &iscp.JobSpec{ + ConsumerInfo: iscp.ConsumerInfo{ConsumerType: sinker_type, + DBName: dbname, + TableName: tablename, + IndexName: indexname}, + } + job := &iscp.JobID{DBName: dbname, TableName: tablename, JobName: genCdcTaskJobID(indexname)} + + // create index cdc task + ok, err := CreateCdcTask(c, spec, job) + if err != nil { + return err + } + + if !ok { + // cdc task already exist. ignore it. IVFFLAT alter reindex will call CreateIndexCdcTask multiple times. + logutil.Infof("index cdc task (%s, %s, %s) already exists", dbname, tablename, indexname) + return nil + } + return nil +} + +func genCdcTaskJobID(indexname string) string { + return "index_" + indexname +} + +func DropIndexCdcTask(c *Compile, tableDef *plan.TableDef, dbname string, tablename string, indexname string) error { + var err error + + valid, err := checkValidIndexCdc(tableDef, indexname) + if err != nil { + return err + } + + if !valid { + // index name is not valid cdc task. ignore it + return nil + } + + // delete index cdc task + _, err = DeleteCdcTask(c, &iscp.JobID{DBName: dbname, TableName: tablename, JobName: genCdcTaskJobID(indexname)}) + if err != nil { + return err + } + + return nil +} + +// drop all cdc tasks according to tableDef +func DropAllIndexCdcTasks(c *Compile, tabledef *plan.TableDef, dbname string, tablename string) error { + idxmap := make(map[string]bool) + for _, idx := range tabledef.Indexes { + + _, ok := idxmap[idx.IndexName] + if ok { + continue + } + + valid, err := checkValidIndexCdcByIndexdef(idx) + if err != nil { + return err + } + + if valid { + idxmap[idx.IndexName] = true + //hasindex = true + _, e := DeleteCdcTask(c, &iscp.JobID{DBName: dbname, TableName: tablename, JobName: genCdcTaskJobID(idx.IndexName)}) + if e != nil { + return e + } + } + } + return nil +} + +func getSinkerTypeFromAlgo(algo string) int8 { + if catalog.IsHnswIndexAlgo(algo) { + return int8(iscp.ConsumerType_IndexSync) + } else if catalog.IsIvfIndexAlgo(algo) { + return int8(iscp.ConsumerType_IndexSync) + } else if catalog.IsFullTextIndexAlgo(algo) { + return int8(iscp.ConsumerType_IndexSync) + } + panic("getSinkerTypeFromAlgo: invalid sinker type") +} + +// NOTE: CreateAllIndexCdcTasks will create CDC task according to existing tableDef +func CreateAllIndexCdcTasks(c *Compile, indexes []*plan.IndexDef, dbname string, tablename string) error { + idxmap := make(map[string]bool) + for _, idx := range indexes { + _, ok := idxmap[idx.IndexName] + if ok { + continue + } + + valid, err := checkValidIndexCdcByIndexdef(idx) + if err != nil { + return err + } + + if valid { + idxmap[idx.IndexName] = true + sinker_type := getSinkerTypeFromAlgo(idx.IndexAlgo) + e := CreateIndexCdcTask(c, dbname, tablename, idx.IndexName, sinker_type) + if e != nil { + return e + } + } + } + return nil +} diff --git a/pkg/sql/compile/iscp_util_test.go b/pkg/sql/compile/iscp_util_test.go new file mode 100644 index 0000000000000..ede4700406d7e --- /dev/null +++ b/pkg/sql/compile/iscp_util_test.go @@ -0,0 +1,301 @@ +// Copyright 2023 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compile + +import ( + "context" + "fmt" + "testing" + + "github.com/matrixorigin/matrixone/pkg/common/moerr" + "github.com/matrixorigin/matrixone/pkg/iscp" + "github.com/matrixorigin/matrixone/pkg/pb/plan" + "github.com/matrixorigin/matrixone/pkg/testutil" + "github.com/matrixorigin/matrixone/pkg/txn/client" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +/* +func mockIscpRegisterJobSuccess(ctx context.Context, cnUUID string, txn client.TxnOperator, spec *iscp.JobSpec, job *iscp.JobID, startFromNow bool) (bool, error) { + return true, nil +} + +func mockIscpUnregisterJobSuccess(ctx context.Context, cnUUID string, txn client.TxnOperator, job *iscp.JobID) (bool, error) { + return true, nil +} +*/ + +func mockIscpRegisterJobError(ctx context.Context, cnUUID string, txn client.TxnOperator, spec *iscp.JobSpec, job *iscp.JobID, startFromNow bool) (bool, error) { + return false, moerr.NewInternalErrorNoCtx("mock register job error") +} + +func mockIscpUnregisterJobError(ctx context.Context, cnUUID string, txn client.TxnOperator, job *iscp.JobID) (bool, error) { + return false, moerr.NewInternalErrorNoCtx("mock unregister job error") +} + +func TestISCPCheckValidIndexCdcByIndexdef(t *testing.T) { + { + + idx := &plan.IndexDef{ + TableExist: true, + IndexAlgo: "ivfflat", + IndexAlgoParams: `{"async":"true"}`, + } + found, err := checkValidIndexCdcByIndexdef(idx) + require.Nil(t, err) + require.Equal(t, found, true) + } + + { + + idx := &plan.IndexDef{ + TableExist: true, + IndexAlgo: "ivfflat", + IndexAlgoParams: `{"async":1}`, + } + _, err := checkValidIndexCdcByIndexdef(idx) + require.NotNil(t, err) + } + + { + + idx := &plan.IndexDef{ + TableExist: true, + IndexAlgo: "ivfflat", + IndexAlgoParams: `{}`, + } + found, err := checkValidIndexCdcByIndexdef(idx) + require.Nil(t, err) + require.Equal(t, found, false) + } +} + +func TestISCPCheckValidIndexCdc(t *testing.T) { + + { + tbldef := &plan.TableDef{ + Indexes: []*plan.IndexDef{ + { + TableExist: true, + IndexName: "a", + IndexAlgo: "ivfflat", + IndexAlgoParams: `{"async":"true"}`, + }, + }, + } + + ok, err := checkValidIndexCdc(tbldef, "a") + require.Nil(t, err) + require.Equal(t, ok, true) + + } + + { + tbldef := &plan.TableDef{ + Indexes: []*plan.IndexDef{ + { + TableExist: true, + IndexName: "a", + IndexAlgo: "ivfflat", + IndexAlgoParams: `{"async":1}`, + }, + }, + } + + _, err := checkValidIndexCdc(tbldef, "a") + require.NotNil(t, err) + + } +} + +func TestISCPCreateAllIndexCdcTasks(t *testing.T) { + + iscpRegisterJobFunc = mockIscpRegisterJobError + iscpUnregisterJobFunc = mockIscpUnregisterJobError + + defer func() { + iscpRegisterJobFunc = iscp.RegisterJob + iscpUnregisterJobFunc = iscp.UnregisterJob + }() + + c := &Compile{} + c.proc = testutil.NewProcess(t) + + { + tbldef := &plan.TableDef{ + Indexes: []*plan.IndexDef{ + { + TableExist: true, + IndexName: "a", + IndexAlgo: "ivfflat", + IndexAlgoParams: `{"async":"true"}`, + }, + }, + } + + err := CreateAllIndexCdcTasks(c, tbldef.Indexes, "dbname", "tname") + require.NotNil(t, err) + fmt.Println(err) + + } + + { + tbldef := &plan.TableDef{ + Indexes: []*plan.IndexDef{ + { + TableExist: true, + IndexName: "a", + IndexAlgo: "ivfflat", + IndexAlgoParams: `{"async":1}`, + }, + }, + } + + err := CreateAllIndexCdcTasks(c, tbldef.Indexes, "dbname", "tname") + require.NotNil(t, err) + fmt.Println(err) + + } + +} + +func TestISCPDropAllIndexCdcTasks(t *testing.T) { + + iscpRegisterJobFunc = mockIscpRegisterJobError + iscpUnregisterJobFunc = mockIscpUnregisterJobError + + defer func() { + iscpRegisterJobFunc = iscp.RegisterJob + iscpUnregisterJobFunc = iscp.UnregisterJob + }() + + c := &Compile{} + c.proc = testutil.NewProcess(t) + + { + tbldef := &plan.TableDef{ + Indexes: []*plan.IndexDef{ + { + TableExist: true, + IndexName: "a", + IndexAlgo: "ivfflat", + IndexAlgoParams: `{"async":"true"}`, + }, + }, + } + + err := DropAllIndexCdcTasks(c, tbldef, "dbname", "tname") + require.NotNil(t, err) + fmt.Println(err) + + } + + { + tbldef := &plan.TableDef{ + Indexes: []*plan.IndexDef{ + { + TableExist: true, + IndexName: "a", + IndexAlgo: "ivfflat", + IndexAlgoParams: `{"async":1}`, + }, + }, + } + + err := DropAllIndexCdcTasks(c, tbldef, "dbname", "tname") + require.NotNil(t, err) + fmt.Println(err) + + } + +} + +func TestISCPDropIndexCdcTask(t *testing.T) { + + iscpRegisterJobFunc = mockIscpRegisterJobError + iscpUnregisterJobFunc = mockIscpUnregisterJobError + + defer func() { + iscpRegisterJobFunc = iscp.RegisterJob + iscpUnregisterJobFunc = iscp.UnregisterJob + }() + + c := &Compile{} + c.proc = testutil.NewProcess(t) + + { + tbldef := &plan.TableDef{ + Indexes: []*plan.IndexDef{ + { + TableExist: true, + IndexName: "a", + IndexAlgo: "ivfflat", + IndexAlgoParams: `{"async":"true"}`, + }, + }, + } + + err := DropIndexCdcTask(c, tbldef, "dbname", "tname", "a") + require.NotNil(t, err) + fmt.Println(err) + + } + + { + tbldef := &plan.TableDef{ + Indexes: []*plan.IndexDef{ + { + TableExist: true, + IndexName: "a", + IndexAlgo: "ivfflat", + IndexAlgoParams: `{"async":1}`, + }, + }, + } + + err := DropIndexCdcTask(c, tbldef, "dbname", "tname", "a") + require.NotNil(t, err) + fmt.Println(err) + + } + +} + +func TestISCPCreateIndexCdcTask(t *testing.T) { + + iscpRegisterJobFunc = mockIscpRegisterJobError + iscpUnregisterJobFunc = mockIscpUnregisterJobError + + defer func() { + iscpRegisterJobFunc = iscp.RegisterJob + iscpUnregisterJobFunc = iscp.UnregisterJob + }() + + c := &Compile{} + c.proc = testutil.NewProcess(t) + + { + err := CreateIndexCdcTask(c, "dbname", "tname", "a", 0) + require.NotNil(t, err) + fmt.Println(err) + + } + +} + +func TestISCPGetSinkerTypeFromAlgo(t *testing.T) { + assert.Panics(t, func() { getSinkerTypeFromAlgo("error") }, "getSinkerTypeFromAlgo panic") +} diff --git a/pkg/sql/compile/util.go b/pkg/sql/compile/util.go index 8fa97ba42d952..ac11b96982751 100644 --- a/pkg/sql/compile/util.go +++ b/pkg/sql/compile/util.go @@ -505,7 +505,7 @@ func GetConstraintDefFromTableDefs(defs []engine.TableDef) *engine.ConstraintDef return cstrDef } -func genInsertIndexTableSqlForFullTextIndex(originalTableDef *plan.TableDef, indexDef *plan.IndexDef, qryDatabase string) []string { +func genInsertIndexTableSqlForFullTextIndex(originalTableDef *plan.TableDef, indexDef *plan.IndexDef, qryDatabase string) ([]string, error) { src_alias := "src" pkColName := src_alias + "." + originalTableDef.Pkey.PkeyColName params := indexDef.IndexAlgoParams @@ -526,7 +526,7 @@ func genInsertIndexTableSqlForFullTextIndex(originalTableDef *plan.TableDef, ind pkColName, concat) - return []string{sql} + return []string{sql}, nil } func genDeleteHnswIndex(proc *process.Process, indexDefs map[string]*plan.IndexDef, qryDatabase string, originalTableDef *plan.TableDef) ([]string, error) { diff --git a/pkg/sql/plan/build_dml_util.go b/pkg/sql/plan/build_dml_util.go index 63186d2c7c8b1..a434b9f68e222 100644 --- a/pkg/sql/plan/build_dml_util.go +++ b/pkg/sql/plan/build_dml_util.go @@ -299,18 +299,18 @@ func checkDeleteOptToTruncate(ctx CompilerContext) (bool, error) { // buildDeletePlans build preinsert plan. /* -[o1]sink_scan -> join[u1] -> sink - [u1]sink_scan -> lock -> delete -> [mergedelete] ... // if it's delete stmt. do delete u1 - [u1]sink_scan -> preinsert_uk -> sink ... // if it's update stmt. do update u1 -[o1]sink_scan -> join[u2] -> sink - [u2]sink_scan -> lock -> delete -> [mergedelete] ... // if it's delete stmt. do delete u2 - [u2]sink_scan -> preinsert_uk -> sink ... // if it's update stmt. do update u2 -[o1]sink_scan -> predelete[get partition] -> lock -> delete -> [mergedelete] - -[o1]sink_scan -> join[f1 semi join c1 on c1.fid=f1.id, get f1.id] -> filter(assert(isempty(id))) // if have refChild table with no action -[o1]sink_scan -> join[f1 inner join c2 on f1.id = c2.fid, 取c2.*, null] -> sink ...(like update) // if have refChild table with set null -[o1]sink_scan -> join[f1 inner join c4 on f1.id = c4.fid, get c3.*] -> sink ...(like delete) // delete stmt: if have refChild table with cascade -[o1]sink_scan -> join[f1 inner join c4 on f1.id = c4.fid, get c3.*, update cols] -> sink ...(like update) // update stmt: if have refChild table with cascade + [o1]sink_scan -> join[u1] -> sink + [u1]sink_scan -> lock -> delete -> [mergedelete] ... // if it's delete stmt. do delete u1 + [u1]sink_scan -> preinsert_uk -> sink ... // if it's update stmt. do update u1 + [o1]sink_scan -> join[u2] -> sink + [u2]sink_scan -> lock -> delete -> [mergedelete] ... // if it's delete stmt. do delete u2 + [u2]sink_scan -> preinsert_uk -> sink ... // if it's update stmt. do update u2 + [o1]sink_scan -> predelete[get partition] -> lock -> delete -> [mergedelete] + + [o1]sink_scan -> join[f1 semi join c1 on c1.fid=f1.id, get f1.id] -> filter(assert(isempty(id))) // if have refChild table with no action + [o1]sink_scan -> join[f1 inner join c2 on f1.id = c2.fid, 取c2.*, null] -> sink ...(like update) // if have refChild table with set null + [o1]sink_scan -> join[f1 inner join c4 on f1.id = c4.fid, get c3.*] -> sink ...(like delete) // delete stmt: if have refChild table with cascade + [o1]sink_scan -> join[f1 inner join c4 on f1.id = c4.fid, get c3.*, update cols] -> sink ...(like update) // update stmt: if have refChild table with cascade */ func buildDeletePlans(ctx CompilerContext, builder *QueryBuilder, bindCtx *BindContext, delCtx *dmlPlanCtx) error { if sinkOrUnionNodeId, ok := builder.deleteNode[delCtx.tableDef.TblId]; ok { @@ -3202,16 +3202,16 @@ func runSql(ctx CompilerContext, sql string) (executor.Result, error) { } /* -Example on FkReferKey and FkReferDef: + Example on FkReferKey and FkReferDef: - In database `test`: + In database `test`: - create table t1(a int,primary key(a)); + create table t1(a int,primary key(a)); - create table t2(b int, constraint c1 foreign key(b) references t1(a)); + create table t2(b int, constraint c1 foreign key(b) references t1(a)); - So, the structure FkReferDef below denotes such relationships : test.t2(b) -> test.t1(a) - FkReferKey holds : db = test, tbl = t2 + So, the structure FkReferDef below denotes such relationships : test.t2(b) -> test.t1(a) + FkReferKey holds : db = test, tbl = t2 */ @@ -3507,6 +3507,16 @@ func buildPreInsertMultiTableIndexes(ctx CompilerContext, builder *QueryBuilder, switch multiTableIndex.IndexAlgo { case catalog.MoIndexIvfFlatAlgo.ToString(): + // skip async + var async bool + async, err = catalog.IsIndexAsync(multiTableIndex.IndexAlgoParams) + if err != nil { + return err + } + if async { + continue + } + lastNodeId = appendSinkScanNode(builder, bindCtx, sourceStep) var idxRefs = make([]*ObjectRef, 3) var idxTableDefs = make([]*TableDef, 3) @@ -3580,6 +3590,16 @@ func buildDeleteMultiTableIndexes(ctx CompilerContext, builder *QueryBuilder, bi for _, multiTableIndex := range multiTableIndexes { switch multiTableIndex.IndexAlgo { case catalog.MoIndexIvfFlatAlgo.ToString(): + // skip async + var async bool + async, err = catalog.IsIndexAsync(multiTableIndex.IndexAlgoParams) + if err != nil { + return err + } + if async { + continue + } + // Used by pre-insert vector index. var idxRefs = make([]*ObjectRef, 3) var idxTableDefs = make([]*TableDef, 3) @@ -4297,6 +4317,15 @@ func buildDeleteIndexPlans(ctx CompilerContext, builder *QueryBuilder, bindCtx * func buildPreInsertFullTextIndex(stmt *tree.Insert, ctx CompilerContext, builder *QueryBuilder, bindCtx *BindContext, objRef *ObjectRef, tableDef *TableDef, updateColLength int, sourceStep int32, ifInsertFromUniqueColMap map[string]bool, indexdef *plan.IndexDef, idx int) error { + // skip async + async, err := catalog.IsIndexAsync(indexdef.IndexAlgoParams) + if err != nil { + return err + } + if async { + return nil + } + isUpdate := (updateColLength > 0) lastNodeId := appendSinkScanNode(builder, bindCtx, sourceStep) @@ -4718,6 +4747,15 @@ func buildDeleteRowsFullTextIndex(ctx CompilerContext, builder *QueryBuilder, bi func buildPreDeleteFullTextIndex(ctx CompilerContext, builder *QueryBuilder, bindCtx *BindContext, delCtx *dmlPlanCtx, indexdef *plan.IndexDef, idx int, typMap map[string]plan.Type, posMap map[string]int) error { + // skip async + async, err := catalog.IsIndexAsync(indexdef.IndexAlgoParams) + if err != nil { + return err + } + if async { + return nil + } + //isUpdate := delCtx.updateColLength > 0 indexObjRef, indexTableDef, err := ctx.ResolveIndexTableByRef(delCtx.objRef, indexdef.IndexTableName, nil) if err != nil { @@ -4749,6 +4787,15 @@ func buildPreDeleteFullTextIndex(ctx CompilerContext, builder *QueryBuilder, bin func buildPostDmlFullTextIndex(ctx CompilerContext, builder *QueryBuilder, bindCtx *BindContext, indexObjRef *ObjectRef, indexTableDef *TableDef, tableDef *TableDef, sourceStep int32, indexdef *plan.IndexDef, idx int, isDelete, isInsert, isDeleteWithoutFilters bool) error { + // skip async + async, err := catalog.IsIndexAsync(indexdef.IndexAlgoParams) + if err != nil { + return err + } + if async { + return nil + } + lastNodeId := appendSinkScanNode(builder, bindCtx, sourceStep) orgPkColPos, _ := getPkPos(tableDef, false) diff --git a/pkg/sql/plan/build_dml_util_test.go b/pkg/sql/plan/build_dml_util_test.go index 7008a31d87d60..2fe741e4545c5 100644 --- a/pkg/sql/plan/build_dml_util_test.go +++ b/pkg/sql/plan/build_dml_util_test.go @@ -23,6 +23,7 @@ import ( "github.com/matrixorigin/matrixone/pkg/common/buffer" moruntime "github.com/matrixorigin/matrixone/pkg/common/runtime" + "github.com/matrixorigin/matrixone/pkg/pb/plan" "github.com/matrixorigin/matrixone/pkg/testutil" "github.com/matrixorigin/matrixone/pkg/util/executor" ) @@ -50,3 +51,51 @@ func Test_runSql(t *testing.T) { _, err := runSql(compilerContext, "") require.Error(t, err, "internal error: no account id in context") } + +func Test_buildPostDmlFullTextIndexAsync(t *testing.T) { + { + //invalid json + idxdef := &plan.IndexDef{ + IndexAlgoParams: `{"async":1}`, + } + + err := buildPostDmlFullTextIndex(nil, nil, nil, nil, nil, nil, 0, idxdef, 0, false, false, false) + require.NotNil(t, err) + } + + { + + // async true + idxdef := &plan.IndexDef{ + IndexAlgoParams: `{"async":"true"}`, + } + + err := buildPostDmlFullTextIndex(nil, nil, nil, nil, nil, nil, 0, idxdef, 0, false, false, false) + require.Nil(t, err) + } + +} + +func Test_buildPreDeleteFullTextIndexAsync(t *testing.T) { + { + //invalid json + idxdef := &plan.IndexDef{ + IndexAlgoParams: `{"async":1}`, + } + + err := buildPreDeleteFullTextIndex(nil, nil, nil, nil, idxdef, 0, nil, nil) + require.NotNil(t, err) + } + + { + + // async true + idxdef := &plan.IndexDef{ + IndexAlgoParams: `{"async":"true"}`, + } + + err := buildPreDeleteFullTextIndex(nil, nil, nil, nil, idxdef, 0, nil, nil) + require.Nil(t, err) + } + +} diff --git a/pkg/vectorindex/types.go b/pkg/vectorindex/types.go index 5fcd032b1f021..b15ab87209ce6 100644 --- a/pkg/vectorindex/types.go +++ b/pkg/vectorindex/types.go @@ -112,9 +112,9 @@ type VectorIndexCdc[T types.RealNumbers] struct { Data []VectorIndexCdcEntry[T] `json:"cdc"` } -func NewVectorIndexCdc[T types.RealNumbers]() *VectorIndexCdc[T] { +func NewVectorIndexCdc[T types.RealNumbers](capacity int) *VectorIndexCdc[T] { return &VectorIndexCdc[T]{ - Data: make([]VectorIndexCdcEntry[T], 0, 8192), + Data: make([]VectorIndexCdcEntry[T], 0, capacity), } } diff --git a/pkg/vectorindex/types_test.go b/pkg/vectorindex/types_test.go index fad882167d8af..445cd9fd0732d 100644 --- a/pkg/vectorindex/types_test.go +++ b/pkg/vectorindex/types_test.go @@ -26,7 +26,7 @@ func TestCdc(t *testing.T) { key2 := int64(1) v2 := []float32{1, 2, 3} - cdc := NewVectorIndexCdc[float32]() + cdc := NewVectorIndexCdc[float32](8192) // Insert cdc.Insert(key, v) diff --git a/test/distributed/cases/fulltext/fulltext_async.result b/test/distributed/cases/fulltext/fulltext_async.result new file mode 100644 index 0000000000000..bc2e78807b937 --- /dev/null +++ b/test/distributed/cases/fulltext/fulltext_async.result @@ -0,0 +1,23 @@ +set experimental_fulltext_index=1; +set ft_relevancy_algorithm="TF-IDF"; +create table src (id bigint primary key, body varchar, title text, FULLTEXT ftidx (body, title) ASYNC); +insert into src values (0, 'color is red', 't1'), (1, 'car is yellow', 'crazy car'), (2, 'sky is blue', 'no limit'), (3, 'blue is not red', 'colorful'), +(4, '遠東兒童中文是針對6到9歲的小朋友精心設計的中文學習教材,共三冊,目前已出版一、二冊。', '遠東兒童中文'), +(5, '每冊均採用近百張全幅彩圖及照片,生動活潑、自然真實,加深兒童學習印象,洋溢學習樂趣。', '遠東兒童中文'), +(6, '各個單元主題內容涵蓋中華文化及生活應用的介紹。本套教材含課本、教學指引、生字卡、學生作業本與CD,中英對照,精美大字版。本系列有繁體字及簡體字兩種版本印行。', '中文短篇小說'), +(7, '59個簡單的英文和中文短篇小說', '適合初學者'), +(8, NULL, 'NOT INCLUDED'), +(9, 'NOT INCLUDED BODY', NULL), +(10, NULL, NULL); +select sleep(30); +sleep(30) +0 +select * from src where match(body, title) against('red'); +id body title +0 color is red t1 +3 blue is not red colorful +show create table src; +Table Create Table +src CREATE TABLE `src` (\n `id` bigint NOT NULL,\n `body` varchar(65535) DEFAULT NULL,\n `title` text DEFAULT NULL,\n PRIMARY KEY (`id`),\n FULLTEXT `ftidx`(`body`,`title`) ASYNC\n) +alter table src rename to src1; +drop table src1; diff --git a/test/distributed/cases/fulltext/fulltext_async.sql b/test/distributed/cases/fulltext/fulltext_async.sql new file mode 100644 index 0000000000000..c982c871679f8 --- /dev/null +++ b/test/distributed/cases/fulltext/fulltext_async.sql @@ -0,0 +1,25 @@ +-- TODO: run all tests with both experimental_fulltext_index = 0 and 1 +-- TODO: GENERATE the test case to cover all combinations of types (varchar, char and text) +set experimental_fulltext_index=1; +set ft_relevancy_algorithm="TF-IDF"; + +create table src (id bigint primary key, body varchar, title text, FULLTEXT ftidx (body, title) ASYNC); + +insert into src values (0, 'color is red', 't1'), (1, 'car is yellow', 'crazy car'), (2, 'sky is blue', 'no limit'), (3, 'blue is not red', 'colorful'), +(4, '遠東兒童中文是針對6到9歲的小朋友精心設計的中文學習教材,共三冊,目前已出版一、二冊。', '遠東兒童中文'), +(5, '每冊均採用近百張全幅彩圖及照片,生動活潑、自然真實,加深兒童學習印象,洋溢學習樂趣。', '遠東兒童中文'), +(6, '各個單元主題內容涵蓋中華文化及生活應用的介紹。本套教材含課本、教學指引、生字卡、學生作業本與CD,中英對照,精美大字版。本系列有繁體字及簡體字兩種版本印行。', '中文短篇小說'), +(7, '59個簡單的英文和中文短篇小說', '適合初學者'), +(8, NULL, 'NOT INCLUDED'), +(9, 'NOT INCLUDED BODY', NULL), +(10, NULL, NULL); + +select sleep(30); + +select * from src where match(body, title) against('red'); + +show create table src; + +alter table src rename to src1; + +drop table src1; diff --git a/test/distributed/cases/vector/vector_hnsw_async.result b/test/distributed/cases/vector/vector_hnsw_async.result new file mode 100644 index 0000000000000..a849151d687f9 --- /dev/null +++ b/test/distributed/cases/vector/vector_hnsw_async.result @@ -0,0 +1,66 @@ +SET experimental_hnsw_index = 1; +drop database if exists hnsw_cdc; +create database if not exists hnsw_cdc; +use hnsw_cdc; +create table t1(a bigint primary key, b vecf32(3),c int,key c_k(c)); +create index idx01 using hnsw on t1(b) op_type "vector_l2_ops" M 48 EF_CONSTRUCTION 64 EF_SEARCH 64 ASYNC; +insert into t1 values (0, "[1,2,3]", 1); +UPDATE t1 set b = '[4,5,6]' where a = 0; +insert into t1 values (1, "[2,3,4]", 1); +DELETE FROM t1 WHERE a=1; +select sleep(30); +sleep(30) +0 +select * from t1 order by L2_DISTANCE(b,"[1,2,3]") ASC LIMIT 10; +a b c +0 [4, 5, 6] 1 +select * from t1 order by L2_DISTANCE(b,"[4,5,6]") ASC LIMIT 10; +a b c +0 [4, 5, 6] 1 +select * from t1 order by L2_DISTANCE(b,"[2,3,4]") ASC LIMIT 10; +a b c +0 [4, 5, 6] 1 +drop table t1; +create table t2(a bigint primary key, b vecf32(128)); +create index idx2 using hnsw on t2(b) op_type "vector_l2_ops" M 48 EF_CONSTRUCTION 64 EF_SEARCH 64 ASYNC; +load data infile {'filepath'='$resources/vector/sift128_base_10k.csv.gz', 'compression'='gzip'} into table t2 fields terminated by ':' parallel 'true'; +select count(*) from t2; +count(*) +10000 +select sleep(30); +sleep(30) +0 +select * from t2 order by L2_DISTANCE(b, "[14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7]") ASC LIMIT 1; +a b +9999 [14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7] +select * from t2 order by L2_DISTANCE(b, "[0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1]") ASC LIMIT 1; +a b +0 [0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1] +drop table t2; +create table t3(a bigint primary key, b vecf32(128)); +load data infile {'filepath'='$resources/vector/sift128_base_10k.csv.gz', 'compression'='gzip'} into table t3 fields terminated by ':' parallel 'true'; +select count(*) from t3; +count(*) +10000 +create index idx3 using hnsw on t3(b) op_type "vector_l2_ops" M 48 EF_CONSTRUCTION 64 EF_SEARCH 64 ASYNC; +load data infile {'filepath'='$resources/vector/sift128_base_10k_2.csv.gz', 'compression'='gzip'} into table t3 fields terminated by ':' parallel 'true'; +select count(*) from t3; +count(*) +20000 +select sleep(45); +sleep(45) +0 +select * from t3 order by L2_DISTANCE(b, "[14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7]") ASC LIMIT 1; +a b +9999 [14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7] +select * from t3 order by L2_DISTANCE(b, "[0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1]") ASC LIMIT 1; +a b +0 [0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1] +select * from t3 order by L2_DISTANCE(b, "[59, 0, 0, 1, 1, 1, 5, 100, 41, 0, 0, 4, 57, 34, 31, 115, 4, 0, 0, 12, 30, 33, 43, 85, 21, 0, 0, 14, 25, 9, 10, 60, 99, 11, 0, 0, 0, 0, 10, 55, 68, 1, 0, 3, 115, 65, 42, 115, 32, 3, 0, 4, 13, 21, 104, 115, 81, 15, 15, 23, 9, 2, 21, 75, 43, 20, 1, 0, 10, 2, 2, 20, 52, 35, 32, 61, 79, 8, 7, 41, 50, 106, 96, 20, 8, 2, 11, 39, 115, 48, 53, 11, 3, 0, 2, 43, 35, 11, 0, 1, 13, 7, 0, 1, 115, 58, 54, 29, 1, 2, 0, 3, 32, 115, 99, 34, 1, 0, 0, 0, 35, 15, 52, 44, 9, 0, 0, 18]") ASC LIMIT 1; +a b +10000 [59, 0, 0, 1, 1, 1, 5, 100, 41, 0, 0, 4, 57, 34, 31, 115, 4, 0, 0, 12, 30, 33, 43, 85, 21, 0, 0, 14, 25, 9, 10, 60, 99, 11, 0, 0, 0, 0, 10, 55, 68, 1, 0, 3, 115, 65, 42, 115, 32, 3, 0, 4, 13, 21, 104, 115, 81, 15, 15, 23, 9, 2, 21, 75, 43, 20, 1, 0, 10, 2, 2, 20, 52, 35, 32, 61, 79, 8, 7, 41, 50, 106, 96, 20, 8, 2, 11, 39, 115, 48, 53, 11, 3, 0, 2, 43, 35, 11, 0, 1, 13, 7, 0, 1, 115, 58, 54, 29, 1, 2, 0, 3, 32, 115, 99, 34, 1, 0, 0, 0, 35, 15, 52, 44, 9, 0, 0, 18] +select * from t3 order by L2_DISTANCE(b, "[0, 0, 0, 0, 0, 101, 82, 4, 2, 0, 0, 0, 3, 133, 133, 8, 46, 1, 2, 13, 15, 29, 87, 50, 22, 1, 0, 16, 25, 6, 18, 49, 5, 2, 0, 2, 3, 59, 70, 19, 18, 2, 0, 11, 42, 37, 30, 13, 133, 13, 4, 53, 28, 3, 8, 42, 77, 6, 11, 103, 36, 0, 0, 32, 7, 15, 59, 27, 2, 0, 2, 5, 14, 5, 55, 52, 51, 3, 2, 5, 133, 21, 10, 38, 26, 1, 0, 64, 71, 3, 10, 118, 53, 5, 6, 28, 33, 26, 73, 15, 0, 0, 0, 22, 13, 15, 133, 133, 4, 0, 0, 15, 107, 62, 46, 91, 9, 1, 7, 16, 28, 4, 0, 27, 33, 4, 15, 25]") ASC LIMIT 1; +a b +19999 [0, 0, 0, 0, 0, 101, 82, 4, 2, 0, 0, 0, 3, 133, 133, 8, 46, 1, 2, 13, 15, 29, 87, 50, 22, 1, 0, 16, 25, 6, 18, 49, 5, 2, 0, 2, 3, 59, 70, 19, 18, 2, 0, 11, 42, 37, 30, 13, 133, 13, 4, 53, 28, 3, 8, 42, 77, 6, 11, 103, 36, 0, 0, 32, 7, 15, 59, 27, 2, 0, 2, 5, 14, 5, 55, 52, 51, 3, 2, 5, 133, 21, 10, 38, 26, 1, 0, 64, 71, 3, 10, 118, 53, 5, 6, 28, 33, 26, 73, 15, 0, 0, 0, 22, 13, 15, 133, 133, 4, 0, 0, 15, 107, 62, 46, 91, 9, 1, 7, 16, 28, 4, 0, 27, 33, 4, 15, 25] +drop table t3; +drop database hnsw_cdc; diff --git a/test/distributed/cases/vector/vector_hnsw_async.sql b/test/distributed/cases/vector/vector_hnsw_async.sql new file mode 100644 index 0000000000000..79f9cef553d95 --- /dev/null +++ b/test/distributed/cases/vector/vector_hnsw_async.sql @@ -0,0 +1,96 @@ + +SET experimental_hnsw_index = 1; + +drop database if exists hnsw_cdc; +create database if not exists hnsw_cdc; +use hnsw_cdc; + +create table t1(a bigint primary key, b vecf32(3),c int,key c_k(c)); + +-- empty data +create index idx01 using hnsw on t1(b) op_type "vector_l2_ops" M 48 EF_CONSTRUCTION 64 EF_SEARCH 64 ASYNC; + +-- select sleep(30); + +insert into t1 values (0, "[1,2,3]", 1); +-- select hnsw_cdc_update('hnsw_cdc', 't1', 3, '{"start":"", "end":"", "cdc":[{"t":"U", "pk":0, "v":[1,2,3]}]}'); + +UPDATE t1 set b = '[4,5,6]' where a = 0; +-- select hnsw_cdc_update('hnsw_cdc', 't1', 3, '{"start":"", "end":"", "cdc":[{"t":"U", "pk":0, "v":[4,5,6]}]}'); + +insert into t1 values (1, "[2,3,4]", 1); +-- select hnsw_cdc_update('hnsw_cdc', 't1', 3, '{"start":"", "end":"", "cdc":[{"t":"I", "pk":1, "v":[2,3,4]}]}'); + +DELETE FROM t1 WHERE a=1; +-- select hnsw_cdc_update('hnsw_cdc', 't1', 3, '{"start":"", "end":"", "cdc":[{"t":"D", "pk":0}]}'); + +select sleep(30); + +-- test with multi-cn is tricky. since model is cached in memory, model may not be updated after CDC sync'd. The only way to test is to all INSERT/DELETE/UPDATE before SELECT. +-- already update to [4,5,6], result is [4,5,6] +select * from t1 order by L2_DISTANCE(b,"[1,2,3]") ASC LIMIT 10; + +-- should return a=0 +select * from t1 order by L2_DISTANCE(b,"[4,5,6]") ASC LIMIT 10; + +-- a=1 deleted. result is [4,5,6] +select * from t1 order by L2_DISTANCE(b,"[2,3,4]") ASC LIMIT 10; + +drop table t1; + +-- t2 +create table t2(a bigint primary key, b vecf32(128)); +create index idx2 using hnsw on t2(b) op_type "vector_l2_ops" M 48 EF_CONSTRUCTION 64 EF_SEARCH 64 ASYNC; +-- select sleep(30); + +load data infile {'filepath'='$resources/vector/sift128_base_10k.csv.gz', 'compression'='gzip'} into table t2 fields terminated by ':' parallel 'true'; + +select count(*) from t2; + +select sleep(30); + +select * from t2 order by L2_DISTANCE(b, "[14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7]") ASC LIMIT 1; + +select * from t2 order by L2_DISTANCE(b, "[0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1]") ASC LIMIT 1; + + +-- delete whole table won't work for now. +-- delete from t2 +-- select sleep(10) + +drop table t2; + +-- end t2 + +-- t3 +create table t3(a bigint primary key, b vecf32(128)); + +load data infile {'filepath'='$resources/vector/sift128_base_10k.csv.gz', 'compression'='gzip'} into table t3 fields terminated by ':' parallel 'true'; + +select count(*) from t3; + +create index idx3 using hnsw on t3(b) op_type "vector_l2_ops" M 48 EF_CONSTRUCTION 64 EF_SEARCH 64 ASYNC; + +-- select sleep(30); + +load data infile {'filepath'='$resources/vector/sift128_base_10k_2.csv.gz', 'compression'='gzip'} into table t3 fields terminated by ':' parallel 'true'; + +select count(*) from t3; + +select sleep(45); + +select * from t3 order by L2_DISTANCE(b, "[14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7]") ASC LIMIT 1; + +select * from t3 order by L2_DISTANCE(b, "[0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1]") ASC LIMIT 1; + + +select * from t3 order by L2_DISTANCE(b, "[59, 0, 0, 1, 1, 1, 5, 100, 41, 0, 0, 4, 57, 34, 31, 115, 4, 0, 0, 12, 30, 33, 43, 85, 21, 0, 0, 14, 25, 9, 10, 60, 99, 11, 0, 0, 0, 0, 10, 55, 68, 1, 0, 3, 115, 65, 42, 115, 32, 3, 0, 4, 13, 21, 104, 115, 81, 15, 15, 23, 9, 2, 21, 75, 43, 20, 1, 0, 10, 2, 2, 20, 52, 35, 32, 61, 79, 8, 7, 41, 50, 106, 96, 20, 8, 2, 11, 39, 115, 48, 53, 11, 3, 0, 2, 43, 35, 11, 0, 1, 13, 7, 0, 1, 115, 58, 54, 29, 1, 2, 0, 3, 32, 115, 99, 34, 1, 0, 0, 0, 35, 15, 52, 44, 9, 0, 0, 18]") ASC LIMIT 1; + +select * from t3 order by L2_DISTANCE(b, "[0, 0, 0, 0, 0, 101, 82, 4, 2, 0, 0, 0, 3, 133, 133, 8, 46, 1, 2, 13, 15, 29, 87, 50, 22, 1, 0, 16, 25, 6, 18, 49, 5, 2, 0, 2, 3, 59, 70, 19, 18, 2, 0, 11, 42, 37, 30, 13, 133, 13, 4, 53, 28, 3, 8, 42, 77, 6, 11, 103, 36, 0, 0, 32, 7, 15, 59, 27, 2, 0, 2, 5, 14, 5, 55, 52, 51, 3, 2, 5, 133, 21, 10, 38, 26, 1, 0, 64, 71, 3, 10, 118, 53, 5, 6, 28, 33, 26, 73, 15, 0, 0, 0, 22, 13, 15, 133, 133, 4, 0, 0, 15, 107, 62, 46, 91, 9, 1, 7, 16, 28, 4, 0, 27, 33, 4, 15, 25]") ASC LIMIT 1; + +drop table t3; + +-- end t3 + +drop database hnsw_cdc; + diff --git a/test/distributed/cases/vector/vector_hnsw_f64_async.result b/test/distributed/cases/vector/vector_hnsw_f64_async.result new file mode 100644 index 0000000000000..8f56e173eb72f --- /dev/null +++ b/test/distributed/cases/vector/vector_hnsw_f64_async.result @@ -0,0 +1,66 @@ +SET experimental_hnsw_index = 1; +drop database if exists hnsw_cdc; +create database if not exists hnsw_cdc; +use hnsw_cdc; +create table t1(a bigint primary key, b vecf64(3),c int,key c_k(c)); +create index idx01 using hnsw on t1(b) op_type "vector_l2_ops" M 48 EF_CONSTRUCTION 64 EF_SEARCH 64 ASYNC; +insert into t1 values (0, "[1,2,3]", 1); +UPDATE t1 set b = '[4,5,6]' where a = 0; +insert into t1 values (1, "[2,3,4]", 1); +DELETE FROM t1 WHERE a=1; +select sleep(30); +sleep(30) +0 +select * from t1 order by L2_DISTANCE(b,"[1,2,3]") ASC LIMIT 10; +a b c +0 [4, 5, 6] 1 +select * from t1 order by L2_DISTANCE(b,"[4,5,6]") ASC LIMIT 10; +a b c +0 [4, 5, 6] 1 +select * from t1 order by L2_DISTANCE(b,"[2,3,4]") ASC LIMIT 10; +a b c +0 [4, 5, 6] 1 +drop table t1; +create table t2(a bigint primary key, b vecf64(128)); +create index idx2 using hnsw on t2(b) op_type "vector_l2_ops" M 48 EF_CONSTRUCTION 64 EF_SEARCH 64 ASYNC; +load data infile {'filepath'='$resources/vector/sift128_base_10k.csv.gz', 'compression'='gzip'} into table t2 fields terminated by ':' parallel 'true'; +select count(*) from t2; +count(*) +10000 +select sleep(30); +sleep(30) +0 +select * from t2 order by L2_DISTANCE(b, "[14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7]") ASC LIMIT 1; +a b +9999 [14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7] +select * from t2 order by L2_DISTANCE(b, "[0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1]") ASC LIMIT 1; +a b +0 [0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1] +drop table t2; +create table t3(a bigint primary key, b vecf64(128)); +load data infile {'filepath'='$resources/vector/sift128_base_10k.csv.gz', 'compression'='gzip'} into table t3 fields terminated by ':' parallel 'true'; +select count(*) from t3; +count(*) +10000 +create index idx3 using hnsw on t3(b) op_type "vector_l2_ops" M 48 EF_CONSTRUCTION 64 EF_SEARCH 64 ASYNC; +load data infile {'filepath'='$resources/vector/sift128_base_10k_2.csv.gz', 'compression'='gzip'} into table t3 fields terminated by ':' parallel 'true'; +select count(*) from t3; +count(*) +20000 +select sleep(45); +sleep(45) +0 +select * from t3 order by L2_DISTANCE(b, "[14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7]") ASC LIMIT 1; +a b +9999 [14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7] +select * from t3 order by L2_DISTANCE(b, "[0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1]") ASC LIMIT 1; +a b +0 [0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1] +select * from t3 order by L2_DISTANCE(b, "[59, 0, 0, 1, 1, 1, 5, 100, 41, 0, 0, 4, 57, 34, 31, 115, 4, 0, 0, 12, 30, 33, 43, 85, 21, 0, 0, 14, 25, 9, 10, 60, 99, 11, 0, 0, 0, 0, 10, 55, 68, 1, 0, 3, 115, 65, 42, 115, 32, 3, 0, 4, 13, 21, 104, 115, 81, 15, 15, 23, 9, 2, 21, 75, 43, 20, 1, 0, 10, 2, 2, 20, 52, 35, 32, 61, 79, 8, 7, 41, 50, 106, 96, 20, 8, 2, 11, 39, 115, 48, 53, 11, 3, 0, 2, 43, 35, 11, 0, 1, 13, 7, 0, 1, 115, 58, 54, 29, 1, 2, 0, 3, 32, 115, 99, 34, 1, 0, 0, 0, 35, 15, 52, 44, 9, 0, 0, 18]") ASC LIMIT 1; +a b +10000 [59, 0, 0, 1, 1, 1, 5, 100, 41, 0, 0, 4, 57, 34, 31, 115, 4, 0, 0, 12, 30, 33, 43, 85, 21, 0, 0, 14, 25, 9, 10, 60, 99, 11, 0, 0, 0, 0, 10, 55, 68, 1, 0, 3, 115, 65, 42, 115, 32, 3, 0, 4, 13, 21, 104, 115, 81, 15, 15, 23, 9, 2, 21, 75, 43, 20, 1, 0, 10, 2, 2, 20, 52, 35, 32, 61, 79, 8, 7, 41, 50, 106, 96, 20, 8, 2, 11, 39, 115, 48, 53, 11, 3, 0, 2, 43, 35, 11, 0, 1, 13, 7, 0, 1, 115, 58, 54, 29, 1, 2, 0, 3, 32, 115, 99, 34, 1, 0, 0, 0, 35, 15, 52, 44, 9, 0, 0, 18] +select * from t3 order by L2_DISTANCE(b, "[0, 0, 0, 0, 0, 101, 82, 4, 2, 0, 0, 0, 3, 133, 133, 8, 46, 1, 2, 13, 15, 29, 87, 50, 22, 1, 0, 16, 25, 6, 18, 49, 5, 2, 0, 2, 3, 59, 70, 19, 18, 2, 0, 11, 42, 37, 30, 13, 133, 13, 4, 53, 28, 3, 8, 42, 77, 6, 11, 103, 36, 0, 0, 32, 7, 15, 59, 27, 2, 0, 2, 5, 14, 5, 55, 52, 51, 3, 2, 5, 133, 21, 10, 38, 26, 1, 0, 64, 71, 3, 10, 118, 53, 5, 6, 28, 33, 26, 73, 15, 0, 0, 0, 22, 13, 15, 133, 133, 4, 0, 0, 15, 107, 62, 46, 91, 9, 1, 7, 16, 28, 4, 0, 27, 33, 4, 15, 25]") ASC LIMIT 1; +a b +19999 [0, 0, 0, 0, 0, 101, 82, 4, 2, 0, 0, 0, 3, 133, 133, 8, 46, 1, 2, 13, 15, 29, 87, 50, 22, 1, 0, 16, 25, 6, 18, 49, 5, 2, 0, 2, 3, 59, 70, 19, 18, 2, 0, 11, 42, 37, 30, 13, 133, 13, 4, 53, 28, 3, 8, 42, 77, 6, 11, 103, 36, 0, 0, 32, 7, 15, 59, 27, 2, 0, 2, 5, 14, 5, 55, 52, 51, 3, 2, 5, 133, 21, 10, 38, 26, 1, 0, 64, 71, 3, 10, 118, 53, 5, 6, 28, 33, 26, 73, 15, 0, 0, 0, 22, 13, 15, 133, 133, 4, 0, 0, 15, 107, 62, 46, 91, 9, 1, 7, 16, 28, 4, 0, 27, 33, 4, 15, 25] +drop table t3; +drop database hnsw_cdc; diff --git a/test/distributed/cases/vector/vector_hnsw_f64_async.sql b/test/distributed/cases/vector/vector_hnsw_f64_async.sql new file mode 100644 index 0000000000000..6bf3192ff9e3d --- /dev/null +++ b/test/distributed/cases/vector/vector_hnsw_f64_async.sql @@ -0,0 +1,96 @@ + +SET experimental_hnsw_index = 1; + +drop database if exists hnsw_cdc; +create database if not exists hnsw_cdc; +use hnsw_cdc; + +create table t1(a bigint primary key, b vecf64(3),c int,key c_k(c)); + +-- empty data +create index idx01 using hnsw on t1(b) op_type "vector_l2_ops" M 48 EF_CONSTRUCTION 64 EF_SEARCH 64 ASYNC; + +-- select sleep(30); + +insert into t1 values (0, "[1,2,3]", 1); +-- select hnsw_cdc_update('hnsw_cdc', 't1', 3, '{"start":"", "end":"", "cdc":[{"t":"U", "pk":0, "v":[1,2,3]}]}'); + +UPDATE t1 set b = '[4,5,6]' where a = 0; +-- select hnsw_cdc_update('hnsw_cdc', 't1', 3, '{"start":"", "end":"", "cdc":[{"t":"U", "pk":0, "v":[4,5,6]}]}'); + +insert into t1 values (1, "[2,3,4]", 1); +-- select hnsw_cdc_update('hnsw_cdc', 't1', 3, '{"start":"", "end":"", "cdc":[{"t":"I", "pk":1, "v":[2,3,4]}]}'); + +DELETE FROM t1 WHERE a=1; +-- select hnsw_cdc_update('hnsw_cdc', 't1', 3, '{"start":"", "end":"", "cdc":[{"t":"D", "pk":0}]}'); + +select sleep(30); + +-- test with multi-cn is tricky. since model is cached in memory, model may not be updated after CDC sync'd. The only way to test is to all INSERT/DELETE/UPDATE before SELECT. +-- already update to [4,5,6], result is [4,5,6] +select * from t1 order by L2_DISTANCE(b,"[1,2,3]") ASC LIMIT 10; + +-- should return a=0 +select * from t1 order by L2_DISTANCE(b,"[4,5,6]") ASC LIMIT 10; + +-- a=1 deleted. result is [4,5,6] +select * from t1 order by L2_DISTANCE(b,"[2,3,4]") ASC LIMIT 10; + +drop table t1; + +-- t2 +create table t2(a bigint primary key, b vecf64(128)); +create index idx2 using hnsw on t2(b) op_type "vector_l2_ops" M 48 EF_CONSTRUCTION 64 EF_SEARCH 64 ASYNC; +-- select sleep(30); + +load data infile {'filepath'='$resources/vector/sift128_base_10k.csv.gz', 'compression'='gzip'} into table t2 fields terminated by ':' parallel 'true'; + +select count(*) from t2; + +select sleep(30); + +select * from t2 order by L2_DISTANCE(b, "[14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7]") ASC LIMIT 1; + +select * from t2 order by L2_DISTANCE(b, "[0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1]") ASC LIMIT 1; + + +-- delete whole table won't work for now. +-- delete from t2 +-- select sleep(10) + +drop table t2; + +-- end t2 + +-- t3 +create table t3(a bigint primary key, b vecf64(128)); + +load data infile {'filepath'='$resources/vector/sift128_base_10k.csv.gz', 'compression'='gzip'} into table t3 fields terminated by ':' parallel 'true'; + +select count(*) from t3; + +create index idx3 using hnsw on t3(b) op_type "vector_l2_ops" M 48 EF_CONSTRUCTION 64 EF_SEARCH 64 ASYNC; + +-- select sleep(30); + +load data infile {'filepath'='$resources/vector/sift128_base_10k_2.csv.gz', 'compression'='gzip'} into table t3 fields terminated by ':' parallel 'true'; + +select count(*) from t3; + +select sleep(45); + +select * from t3 order by L2_DISTANCE(b, "[14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7]") ASC LIMIT 1; + +select * from t3 order by L2_DISTANCE(b, "[0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1]") ASC LIMIT 1; + + +select * from t3 order by L2_DISTANCE(b, "[59, 0, 0, 1, 1, 1, 5, 100, 41, 0, 0, 4, 57, 34, 31, 115, 4, 0, 0, 12, 30, 33, 43, 85, 21, 0, 0, 14, 25, 9, 10, 60, 99, 11, 0, 0, 0, 0, 10, 55, 68, 1, 0, 3, 115, 65, 42, 115, 32, 3, 0, 4, 13, 21, 104, 115, 81, 15, 15, 23, 9, 2, 21, 75, 43, 20, 1, 0, 10, 2, 2, 20, 52, 35, 32, 61, 79, 8, 7, 41, 50, 106, 96, 20, 8, 2, 11, 39, 115, 48, 53, 11, 3, 0, 2, 43, 35, 11, 0, 1, 13, 7, 0, 1, 115, 58, 54, 29, 1, 2, 0, 3, 32, 115, 99, 34, 1, 0, 0, 0, 35, 15, 52, 44, 9, 0, 0, 18]") ASC LIMIT 1; + +select * from t3 order by L2_DISTANCE(b, "[0, 0, 0, 0, 0, 101, 82, 4, 2, 0, 0, 0, 3, 133, 133, 8, 46, 1, 2, 13, 15, 29, 87, 50, 22, 1, 0, 16, 25, 6, 18, 49, 5, 2, 0, 2, 3, 59, 70, 19, 18, 2, 0, 11, 42, 37, 30, 13, 133, 13, 4, 53, 28, 3, 8, 42, 77, 6, 11, 103, 36, 0, 0, 32, 7, 15, 59, 27, 2, 0, 2, 5, 14, 5, 55, 52, 51, 3, 2, 5, 133, 21, 10, 38, 26, 1, 0, 64, 71, 3, 10, 118, 53, 5, 6, 28, 33, 26, 73, 15, 0, 0, 0, 22, 13, 15, 133, 133, 4, 0, 0, 15, 107, 62, 46, 91, 9, 1, 7, 16, 28, 4, 0, 27, 33, 4, 15, 25]") ASC LIMIT 1; + +drop table t3; + +-- end t3 + +drop database hnsw_cdc; + diff --git a/test/distributed/cases/vector/vector_ivf_async.result b/test/distributed/cases/vector/vector_ivf_async.result new file mode 100644 index 0000000000000..42ec1b3ddfac0 --- /dev/null +++ b/test/distributed/cases/vector/vector_ivf_async.result @@ -0,0 +1,58 @@ +SET experimental_ivf_index = 0; +create table vector_index_00(a int primary key, b vecf32(128),c int,key c_k(c), KEY idx using ivfflat(b) lists = 256 op_type 'vector_l2_ops'); +internal error: experimental_ivf_index is not enabled +SET experimental_ivf_index = 1; +SET probe_limit=1; +create table vector_index_01(a int primary key, b vecf32(128),c int,key c_k(c)); +insert into vector_index_01 values(9774 ,"[1, 0, 1, 6, 6, 17, 47, 39, 2, 0, 1, 25, 27, 10, 56, 130, 18, 5, 2, 6, 15, 2, 19, 130, 42, 28, 1, 1, 2, 1, 0, 5, 0, 2, 4, 4, 31, 34, 44, 35, 9, 3, 8, 11, 33, 12, 61, 130, 130, 17, 0, 1, 6, 2, 9, 130, 111, 36, 0, 0, 11, 9, 1, 12, 2, 100, 130, 28, 7, 2, 6, 7, 9, 27, 130, 83, 5, 0, 1, 18, 130, 130, 84, 9, 0, 0, 2, 24, 111, 24, 0, 1, 37, 24, 2, 10, 12, 62, 33, 3, 0, 0, 0, 1, 3, 16, 106, 28, 0, 0, 0, 0, 17, 46, 85, 10, 0, 0, 1, 4, 11, 4, 2, 2, 9, 14, 8, 8]",3),(9775,"[0, 1, 1, 3, 0, 3, 46, 20, 1, 4, 17, 9, 1, 17, 108, 15, 0, 3, 37, 17, 6, 15, 116, 16, 6, 1, 4, 7, 7, 7, 9, 6, 0, 8, 10, 4, 26, 129, 27, 9, 0, 0, 5, 2, 11, 129, 129, 12, 103, 4, 0, 0, 2, 31, 129, 129, 94, 4, 0, 0, 0, 3, 13, 42, 0, 15, 38, 2, 70, 129, 1, 0, 5, 10, 40, 12, 74, 129, 6, 1, 129, 39, 6, 1, 2, 22, 9, 33, 122, 13, 0, 0, 0, 0, 5, 23, 4, 11, 9, 12, 45, 38, 1, 0, 0, 4, 36, 38, 57, 32, 0, 0, 82, 22, 9, 5, 13, 11, 3, 94, 35, 3, 0, 0, 0, 1, 16, 97]",5),(9776,"[10, 3, 8, 5, 48, 26, 5, 16, 17, 0, 0, 2, 132, 53, 1, 16, 112, 6, 0, 0, 7, 2, 1, 48, 48, 15, 18, 31, 3, 0, 0, 9, 6, 10, 19, 27, 50, 46, 17, 9, 18, 1, 4, 48, 132, 23, 3, 5, 132, 9, 4, 3, 11, 0, 2, 46, 84, 12, 10, 10, 1, 0, 12, 76, 26, 22, 16, 26, 35, 15, 3, 16, 15, 1, 51, 132, 125, 8, 1, 2, 132, 51, 67, 91, 8, 0, 0, 30, 126, 39, 32, 38, 4, 0, 1, 12, 24, 2, 2, 2, 4, 7, 2, 19, 93, 19, 70, 92, 2, 3, 1, 21, 36, 58, 132, 94, 0, 0, 0, 0, 21, 25, 57, 48, 1, 0, 0, 1]",3); +insert into vector_index_01 values(9777, " [16, 15, 0, 0, 5, 46, 5, 5, 4, 0, 0, 0, 28, 118, 12, 5, 75, 44, 5, 0, 6, 32, 6, 49, 41, 74, 9, 1, 0, 0, 0, 9, 1, 9, 16, 41, 71, 80, 3, 0, 0, 4, 3, 5, 51, 106, 11, 3, 112, 28, 13, 1, 4, 8, 3, 104, 118, 14, 1, 1, 0, 0, 0, 88, 3, 27, 46, 118, 108, 49, 2, 0, 1, 46, 118, 118, 27, 12, 0, 0, 33, 118, 118, 8, 0, 0, 0, 4, 118, 95, 40, 0, 0, 0, 1, 11, 27, 38, 12, 12, 18, 29, 3, 2, 13, 30, 94, 78, 30, 19, 9, 3, 31, 45, 70, 42, 15, 1, 3, 12, 14, 22, 16, 2, 3, 17, 24, 13]",4),(9778,"[41, 0, 0, 7, 1, 1, 20, 67, 9, 0, 0, 0, 0, 31, 120, 61, 25, 0, 0, 0, 0, 10, 120, 90, 32, 0, 0, 1, 13, 11, 22, 50, 4, 0, 2, 93, 40, 15, 37, 18, 12, 2, 2, 19, 8, 44, 120, 25, 120, 5, 0, 0, 0, 2, 48, 97, 102, 14, 3, 3, 11, 9, 34, 41, 0, 0, 4, 120, 56, 3, 4, 5, 6, 15, 37, 116, 28, 0, 0, 3, 120, 120, 24, 6, 2, 0, 1, 28, 53, 90, 51, 11, 11, 2, 12, 14, 8, 6, 4, 30, 9, 1, 4, 22, 25, 79, 120, 66, 5, 0, 0, 6, 42, 120, 91, 43, 15, 2, 4, 39, 12, 9, 9, 12, 15, 5, 24, 36]",4); +create index idx01 using ivfflat on vector_index_01(b) lists=5 op_type "vector_l2_ops" ASYNC; +show create table vector_index_01; +Table Create Table +vector_index_01 CREATE TABLE `vector_index_01` (\n `a` int NOT NULL,\n `b` vecf32(128) DEFAULT NULL,\n `c` int DEFAULT NULL,\n PRIMARY KEY (`a`),\n KEY `c_k` (`c`),\n KEY `idx01` USING ivfflat (`b`) lists = 5 op_type 'vector_l2_ops' async \n) +desc vector_index_01; +Field Type Null Key Default Extra Comment +a INT(32) NO PRI null +b VECF32(128) YES MUL null +c INT(32) YES MUL null +select * from vector_index_01 order by L2_DISTANCE(b, "[16, 15, 0, 0, 5, 46, 5, 5, 4, 0, 0, 0, 28, 118, 12, 5, 75, 44, 5, 0, 6, 32, 6, 49, 41, 74, 9, 1, 0, 0, 0, 9, 1, 9, 16, 41, 71, 80, 3, 0, 0, 4, 3, 5, 51, 106, 11, 3, 112, 28, 13, 1, 4, 8, 3, 104, 118, 14, 1, 1, 0, 0, 0, 88, 3, 27, 46, 118, 108, 49, 2, 0, 1, 46, 118, 118, 27, 12, 0, 0, 33, 118, 118, 8, 0, 0, 0, 4, 118, 95, 40, 0, 0, 0, 1, 11, 27, 38, 12, 12, 18, 29, 3, 2, 13, 30, 94, 78, 30, 19, 9, 3, 31, 45, 70, 42, 15, 1, 3, 12, 14, 22, 16, 2, 3, 17, 24, 13]") ASC LIMIT 2; +a b c +9777 [16, 15, 0, 0, 5, 46, 5, 5, 4, 0, 0, 0, 28, 118, 12, 5, 75, 44, 5, 0, 6, 32, 6, 49, 41, 74, 9, 1, 0, 0, 0, 9, 1, 9, 16, 41, 71, 80, 3, 0, 0, 4, 3, 5, 51, 106, 11, 3, 112, 28, 13, 1, 4, 8, 3, 104, 118, 14, 1, 1, 0, 0, 0, 88, 3, 27, 46, 118, 108, 49, 2, 0, 1, 46, 118, 118, 27, 12, 0, 0, 33, 118, 118, 8, 0, 0, 0, 4, 118, 95, 40, 0, 0, 0, 1, 11, 27, 38, 12, 12, 18, 29, 3, 2, 13, 30, 94, 78, 30, 19, 9, 3, 31, 45, 70, 42, 15, 1, 3, 12, 14, 22, 16, 2, 3, 17, 24, 13] 4 +select * from vector_index_01 where a>9774 order by L2_DISTANCE(b, "[4, 6, 1, 42, 119, 4, 1, 0, 9, 96, 58, 2, 14, 0, 0, 0, 0, 57, 119, 24, 15, 2, 0, 0, 0, 0, 48, 26, 26, 7, 0, 0, 66, 21, 0, 4, 107, 92, 42, 7, 119, 119, 45, 2, 21, 18, 0, 9, 0, 39, 67, 43, 101, 66, 0, 0, 0, 3, 38, 75, 76, 17, 0, 0, 89, 0, 0, 0, 6, 26, 21, 20, 119, 1, 0, 0, 10, 43, 72, 92, 2, 8, 22, 25, 22, 46, 119, 60, 1, 13, 45, 48, 75, 69, 45, 15, 13, 0, 0, 2, 6, 0, 1, 11, 24, 0, 0, 0, 0, 24, 92, 49, 0, 0, 0, 0, 0, 18, 119, 40, 0, 0, 0, 0, 4, 47, 81, 10]") desc limit 2; +a b c +9775 [0, 1, 1, 3, 0, 3, 46, 20, 1, 4, 17, 9, 1, 17, 108, 15, 0, 3, 37, 17, 6, 15, 116, 16, 6, 1, 4, 7, 7, 7, 9, 6, 0, 8, 10, 4, 26, 129, 27, 9, 0, 0, 5, 2, 11, 129, 129, 12, 103, 4, 0, 0, 2, 31, 129, 129, 94, 4, 0, 0, 0, 3, 13, 42, 0, 15, 38, 2, 70, 129, 1, 0, 5, 10, 40, 12, 74, 129, 6, 1, 129, 39, 6, 1, 2, 22, 9, 33, 122, 13, 0, 0, 0, 0, 5, 23, 4, 11, 9, 12, 45, 38, 1, 0, 0, 4, 36, 38, 57, 32, 0, 0, 82, 22, 9, 5, 13, 11, 3, 94, 35, 3, 0, 0, 0, 1, 16, 97] 5 +insert into vector_index_01 values(9779, " [10, 15, 0, 0, 5, 46, 5, 5, 4, 0, 0, 0, 28, 118, 12, 5, 75, 44, 5, 0, 6, 32, 6, 49, 41, 74, 9, 1, 0, 0, 0, 9, 1, 9, 16, 41, 71, 80, 3, 0, 0, 4, 3, 5, 51, 106, 11, 3, 112, 28, 13, 1, 4, 8, 3, 104, 118, 14, 1, 1, 0, 0, 0, 88, 3, 27, 46, 118, 108, 49, 2, 0, 1, 46, 118, 118, 27, 12, 0, 0, 33, 118, 118, 8, 0, 0, 0, 4, 118, 95, 40, 0, 0, 0, 1, 11, 27, 38, 12, 12, 18, 29, 3, 2, 13, 30, 94, 78, 30, 19, 9, 3, 31, 45, 70, 42, 15, 1, 3, 12, 14, 22, 16, 2, 3, 17, 24, 13]",4); +select sleep(30); +sleep(30) +0 +select * from vector_index_01 order by L2_DISTANCE(b, "[10, 15, 0, 0, 5, 46, 5, 5, 4, 0, 0, 0, 28, 118, 12, 5, 75, 44, 5, 0, 6, 32, 6, 49, 41, 74, 9, 1, 0, 0, 0, 9, 1, 9, 16, 41, 71, 80, 3, 0, 0, 4, 3, 5, 51, 106, 11, 3, 112, 28, 13, 1, 4, 8, 3, 104, 118, 14, 1, 1, 0, 0, 0, 88, 3, 27, 46, 118, 108, 49, 2, 0, 1, 46, 118, 118, 27, 12, 0, 0, 33, 118, 118, 8, 0, 0, 0, 4, 118, 95, 40, 0, 0, 0, 1, 11, 27, 38, 12, 12, 18, 29, 3, 2, 13, 30, 94, 78, 30, 19, 9, 3, 31, 45, 70, 42, 15, 1, 3, 12, 14, 22, 16, 2, 3, 17, 24, 13]") ASC LIMIT 2; +a b c +9779 [10, 15, 0, 0, 5, 46, 5, 5, 4, 0, 0, 0, 28, 118, 12, 5, 75, 44, 5, 0, 6, 32, 6, 49, 41, 74, 9, 1, 0, 0, 0, 9, 1, 9, 16, 41, 71, 80, 3, 0, 0, 4, 3, 5, 51, 106, 11, 3, 112, 28, 13, 1, 4, 8, 3, 104, 118, 14, 1, 1, 0, 0, 0, 88, 3, 27, 46, 118, 108, 49, 2, 0, 1, 46, 118, 118, 27, 12, 0, 0, 33, 118, 118, 8, 0, 0, 0, 4, 118, 95, 40, 0, 0, 0, 1, 11, 27, 38, 12, 12, 18, 29, 3, 2, 13, 30, 94, 78, 30, 19, 9, 3, 31, 45, 70, 42, 15, 1, 3, 12, 14, 22, 16, 2, 3, 17, 24, 13] 4 +9777 [16, 15, 0, 0, 5, 46, 5, 5, 4, 0, 0, 0, 28, 118, 12, 5, 75, 44, 5, 0, 6, 32, 6, 49, 41, 74, 9, 1, 0, 0, 0, 9, 1, 9, 16, 41, 71, 80, 3, 0, 0, 4, 3, 5, 51, 106, 11, 3, 112, 28, 13, 1, 4, 8, 3, 104, 118, 14, 1, 1, 0, 0, 0, 88, 3, 27, 46, 118, 108, 49, 2, 0, 1, 46, 118, 118, 27, 12, 0, 0, 33, 118, 118, 8, 0, 0, 0, 4, 118, 95, 40, 0, 0, 0, 1, 11, 27, 38, 12, 12, 18, 29, 3, 2, 13, 30, 94, 78, 30, 19, 9, 3, 31, 45, 70, 42, 15, 1, 3, 12, 14, 22, 16, 2, 3, 17, 24, 13] 4 +drop table vector_index_01; +create table ivf3(a bigint primary key, b vecf32(128)); +load data infile {'filepath'='$resources/vector/sift128_base_10k.csv.gz', 'compression'='gzip'} into table ivf3 fields terminated by ':' parallel 'true'; +select count(*) from ivf3; +count(*) +10000 +create index idx3 using ivfflat on ivf3(b) op_type "vector_l2_ops" LISTS=100 ASYNC; +load data infile {'filepath'='$resources/vector/sift128_base_10k_2.csv.gz', 'compression'='gzip'} into table ivf3 fields terminated by ':' parallel 'true'; +select count(*) from ivf3; +count(*) +20000 +select sleep(45); +sleep(45) +0 +select * from ivf3 order by L2_DISTANCE(b, "[14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7]") ASC LIMIT 1; +a b +9999 [14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7] +select * from ivf3 order by L2_DISTANCE(b, "[0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1]") ASC LIMIT 1; +a b +0 [0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1] +select * from ivf3 order by L2_DISTANCE(b, "[59, 0, 0, 1, 1, 1, 5, 100, 41, 0, 0, 4, 57, 34, 31, 115, 4, 0, 0, 12, 30, 33, 43, 85, 21, 0, 0, 14, 25, 9, 10, 60, 99, 11, 0, 0, 0, 0, 10, 55, 68, 1, 0, 3, 115, 65, 42, 115, 32, 3, 0, 4, 13, 21, 104, 115, 81, 15, 15, 23, 9, 2, 21, 75, 43, 20, 1, 0, 10, 2, 2, 20, 52, 35, 32, 61, 79, 8, 7, 41, 50, 106, 96, 20, 8, 2, 11, 39, 115, 48, 53, 11, 3, 0, 2, 43, 35, 11, 0, 1, 13, 7, 0, 1, 115, 58, 54, 29, 1, 2, 0, 3, 32, 115, 99, 34, 1, 0, 0, 0, 35, 15, 52, 44, 9, 0, 0, 18]") ASC LIMIT 1; +a b +10000 [59, 0, 0, 1, 1, 1, 5, 100, 41, 0, 0, 4, 57, 34, 31, 115, 4, 0, 0, 12, 30, 33, 43, 85, 21, 0, 0, 14, 25, 9, 10, 60, 99, 11, 0, 0, 0, 0, 10, 55, 68, 1, 0, 3, 115, 65, 42, 115, 32, 3, 0, 4, 13, 21, 104, 115, 81, 15, 15, 23, 9, 2, 21, 75, 43, 20, 1, 0, 10, 2, 2, 20, 52, 35, 32, 61, 79, 8, 7, 41, 50, 106, 96, 20, 8, 2, 11, 39, 115, 48, 53, 11, 3, 0, 2, 43, 35, 11, 0, 1, 13, 7, 0, 1, 115, 58, 54, 29, 1, 2, 0, 3, 32, 115, 99, 34, 1, 0, 0, 0, 35, 15, 52, 44, 9, 0, 0, 18] +select * from ivf3 order by L2_DISTANCE(b, "[0, 0, 0, 0, 0, 101, 82, 4, 2, 0, 0, 0, 3, 133, 133, 8, 46, 1, 2, 13, 15, 29, 87, 50, 22, 1, 0, 16, 25, 6, 18, 49, 5, 2, 0, 2, 3, 59, 70, 19, 18, 2, 0, 11, 42, 37, 30, 13, 133, 13, 4, 53, 28, 3, 8, 42, 77, 6, 11, 103, 36, 0, 0, 32, 7, 15, 59, 27, 2, 0, 2, 5, 14, 5, 55, 52, 51, 3, 2, 5, 133, 21, 10, 38, 26, 1, 0, 64, 71, 3, 10, 118, 53, 5, 6, 28, 33, 26, 73, 15, 0, 0, 0, 22, 13, 15, 133, 133, 4, 0, 0, 15, 107, 62, 46, 91, 9, 1, 7, 16, 28, 4, 0, 27, 33, 4, 15, 25]") ASC LIMIT 1; +a b +19999 [0, 0, 0, 0, 0, 101, 82, 4, 2, 0, 0, 0, 3, 133, 133, 8, 46, 1, 2, 13, 15, 29, 87, 50, 22, 1, 0, 16, 25, 6, 18, 49, 5, 2, 0, 2, 3, 59, 70, 19, 18, 2, 0, 11, 42, 37, 30, 13, 133, 13, 4, 53, 28, 3, 8, 42, 77, 6, 11, 103, 36, 0, 0, 32, 7, 15, 59, 27, 2, 0, 2, 5, 14, 5, 55, 52, 51, 3, 2, 5, 133, 21, 10, 38, 26, 1, 0, 64, 71, 3, 10, 118, 53, 5, 6, 28, 33, 26, 73, 15, 0, 0, 0, 22, 13, 15, 133, 133, 4, 0, 0, 15, 107, 62, 46, 91, 9, 1, 7, 16, 28, 4, 0, 27, 33, 4, 15, 25] +drop table ivf3; diff --git a/test/distributed/cases/vector/vector_ivf_async.sql b/test/distributed/cases/vector/vector_ivf_async.sql new file mode 100644 index 0000000000000..cba0551640bff --- /dev/null +++ b/test/distributed/cases/vector/vector_ivf_async.sql @@ -0,0 +1,59 @@ +-- create table error +SET experimental_ivf_index = 0; +create table vector_index_00(a int primary key, b vecf32(128),c int,key c_k(c), KEY idx using ivfflat(b) lists = 256 op_type 'vector_l2_ops'); + + +SET experimental_ivf_index = 1; +SET probe_limit=1; + +-- create vector index: create->create index->insert +create table vector_index_01(a int primary key, b vecf32(128),c int,key c_k(c)); +insert into vector_index_01 values(9774 ,"[1, 0, 1, 6, 6, 17, 47, 39, 2, 0, 1, 25, 27, 10, 56, 130, 18, 5, 2, 6, 15, 2, 19, 130, 42, 28, 1, 1, 2, 1, 0, 5, 0, 2, 4, 4, 31, 34, 44, 35, 9, 3, 8, 11, 33, 12, 61, 130, 130, 17, 0, 1, 6, 2, 9, 130, 111, 36, 0, 0, 11, 9, 1, 12, 2, 100, 130, 28, 7, 2, 6, 7, 9, 27, 130, 83, 5, 0, 1, 18, 130, 130, 84, 9, 0, 0, 2, 24, 111, 24, 0, 1, 37, 24, 2, 10, 12, 62, 33, 3, 0, 0, 0, 1, 3, 16, 106, 28, 0, 0, 0, 0, 17, 46, 85, 10, 0, 0, 1, 4, 11, 4, 2, 2, 9, 14, 8, 8]",3),(9775,"[0, 1, 1, 3, 0, 3, 46, 20, 1, 4, 17, 9, 1, 17, 108, 15, 0, 3, 37, 17, 6, 15, 116, 16, 6, 1, 4, 7, 7, 7, 9, 6, 0, 8, 10, 4, 26, 129, 27, 9, 0, 0, 5, 2, 11, 129, 129, 12, 103, 4, 0, 0, 2, 31, 129, 129, 94, 4, 0, 0, 0, 3, 13, 42, 0, 15, 38, 2, 70, 129, 1, 0, 5, 10, 40, 12, 74, 129, 6, 1, 129, 39, 6, 1, 2, 22, 9, 33, 122, 13, 0, 0, 0, 0, 5, 23, 4, 11, 9, 12, 45, 38, 1, 0, 0, 4, 36, 38, 57, 32, 0, 0, 82, 22, 9, 5, 13, 11, 3, 94, 35, 3, 0, 0, 0, 1, 16, 97]",5),(9776,"[10, 3, 8, 5, 48, 26, 5, 16, 17, 0, 0, 2, 132, 53, 1, 16, 112, 6, 0, 0, 7, 2, 1, 48, 48, 15, 18, 31, 3, 0, 0, 9, 6, 10, 19, 27, 50, 46, 17, 9, 18, 1, 4, 48, 132, 23, 3, 5, 132, 9, 4, 3, 11, 0, 2, 46, 84, 12, 10, 10, 1, 0, 12, 76, 26, 22, 16, 26, 35, 15, 3, 16, 15, 1, 51, 132, 125, 8, 1, 2, 132, 51, 67, 91, 8, 0, 0, 30, 126, 39, 32, 38, 4, 0, 1, 12, 24, 2, 2, 2, 4, 7, 2, 19, 93, 19, 70, 92, 2, 3, 1, 21, 36, 58, 132, 94, 0, 0, 0, 0, 21, 25, 57, 48, 1, 0, 0, 1]",3); +insert into vector_index_01 values(9777, " [16, 15, 0, 0, 5, 46, 5, 5, 4, 0, 0, 0, 28, 118, 12, 5, 75, 44, 5, 0, 6, 32, 6, 49, 41, 74, 9, 1, 0, 0, 0, 9, 1, 9, 16, 41, 71, 80, 3, 0, 0, 4, 3, 5, 51, 106, 11, 3, 112, 28, 13, 1, 4, 8, 3, 104, 118, 14, 1, 1, 0, 0, 0, 88, 3, 27, 46, 118, 108, 49, 2, 0, 1, 46, 118, 118, 27, 12, 0, 0, 33, 118, 118, 8, 0, 0, 0, 4, 118, 95, 40, 0, 0, 0, 1, 11, 27, 38, 12, 12, 18, 29, 3, 2, 13, 30, 94, 78, 30, 19, 9, 3, 31, 45, 70, 42, 15, 1, 3, 12, 14, 22, 16, 2, 3, 17, 24, 13]",4),(9778,"[41, 0, 0, 7, 1, 1, 20, 67, 9, 0, 0, 0, 0, 31, 120, 61, 25, 0, 0, 0, 0, 10, 120, 90, 32, 0, 0, 1, 13, 11, 22, 50, 4, 0, 2, 93, 40, 15, 37, 18, 12, 2, 2, 19, 8, 44, 120, 25, 120, 5, 0, 0, 0, 2, 48, 97, 102, 14, 3, 3, 11, 9, 34, 41, 0, 0, 4, 120, 56, 3, 4, 5, 6, 15, 37, 116, 28, 0, 0, 3, 120, 120, 24, 6, 2, 0, 1, 28, 53, 90, 51, 11, 11, 2, 12, 14, 8, 6, 4, 30, 9, 1, 4, 22, 25, 79, 120, 66, 5, 0, 0, 6, 42, 120, 91, 43, 15, 2, 4, 39, 12, 9, 9, 12, 15, 5, 24, 36]",4); +create index idx01 using ivfflat on vector_index_01(b) lists=5 op_type "vector_l2_ops" ASYNC; +show create table vector_index_01; +desc vector_index_01; +select * from vector_index_01 order by L2_DISTANCE(b, "[16, 15, 0, 0, 5, 46, 5, 5, 4, 0, 0, 0, 28, 118, 12, 5, 75, 44, 5, 0, 6, 32, 6, 49, 41, 74, 9, 1, 0, 0, 0, 9, 1, 9, 16, 41, 71, 80, 3, 0, 0, 4, 3, 5, 51, 106, 11, 3, 112, 28, 13, 1, 4, 8, 3, 104, 118, 14, 1, 1, 0, 0, 0, 88, 3, 27, 46, 118, 108, 49, 2, 0, 1, 46, 118, 118, 27, 12, 0, 0, 33, 118, 118, 8, 0, 0, 0, 4, 118, 95, 40, 0, 0, 0, 1, 11, 27, 38, 12, 12, 18, 29, 3, 2, 13, 30, 94, 78, 30, 19, 9, 3, 31, 45, 70, 42, 15, 1, 3, 12, 14, 22, 16, 2, 3, 17, 24, 13]") ASC LIMIT 2; +select * from vector_index_01 where a>9774 order by L2_DISTANCE(b, "[4, 6, 1, 42, 119, 4, 1, 0, 9, 96, 58, 2, 14, 0, 0, 0, 0, 57, 119, 24, 15, 2, 0, 0, 0, 0, 48, 26, 26, 7, 0, 0, 66, 21, 0, 4, 107, 92, 42, 7, 119, 119, 45, 2, 21, 18, 0, 9, 0, 39, 67, 43, 101, 66, 0, 0, 0, 3, 38, 75, 76, 17, 0, 0, 89, 0, 0, 0, 6, 26, 21, 20, 119, 1, 0, 0, 10, 43, 72, 92, 2, 8, 22, 25, 22, 46, 119, 60, 1, 13, 45, 48, 75, 69, 45, 15, 13, 0, 0, 2, 6, 0, 1, 11, 24, 0, 0, 0, 0, 24, 92, 49, 0, 0, 0, 0, 0, 18, 119, 40, 0, 0, 0, 0, 4, 47, 81, 10]") desc limit 2; + + +insert into vector_index_01 values(9779, " [10, 15, 0, 0, 5, 46, 5, 5, 4, 0, 0, 0, 28, 118, 12, 5, 75, 44, 5, 0, 6, 32, 6, 49, 41, 74, 9, 1, 0, 0, 0, 9, 1, 9, 16, 41, 71, 80, 3, 0, 0, 4, 3, 5, 51, 106, 11, 3, 112, 28, 13, 1, 4, 8, 3, 104, 118, 14, 1, 1, 0, 0, 0, 88, 3, 27, 46, 118, 108, 49, 2, 0, 1, 46, 118, 118, 27, 12, 0, 0, 33, 118, 118, 8, 0, 0, 0, 4, 118, 95, 40, 0, 0, 0, 1, 11, 27, 38, 12, 12, 18, 29, 3, 2, 13, 30, 94, 78, 30, 19, 9, 3, 31, 45, 70, 42, 15, 1, 3, 12, 14, 22, 16, 2, 3, 17, 24, 13]",4); + +select sleep(30); + +-- 9779 +select * from vector_index_01 order by L2_DISTANCE(b, "[10, 15, 0, 0, 5, 46, 5, 5, 4, 0, 0, 0, 28, 118, 12, 5, 75, 44, 5, 0, 6, 32, 6, 49, 41, 74, 9, 1, 0, 0, 0, 9, 1, 9, 16, 41, 71, 80, 3, 0, 0, 4, 3, 5, 51, 106, 11, 3, 112, 28, 13, 1, 4, 8, 3, 104, 118, 14, 1, 1, 0, 0, 0, 88, 3, 27, 46, 118, 108, 49, 2, 0, 1, 46, 118, 118, 27, 12, 0, 0, 33, 118, 118, 8, 0, 0, 0, 4, 118, 95, 40, 0, 0, 0, 1, 11, 27, 38, 12, 12, 18, 29, 3, 2, 13, 30, 94, 78, 30, 19, 9, 3, 31, 45, 70, 42, 15, 1, 3, 12, 14, 22, 16, 2, 3, 17, 24, 13]") ASC LIMIT 2; + +drop table vector_index_01; + + +-- ivf3 + +create table ivf3(a bigint primary key, b vecf32(128)); + +load data infile {'filepath'='$resources/vector/sift128_base_10k.csv.gz', 'compression'='gzip'} into table ivf3 fields terminated by ':' parallel 'true'; + +select count(*) from ivf3; + +create index idx3 using ivfflat on ivf3(b) op_type "vector_l2_ops" LISTS=100 ASYNC; + +-- select sleep(30); + +load data infile {'filepath'='$resources/vector/sift128_base_10k_2.csv.gz', 'compression'='gzip'} into table ivf3 fields terminated by ':' parallel 'true'; + +select count(*) from ivf3; + +select sleep(45); + +select * from ivf3 order by L2_DISTANCE(b, "[14, 2, 0, 0, 0, 2, 42, 55, 9, 1, 0, 0, 18, 100, 77, 32, 89, 1, 0, 0, 19, 85, 15, 68, 52, 4, 0, 0, 0, 0, 2, 28, 34, 13, 5, 12, 49, 40, 39, 37, 24, 2, 0, 0, 34, 83, 88, 28, 119, 20, 0, 0, 41, 39, 13, 62, 119, 16, 2, 0, 0, 0, 10, 42, 9, 46, 82, 79, 64, 19, 2, 5, 10, 35, 26, 53, 84, 32, 34, 9, 119, 119, 21, 3, 3, 11, 17, 14, 119, 25, 8, 5, 0, 0, 11, 22, 23, 17, 42, 49, 17, 12, 5, 5, 12, 78, 119, 90, 27, 0, 4, 2, 48, 92, 112, 85, 15, 0, 2, 7, 50, 36, 15, 11, 1, 0, 0, 7]") ASC LIMIT 1; + +select * from ivf3 order by L2_DISTANCE(b, "[0, 16, 35, 5, 32, 31, 14, 10, 11, 78, 55, 10, 45, 83, 11, 6, 14, 57, 102, 75, 20, 8, 3, 5, 67, 17, 19, 26, 5, 0, 1, 22, 60, 26, 7, 1, 18, 22, 84, 53, 85, 119, 119, 4, 24, 18, 7, 7, 1, 81, 106, 102, 72, 30, 6, 0, 9, 1, 9, 119, 72, 1, 4, 33, 119, 29, 6, 1, 0, 1, 14, 52, 119, 30, 3, 0, 0, 55, 92, 111, 2, 5, 4, 9, 22, 89, 96, 14, 1, 0, 1, 82, 59, 16, 20, 5, 25, 14, 11, 4, 0, 0, 1, 26, 47, 23, 4, 0, 0, 4, 38, 83, 30, 14, 9, 4, 9, 17, 23, 41, 0, 0, 2, 8, 19, 25, 23, 1]") ASC LIMIT 1; + + +select * from ivf3 order by L2_DISTANCE(b, "[59, 0, 0, 1, 1, 1, 5, 100, 41, 0, 0, 4, 57, 34, 31, 115, 4, 0, 0, 12, 30, 33, 43, 85, 21, 0, 0, 14, 25, 9, 10, 60, 99, 11, 0, 0, 0, 0, 10, 55, 68, 1, 0, 3, 115, 65, 42, 115, 32, 3, 0, 4, 13, 21, 104, 115, 81, 15, 15, 23, 9, 2, 21, 75, 43, 20, 1, 0, 10, 2, 2, 20, 52, 35, 32, 61, 79, 8, 7, 41, 50, 106, 96, 20, 8, 2, 11, 39, 115, 48, 53, 11, 3, 0, 2, 43, 35, 11, 0, 1, 13, 7, 0, 1, 115, 58, 54, 29, 1, 2, 0, 3, 32, 115, 99, 34, 1, 0, 0, 0, 35, 15, 52, 44, 9, 0, 0, 18]") ASC LIMIT 1; + +select * from ivf3 order by L2_DISTANCE(b, "[0, 0, 0, 0, 0, 101, 82, 4, 2, 0, 0, 0, 3, 133, 133, 8, 46, 1, 2, 13, 15, 29, 87, 50, 22, 1, 0, 16, 25, 6, 18, 49, 5, 2, 0, 2, 3, 59, 70, 19, 18, 2, 0, 11, 42, 37, 30, 13, 133, 13, 4, 53, 28, 3, 8, 42, 77, 6, 11, 103, 36, 0, 0, 32, 7, 15, 59, 27, 2, 0, 2, 5, 14, 5, 55, 52, 51, 3, 2, 5, 133, 21, 10, 38, 26, 1, 0, 64, 71, 3, 10, 118, 53, 5, 6, 28, 33, 26, 73, 15, 0, 0, 0, 22, 13, 15, 133, 133, 4, 0, 0, 15, 107, 62, 46, 91, 9, 1, 7, 16, 28, 4, 0, 27, 33, 4, 15, 25]") ASC LIMIT 1; + +drop table ivf3; + +