Skip to content

Commit 2752b3b

Browse files
committed
feat: support diff
Change-Id: I36c049ff053fc155f8aecca46847c643ce5e8776
1 parent 2123316 commit 2752b3b

15 files changed

+377
-63
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
.idea
33
fe.audit.log.*
44
dorisdump
5-
dorisdump_output
5+
output
66
.dorisdump
77
*.cast
88
*.tokens

README.md

+3
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ dorisdump replay -f /path/to/dump.sql \
4343
--count 100 \ # max replay sql count
4444
--speed 0.5 \ # replay speed
4545
--result-dir replay1
46+
47+
# Print diff of two replay result directories
48+
dorisdump diff replay1 replay2
4649
```
4750

4851
## Build

cmd/diff.go

+187
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
/*
2+
Copyright © 2024 Thearas [email protected]
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
package cmd
17+
18+
import (
19+
"bufio"
20+
"errors"
21+
"fmt"
22+
"math"
23+
"os"
24+
"path/filepath"
25+
"strconv"
26+
"strings"
27+
"time"
28+
29+
"github.com/fatih/color"
30+
"github.com/goccy/go-json"
31+
"github.com/sirupsen/logrus"
32+
"github.com/spf13/cobra"
33+
34+
"github.com/Thearas/dorisdump/src"
35+
)
36+
37+
var (
38+
noColor bool
39+
minDurationDiff time.Duration
40+
)
41+
42+
// diffCmd represents the diff command
43+
var diffCmd = &cobra.Command{
44+
Use: "diff",
45+
Short: "Diff replay result",
46+
Example: "dorisdump diff /path/to/replay1 /path/to/replay2",
47+
RunE: func(cmd *cobra.Command, args []string) error {
48+
if noColor {
49+
if err := os.Setenv("NO_COLOR", "true"); err != nil {
50+
return err
51+
}
52+
}
53+
54+
if len(args) != 2 {
55+
return errors.New("diff requires two file path")
56+
}
57+
58+
replay1, replay2 := args[0], args[1]
59+
lstats, err := os.Stat(replay1)
60+
if err != nil {
61+
return err
62+
}
63+
rstats, err := os.Stat(replay2)
64+
if err != nil {
65+
return err
66+
}
67+
if lstats.IsDir() != rstats.IsDir() {
68+
return errors.New("file path should be both file or both directory")
69+
}
70+
71+
return diff(replay1, replay2, lstats.IsDir())
72+
},
73+
}
74+
75+
func init() {
76+
rootCmd.AddCommand(diffCmd)
77+
78+
flags := diffCmd.Flags()
79+
flags.BoolVar(&noColor, "no-color", false, "Disable color output")
80+
flags.DurationVar(&minDurationDiff, "min-duration-diff", 200*time.Millisecond, "Print diff if time difference is greater than this value")
81+
}
82+
83+
func diff(replay1, replay2 string, isDir bool) error {
84+
if !isDir {
85+
return diffFile(replay1, replay2)
86+
}
87+
88+
return filepath.WalkDir(replay1, func(path1 string, d os.DirEntry, err error) error {
89+
if err != nil {
90+
return err
91+
}
92+
if d.IsDir() {
93+
return nil
94+
}
95+
if !strings.HasSuffix(path1, src.ReplayResultFileExt) {
96+
return nil
97+
}
98+
99+
relativePath := strings.TrimPrefix(path1, replay1)
100+
path2 := filepath.Join(replay2, relativePath)
101+
102+
if err := diffFile(path1, path2); err != nil {
103+
logrus.Errorf("diff %s and %s failed, err: %v\n", path1, path2, err)
104+
}
105+
return nil
106+
})
107+
}
108+
109+
func diffFile(file1, file2 string) error {
110+
f1, err := os.Open(file1)
111+
if err != nil {
112+
return err
113+
}
114+
defer f1.Close()
115+
scan1 := bufio.NewScanner(f1)
116+
f2, err := os.Open(file2)
117+
if err != nil {
118+
return err
119+
}
120+
defer f2.Close()
121+
scan2 := bufio.NewScanner(f2)
122+
123+
logrus.Debugf("diffing %s and %s\n", file1, file2)
124+
125+
id2diff := make(map[string]string)
126+
for scan1.Scan() {
127+
b1 := scan1.Bytes()
128+
129+
var b2 []byte
130+
if scan2.Scan() {
131+
b2 = scan2.Bytes()
132+
}
133+
134+
if len(b1) == 0 && len(b2) == 0 {
135+
continue
136+
}
137+
138+
var d diff2
139+
err = json.Unmarshal(b1, &d.r1)
140+
if err != nil {
141+
logrus.Errorf("unmarshal %s failed, err: %v\n", string(b2), err)
142+
}
143+
err = json.Unmarshal(b2, &d.r2)
144+
if err != nil {
145+
logrus.Errorf("unmarshal %s failed, err: %v\n", string(b2), err)
146+
}
147+
148+
if d.r1.QueryId != d.r2.QueryId {
149+
id2diff[d.r1.QueryId] = fmt.Sprintf("query id not match, %s != %s", d.r1.QueryId, d.r2.QueryId)
150+
break
151+
}
152+
if diffmsg := d.result(); diffmsg != "" {
153+
id2diff[d.r1.QueryId] = diffmsg
154+
}
155+
}
156+
157+
// print diff result
158+
for id, diffmsg := range id2diff {
159+
fmt.Printf("QueryId: %s, %s\n", id, diffmsg)
160+
}
161+
162+
return nil
163+
}
164+
165+
type diff2 struct {
166+
r1, r2 src.ReplayResult
167+
diff string
168+
}
169+
170+
func (d *diff2) result() string {
171+
if d.r1.Err != d.r2.Err {
172+
return fmt.Sprintf(`err not match:
173+
%s
174+
------
175+
%s`, color.GreenString(d.r1.Err), color.RedString(d.r2.Err))
176+
}
177+
if d.r1.ReturnRows != d.r2.ReturnRows {
178+
return fmt.Sprintf("rows count not match: %s != %s", color.GreenString(strconv.Itoa(d.r1.ReturnRows)), color.RedString(strconv.Itoa(d.r2.ReturnRows)))
179+
}
180+
if d.r1.ReturnRowsHash != d.r2.ReturnRowsHash {
181+
return color.RedString("rows hash not match (count: %d)", d.r1.ReturnRows)
182+
}
183+
if math.Abs(float64(d.r1.DurationMs-d.r2.DurationMs)) > float64(minDurationDiff.Milliseconds()) {
184+
return fmt.Sprintf("duration not match: %s vs %s", color.GreenString("%dms", d.r1.DurationMs), color.RedString("%dms", d.r2.DurationMs))
185+
}
186+
return ""
187+
}

cmd/dump.go

+7
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ type Dump struct {
5757
QueryUniqueNormalize bool
5858
QueryMinDuration_ time.Duration
5959
QueryMinDurationMs int
60+
QueryStates []string
6061

6162
Clean bool
6263
}
@@ -139,6 +140,7 @@ func init() {
139140
pFlags.StringVar(&DumpConfig.QueryOutputMode, "query-output-mode", "default", "Dump query output mode, one of [default, unique]")
140141
pFlags.BoolVar(&DumpConfig.QueryUniqueNormalize, "query-unique-normalize", false, "Regard 'select 1 from b where a = 1' as 'select ? from b where a = ?' for unique, only take effect when --query-output-mode=unique")
141142
pFlags.DurationVar(&DumpConfig.QueryMinDuration_, "query-min-duration", 0, "Dump queries which execution duration is greater than or equal to")
143+
pFlags.StringSliceVar(&DumpConfig.QueryStates, "query-states", []string{}, "Dump queries with states, like 'ok', 'eof' and 'err'")
142144
pFlags.StringSliceVar(&DumpConfig.AuditLogPaths, "audit-logs", nil, "Audit log paths, either local path or ssh://xxx")
143145
pFlags.BoolVar(&DumpConfig.AuditLogUnescape, "audit-log-unescape", true, "Unescape '\\n', '\\t' and '\\r' in audit log")
144146
pFlags.StringVar(&DumpConfig.AuditLogEncoding, "audit-log-encoding", "auto", "Audit log encoding, like utf8, gbk, ...")
@@ -184,6 +186,10 @@ func completeDumpConfig() error {
184186
}
185187
}
186188

189+
DumpConfig.QueryStates = lo.Map(DumpConfig.QueryStates, func(s string, _ int) string {
190+
return strings.ToUpper(s)
191+
})
192+
187193
DumpConfig.SSHPrivateKey = src.ExpandHome(DumpConfig.SSHPrivateKey)
188194
if DumpConfig.SSHAddress == "" {
189195
DumpConfig.SSHAddress = fmt.Sprintf("ssh://root@%s:22", GlobalConfig.DBHost)
@@ -367,6 +373,7 @@ func dumpQueries(ctx context.Context) ([][]string, error) {
367373
auditLogFiles,
368374
DumpConfig.AuditLogEncoding,
369375
DumpConfig.QueryMinDurationMs,
376+
DumpConfig.QueryStates,
370377
GlobalConfig.Parallel,
371378
DumpConfig.QueryOutputMode == "unique",
372379
DumpConfig.QueryUniqueNormalize,

cmd/replay.go

+23-5
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,13 @@ type Replay struct {
3939
From_, To_ string
4040
Count int
4141
Speed float32
42+
MaxHashRows int
4243

4344
DBs map[string]struct{}
4445
Users map[string]struct{}
4546
From, To int64
47+
48+
Clean bool
4649
}
4750

4851
// replayCmd represents the replay command
@@ -58,6 +61,12 @@ var replayCmd = &cobra.Command{
5861
if err := completeReplayConfig(); err != nil {
5962
return nil
6063
}
64+
if ReplayConfig.Clean {
65+
if err := cleanFile(ReplayConfig.ReplayResultDir, true); err != nil {
66+
return err
67+
}
68+
}
69+
6170
return replay(cmd.Context())
6271
},
6372
}
@@ -67,12 +76,16 @@ func init() {
6776

6877
pFlags := replayCmd.PersistentFlags()
6978
pFlags.StringVarP(&ReplayConfig.ReplayFile, "file", "f", "", "Replay queries from dump file")
70-
pFlags.StringVar(&ReplayConfig.ReplayResultDir, "result-dir", "", "Replay result directory, default is <output-dir>/replay")
79+
pFlags.StringVar(&ReplayConfig.ReplayResultDir, "result-dir", "", "Replay result directory, default is '<output-dir>/replay'")
7180
pFlags.StringSliceVar(&ReplayConfig.Users_, "users", []string{}, "Replay queries from these users")
7281
pFlags.StringVar(&ReplayConfig.From_, "from", "", "Replay queries from this time, like '2006-01-02 15:04:05'")
7382
pFlags.StringVar(&ReplayConfig.To_, "to", "", "Replay queries to this time, like '2006-01-02 16:04:05'")
7483
pFlags.IntVar(&ReplayConfig.Count, "count", -1, "Max SQL count to replay, < 0 means unlimited")
7584
pFlags.Float32Var(&ReplayConfig.Speed, "speed", 1.0, "Replay speed, like 0.5, 2, 4, ...")
85+
pFlags.IntVar(&ReplayConfig.MaxHashRows, "max-hash-rows", 0, "Number of query return rows to hash, useful when diff replay result")
86+
87+
flags := replayCmd.Flags()
88+
flags.BoolVar(&ReplayConfig.Clean, "clean", false, "Clean previous replay result")
7689
}
7790

7891
func completeReplayConfig() (err error) {
@@ -86,15 +99,18 @@ func completeReplayConfig() (err error) {
8699
var t time.Time
87100
if ReplayConfig.From_ != "" {
88101
t, err = time.Parse("2006-01-02 15:04:05", ReplayConfig.From_)
102+
if err != nil {
103+
return err
104+
}
89105
ReplayConfig.From = t.UnixMilli()
90106
}
91107
if ReplayConfig.To_ != "" {
92108
t, err = time.Parse("2006-01-02 15:04:05", ReplayConfig.To_)
109+
if err != nil {
110+
return err
111+
}
93112
ReplayConfig.To = t.UnixMilli()
94113
}
95-
if err != nil {
96-
return err
97-
}
98114

99115
if ReplayConfig.Speed <= 0 {
100116
return fmt.Errorf("replay speed must be > 0")
@@ -121,6 +137,7 @@ func replay(ctx context.Context) error {
121137
ReplayConfig.Count,
122138
)
123139

140+
// TODO: better to use connection -> sqls, but no connection id in audit log yet
124141
client2sqls, minTs, err := src.DecodeReplaySqls(
125142
bufio.NewScanner(f),
126143
ReplayConfig.DBs,
@@ -147,6 +164,7 @@ func replay(ctx context.Context) error {
147164
return src.ReplaySqls(
148165
ctx,
149166
GlobalConfig.DBHost, GlobalConfig.DBPort, GlobalConfig.DBUser, GlobalConfig.DBPassword,
150-
ReplayConfig.ReplayResultDir, client2sqls, ReplayConfig.Speed, minTs, GlobalConfig.Parallel,
167+
ReplayConfig.ReplayResultDir, client2sqls, ReplayConfig.Speed, ReplayConfig.MaxHashRows,
168+
minTs, GlobalConfig.Parallel,
151169
)
152170
}

cmd/root.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ or environment variables with prefix 'DORIS_', e.g.
6565
DORIS_PORT=9030
6666
`,
6767
Example: "dorisdump dump --help",
68-
SuggestFor: []string{"dump"},
69-
ValidArgs: []string{"completion", "help", "clean", "dump", "anonymize", "replay"},
68+
SuggestFor: []string{"dump", "replay"},
69+
ValidArgs: []string{"completion", "help", "clean", "dump", "anonymize", "replay", "diff"},
7070
TraverseChildren: true,
7171
SuggestionsMinimumDistance: 2,
7272
PersistentPreRunE: func(cmd *cobra.Command, _ []string) error {
@@ -91,7 +91,7 @@ func init() {
9191
pFlags.StringVar(&GlobalConfig.ConfigFile, "config", "", "Config file (default is $HOME/.dorisdump.yaml)")
9292
pFlags.StringVarP(&GlobalConfig.LogLevel, "log-level", "L", "info", "Log level, one of: trace, debug, info, warn")
9393
pFlags.StringVar(&GlobalConfig.DataDir, "data-dir", "./.dorisdump/", "Directory for storing data")
94-
pFlags.StringVarP(&GlobalConfig.OutputDir, "output", "O", "./dorisdump_output/", "Directory for storing dump sql and replay result")
94+
pFlags.StringVarP(&GlobalConfig.OutputDir, "output", "O", "./output/", "Directory for storing dump sql and replay result")
9595
pFlags.BoolVar(&GlobalConfig.DryRun, "dry-run", false, "Dry run")
9696
pFlags.IntVar(&GlobalConfig.Parallel, "parallel", 10, "Parallel dump worker")
9797

fixture/replay.sql

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
/*dorisdump{"ts":"2024-08-06 23:44:11,041","client":"192.168.48.119:51970","user":"root","db":"__internal_schema","queryId":"8cb2e4f433e74463-a0ededde7b648b35"}*/ select "hello world 1" as hello;
2+
/*dorisdump{"ts":"2024-08-06 23:44:11,043","client":"192.168.48.118:51970","user":"root","db":"__internal_schema","queryId":"8eaf2c126a249c7-8d48a95bd8501cc9"}*/ select 1.0 as num;
3+
/*dorisdump{"ts":"2024-08-06 23:44:12,044","client":"192.168.48.119:51970","user":"root","db":"__internal_schema","queryId":"60b885f02d014194-b225555e4ed26d7e"}*/ select 'hello world 2' as hello;
4+
/*dorisdump{"ts":"2024-08-06 23:44:11,045","client":"192.168.48.118:51970","user":"root","db":"__internal_schema","queryId":"ffb1d743a9eb4394-9b48a38bcc0b8b19"}*/ select 2.0 as num;
5+
/*dorisdump{"ts":"2024-08-06 23:44:13,046","client":"192.168.48.119:51970","user":"root","db":"__internal_schema","queryId":"41dc7120df0040c0-a677b93ad1a28d27"}*/ select "hello world 3" as hello;
6+
/*dorisdump{"ts":"2024-08-06 23:44:12,047","client":"192.168.48.118:51970","user":"root","db":"__internal_schema","queryId":"18799575029447f9-a6a3fc65c8eda3f1"}*/ SHOW VARIABLES LIKE
7+
'%time_zone%';
8+
/*dorisdump{"ts":"2024-08-06 23:44:13,048","client":"192.168.48.118:51970","user":"root","db":"__internal_schema","queryId":"18799575029447f9-a6a3fc65c8eda3f2"}*/ SHOW VARIABLES LIKE
9+
'%time_zone%';

go.mod

+4-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ require (
66
github.com/antlr4-go/antlr/v4 v4.13.1
77
github.com/bramvdbogaerde/go-scp v1.5.0
88
github.com/dlclark/regexp2 v1.11.5-0.20240806004527-5bbbed8ea10b
9+
github.com/fatih/color v1.17.0
910
github.com/flier/gohs v1.2.2
1011
github.com/go-sql-driver/mysql v1.7.1
1112
github.com/goccy/go-json v0.10.3
@@ -37,6 +38,8 @@ require (
3738
github.com/inconshreveable/mousetrap v1.1.0 // indirect
3839
github.com/klauspost/cpuid/v2 v2.2.8 // indirect
3940
github.com/magiconair/properties v1.8.7 // indirect
41+
github.com/mattn/go-colorable v0.1.13 // indirect
42+
github.com/mattn/go-isatty v0.0.20 // indirect
4043
github.com/mitchellh/mapstructure v1.5.0 // indirect
4144
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
4245
github.com/pingcap/errors v0.11.5-0.20240311024730-e056997136bb // indirect
@@ -52,7 +55,7 @@ require (
5255
go.uber.org/atomic v1.11.0 // indirect
5356
go.uber.org/multierr v1.11.0 // indirect
5457
go.uber.org/zap v1.27.0 // indirect
55-
golang.org/x/sys v0.24.0 // indirect
58+
golang.org/x/sys v0.25.0 // indirect
5659
gopkg.in/ini.v1 v1.67.0 // indirect
5760
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
5861
)

0 commit comments

Comments
 (0)