-
-
Notifications
You must be signed in to change notification settings - Fork 54
/
match.go
403 lines (358 loc) · 12.3 KB
/
match.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
package doublestar
import (
"path/filepath"
"unicode/utf8"
)
// Match reports whether name matches the shell pattern.
// The pattern syntax is:
//
// pattern:
// { term }
// term:
// '*' matches any sequence of non-path-separators
// '/**/' matches zero or more directories
// '?' matches any single non-path-separator character
// '[' [ '^' '!' ] { character-range } ']'
// character class (must be non-empty)
// starting with `^` or `!` negates the class
// '{' { term } [ ',' { term } ... ] '}'
// alternatives
// c matches character c (c != '*', '?', '\\', '[')
// '\\' c matches character c
//
// character-range:
// c matches character c (c != '\\', '-', ']')
// '\\' c matches character c
// lo '-' hi matches character c for lo <= c <= hi
//
// Match returns true if `name` matches the file name `pattern`. `name` and
// `pattern` are split on forward slash (`/`) characters and may be relative or
// absolute.
//
// Match requires pattern to match all of name, not just a substring.
// The only possible returned error is ErrBadPattern, when pattern
// is malformed.
//
// A doublestar (`**`) should appear surrounded by path separators such as
// `/**/`. A mid-pattern doublestar (`**`) behaves like bash's globstar
// option: a pattern such as `path/to/**.txt` would return the same results as
// `path/to/*.txt`. The pattern you're looking for is `path/to/**/*.txt`.
//
// Note: this is meant as a drop-in replacement for path.Match() which
// always uses '/' as the path separator. If you want to support systems
// which use a different path separator (such as Windows), what you want
// is PathMatch(). Alternatively, you can run filepath.ToSlash() on both
// pattern and name and then use this function.
//
// Note: users should _not_ count on the returned error,
// doublestar.ErrBadPattern, being equal to path.ErrBadPattern.
//
func Match(pattern, name string) (bool, error) {
return matchWithSeparator(pattern, name, '/', true)
}
// MatchUnvalidated can provide a small performance improvement if you don't
// care about whether or not the pattern is valid (perhaps because you already
// ran `ValidatePattern`). Note that there's really only one case where this
// performance improvement is realized: when pattern matching reaches the end
// of `name` before reaching the end of `pattern`, such as `Match("a/b/c",
// "a")`.
func MatchUnvalidated(pattern, name string) bool {
matched, _ := matchWithSeparator(pattern, name, '/', false)
return matched
}
// PathMatch returns true if `name` matches the file name `pattern`. The
// difference between Match and PathMatch is that PathMatch will automatically
// use your system's path separator to split `name` and `pattern`. On systems
// where the path separator is `'\'`, escaping will be disabled.
//
// Note: this is meant as a drop-in replacement for filepath.Match(). It
// assumes that both `pattern` and `name` are using the system's path
// separator. If you can't be sure of that, use filepath.ToSlash() on both
// `pattern` and `name`, and then use the Match() function instead.
//
func PathMatch(pattern, name string) (bool, error) {
return matchWithSeparator(pattern, name, filepath.Separator, true)
}
// PathMatchUnvalidated can provide a small performance improvement if you
// don't care about whether or not the pattern is valid (perhaps because you
// already ran `ValidatePattern`). Note that there's really only one case where
// this performance improvement is realized: when pattern matching reaches the
// end of `name` before reaching the end of `pattern`, such as `Match("a/b/c",
// "a")`.
func PathMatchUnvalidated(pattern, name string) bool {
matched, _ := matchWithSeparator(pattern, name, filepath.Separator, false)
return matched
}
func matchWithSeparator(pattern, name string, separator rune, validate bool) (matched bool, err error) {
return doMatchWithSeparator(pattern, name, separator, validate, -1, -1, -1, -1, 0, 0)
}
func doMatchWithSeparator(pattern, name string, separator rune, validate bool, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, patIdx, nameIdx int) (matched bool, err error) {
patLen := len(pattern)
nameLen := len(name)
startOfSegment := true
MATCH:
for nameIdx < nameLen {
if patIdx < patLen {
switch pattern[patIdx] {
case '*':
if patIdx++; patIdx < patLen && pattern[patIdx] == '*' {
// doublestar - must begin with a path separator, otherwise we'll
// treat it like a single star like bash
patIdx++
if startOfSegment {
if patIdx >= patLen {
// pattern ends in `/**`: return true
return true, nil
}
// doublestar must also end with a path separator, otherwise we're
// just going to treat the doublestar as a single star like bash
patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
if patRune == separator {
patIdx += patRuneLen
doublestarPatternBacktrack = patIdx
doublestarNameBacktrack = nameIdx
starPatternBacktrack = -1
starNameBacktrack = -1
continue
}
}
}
startOfSegment = false
starPatternBacktrack = patIdx
starNameBacktrack = nameIdx
continue
case '?':
startOfSegment = false
nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
if nameRune == separator {
// `?` cannot match the separator
break
}
patIdx++
nameIdx += nameRuneLen
continue
case '[':
startOfSegment = false
if patIdx++; patIdx >= patLen {
// class didn't end
return false, ErrBadPattern
}
nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
matched := false
negate := pattern[patIdx] == '!' || pattern[patIdx] == '^'
if negate {
patIdx++
}
if patIdx >= patLen || pattern[patIdx] == ']' {
// class didn't end or empty character class
return false, ErrBadPattern
}
last := utf8.MaxRune
for patIdx < patLen && pattern[patIdx] != ']' {
patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
patIdx += patRuneLen
// match a range
if last < utf8.MaxRune && patRune == '-' && patIdx < patLen && pattern[patIdx] != ']' {
if pattern[patIdx] == '\\' {
// next character is escaped
patIdx++
}
patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
patIdx += patRuneLen
if last <= nameRune && nameRune <= patRune {
matched = true
break
}
// didn't match range - reset `last`
last = utf8.MaxRune
continue
}
// not a range - check if the next rune is escaped
if patRune == '\\' {
patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
patIdx += patRuneLen
}
// check if the rune matches
if patRune == nameRune {
matched = true
break
}
// no matches yet
last = patRune
}
if matched == negate {
// failed to match - if we reached the end of the pattern, that means
// we never found a closing `]`
if patIdx >= patLen {
return false, ErrBadPattern
}
break
}
closingIdx := indexUnescapedByte(pattern[patIdx:], ']', true)
if closingIdx == -1 {
// no closing `]`
return false, ErrBadPattern
}
patIdx += closingIdx + 1
nameIdx += nameRuneLen
continue
case '{':
startOfSegment = false
beforeIdx := patIdx
patIdx++
closingIdx := indexMatchedClosingAlt(pattern[patIdx:], separator != '\\')
if closingIdx == -1 {
// no closing `}`
return false, ErrBadPattern
}
closingIdx += patIdx
for {
commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\')
if commaIdx == -1 {
break
}
commaIdx += patIdx
result, err := doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:commaIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)
if result || err != nil {
return result, err
}
patIdx = commaIdx + 1
}
return doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:closingIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)
case '\\':
if separator != '\\' {
// next rune is "escaped" in the pattern - literal match
if patIdx++; patIdx >= patLen {
// pattern ended
return false, ErrBadPattern
}
}
fallthrough
default:
patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
if patRune != nameRune {
if separator != '\\' && patIdx > 0 && pattern[patIdx-1] == '\\' {
// if this rune was meant to be escaped, we need to move patIdx
// back to the backslash before backtracking or validating below
patIdx--
}
break
}
patIdx += patRuneLen
nameIdx += nameRuneLen
startOfSegment = patRune == separator
continue
}
}
if starPatternBacktrack >= 0 {
// `*` backtrack, but only if the `name` rune isn't the separator
nameRune, nameRuneLen := utf8.DecodeRuneInString(name[starNameBacktrack:])
if nameRune != separator {
starNameBacktrack += nameRuneLen
patIdx = starPatternBacktrack
nameIdx = starNameBacktrack
startOfSegment = false
continue
}
}
if doublestarPatternBacktrack >= 0 {
// `**` backtrack, advance `name` past next separator
nameIdx = doublestarNameBacktrack
for nameIdx < nameLen {
nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
nameIdx += nameRuneLen
if nameRune == separator {
doublestarNameBacktrack = nameIdx
patIdx = doublestarPatternBacktrack
startOfSegment = true
continue MATCH
}
}
}
if validate && patIdx < patLen && !doValidatePattern(pattern[patIdx:], separator) {
return false, ErrBadPattern
}
return false, nil
}
if nameIdx < nameLen {
// we reached the end of `pattern` before the end of `name`
return false, nil
}
// we've reached the end of `name`; we've successfully matched if we've also
// reached the end of `pattern`, or if the rest of `pattern` can match a
// zero-length string
return isZeroLengthPattern(pattern[patIdx:], separator)
}
func isZeroLengthPattern(pattern string, separator rune) (ret bool, err error) {
// `/**`, `**/`, and `/**/` are special cases - a pattern such as `path/to/a/**` or `path/to/a/**/`
// *should* match `path/to/a` because `a` might be a directory
if pattern == "" ||
pattern == "*" ||
pattern == "**" ||
pattern == string(separator)+"**" ||
pattern == "**"+string(separator) ||
pattern == string(separator)+"**"+string(separator) {
return true, nil
}
if pattern[0] == '{' {
closingIdx := indexMatchedClosingAlt(pattern[1:], separator != '\\')
if closingIdx == -1 {
// no closing '}'
return false, ErrBadPattern
}
closingIdx += 1
patIdx := 1
for {
commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\')
if commaIdx == -1 {
break
}
commaIdx += patIdx
ret, err = isZeroLengthPattern(pattern[patIdx:commaIdx]+pattern[closingIdx+1:], separator)
if ret || err != nil {
return
}
patIdx = commaIdx + 1
}
return isZeroLengthPattern(pattern[patIdx:closingIdx]+pattern[closingIdx+1:], separator)
}
// no luck - validate the rest of the pattern
if !doValidatePattern(pattern, separator) {
return false, ErrBadPattern
}
return false, nil
}
// Finds the index of the first unescaped byte `c`, or negative 1.
func indexUnescapedByte(s string, c byte, allowEscaping bool) int {
l := len(s)
for i := 0; i < l; i++ {
if allowEscaping && s[i] == '\\' {
// skip next byte
i++
} else if s[i] == c {
return i
}
}
return -1
}
// Assuming the byte before the beginning of `s` is an opening `{`, this
// function will find the index of the matching `}`. That is, it'll skip over
// any nested `{}` and account for escaping
func indexMatchedClosingAlt(s string, allowEscaping bool) int {
alts := 1
l := len(s)
for i := 0; i < l; i++ {
if allowEscaping && s[i] == '\\' {
// skip next byte
i++
} else if s[i] == '{' {
alts++
} else if s[i] == '}' {
if alts--; alts == 0 {
return i
}
}
}
return -1
}