Skip to content

Commit 9b8478d

Browse files
authored
Merge pull request #105 from vmarkovtsev/master
Add filtering changes by language
2 parents 75e4d76 + e70948c commit 9b8478d

File tree

4 files changed

+155
-65
lines changed

4 files changed

+155
-65
lines changed

internal/plumbing/tree_diff.go

+84-13
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package plumbing
22

33
import (
4+
"fmt"
5+
"gopkg.in/src-d/enry.v1"
46
"io"
57
"log"
68
"strings"
@@ -18,8 +20,11 @@ import (
1820
type TreeDiff struct {
1921
core.NoopMerger
2022
SkipDirs []string
23+
Languages map[string]bool
24+
2125
previousTree *object.Tree
2226
previousCommit plumbing.Hash
27+
repository *git.Repository
2328
}
2429

2530
const (
@@ -31,6 +36,13 @@ const (
3136
// ConfigTreeDiffBlacklistedDirs s the name of the configuration option
3237
// (TreeDiff.Configure()) which allows to set blacklisted directories.
3338
ConfigTreeDiffBlacklistedDirs = "TreeDiff.BlacklistedDirs"
39+
// ConfigTreeDiffLanguages is the name of the configuration option (TreeDiff.Configure())
40+
// which sets the list of programming languages to analyze. Language names are at
41+
// https://doc.bblf.sh/languages.html Names are joined with a comma ",".
42+
// "all" is the special name which disables this filter.
43+
ConfigTreeDiffLanguages = "TreeDiff.Languages"
44+
// allLanguages denotes passing all files in.
45+
allLanguages = "all"
3446
)
3547

3648
var defaultBlacklistedDirs = []string{"vendor/", "vendors/", "node_modules/"}
@@ -67,7 +79,15 @@ func (treediff *TreeDiff) ListConfigurationOptions() []core.ConfigurationOption
6779
Description: "List of blacklisted directories. Separated by comma \",\".",
6880
Flag: "blacklisted-dirs",
6981
Type: core.StringsConfigurationOption,
70-
Default: defaultBlacklistedDirs},
82+
Default: defaultBlacklistedDirs}, {
83+
Name: ConfigTreeDiffLanguages,
84+
Description: fmt.Sprintf(
85+
"List of programming languages to analyze. Separated by comma \",\". " +
86+
"Names are at https://doc.bblf.sh/languages.html \"%s\" is the special name " +
87+
"which disables this filter and lets all the files through.", allLanguages),
88+
Flag: "languages",
89+
Type: core.StringsConfigurationOption,
90+
Default: []string{allLanguages}},
7191
}
7292
return options[:]
7393
}
@@ -77,12 +97,26 @@ func (treediff *TreeDiff) Configure(facts map[string]interface{}) {
7797
if val, exist := facts[ConfigTreeDiffEnableBlacklist]; exist && val.(bool) {
7898
treediff.SkipDirs = facts[ConfigTreeDiffBlacklistedDirs].([]string)
7999
}
100+
if val, exists := facts[ConfigTreeDiffLanguages].(string); exists {
101+
treediff.Languages = map[string]bool{}
102+
for _, lang := range strings.Split(val, ",") {
103+
treediff.Languages[strings.TrimSpace(lang)] = true
104+
}
105+
} else if treediff.Languages == nil {
106+
treediff.Languages = map[string]bool{}
107+
treediff.Languages[allLanguages] = true
108+
}
80109
}
81110

82111
// Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
83112
// calls. The repository which is going to be analysed is supplied as an argument.
84113
func (treediff *TreeDiff) Initialize(repository *git.Repository) {
85114
treediff.previousTree = nil
115+
treediff.repository = repository
116+
if treediff.Languages == nil {
117+
treediff.Languages = map[string]bool{}
118+
treediff.Languages[allLanguages] = true
119+
}
86120
}
87121

88122
// Consume runs this PipelineItem on the next commit data.
@@ -124,6 +158,13 @@ func (treediff *TreeDiff) Consume(deps map[string]interface{}) (map[string]inter
124158
}
125159
return err
126160
}
161+
pass, err := treediff.checkLanguage(file.Name, file.Hash)
162+
if err != nil {
163+
return err
164+
}
165+
if !pass {
166+
continue
167+
}
127168
diff = append(diff, &object.Change{
128169
To: object.ChangeEntry{Name: file.Name, Tree: tree, TreeEntry: object.TreeEntry{
129170
Name: file.Name, Mode: file.Mode, Hash: file.Hash}}})
@@ -137,21 +178,29 @@ func (treediff *TreeDiff) Consume(deps map[string]interface{}) (map[string]inter
137178
treediff.previousTree = tree
138179
treediff.previousCommit = commit.Hash
139180

140-
if len(treediff.SkipDirs) > 0 {
141-
// filter without allocation
142-
filteredDiff := make([]*object.Change, 0, len(diff))
143-
OUTER:
144-
for _, change := range diff {
145-
for _, dir := range treediff.SkipDirs {
146-
if strings.HasPrefix(change.To.Name, dir) || strings.HasPrefix(change.From.Name, dir) {
147-
continue OUTER
148-
}
181+
// filter without allocation
182+
filteredDiff := make([]*object.Change, 0, len(diff))
183+
OUTER:
184+
for _, change := range diff {
185+
for _, dir := range treediff.SkipDirs {
186+
if strings.HasPrefix(change.To.Name, dir) || strings.HasPrefix(change.From.Name, dir) {
187+
continue OUTER
149188
}
150-
filteredDiff = append(filteredDiff, change)
151189
}
152-
153-
diff = filteredDiff
190+
var changeEntry object.ChangeEntry
191+
if change.To.Tree == nil {
192+
changeEntry = change.From
193+
} else {
194+
changeEntry = change.To
195+
}
196+
pass, _ := treediff.checkLanguage(changeEntry.Name, changeEntry.TreeEntry.Hash)
197+
if !pass {
198+
continue
199+
}
200+
filteredDiff = append(filteredDiff, change)
154201
}
202+
203+
diff = filteredDiff
155204
return map[string]interface{}{DependencyTreeChanges: diff}, nil
156205
}
157206

@@ -160,6 +209,28 @@ func (treediff *TreeDiff) Fork(n int) []core.PipelineItem {
160209
return core.ForkCopyPipelineItem(treediff, n)
161210
}
162211

212+
// checkLanguage returns whether the blob corresponds to the list of required languages.
213+
func (treediff *TreeDiff) checkLanguage(name string, blobHash plumbing.Hash) (bool, error) {
214+
if treediff.Languages[allLanguages] {
215+
return true, nil
216+
}
217+
blob, err := treediff.repository.BlobObject(blobHash)
218+
if err != nil {
219+
return false, err
220+
}
221+
reader, err := blob.Reader()
222+
if err != nil {
223+
return false, err
224+
}
225+
buffer := make([]byte, 1024)
226+
_, err = reader.Read(buffer)
227+
if err != nil {
228+
return false, err
229+
}
230+
lang := enry.GetLanguage(name, buffer)
231+
return treediff.Languages[lang], nil
232+
}
233+
163234
func init() {
164235
core.Registry.Register(&TreeDiff{})
165236
}

internal/plumbing/tree_diff_test.go

+43-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ func TestTreeDiffMeta(t *testing.T) {
2525
assert.Equal(t, len(td.Provides()), 1)
2626
assert.Equal(t, td.Provides()[0], DependencyTreeChanges)
2727
opts := td.ListConfigurationOptions()
28-
assert.Len(t, opts, 2)
28+
assert.Len(t, opts, 3)
2929
}
3030

3131
func TestTreeDiffRegistration(t *testing.T) {
@@ -115,6 +115,7 @@ func TestTreeDiffBadCommit(t *testing.T) {
115115
func TestTreeDiffConsumeSkip(t *testing.T) {
116116
// consume without skiping
117117
td := fixtureTreeDiff()
118+
assert.Contains(t, td.Languages, allLanguages)
118119
commit, _ := test.Repository.CommitObject(plumbing.NewHash(
119120
"aefdedf7cafa6ee110bae9a3910bf5088fdeb5a9"))
120121
deps := map[string]interface{}{}
@@ -142,6 +143,47 @@ func TestTreeDiffConsumeSkip(t *testing.T) {
142143
assert.Equal(t, 31, len(changes))
143144
}
144145

146+
func TestTreeDiffConsumeLanguageFilterFirst(t *testing.T) {
147+
td := fixtureTreeDiff()
148+
td.Configure(map[string]interface{}{ConfigTreeDiffLanguages: "Go"})
149+
commit, _ := test.Repository.CommitObject(plumbing.NewHash(
150+
"fbe766ffdc3f87f6affddc051c6f8b419beea6a2"))
151+
deps := map[string]interface{}{}
152+
deps[core.DependencyCommit] = commit
153+
res, err := td.Consume(deps)
154+
assert.Nil(t, err)
155+
assert.Equal(t, len(res), 1)
156+
changes := res[DependencyTreeChanges].(object.Changes)
157+
assert.Equal(t, len(changes), 6)
158+
assert.Equal(t, changes[0].To.Name, "analyser.go")
159+
assert.Equal(t, changes[1].To.Name, "cmd/hercules/main.go")
160+
assert.Equal(t, changes[2].To.Name, "doc.go")
161+
assert.Equal(t, changes[3].To.Name, "file.go")
162+
assert.Equal(t, changes[4].To.Name, "file_test.go")
163+
assert.Equal(t, changes[5].To.Name, "rbtree.go")
164+
}
165+
166+
func TestTreeDiffConsumeLanguageFilter(t *testing.T) {
167+
td := fixtureTreeDiff()
168+
td.Configure(map[string]interface{}{ConfigTreeDiffLanguages: "Python"})
169+
commit, _ := test.Repository.CommitObject(plumbing.NewHash(
170+
"e89c1d10fb31e32668ad905eb59dc44d7a4a021e"))
171+
deps := map[string]interface{}{}
172+
deps[core.DependencyCommit] = commit
173+
res, err := td.Consume(deps)
174+
assert.Nil(t, err)
175+
assert.Equal(t, len(res), 1)
176+
commit, _ = test.Repository.CommitObject(plumbing.NewHash(
177+
"fbe766ffdc3f87f6affddc051c6f8b419beea6a2"))
178+
deps[core.DependencyCommit] = commit
179+
res, err = td.Consume(deps)
180+
assert.Nil(t, err)
181+
assert.Equal(t, len(res), 1)
182+
changes := res[DependencyTreeChanges].(object.Changes)
183+
assert.Equal(t, len(changes), 1)
184+
assert.Equal(t, changes[0].To.Name, "labours.py")
185+
}
186+
145187
func TestTreeDiffFork(t *testing.T) {
146188
td1 := fixtureTreeDiff()
147189
td1.SkipDirs = append(td1.SkipDirs, "skip")

internal/plumbing/uast/uast.go

+1-28
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
"gopkg.in/bblfsh/client-go.v2"
2020
"gopkg.in/bblfsh/sdk.v1/protocol"
2121
"gopkg.in/bblfsh/sdk.v1/uast"
22-
"gopkg.in/src-d/enry.v1"
2322
"gopkg.in/src-d/go-git.v4"
2423
"gopkg.in/src-d/go-git.v4/plumbing"
2524
"gopkg.in/src-d/go-git.v4/plumbing/object"
@@ -37,7 +36,6 @@ type Extractor struct {
3736
Endpoint string
3837
Context func() (context.Context, context.CancelFunc)
3938
PoolSize int
40-
Languages map[string]bool
4139
FailOnErrors bool
4240
ProcessedFiles map[string]int
4341

@@ -60,11 +58,6 @@ const (
6058
// ConfigUASTFailOnErrors is the name of the configuration option (Extractor.Configure())
6159
// which enables early exit in case of any Babelfish UAST parsing errors.
6260
ConfigUASTFailOnErrors = "ConfigUASTFailOnErrors"
63-
// ConfigUASTLanguages is the name of the configuration option (Extractor.Configure())
64-
// which sets the list of languages to parse. Language names are at
65-
// https://doc.bblf.sh/languages.html Names are joined with a comma ",".
66-
ConfigUASTLanguages = "ConfigUASTLanguages"
67-
6861
// FeatureUast is the name of the Pipeline feature which activates all the items related to UAST.
6962
FeatureUast = "uast"
7063
// DependencyUasts is the name of the dependency provided by Extractor.
@@ -140,12 +133,7 @@ func (exr *Extractor) ListConfigurationOptions() []core.ConfigurationOption {
140133
Description: "Panic if there is a UAST extraction error.",
141134
Flag: "bblfsh-fail-on-error",
142135
Type: core.BoolConfigurationOption,
143-
Default: false}, {
144-
Name: ConfigUASTLanguages,
145-
Description: "Programming languages from which to extract UASTs. Separated by comma \",\".",
146-
Flag: "languages",
147-
Type: core.StringConfigurationOption,
148-
Default: "Python,Java,Go,JavaScript,Ruby,PHP"},
136+
Default: false},
149137
}
150138
return options[:]
151139
}
@@ -164,12 +152,6 @@ func (exr *Extractor) Configure(facts map[string]interface{}) {
164152
if val, exists := facts[ConfigUASTPoolSize].(int); exists {
165153
exr.PoolSize = val
166154
}
167-
if val, exists := facts[ConfigUASTLanguages].(string); exists {
168-
exr.Languages = map[string]bool{}
169-
for _, lang := range strings.Split(val, ",") {
170-
exr.Languages[strings.TrimSpace(lang)] = true
171-
}
172-
}
173155
if val, exists := facts[ConfigUASTFailOnErrors].(bool); exists {
174156
exr.FailOnErrors = val
175157
}
@@ -210,9 +192,6 @@ func (exr *Extractor) Initialize(repository *git.Repository) {
210192
panic("UAST goroutine pool was not created")
211193
}
212194
exr.ProcessedFiles = map[string]int{}
213-
if exr.Languages == nil {
214-
exr.Languages = map[string]bool{}
215-
}
216195
}
217196

218197
// Consume runs this PipelineItem on the next commit data.
@@ -235,17 +214,11 @@ func (exr *Extractor) Consume(deps map[string]interface{}) (map[string]interface
235214
return
236215
}
237216
defer ioutil.CheckClose(reader, &err)
238-
239217
buf := new(bytes.Buffer)
240218
if _, err := buf.ReadFrom(reader); err != nil {
241219
errs = append(errs, err)
242220
return
243221
}
244-
lang := enry.GetLanguage(change.To.Name, buf.Bytes())
245-
if _, exists := exr.Languages[lang]; !exists {
246-
exr.ProcessedFiles[change.To.Name] = uastExtractionSkipped
247-
return
248-
}
249222
exr.ProcessedFiles[change.To.Name]++
250223
}
251224
wg.Add(1)

0 commit comments

Comments
 (0)