Skip to content

Commit c8fd37d

Browse files
authored
Merge pull request #243 from vmarkovtsev/master
Add TyposDatasetBuilder
2 parents b6d8e21 + 3d59afc commit c8fd37d

File tree

8 files changed

+887
-124
lines changed

8 files changed

+887
-124
lines changed

core.go

+2-6
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ import (
99
"gopkg.in/src-d/hercules.v9/internal/plumbing/identity"
1010
"gopkg.in/src-d/hercules.v9/internal/plumbing/uast"
1111
"gopkg.in/src-d/hercules.v9/internal/yaml"
12-
"gopkg.in/src-d/hercules.v9/leaves"
12+
_ "gopkg.in/src-d/hercules.v9/leaves" // add burndown and other analyses
13+
_ "gopkg.in/src-d/hercules.v9/leaves/research" // add "research" analyses
1314
)
1415

1516
// ConfigurationOptionType represents the possible types of a ConfigurationOption's value.
@@ -171,8 +172,3 @@ func PathifyFlagValue(flag *pflag.Flag) {
171172
func EnablePathFlagTypeMasquerade() {
172173
core.EnablePathFlagTypeMasquerade()
173174
}
174-
175-
func init() {
176-
// hack to link with .leaves
177-
_ = leaves.BurndownAnalysis{}
178-
}

internal/pb/pb.pb.go

+168-96
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/pb/pb.proto

+12
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,18 @@ message CommitsAnalysisResults {
165165
repeated string author_index = 2;
166166
}
167167

168+
message Typo {
169+
string wrong = 1;
170+
string correct = 2;
171+
string commit = 3;
172+
string file = 4;
173+
int32 line = 5;
174+
}
175+
176+
message TyposDataset {
177+
repeated Typo typos = 1;
178+
}
179+
168180
message AnalysisResults {
169181
Metadata header = 1;
170182
// the mapped values are dynamic messages which require the second parsing pass.

internal/pb/pb_pb2.py

+112-5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/plumbing/uast/uast.go

+59-15
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,12 @@ import (
3232
// It is a PipelineItem.
3333
type Extractor struct {
3434
core.NoopMerger
35-
Endpoint string
36-
Context func() (context.Context, context.CancelFunc)
37-
PoolSize int
38-
FailOnErrors bool
39-
ProcessedFiles map[string]int
35+
Endpoint string
36+
Context func() (context.Context, context.CancelFunc)
37+
PoolSize int
38+
FailOnErrors bool
39+
ProcessedFiles map[string]int
40+
IgnoredMissingDrivers map[string]bool
4041

4142
clients []*bblfsh.Client
4243
pool *tunny.Pool
@@ -45,22 +46,36 @@ type Extractor struct {
4546
const (
4647
// ConfigUASTEndpoint is the name of the configuration option (Extractor.Configure())
4748
// which sets the Babelfish server address.
48-
ConfigUASTEndpoint = "ConfigUASTEndpoint"
49+
ConfigUASTEndpoint = "UAST.Endpoint"
4950
// ConfigUASTTimeout is the name of the configuration option (Extractor.Configure())
5051
// which sets the maximum amount of time to wait for a Babelfish server response.
51-
ConfigUASTTimeout = "ConfigUASTTimeout"
52+
ConfigUASTTimeout = "UAST.Timeout"
5253
// ConfigUASTPoolSize is the name of the configuration option (Extractor.Configure())
5354
// which sets the number of goroutines to run for UAST parse queries.
54-
ConfigUASTPoolSize = "ConfigUASTPoolSize"
55+
ConfigUASTPoolSize = "UAST.PoolSize"
5556
// ConfigUASTFailOnErrors is the name of the configuration option (Extractor.Configure())
5657
// which enables early exit in case of any Babelfish UAST parsing errors.
57-
ConfigUASTFailOnErrors = "ConfigUASTFailOnErrors"
58+
ConfigUASTFailOnErrors = "UAST.FailOnErrors"
59+
// ConfigUASTIgnoreMissingDrivers is the name of the configuration option (Extractor.Configure())
60+
// which sets the ignored missing driver names.
61+
ConfigUASTIgnoreMissingDrivers = "UAST.IgnoreMissingDrivers"
62+
// DefaultBabelfishEndpoint is the default address of the Babelfish parsing server.
63+
DefaultBabelfishEndpoint = "0.0.0.0:9432"
64+
// DefaultBabelfishTimeout is the default value of the RPC timeout in seconds.
65+
DefaultBabelfishTimeout = 20
5866
// FeatureUast is the name of the Pipeline feature which activates all the items related to UAST.
5967
FeatureUast = "uast"
6068
// DependencyUasts is the name of the dependency provided by Extractor.
6169
DependencyUasts = "uasts"
6270
)
6371

72+
var (
73+
// DefaultBabelfishWorkers is the default number of parsing RPC goroutines.
74+
DefaultBabelfishWorkers = runtime.NumCPU() * 2
75+
// DefaultIgnoredMissingDrivers is the languages which are ignored if the Babelfish driver is missing.
76+
DefaultIgnoredMissingDrivers = []string{"markdown", "text", "yaml", "json"}
77+
)
78+
6479
type uastTask struct {
6580
Lock *sync.RWMutex
6681
Dest map[plumbing.Hash]nodes.Node
@@ -117,22 +132,27 @@ func (exr *Extractor) ListConfigurationOptions() []core.ConfigurationOption {
117132
Description: "How many days there are in a single band.",
118133
Flag: "bblfsh",
119134
Type: core.StringConfigurationOption,
120-
Default: "0.0.0.0:9432"}, {
135+
Default: DefaultBabelfishEndpoint}, {
121136
Name: ConfigUASTTimeout,
122137
Description: "Babelfish's server timeout in seconds.",
123138
Flag: "bblfsh-timeout",
124139
Type: core.IntConfigurationOption,
125-
Default: 20}, {
140+
Default: DefaultBabelfishTimeout}, {
126141
Name: ConfigUASTPoolSize,
127142
Description: "Number of goroutines to extract UASTs.",
128143
Flag: "bblfsh-pool-size",
129144
Type: core.IntConfigurationOption,
130-
Default: runtime.NumCPU() * 2}, {
145+
Default: DefaultBabelfishWorkers}, {
131146
Name: ConfigUASTFailOnErrors,
132147
Description: "Panic if there is a UAST extraction error.",
133148
Flag: "bblfsh-fail-on-error",
134149
Type: core.BoolConfigurationOption,
135-
Default: false},
150+
Default: false}, {
151+
Name: ConfigUASTIgnoreMissingDrivers,
152+
Description: "Do not warn about missing drivers for the specified languages.",
153+
Flag: "bblfsh-ignored-drivers",
154+
Type: core.StringsConfigurationOption,
155+
Default: DefaultIgnoredMissingDrivers},
136156
}
137157
return options[:]
138158
}
@@ -154,6 +174,12 @@ func (exr *Extractor) Configure(facts map[string]interface{}) error {
154174
if val, exists := facts[ConfigUASTFailOnErrors].(bool); exists {
155175
exr.FailOnErrors = val
156176
}
177+
if val, exists := facts[ConfigUASTIgnoreMissingDrivers].([]string); exists {
178+
exr.IgnoredMissingDrivers = map[string]bool{}
179+
for _, name := range val {
180+
exr.IgnoredMissingDrivers[name] = true
181+
}
182+
}
157183
return nil
158184
}
159185

@@ -162,9 +188,16 @@ func (exr *Extractor) Configure(facts map[string]interface{}) error {
162188
func (exr *Extractor) Initialize(repository *git.Repository) error {
163189
if exr.Context == nil {
164190
exr.Context = func() (context.Context, context.CancelFunc) {
165-
return context.Background(), nil
191+
return context.WithTimeout(context.Background(),
192+
time.Duration(DefaultBabelfishTimeout)*time.Second)
166193
}
167194
}
195+
if exr.Endpoint == "" {
196+
exr.Endpoint = DefaultBabelfishEndpoint
197+
}
198+
if exr.PoolSize == 0 {
199+
exr.PoolSize = DefaultBabelfishWorkers
200+
}
168201
poolSize := exr.PoolSize
169202
if poolSize == 0 {
170203
poolSize = runtime.NumCPU()
@@ -196,6 +229,12 @@ func (exr *Extractor) Initialize(repository *git.Repository) error {
196229
panic("UAST goroutine pool was not created")
197230
}
198231
exr.ProcessedFiles = map[string]int{}
232+
if exr.IgnoredMissingDrivers == nil {
233+
exr.IgnoredMissingDrivers = map[string]bool{}
234+
for _, name := range DefaultIgnoredMissingDrivers {
235+
exr.IgnoredMissingDrivers[name] = true
236+
}
237+
}
199238
return nil
200239
}
201240

@@ -250,7 +289,7 @@ func (exr *Extractor) Consume(deps map[string]interface{}) (map[string]interface
250289
if exr.FailOnErrors {
251290
return nil, errors.New(joined)
252291
}
253-
fmt.Fprintln(os.Stderr, joined)
292+
log.Println(joined)
254293
}
255294
return map[string]interface{}{DependencyUasts: uasts}, nil
256295
}
@@ -284,6 +323,11 @@ func (exr *Extractor) extractTask(client *bblfsh.Client, data interface{}) inter
284323
task.Lock.Lock()
285324
defer task.Lock.Unlock()
286325
if err != nil {
326+
for lang := range exr.IgnoredMissingDrivers {
327+
if strings.HasSuffix(err.Error(), "\""+lang+"\"") {
328+
return nil
329+
}
330+
}
287331
*task.Errors = append(*task.Errors,
288332
fmt.Errorf("\nfile %s, blob %s: %v", task.Name, task.Hash.String(), err))
289333
return nil

internal/plumbing/uast/uast_test.go

+33-2
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,12 @@ func TestUASTExtractorMeta(t *testing.T) {
5050
assert.Equal(t, exr.Requires()[0], items.DependencyTreeChanges)
5151
assert.Equal(t, exr.Requires()[1], items.DependencyBlobCache)
5252
opts := exr.ListConfigurationOptions()
53-
assert.Len(t, opts, 4)
53+
assert.Len(t, opts, 5)
5454
assert.Equal(t, opts[0].Name, ConfigUASTEndpoint)
5555
assert.Equal(t, opts[1].Name, ConfigUASTTimeout)
5656
assert.Equal(t, opts[2].Name, ConfigUASTPoolSize)
5757
assert.Equal(t, opts[3].Name, ConfigUASTFailOnErrors)
58+
assert.Equal(t, opts[4].Name, ConfigUASTIgnoreMissingDrivers)
5859
feats := exr.Features()
5960
assert.Len(t, feats, 1)
6061
assert.Equal(t, feats[0], FeatureUast)
@@ -68,11 +69,13 @@ func TestUASTExtractorConfiguration(t *testing.T) {
6869
facts[ConfigUASTTimeout] = 15
6970
facts[ConfigUASTPoolSize] = 7
7071
facts[ConfigUASTFailOnErrors] = true
72+
facts[ConfigUASTIgnoreMissingDrivers] = []string{"test"}
7173
exr.Configure(facts)
7274
assert.Equal(t, exr.Endpoint, facts[ConfigUASTEndpoint])
7375
assert.NotNil(t, exr.Context)
7476
assert.Equal(t, exr.PoolSize, facts[ConfigUASTPoolSize])
7577
assert.Equal(t, exr.FailOnErrors, true)
78+
assert.Equal(t, exr.IgnoredMissingDrivers, map[string]bool{"test": true})
7679
}
7780

7881
func TestUASTExtractorRegistration(t *testing.T) {
@@ -92,7 +95,7 @@ func TestUASTExtractorNoBabelfish(t *testing.T) {
9295

9396
func TestUASTExtractorConsume(t *testing.T) {
9497
exr := fixtureUASTExtractor()
95-
changes := make(object.Changes, 3)
98+
changes := make(object.Changes, 4)
9699
// 2b1ed978194a94edeabbca6de7ff3b5771d4d665
97100
treeFrom, _ := test.Repository.TreeObject(plumbing.NewHash(
98101
"96c6ece9b2f3c7c51b83516400d278dea5605100"))
@@ -136,13 +139,24 @@ func TestUASTExtractorConsume(t *testing.T) {
136139
},
137140
},
138141
}
142+
changes[3] = &object.Change{From: object.ChangeEntry{}, To: object.ChangeEntry{
143+
Name: "README.md",
144+
Tree: treeTo,
145+
TreeEntry: object.TreeEntry{
146+
Name: "README.md",
147+
Mode: 0100644,
148+
Hash: plumbing.NewHash("5248c86995f6d60eb57730da18b5e020a4341863"),
149+
},
150+
},
151+
}
139152
cache := map[plumbing.Hash]*items.CachedBlob{}
140153
for _, hash := range []string{
141154
"baa64828831d174f40140e4b3cfa77d1e917a2c1",
142155
"5d78f57d732aed825764347ec6f3ab74d50d0619",
143156
"c29112dbd697ad9b401333b80c18a63951bc18d9",
144157
"f7d918ec500e2f925ecde79b51cc007bac27de72",
145158
"81f2b6d1fa5357f90e9dead150cd515720897545",
159+
"5248c86995f6d60eb57730da18b5e020a4341863",
146160
} {
147161
AddHash(t, cache, hash)
148162
}
@@ -158,6 +172,12 @@ func TestUASTExtractorConsume(t *testing.T) {
158172
assert.Len(t, res[DependencyUasts], 1)
159173
assert.Nil(t, err)
160174

175+
exr.FailOnErrors = true
176+
res, err = exr.Consume(deps)
177+
assert.Nil(t, res)
178+
assert.NotNil(t, err)
179+
exr.FailOnErrors = false
180+
161181
hash := plumbing.NewHash("5d78f57d732aed825764347ec6f3ab74d50d0619")
162182
changes[1] = &object.Change{From: object.ChangeEntry{}, To: object.ChangeEntry{
163183
Name: "labours.py",
@@ -176,6 +196,17 @@ func TestUASTExtractorConsume(t *testing.T) {
176196
uasts := res[DependencyUasts].(map[plumbing.Hash]nodes.Node)
177197
assert.Equal(t, len(uasts), 1)
178198
assert.Equal(t, len(uasts[hash].(nodes.Object)["body"].(nodes.Array)), 24)
199+
200+
exr.IgnoredMissingDrivers = map[string]bool{}
201+
changes[2] = changes[3]
202+
deps[items.DependencyTreeChanges] = changes[:3]
203+
res, err = exr.Consume(deps)
204+
assert.Nil(t, err)
205+
exr.FailOnErrors = true
206+
res, err = exr.Consume(deps)
207+
assert.Nil(t, res)
208+
assert.NotNil(t, err)
209+
exr.FailOnErrors = false
179210
}
180211

181212
func TestUASTExtractorFork(t *testing.T) {

0 commit comments

Comments
 (0)