Skip to content

Commit 3a8653c

Browse files
authoredJun 21, 2019
Merge pull request #292 from vmarkovtsev/master
Added 3D overwrites visual
2 parents 65a51f1 + b8f1f40 commit 3a8653c

File tree

4 files changed

+41
-46
lines changed

4 files changed

+41
-46
lines changed
 

‎README.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ Table of Contents
3939
* [Project burndown](#project-burndown)
4040
* [Files](#files)
4141
* [People](#people)
42-
* [Churn matrix](#churn-matrix)
42+
* [Churn matrix](#overwrites-matrix)
4343
* [Code ownership](#code-ownership)
4444
* [Couples](#couples)
4545
* [Structural hotness](#structural-hotness)
@@ -214,14 +214,14 @@ If `--people-dict` is specified, it should point to a text file with the custom
214214
format is: every line is a single developer, it contains all the matching emails and names separated
215215
by `|`. The case is ignored.
216216

217-
#### Churn matrix
217+
#### Overwrites matrix
218218

219-
![Wireshark top 20 churn matrix](doc/wireshark_churn_matrix.png)
220-
<p align="center">Wireshark top 20 devs - churn matrix</p>
219+
![Wireshark top 20 overwrites matrix](doc/wireshark_overwrites_matrix.png)
220+
<p align="center">Wireshark top 20 devs - overwrites matrix</p>
221221

222222
```
223223
hercules --burndown --burndown-people [--people-dict=/path/to/identities]
224-
labours -m churn-matrix
224+
labours -m overwrites-matrix
225225
```
226226

227227
Beside the burndown information, `--burndown-people` collects the added and deleted line statistics per
File renamed without changes.

‎python/labours/labours.py

+35-40
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,13 @@ def parse_args():
5858
parser.add_argument("--size", help="Axes' size in inches, for example \"12,9\"")
5959
parser.add_argument("--relative", action="store_true",
6060
help="Occupy 100%% height for every measurement.")
61-
parser.add_argument("--couples-tmp-dir", help="Temporary directory to work with couples.")
61+
parser.add_argument("--tmpdir", help="Temporary directory for intermediate files.")
6262
parser.add_argument("-m", "--mode",
6363
choices=["burndown-project", "burndown-file", "burndown-person",
64-
"churn-matrix", "ownership", "couples-files", "couples-people",
65-
"couples-shotness", "shotness", "sentiment", "devs",
66-
"devs-efforts", "old-vs-new", "all", "run-times", "languages",
67-
"devs-parallel"],
64+
"overwrites-matrix", "ownership", "couples-files",
65+
"couples-people", "couples-shotness", "shotness", "sentiment",
66+
"devs", "devs-efforts", "old-vs-new", "all", "run-times",
67+
"languages", "devs-parallel"],
6868
help="What to plot.")
6969
parser.add_argument(
7070
"--resample", default="year",
@@ -82,7 +82,7 @@ def parse_args():
8282
parser.add_argument("--disable-projector", action="store_true",
8383
help="Do not run Tensorflow Projector on couples.")
8484
parser.add_argument("--max-people", default=20, type=int,
85-
help="Maximum number of developers in churn matrix and people plots.")
85+
help="Maximum number of developers in overwrites matrix and people plots.")
8686
args = parser.parse_args()
8787
return args
8888

@@ -716,18 +716,19 @@ def load_ownership(header, sequence, contents, max_people):
716716
return sequence, people, date_range_sampling, last
717717

718718

719-
def load_churn_matrix(people, matrix, max_people):
719+
def load_overwrites_matrix(people, matrix, max_people, normalize=True):
720720
matrix = matrix.astype(float)
721721
if matrix.shape[0] > max_people:
722722
order = numpy.argsort(-matrix[:, 0])
723723
matrix = matrix[order[:max_people]][:, [0, 1] + list(2 + order[:max_people])]
724724
people = [people[i] for i in order[:max_people]]
725725
print("Warning: truncated people to most productive %d" % max_people)
726-
zeros = matrix[:, 0] == 0
727-
matrix[zeros, :] = 1
728-
matrix /= matrix[:, 0][:, None]
726+
if normalize:
727+
zeros = matrix[:, 0] == 0
728+
matrix[zeros, :] = 1
729+
matrix /= matrix[:, 0][:, None]
730+
matrix[zeros, :] = 0
729731
matrix = -matrix[:, 1:]
730-
matrix[zeros, :] = 0
731732
for i, name in enumerate(people):
732733
if len(name) > 40:
733734
people[i] = name[:37] + "..."
@@ -907,11 +908,11 @@ def plot_many_burndown(args, target, header, parts):
907908
sys.stdout.write(stdout.getvalue())
908909

909910

910-
def plot_churn_matrix(args, repo, people, matrix):
911+
def plot_overwrites_matrix(args, repo, people, matrix):
911912
if args.output and args.output.endswith(".json"):
912913
data = locals().copy()
913914
del data["args"]
914-
data["type"] = "churn_matrix"
915+
data["type"] = "overwrites_matrix"
915916
if args.mode == "all":
916917
output = get_plot_path(args.output, "matrix")
917918
else:
@@ -1410,24 +1411,9 @@ def order_commits(chosen_people, days, people):
14101411
series = list(devseries.values())
14111412
for i, s in enumerate(series):
14121413
arr = numpy.array(s).transpose().astype(numpy.float32)
1413-
commits = arr[1]
1414-
if len(commits) < 7:
1415-
commits /= commits.max()
1416-
else:
1417-
# 4 is sizeof(float32)
1418-
windows = numpy.lib.stride_tricks.as_strided(commits, [len(commits) - 6, 7], [4, 4])
1419-
commits = numpy.concatenate((
1420-
[windows[0, 0] / windows[0].max(),
1421-
windows[0, 1] / windows[0].max(),
1422-
windows[0, 2] / windows[0].max()],
1423-
windows[:, 3] / windows.max(axis=1),
1424-
[windows[-1, 4] / windows[-1].max(),
1425-
windows[-1, 5] / windows[-1].max(),
1426-
windows[-1, 6] / windows[-1].max()]
1427-
))
1428-
arr[1] = commits * 7 # 7 is a pure heuristic here and is not related to the window size
1414+
arr[1] /= arr[1].sum()
14291415
series[i] = arr.transpose()
1430-
# calculate the distance matrix using dynamic time warping metric
1416+
# calculate the distance matrix using dynamic time warping
14311417
dists = numpy.full((len(series),) * 2, -100500, dtype=numpy.float32)
14321418
for x, serx in enumerate(series):
14331419
dists[x, x] = 0
@@ -1450,8 +1436,7 @@ def hdbscan_cluster_routed_series(dists, route):
14501436
try:
14511437
from hdbscan import HDBSCAN
14521438
except ImportError as e:
1453-
print("Cannot import ortools: %s\nInstall it from "
1454-
"https://developers.google.com/optimization/install/python/" % e)
1439+
print("Cannot import hdbscan: %s" % e)
14551440
sys.exit(1)
14561441

14571442
opt_dist_chain = numpy.cumsum(numpy.array(
@@ -1799,12 +1784,22 @@ def people_burndown():
17991784
except KeyError:
18001785
print("people: " + burndown_people_warning)
18011786

1802-
def churn_matrix():
1787+
def overwrites_matrix():
18031788
try:
1804-
plot_churn_matrix(args, name, *load_churn_matrix(
1789+
1790+
plot_overwrites_matrix(args, name, *load_overwrites_matrix(
18051791
*reader.get_people_interaction(), max_people=args.max_people))
1792+
people, matrix = load_overwrites_matrix(
1793+
*reader.get_people_interaction(), max_people=1000000, normalize=False)
1794+
from scipy.sparse import csr_matrix
1795+
matrix = matrix[:, 1:]
1796+
matrix = numpy.triu(matrix) + numpy.tril(matrix).T
1797+
matrix = matrix + matrix.T
1798+
matrix = csr_matrix(matrix)
1799+
write_embeddings("overwrites", args.output, not args.disable_projector,
1800+
*train_embeddings(people, matrix, tmpdir=args.tmpdir))
18061801
except KeyError:
1807-
print("churn_matrix: " + burndown_people_warning)
1802+
print("overwrites_matrix: " + burndown_people_warning)
18081803

18091804
def ownership_burndown():
18101805
try:
@@ -1822,23 +1817,23 @@ def couples_files():
18221817
try:
18231818
write_embeddings("files", args.output, not args.disable_projector,
18241819
*train_embeddings(*reader.get_files_coocc(),
1825-
tmpdir=args.couples_tmp_dir))
1820+
tmpdir=args.tmpdir))
18261821
except KeyError:
18271822
print(couples_warning)
18281823

18291824
def couples_people():
18301825
try:
18311826
write_embeddings("people", args.output, not args.disable_projector,
18321827
*train_embeddings(*reader.get_people_coocc(),
1833-
tmpdir=args.couples_tmp_dir))
1828+
tmpdir=args.tmpdir))
18341829
except KeyError:
18351830
print(couples_warning)
18361831

18371832
def couples_shotness():
18381833
try:
18391834
write_embeddings("shotness", args.output, not args.disable_projector,
18401835
*train_embeddings(*reader.get_shotness_coocc(),
1841-
tmpdir=args.couples_tmp_dir))
1836+
tmpdir=args.tmpdir))
18421837
except KeyError:
18431838
print(shotness_warning)
18441839

@@ -1916,7 +1911,7 @@ def devs_parallel():
19161911
"burndown-project": project_burndown,
19171912
"burndown-file": files_burndown,
19181913
"burndown-person": people_burndown,
1919-
"churn-matrix": churn_matrix,
1914+
"overwrites-matrix": overwrites_matrix,
19201915
"ownership": ownership_burndown,
19211916
"couples-files": couples_files,
19221917
"couples-people": couples_people,
@@ -1936,7 +1931,7 @@ def devs_parallel():
19361931
project_burndown()
19371932
files_burndown()
19381933
people_burndown()
1939-
churn_matrix()
1934+
overwrites_matrix()
19401935
ownership_burndown()
19411936
couples_files()
19421937
couples_people()

‎python/setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
description="Python companion for github.com/src-d/hercules to visualize the results.",
1616
long_description=long_description,
1717
long_description_content_type="text/markdown",
18-
version="10.1.0",
18+
version="10.2.0",
1919
license="Apache-2.0",
2020
author="source{d}",
2121
author_email="machine-learning@sourced.tech",

0 commit comments

Comments
 (0)
Please sign in to comment.