Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Table of Contents
* [Project burndown](#project-burndown)
* [Files](#files)
* [People](#people)
* [Churn matrix](#churn-matrix)
* [Churn matrix](#overwrites-matrix)
* [Code ownership](#code-ownership)
* [Couples](#couples)
* [Structural hotness](#structural-hotness)
Expand Down Expand Up @@ -214,14 +214,14 @@ If `--people-dict` is specified, it should point to a text file with the custom
format is: every line is a single developer, it contains all the matching emails and names separated
by `|`. The case is ignored.

#### Churn matrix
#### Overwrites matrix

![Wireshark top 20 churn matrix](doc/wireshark_churn_matrix.png)
<p align="center">Wireshark top 20 devs - churn matrix</p>
![Wireshark top 20 overwrites matrix](doc/wireshark_overwrites_matrix.png)
<p align="center">Wireshark top 20 devs - overwrites matrix</p>

```
hercules --burndown --burndown-people [--people-dict=/path/to/identities]
labours -m churn-matrix
labours -m overwrites-matrix
```

Beside the burndown information, `--burndown-people` collects the added and deleted line statistics per
Expand Down
File renamed without changes
75 changes: 35 additions & 40 deletions python/labours/labours.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,13 @@ def parse_args():
parser.add_argument("--size", help="Axes' size in inches, for example \"12,9\"")
parser.add_argument("--relative", action="store_true",
help="Occupy 100%% height for every measurement.")
parser.add_argument("--couples-tmp-dir", help="Temporary directory to work with couples.")
parser.add_argument("--tmpdir", help="Temporary directory for intermediate files.")
parser.add_argument("-m", "--mode",
choices=["burndown-project", "burndown-file", "burndown-person",
"churn-matrix", "ownership", "couples-files", "couples-people",
"couples-shotness", "shotness", "sentiment", "devs",
"devs-efforts", "old-vs-new", "all", "run-times", "languages",
"devs-parallel"],
"overwrites-matrix", "ownership", "couples-files",
"couples-people", "couples-shotness", "shotness", "sentiment",
"devs", "devs-efforts", "old-vs-new", "all", "run-times",
"languages", "devs-parallel"],
help="What to plot.")
parser.add_argument(
"--resample", default="year",
Expand All @@ -82,7 +82,7 @@ def parse_args():
parser.add_argument("--disable-projector", action="store_true",
help="Do not run Tensorflow Projector on couples.")
parser.add_argument("--max-people", default=20, type=int,
help="Maximum number of developers in churn matrix and people plots.")
help="Maximum number of developers in overwrites matrix and people plots.")
args = parser.parse_args()
return args

Expand Down Expand Up @@ -716,18 +716,19 @@ def load_ownership(header, sequence, contents, max_people):
return sequence, people, date_range_sampling, last


def load_churn_matrix(people, matrix, max_people):
def load_overwrites_matrix(people, matrix, max_people, normalize=True):
matrix = matrix.astype(float)
if matrix.shape[0] > max_people:
order = numpy.argsort(-matrix[:, 0])
matrix = matrix[order[:max_people]][:, [0, 1] + list(2 + order[:max_people])]
people = [people[i] for i in order[:max_people]]
print("Warning: truncated people to most productive %d" % max_people)
zeros = matrix[:, 0] == 0
matrix[zeros, :] = 1
matrix /= matrix[:, 0][:, None]
if normalize:
zeros = matrix[:, 0] == 0
matrix[zeros, :] = 1
matrix /= matrix[:, 0][:, None]
matrix[zeros, :] = 0
matrix = -matrix[:, 1:]
matrix[zeros, :] = 0
for i, name in enumerate(people):
if len(name) > 40:
people[i] = name[:37] + "..."
Expand Down Expand Up @@ -907,11 +908,11 @@ def plot_many_burndown(args, target, header, parts):
sys.stdout.write(stdout.getvalue())


def plot_churn_matrix(args, repo, people, matrix):
def plot_overwrites_matrix(args, repo, people, matrix):
if args.output and args.output.endswith(".json"):
data = locals().copy()
del data["args"]
data["type"] = "churn_matrix"
data["type"] = "overwrites_matrix"
if args.mode == "all":
output = get_plot_path(args.output, "matrix")
else:
Expand Down Expand Up @@ -1410,24 +1411,9 @@ def order_commits(chosen_people, days, people):
series = list(devseries.values())
for i, s in enumerate(series):
arr = numpy.array(s).transpose().astype(numpy.float32)
commits = arr[1]
if len(commits) < 7:
commits /= commits.max()
else:
# 4 is sizeof(float32)
windows = numpy.lib.stride_tricks.as_strided(commits, [len(commits) - 6, 7], [4, 4])
commits = numpy.concatenate((
[windows[0, 0] / windows[0].max(),
windows[0, 1] / windows[0].max(),
windows[0, 2] / windows[0].max()],
windows[:, 3] / windows.max(axis=1),
[windows[-1, 4] / windows[-1].max(),
windows[-1, 5] / windows[-1].max(),
windows[-1, 6] / windows[-1].max()]
))
arr[1] = commits * 7 # 7 is a pure heuristic here and is not related to the window size
arr[1] /= arr[1].sum()
series[i] = arr.transpose()
# calculate the distance matrix using dynamic time warping metric
# calculate the distance matrix using dynamic time warping
dists = numpy.full((len(series),) * 2, -100500, dtype=numpy.float32)
for x, serx in enumerate(series):
dists[x, x] = 0
Expand All @@ -1450,8 +1436,7 @@ def hdbscan_cluster_routed_series(dists, route):
try:
from hdbscan import HDBSCAN
except ImportError as e:
print("Cannot import ortools: %s\nInstall it from "
"https://developers.google.com/optimization/install/python/" % e)
print("Cannot import hdbscan: %s" % e)
sys.exit(1)

opt_dist_chain = numpy.cumsum(numpy.array(
Expand Down Expand Up @@ -1799,12 +1784,22 @@ def people_burndown():
except KeyError:
print("people: " + burndown_people_warning)

def churn_matrix():
def overwrites_matrix():
try:
plot_churn_matrix(args, name, *load_churn_matrix(

plot_overwrites_matrix(args, name, *load_overwrites_matrix(
*reader.get_people_interaction(), max_people=args.max_people))
people, matrix = load_overwrites_matrix(
*reader.get_people_interaction(), max_people=1000000, normalize=False)
from scipy.sparse import csr_matrix
matrix = matrix[:, 1:]
matrix = numpy.triu(matrix) + numpy.tril(matrix).T
matrix = matrix + matrix.T
matrix = csr_matrix(matrix)
write_embeddings("overwrites", args.output, not args.disable_projector,
*train_embeddings(people, matrix, tmpdir=args.tmpdir))
except KeyError:
print("churn_matrix: " + burndown_people_warning)
print("overwrites_matrix: " + burndown_people_warning)

def ownership_burndown():
try:
Expand All @@ -1822,23 +1817,23 @@ def couples_files():
try:
write_embeddings("files", args.output, not args.disable_projector,
*train_embeddings(*reader.get_files_coocc(),
tmpdir=args.couples_tmp_dir))
tmpdir=args.tmpdir))
except KeyError:
print(couples_warning)

def couples_people():
try:
write_embeddings("people", args.output, not args.disable_projector,
*train_embeddings(*reader.get_people_coocc(),
tmpdir=args.couples_tmp_dir))
tmpdir=args.tmpdir))
except KeyError:
print(couples_warning)

def couples_shotness():
try:
write_embeddings("shotness", args.output, not args.disable_projector,
*train_embeddings(*reader.get_shotness_coocc(),
tmpdir=args.couples_tmp_dir))
tmpdir=args.tmpdir))
except KeyError:
print(shotness_warning)

Expand Down Expand Up @@ -1916,7 +1911,7 @@ def devs_parallel():
"burndown-project": project_burndown,
"burndown-file": files_burndown,
"burndown-person": people_burndown,
"churn-matrix": churn_matrix,
"overwrites-matrix": overwrites_matrix,
"ownership": ownership_burndown,
"couples-files": couples_files,
"couples-people": couples_people,
Expand All @@ -1936,7 +1931,7 @@ def devs_parallel():
project_burndown()
files_burndown()
people_burndown()
churn_matrix()
overwrites_matrix()
ownership_burndown()
couples_files()
couples_people()
Expand Down
2 changes: 1 addition & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
description="Python companion for github.com/src-d/hercules to visualize the results.",
long_description=long_description,
long_description_content_type="text/markdown",
version="10.1.0",
version="10.2.0",
license="Apache-2.0",
author="source{d}",
author_email="[email protected]",
Expand Down