@@ -58,13 +58,13 @@ def parse_args():
58
58
parser .add_argument ("--size" , help = "Axes' size in inches, for example \" 12,9\" " )
59
59
parser .add_argument ("--relative" , action = "store_true" ,
60
60
help = "Occupy 100%% height for every measurement." )
61
- parser .add_argument ("--couples-tmp-dir " , help = "Temporary directory to work with couples ." )
61
+ parser .add_argument ("--tmpdir " , help = "Temporary directory for intermediate files ." )
62
62
parser .add_argument ("-m" , "--mode" ,
63
63
choices = ["burndown-project" , "burndown-file" , "burndown-person" ,
64
- "churn -matrix" , "ownership" , "couples-files" , "couples-people " ,
65
- "couples-shotness " , "shotness" , "sentiment " , "devs " ,
66
- "devs-efforts" , "old-vs-new" , "all" , "run-times" , "languages " ,
67
- "devs-parallel" ],
64
+ "overwrites -matrix" , "ownership" , "couples-files" ,
65
+ "couples-people " , "couples- shotness" , "shotness " , "sentiment " ,
66
+ "devs" , "devs -efforts" , "old-vs-new" , "all" , "run-times" ,
67
+ "languages" , " devs-parallel" ],
68
68
help = "What to plot." )
69
69
parser .add_argument (
70
70
"--resample" , default = "year" ,
@@ -82,7 +82,7 @@ def parse_args():
82
82
parser .add_argument ("--disable-projector" , action = "store_true" ,
83
83
help = "Do not run Tensorflow Projector on couples." )
84
84
parser .add_argument ("--max-people" , default = 20 , type = int ,
85
- help = "Maximum number of developers in churn matrix and people plots." )
85
+ help = "Maximum number of developers in overwrites matrix and people plots." )
86
86
args = parser .parse_args ()
87
87
return args
88
88
@@ -716,18 +716,19 @@ def load_ownership(header, sequence, contents, max_people):
716
716
return sequence , people , date_range_sampling , last
717
717
718
718
719
- def load_churn_matrix (people , matrix , max_people ):
719
+ def load_overwrites_matrix (people , matrix , max_people , normalize = True ):
720
720
matrix = matrix .astype (float )
721
721
if matrix .shape [0 ] > max_people :
722
722
order = numpy .argsort (- matrix [:, 0 ])
723
723
matrix = matrix [order [:max_people ]][:, [0 , 1 ] + list (2 + order [:max_people ])]
724
724
people = [people [i ] for i in order [:max_people ]]
725
725
print ("Warning: truncated people to most productive %d" % max_people )
726
- zeros = matrix [:, 0 ] == 0
727
- matrix [zeros , :] = 1
728
- matrix /= matrix [:, 0 ][:, None ]
726
+ if normalize :
727
+ zeros = matrix [:, 0 ] == 0
728
+ matrix [zeros , :] = 1
729
+ matrix /= matrix [:, 0 ][:, None ]
730
+ matrix [zeros , :] = 0
729
731
matrix = - matrix [:, 1 :]
730
- matrix [zeros , :] = 0
731
732
for i , name in enumerate (people ):
732
733
if len (name ) > 40 :
733
734
people [i ] = name [:37 ] + "..."
@@ -907,11 +908,11 @@ def plot_many_burndown(args, target, header, parts):
907
908
sys .stdout .write (stdout .getvalue ())
908
909
909
910
910
- def plot_churn_matrix (args , repo , people , matrix ):
911
+ def plot_overwrites_matrix (args , repo , people , matrix ):
911
912
if args .output and args .output .endswith (".json" ):
912
913
data = locals ().copy ()
913
914
del data ["args" ]
914
- data ["type" ] = "churn_matrix "
915
+ data ["type" ] = "overwrites_matrix "
915
916
if args .mode == "all" :
916
917
output = get_plot_path (args .output , "matrix" )
917
918
else :
@@ -1410,24 +1411,9 @@ def order_commits(chosen_people, days, people):
1410
1411
series = list (devseries .values ())
1411
1412
for i , s in enumerate (series ):
1412
1413
arr = numpy .array (s ).transpose ().astype (numpy .float32 )
1413
- commits = arr [1 ]
1414
- if len (commits ) < 7 :
1415
- commits /= commits .max ()
1416
- else :
1417
- # 4 is sizeof(float32)
1418
- windows = numpy .lib .stride_tricks .as_strided (commits , [len (commits ) - 6 , 7 ], [4 , 4 ])
1419
- commits = numpy .concatenate ((
1420
- [windows [0 , 0 ] / windows [0 ].max (),
1421
- windows [0 , 1 ] / windows [0 ].max (),
1422
- windows [0 , 2 ] / windows [0 ].max ()],
1423
- windows [:, 3 ] / windows .max (axis = 1 ),
1424
- [windows [- 1 , 4 ] / windows [- 1 ].max (),
1425
- windows [- 1 , 5 ] / windows [- 1 ].max (),
1426
- windows [- 1 , 6 ] / windows [- 1 ].max ()]
1427
- ))
1428
- arr [1 ] = commits * 7 # 7 is a pure heuristic here and is not related to the window size
1414
+ arr [1 ] /= arr [1 ].sum ()
1429
1415
series [i ] = arr .transpose ()
1430
- # calculate the distance matrix using dynamic time warping metric
1416
+ # calculate the distance matrix using dynamic time warping
1431
1417
dists = numpy .full ((len (series ),) * 2 , - 100500 , dtype = numpy .float32 )
1432
1418
for x , serx in enumerate (series ):
1433
1419
dists [x , x ] = 0
@@ -1450,8 +1436,7 @@ def hdbscan_cluster_routed_series(dists, route):
1450
1436
try :
1451
1437
from hdbscan import HDBSCAN
1452
1438
except ImportError as e :
1453
- print ("Cannot import ortools: %s\n Install it from "
1454
- "https://developers.google.com/optimization/install/python/" % e )
1439
+ print ("Cannot import hdbscan: %s" % e )
1455
1440
sys .exit (1 )
1456
1441
1457
1442
opt_dist_chain = numpy .cumsum (numpy .array (
@@ -1799,12 +1784,22 @@ def people_burndown():
1799
1784
except KeyError :
1800
1785
print ("people: " + burndown_people_warning )
1801
1786
1802
- def churn_matrix ():
1787
+ def overwrites_matrix ():
1803
1788
try :
1804
- plot_churn_matrix (args , name , * load_churn_matrix (
1789
+
1790
+ plot_overwrites_matrix (args , name , * load_overwrites_matrix (
1805
1791
* reader .get_people_interaction (), max_people = args .max_people ))
1792
+ people , matrix = load_overwrites_matrix (
1793
+ * reader .get_people_interaction (), max_people = 1000000 , normalize = False )
1794
+ from scipy .sparse import csr_matrix
1795
+ matrix = matrix [:, 1 :]
1796
+ matrix = numpy .triu (matrix ) + numpy .tril (matrix ).T
1797
+ matrix = matrix + matrix .T
1798
+ matrix = csr_matrix (matrix )
1799
+ write_embeddings ("overwrites" , args .output , not args .disable_projector ,
1800
+ * train_embeddings (people , matrix , tmpdir = args .tmpdir ))
1806
1801
except KeyError :
1807
- print ("churn_matrix : " + burndown_people_warning )
1802
+ print ("overwrites_matrix : " + burndown_people_warning )
1808
1803
1809
1804
def ownership_burndown ():
1810
1805
try :
@@ -1822,23 +1817,23 @@ def couples_files():
1822
1817
try :
1823
1818
write_embeddings ("files" , args .output , not args .disable_projector ,
1824
1819
* train_embeddings (* reader .get_files_coocc (),
1825
- tmpdir = args .couples_tmp_dir ))
1820
+ tmpdir = args .tmpdir ))
1826
1821
except KeyError :
1827
1822
print (couples_warning )
1828
1823
1829
1824
def couples_people ():
1830
1825
try :
1831
1826
write_embeddings ("people" , args .output , not args .disable_projector ,
1832
1827
* train_embeddings (* reader .get_people_coocc (),
1833
- tmpdir = args .couples_tmp_dir ))
1828
+ tmpdir = args .tmpdir ))
1834
1829
except KeyError :
1835
1830
print (couples_warning )
1836
1831
1837
1832
def couples_shotness ():
1838
1833
try :
1839
1834
write_embeddings ("shotness" , args .output , not args .disable_projector ,
1840
1835
* train_embeddings (* reader .get_shotness_coocc (),
1841
- tmpdir = args .couples_tmp_dir ))
1836
+ tmpdir = args .tmpdir ))
1842
1837
except KeyError :
1843
1838
print (shotness_warning )
1844
1839
@@ -1916,7 +1911,7 @@ def devs_parallel():
1916
1911
"burndown-project" : project_burndown ,
1917
1912
"burndown-file" : files_burndown ,
1918
1913
"burndown-person" : people_burndown ,
1919
- "churn -matrix" : churn_matrix ,
1914
+ "overwrites -matrix" : overwrites_matrix ,
1920
1915
"ownership" : ownership_burndown ,
1921
1916
"couples-files" : couples_files ,
1922
1917
"couples-people" : couples_people ,
@@ -1936,7 +1931,7 @@ def devs_parallel():
1936
1931
project_burndown ()
1937
1932
files_burndown ()
1938
1933
people_burndown ()
1939
- churn_matrix ()
1934
+ overwrites_matrix ()
1940
1935
ownership_burndown ()
1941
1936
couples_files ()
1942
1937
couples_people ()
0 commit comments