Skip to content

Commit fe51756

Browse files
committed
Publish 0.2.2
1 parent 09eb9fb commit fe51756

7 files changed

+185
-28
lines changed

README.md

+9
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,15 @@ clusterer = spectral_clusterer.SpectralClusterer(
185185
labels = clusterer.predict(matrix, constraint_matrix)
186186
```
187187

188+
The constraint matrix can be constructed from a `speaker_turn_scores` list:
189+
190+
```python
191+
from spectralcluster import constraint
192+
193+
constraint_matrix = constraint.ConstraintMatrix(
194+
spk_turn_entries, threshold=1).compute_diagonals()
195+
```
196+
188197
## Citations
189198

190199
Our paper is cited as:

docs/configs.html

+42-1
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,22 @@ <h1 class="title">Module <code>spectralcluster.configs</code></h1>
2929
</summary>
3030
<pre><code class="python">&#34;&#34;&#34;Example configurations.&#34;&#34;&#34;
3131

32+
from spectralcluster import autotune
33+
from spectralcluster import constraint
34+
from spectralcluster import laplacian
3235
from spectralcluster import refinement
3336
from spectralcluster import spectral_clusterer
3437

38+
AutoTune = autotune.AutoTune
39+
ConstraintName = constraint.ConstraintName
40+
ConstraintOptions = constraint.ConstraintOptions
3541
RefinementName = refinement.RefinementName
3642
RefinementOptions = refinement.RefinementOptions
3743
ThresholdType = refinement.ThresholdType
3844
SymmetrizeType = refinement.SymmetrizeType
45+
LaplacianType = laplacian.LaplacianType
3946
SpectralClusterer = spectral_clusterer.SpectralClusterer
4047

41-
4248
# Configurations that are closest to the ICASSP2018 paper
4349
# &#34;Speaker Diarization with LSTM&#34;.
4450
ICASSP2018_REFINEMENT_SEQUENCE = [
@@ -63,6 +69,41 @@ <h1 class="title">Module <code>spectralcluster.configs</code></h1>
6369
autotune=None,
6470
laplacian_type=None,
6571
refinement_options=icassp2018_refinement_options,
72+
custom_dist=&#34;cosine&#34;)
73+
74+
# Configurations of Turn-To-Diarize system using the
75+
# Turn + Constraint Propagation + AutoTune method
76+
TURNTODIARIZE_REFINEMENT_SEQUENCE = [
77+
RefinementName.RowWiseThreshold, RefinementName.Symmetrize
78+
]
79+
80+
turntodiarize_refinement_options = RefinementOptions(
81+
thresholding_soft_multiplier=0.01,
82+
thresholding_type=ThresholdType.Percentile,
83+
thresholding_with_binarization=True,
84+
thresholding_preserve_diagonal=True,
85+
symmetrize_type=SymmetrizeType.Average,
86+
refinement_sequence=TURNTODIARIZE_REFINEMENT_SEQUENCE)
87+
88+
turntodiarize_constraint_options = ConstraintOptions(
89+
constraint_name=ConstraintName.ConstraintPropagation,
90+
apply_before_refinement=True,
91+
constraint_propagation_alpha=0.4)
92+
93+
turntodiarize_auto_tune = AutoTune(
94+
p_percentile_min=0.40,
95+
p_percentile_max=0.95,
96+
init_search_step=0.05,
97+
search_level=1)
98+
99+
turntodiarize_clusterer = SpectralClusterer(
100+
min_clusters=2,
101+
max_clusters=7,
102+
refinement_options=turntodiarize_refinement_options,
103+
constraint_options=turntodiarize_constraint_options,
104+
autotune=turntodiarize_auto_tune,
105+
laplacian_type=LaplacianType.GraphCut,
106+
row_wise_renorm=True,
66107
custom_dist=&#34;cosine&#34;)</code></pre>
67108
</details>
68109
</section>

docs/index.html

+3-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ <h1 class="title">Package <code>spectralcluster</code></h1>
3535
from . import laplacian
3636
from . import refinement
3737
from . import spectral_clusterer
38-
38+
from . import utils
3939

4040
AutoTune = autotune.AutoTune
4141

@@ -52,6 +52,8 @@ <h1 class="title">Package <code>spectralcluster</code></h1>
5252

5353
SpectralClusterer = spectral_clusterer.SpectralClusterer
5454

55+
EigenGapType = utils.EigenGapType
56+
5557
ICASSP2018_REFINEMENT_SEQUENCE = configs.ICASSP2018_REFINEMENT_SEQUENCE</code></pre>
5658
</details>
5759
</section>

docs/laplacian.html

+4-4
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@ <h1 class="title">Module <code>spectralcluster.laplacian</code></h1>
4343
# The unnormalied Laplacian: L = D - W
4444
Unnormalized = 1
4545

46-
# The random walk view normalized Laplacian: D^{-1}L
46+
# The random walk view normalized Laplacian: D^{-1} * L
4747
RandomWalk = 2
4848

49-
# The graph cut view normalized Laplacian: D^{-1/2}LD^{-1/2}
49+
# The graph cut view normalized Laplacian: D^{-1/2} * L * D^{-1/2}
5050
GraphCut = 3
5151

5252

@@ -181,10 +181,10 @@ <h2 class="section-title" id="header-classes">Classes</h2>
181181
# The unnormalied Laplacian: L = D - W
182182
Unnormalized = 1
183183

184-
# The random walk view normalized Laplacian: D^{-1}L
184+
# The random walk view normalized Laplacian: D^{-1} * L
185185
RandomWalk = 2
186186

187-
# The graph cut view normalized Laplacian: D^{-1/2}LD^{-1/2}
187+
# The graph cut view normalized Laplacian: D^{-1/2} * L * D^{-1/2}
188188
GraphCut = 3</code></pre>
189189
</details>
190190
<h3>Ancestors</h3>

docs/spectral_clusterer.html

+27-10
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ <h1 class="title">Module <code>spectralcluster.spectral_clusterer</code></h1>
3939
RefinementName = refinement.RefinementName
4040
LaplacianType = laplacian.LaplacianType
4141
ConstraintName = constraint.ConstraintName
42+
EigenGapType = utils.EigenGapType
4243

4344

4445
class SpectralClusterer:
@@ -54,7 +55,8 @@ <h1 class="title">Module <code>spectralcluster.spectral_clusterer</code></h1>
5455
row_wise_renorm=False,
5556
custom_dist=&#34;cosine&#34;,
5657
max_iter=300,
57-
constraint_options=None):
58+
constraint_options=None,
59+
eigengap_type=EigenGapType.Ratio):
5860
&#34;&#34;&#34;Constructor of the clusterer.
5961

6062
Args:
@@ -77,6 +79,7 @@ <h1 class="title">Module <code>spectralcluster.spectral_clusterer</code></h1>
7779
max_iter: the maximum number of iterations for the custom k-means
7880
constraint_options: a ConstraintOptions object that contains constraint
7981
arguments
82+
eigengap_type: the type of the eigengap computation
8083
&#34;&#34;&#34;
8184
self.min_clusters = min_clusters
8285
self.max_clusters = max_clusters
@@ -91,6 +94,7 @@ <h1 class="title">Module <code>spectralcluster.spectral_clusterer</code></h1>
9194
self.custom_dist = custom_dist
9295
self.max_iter = max_iter
9396
self.constraint_options = constraint_options
97+
self.eigengap_type = eigengap_type
9498

9599
def _compute_eigenvectors_ncluster(self, affinity, constraint_matrix=None):
96100
&#34;&#34;&#34;Perform eigen decomposition and estiamte the number of clusters.
@@ -133,7 +137,11 @@ <h1 class="title">Module <code>spectralcluster.spectral_clusterer</code></h1>
133137
(eigenvalues, eigenvectors) = utils.compute_sorted_eigenvectors(affinity)
134138
# Get number of clusters.
135139
n_clusters, max_delta_norm = utils.compute_number_of_clusters(
136-
eigenvalues, self.max_clusters, self.stop_eigenvalue, descend=True)
140+
eigenvalues,
141+
self.max_clusters,
142+
self.stop_eigenvalue,
143+
self.eigengap_type,
144+
descend=True)
137145
else:
138146
# Compute Laplacian matrix
139147
laplacian_norm = laplacian.compute_laplacian(
@@ -144,7 +152,7 @@ <h1 class="title">Module <code>spectralcluster.spectral_clusterer</code></h1>
144152
laplacian_norm, descend=False)
145153
# Get number of clusters. Eigen values are sorted in an ascending order
146154
n_clusters, max_delta_norm = utils.compute_number_of_clusters(
147-
eigenvalues, self.max_clusters, descend=False)
155+
eigenvalues, self.max_clusters, self.eigengap_type, descend=False)
148156
return eigenvectors, n_clusters, max_delta_norm
149157

150158
def predict(self, embeddings, constraint_matrix=None):
@@ -187,7 +195,7 @@ <h1 class="title">Module <code>spectralcluster.spectral_clusterer</code></h1>
187195
(eigenvectors, n_clusters,
188196
max_delta_norm) = self._compute_eigenvectors_ncluster(
189197
affinity, constraint_matrix)
190-
ratio = (1 - p_percentile) / max_delta_norm
198+
ratio = np.sqrt(1 - p_percentile) / max_delta_norm
191199
return ratio, eigenvectors, n_clusters
192200

193201
eigenvectors, n_clusters, _ = self.autotune.tune(p_percentile_to_ratio)
@@ -228,7 +236,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
228236
<dl>
229237
<dt id="spectralcluster.spectral_clusterer.SpectralClusterer"><code class="flex name class">
230238
<span>class <span class="ident">SpectralClusterer</span></span>
231-
<span>(</span><span>min_clusters=None, max_clusters=None, refinement_options=None, autotune=None, laplacian_type=None, stop_eigenvalue=0.01, row_wise_renorm=False, custom_dist='cosine', max_iter=300, constraint_options=None)</span>
239+
<span>(</span><span>min_clusters=None, max_clusters=None, refinement_options=None, autotune=None, laplacian_type=None, stop_eigenvalue=0.01, row_wise_renorm=False, custom_dist='cosine', max_iter=300, constraint_options=None, eigengap_type=EigenGapType.Ratio)</span>
232240
</code></dt>
233241
<dd>
234242
<div class="desc"><p>Spectral clustering class.</p>
@@ -264,6 +272,8 @@ <h2 id="args">Args</h2>
264272
<dt><strong><code>constraint_options</code></strong></dt>
265273
<dd>a ConstraintOptions object that contains constraint
266274
arguments</dd>
275+
<dt><strong><code>eigengap_type</code></strong></dt>
276+
<dd>the type of the eigengap computation</dd>
267277
</dl></div>
268278
<details class="source">
269279
<summary>
@@ -282,7 +292,8 @@ <h2 id="args">Args</h2>
282292
row_wise_renorm=False,
283293
custom_dist=&#34;cosine&#34;,
284294
max_iter=300,
285-
constraint_options=None):
295+
constraint_options=None,
296+
eigengap_type=EigenGapType.Ratio):
286297
&#34;&#34;&#34;Constructor of the clusterer.
287298

288299
Args:
@@ -305,6 +316,7 @@ <h2 id="args">Args</h2>
305316
max_iter: the maximum number of iterations for the custom k-means
306317
constraint_options: a ConstraintOptions object that contains constraint
307318
arguments
319+
eigengap_type: the type of the eigengap computation
308320
&#34;&#34;&#34;
309321
self.min_clusters = min_clusters
310322
self.max_clusters = max_clusters
@@ -319,6 +331,7 @@ <h2 id="args">Args</h2>
319331
self.custom_dist = custom_dist
320332
self.max_iter = max_iter
321333
self.constraint_options = constraint_options
334+
self.eigengap_type = eigengap_type
322335

323336
def _compute_eigenvectors_ncluster(self, affinity, constraint_matrix=None):
324337
&#34;&#34;&#34;Perform eigen decomposition and estiamte the number of clusters.
@@ -361,7 +374,11 @@ <h2 id="args">Args</h2>
361374
(eigenvalues, eigenvectors) = utils.compute_sorted_eigenvectors(affinity)
362375
# Get number of clusters.
363376
n_clusters, max_delta_norm = utils.compute_number_of_clusters(
364-
eigenvalues, self.max_clusters, self.stop_eigenvalue, descend=True)
377+
eigenvalues,
378+
self.max_clusters,
379+
self.stop_eigenvalue,
380+
self.eigengap_type,
381+
descend=True)
365382
else:
366383
# Compute Laplacian matrix
367384
laplacian_norm = laplacian.compute_laplacian(
@@ -372,7 +389,7 @@ <h2 id="args">Args</h2>
372389
laplacian_norm, descend=False)
373390
# Get number of clusters. Eigen values are sorted in an ascending order
374391
n_clusters, max_delta_norm = utils.compute_number_of_clusters(
375-
eigenvalues, self.max_clusters, descend=False)
392+
eigenvalues, self.max_clusters, self.eigengap_type, descend=False)
376393
return eigenvectors, n_clusters, max_delta_norm
377394

378395
def predict(self, embeddings, constraint_matrix=None):
@@ -415,7 +432,7 @@ <h2 id="args">Args</h2>
415432
(eigenvectors, n_clusters,
416433
max_delta_norm) = self._compute_eigenvectors_ncluster(
417434
affinity, constraint_matrix)
418-
ratio = (1 - p_percentile) / max_delta_norm
435+
ratio = np.sqrt(1 - p_percentile) / max_delta_norm
419436
return ratio, eigenvectors, n_clusters
420437

421438
eigenvectors, n_clusters, _ = self.autotune.tune(p_percentile_to_ratio)
@@ -516,7 +533,7 @@ <h2 id="raises">Raises</h2>
516533
(eigenvectors, n_clusters,
517534
max_delta_norm) = self._compute_eigenvectors_ncluster(
518535
affinity, constraint_matrix)
519-
ratio = (1 - p_percentile) / max_delta_norm
536+
ratio = np.sqrt(1 - p_percentile) / max_delta_norm
520537
return ratio, eigenvectors, n_clusters
521538

522539
eigenvectors, n_clusters, _ = self.autotune.tune(p_percentile_to_ratio)

0 commit comments

Comments
 (0)