-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrank_aggregation.py
130 lines (112 loc) · 4.79 KB
/
rank_aggregation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import numpy as np
from scipy.stats import mode
from itertools import combinations
### 2023-07-15 : currently no ties are considered and assume full assignment.
### Generalized Borda's method is explained in Qiu, 2018.
def borda(rank_collection):
"""Implements the Borda ranking aggregation. - Actually soft voting, used in
Parameters
----------
rank_collection : np.ndarray of shape (n_samples, n_labels, n_models)
Predicted rankings of labels from n_models number of different models.
i.e. the smaller the value, the more preferred.
Returns
-------
rank_array : np.ndarray of shape (n_samples, n_labels)
"""
score_array = np.zeros(
(rank_collection.shape[0], rank_collection.shape[1], rank_collection.shape[1])
)
for (i, j) in combinations(range(rank_collection.shape[1]), 2):
slice_ref = rank_collection[:, i, :]
slice_compare = rank_collection[:, j, :]
score_array[:, i, j] = np.sum(slice_ref < slice_compare, axis=1)
score_array[:, j, i] = np.sum(slice_ref > slice_compare, axis=1)
final_score = np.sum(score_array, axis=2)
order = np.argsort(final_score, axis=1)
rank = np.argsort(order, axis=1)
return (
np.ones_like(rank) * rank.shape[1] - rank
) # returns lowest value to highest score
def borda_count(rank_collection, weights=None):
"""Implements the actual Borda ranking aggregation.
Parameters
----------
rank_collection : np.ndarray of shape (n_samples, n_labels, n_models)
Predicted rankings of labels from n_models number of different models.
i.e. the smaller the value, the more preferred.
When there are nan values, we assume that the rank vector's values are adjusted to [1, n_non_nan labels]
weights : np.ndarray of shape (n_models,)
Weights, as computed by 'weighted distance * portion of ranks' of each neighbor.
Returns
-------
rank_array : np.ndarray of shape (n_samples, n_labels)
"""
n_samples, n_labels, n_models = rank_collection.shape
if np.any(
np.isnan(rank_collection)
): # For the generalized version with incomplete labels
n_ranked_labels = np.tile(
np.expand_dims(np.sum(np.isnan(rank_collection), axis=1), axis=1),
(1, n_labels, 1),
)
score_array = np.divide(
(n_labels + 1) * (n_ranked_labels - rank_collection + 1),
n_ranked_labels + 1,
) # Scores for ranked labels
score_array[np.where(np.isnan(score_array))] = 0.5 * (
n_labels + 1
) # Scores for Incomplete labels
if weights is None:
score_array = np.sum(score_array, axis=2)
else:
score_by_sample = []
for i in range(score_array.shape[0]):
score_by_sample.append(
np.dot(score_array[i], weights[i]) / np.sum(weights[i])
)
score_array = np.vstack(tuple(score_by_sample))
else: # complete version
score_array = np.sum(rank_collection.shape[1] - rank_collection, axis=2)
rank = score_array.shape[1] - score_array.argsort(axis=1).argsort(axis=1)
return rank
def copeland(rank_collection):
"""Implements the Copeland ranking aggregation."""
score_array = np.zeros(
(rank_collection.shape[0], rank_collection.shape[1], rank_collection.shape[1])
)
def f(num_wins, num_models=rank_collection.shape[2]):
if num_wins > num_models * 0.5:
return 1
elif num_wins == num_models * 0.5:
return 0.5
else:
return 0
for (i, j) in combinations(range(rank_collection.shape[1]), 2):
slice_ref = rank_collection[:, i, :]
slice_compare = rank_collection[:, j, :]
score_array[:, i, j] = np.array(
list(map(f, np.sum(slice_ref < slice_compare, axis=1)))
).reshape(-1, 1)
score_array[:, j, i] = np.array(
list(map(f, np.sum(slice_ref > slice_compare, axis=1)))
).reshape(-1, 1)
final_score = np.sum(score_array, axis=2)
rank = np.argsort(-final_score, axis=1) + np.ones_like(final_score)
return rank # returns lowest value to highest score
def modal(rank_collection):
"""Implements the modal ranking aggregation.
Parameters
----------
rank_collection : np.ndarray of shape (n_samples, n_labels, n_models)
Predicted rankings of labels from n_models number of different models.
i.e. the smaller the value, the more preferred.
Returns
-------
rank_array : np.ndarray of shape (n_samples, n_labels)
Aggregated ranks.
"""
modes, _ = mode(
rank_collection, axis=2, keepdims=False
) # Note that only one value is returned even with ties
return modes.reshape(rank_collection.shape[0], rank_collection.shape[1])