-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpersonalized_pagerank.py
78 lines (64 loc) · 2.17 KB
/
personalized_pagerank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import networkx as nx
from operator import itemgetter
from collections import OrderedDict
def ppr(g, seed, alpha=0.85, epsilon=10e-8, iters=100):
pref = {}
T = [seed]
for node in g.neighbors(seed):
T.append(node)
for node in g:
if node in T:
pref.update({node:(1.0 / len(T))})
else:
pref.update({node:0.0})
return nx.pagerank(g, alpha=alpha, personalization=pref, max_iter=iters,
tol=epsilon)
def ppr_sorted(g, pprv):
spprv = {}
for item in pprv.iteritems():
k, v = item
spprv.update({k:(v/g.degree(k))})
pass
return sorted(spprv.items(), key=itemgetter(1), reverse=True)
def min_cond_cut(g, dspprv, max_cutsize=0):
def conductance(nbunch):
sigma = 0.0
vol1 = vol2 = 0
for node in nbunch:
for n in g.neighbors(node):
if n not in nbunch:
sigma += 1.0
for degseq in g.degree().iteritems():
node, degree = degseq
if node not in nbunch:
vol2 += degree
else:
vol1 += degree
return (sigma / min(vol1, vol2))
k = 1
conductance_list = []
if max_cutsize < 1:
# cutsize could be as big as the graph itself
limit = (len(dspprv))
else:
# maximum size of the cut with minimum conductance
limit = max_cutsize
while k < limit :
nbunch = []
for i in xrange(0, k):
nbunch.append(dspprv[i][0])
c = (k, conductance(nbunch))
# conductane of current cut size
print c
conductance_list.append(c)
k += 1
return min(conductance_list, key=itemgetter(1))
## running the code ..
def loadGraph(gfile):
return nx.read_edgelist(path=gfile, comments='#',
delimiter="\t", nodetype=int)
g = loadGraph(gfile='../data/snap-dblp/com-dblp.ungraph.txt')
a = ppr(g, seed=5, alpha=0.85, epsilon=10e-8, iters=100)
b = ppr_sorted(g, pprv=a)
# finding the best community around NodeId==5
print(min_cond_cut(g, dspprv=b, max_cutsize=10))