Skip to content

Commit becfd8f

Browse files
Cereal84prateekiiest
authored andcommitted
This should fix issue 91 and I've add a argparse in order to specify … (#140)
* This should fix issue 91 and I've add a argparse in order to specify the data_filepath * fix, a subset of, PEP8 issues * fix, a subset of, PEP8 issues * import module MUST be at the top * import module MUST be at the top * fixed 80 columns row * removed useless import * code refactored * removed old code * code refactored in order to avoid nested control flow statements * avoid codebeat nested control * eliminate codebeat nested control validation * fix n arguments error on codeclimate
1 parent af608d0 commit becfd8f

File tree

1 file changed

+65
-30
lines changed
  • Code-Sleep-Python/social_network

1 file changed

+65
-30
lines changed
Lines changed: 65 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,47 @@
11
import pandas as pd
2-
df = pd.read_stata(data_filepath + "individual_characteristics.dta")
2+
from os import getcwd
3+
import argparse
4+
5+
parser = argparse.ArgumentParser()
6+
parser.add_argument('-d', '--data_filepath', help="""Select the directory where
7+
is stored the file. Default is the current directory.""",
8+
default=getcwd())
9+
10+
args = parser.parse_args()
11+
data_filepath = args.data_filepath
12+
13+
df = pd.read_stata(data_filepath + "individual_characteristics.dta")
314
df1 = df[df.village == 1]
415
df2 = df[df.village == 2]
516

617
# Enter code here!
718
df1.head()
819

920

10-
sex1 = {df1.pid[i] : df1.resp_gend[i] for i in range(len(df1.pid))}
11-
caste1 = {df1.pid[i] : df1.caste[i] for i in range(len(df1.pid))}
12-
religion1 = {df1.pid[i] : df1.religion[i] for i in range(len(df1.pid))}
21+
def get_params(dfx, elem, items_range):
22+
return {dfx.pid[i]: elem[i] for i in items_range}
23+
24+
sex1 = get_params(df1, df1.resp_gend, range(len(df1.pid)))
25+
caste1 = get_params(df1, df1.caste, range(len(df1.pid)))
26+
religion1 = (df1, df1.religion, range(len(df1.pid)))
27+
1328
# Continue for df2 as well.
1429

1530
j = 203
16-
sex2 = {df2.pid[j] : df2.resp_gend[j] for j in range(203,406)}
17-
caste2 = {df2.pid[j] : df2.caste[j] for j in range(203,406) }
18-
religion2 = {df2.pid[j] : df2.religion[j] for j in range(203,406)}
19-
31+
sex2 = get_params(df2, df2.resp_gend, range(203, 406))
32+
caste2 = get_params(df2, df2.caste, range(203, 406))
33+
religion2 = (df2, df2.religion, range(203, 406))
2034

2135

22-
from collections import Counter
2336
def chance_homophily(chars):
2437
# Enter code here!
2538
z = set(chars.values())
2639
su = 0
2740
for c in z:
28-
29-
su = su + pow((sum(x == c for x in chars.values())/len(chars) * 1.0),2)
30-
41+
su = su + pow((sum(x == c for x in chars.values())/len(chars) * 1), 2)
3142
return su
3243

44+
3345
favorite_colors = {
3446
"ankit": "red",
3547
"xiaoyu": "blue",
@@ -40,7 +52,6 @@ def chance_homophily(chars):
4052
print(color_homophily)
4153

4254

43-
4455
print("Village 1 chance of same sex:", chance_homophily(sex1))
4556
# Enter your code here.
4657
print("Village 1 chance of same caste:", chance_homophily(caste1))
@@ -51,6 +62,31 @@ def chance_homophily(chars):
5162
print("Village 2 chance of same caste:", chance_homophily(caste2))
5263

5364

65+
def checks_for_homophility(nodes, G, chars, IDs):
66+
67+
num_ties = 0;
68+
num_same_ties = 0;
69+
n1 = nodes[0]
70+
n2 = nodes[1]
71+
72+
if n1 <= n2:
73+
return num_ties, num_same_ties
74+
75+
# do not double-count edges!
76+
if (IDs[n1] in chars and IDs[n2] in chars) == False:
77+
return num_ties, num_same_ties
78+
79+
if G.has_edge(n1, n2) == False:
80+
return num_ties, num_same_ties
81+
82+
# Should `num_ties` be incremented?
83+
# What about `num_same_ties`?
84+
num_ties = 1
85+
if chars[IDs[n1]] == chars[IDs[n2]]:
86+
num_same_ties = 1
87+
88+
return num_ties, num_same_ties
89+
5490

5591
def homophily(G, chars, IDs):
5692
"""
@@ -61,26 +97,25 @@ def homophily(G, chars, IDs):
6197
num_same_ties, num_ties = 0, 0
6298
for n1 in G.nodes():
6399
for n2 in G.nodes():
64-
if n1 > n2: # do not double-count edges!
65-
if IDs[n1] in chars and IDs[n2] in chars:
66-
if G.has_edge(n1, n2):
67-
# Should `num_ties` be incremented? What about `num_same_ties`?
68-
num_ties += 1
69-
if chars[IDs[n1]] == chars[IDs[n2]]:
70100

101+
nodes = [n1, n2]
102+
ties, same_ties = check_for_homophily(nodes, G, chars, IDs)
71103

72-
return (num_same_ties / num_ties)
73-
74-
75-
print("Village 1 observed proportion of same sex:", homophily(G1, sex1, pid1))
76-
print("Village 1 observed proportion of same caste:", homophily(G1, caste1, pid1))
77-
print("Village 1 observed proportion of same religion:", homophily(G1, religion1, pid1))
78-
# Enter your code here!
79-
print("Village 2 observed proportion of same sex:", homophily(G2, sex2, pid2))
80-
print("Village 2 observed proportion of same caste:", homophily(G2, caste2, pid2))
81-
print("Village 2 observed proportion of same religion:", homophily(G1, religion2, pid2))
82-
104+
num_ties += ties
105+
num_same_ties += same_ties
83106

84107

108+
return (num_same_ties / num_ties)
85109

86110

111+
print("Village 1 observed proportion of same sex:", homophily(G1, sex1, pid1))
112+
print("Village 1 observed proportion of same caste:",
113+
homophily(G1, caste1, pid1))
114+
print("Village 1 observed proportion of same religion:",
115+
homophily(G1, religion1, pid1))
116+
# Enter your code here!
117+
print("Village 2 observed proportion of same sex:", homophily(G2, sex2, pid2))
118+
print("Village 2 observed proportion of same caste:",
119+
homophily(G2, caste2, pid2))
120+
print("Village 2 observed proportion of same religion:",
121+
homophily(G1, religion2, pid2))

0 commit comments

Comments
 (0)