1
1
import pandas as pd
2
- df = pd .read_stata (data_filepath + "individual_characteristics.dta" )
2
+ from os import getcwd
3
+ import argparse
4
+
5
+ parser = argparse .ArgumentParser ()
6
+ parser .add_argument ('-d' , '--data_filepath' , help = """Select the directory where
7
+ is stored the file. Default is the current directory.""" ,
8
+ default = getcwd ())
9
+
10
+ args = parser .parse_args ()
11
+ data_filepath = args .data_filepath
12
+
13
+ df = pd .read_stata (data_filepath + "individual_characteristics.dta" )
3
14
df1 = df [df .village == 1 ]
4
15
df2 = df [df .village == 2 ]
5
16
6
17
# Enter code here!
7
18
df1 .head ()
8
19
9
20
10
- sex1 = {df1 .pid [i ] : df1 .resp_gend [i ] for i in range (len (df1 .pid ))}
11
- caste1 = {df1 .pid [i ] : df1 .caste [i ] for i in range (len (df1 .pid ))}
12
- religion1 = {df1 .pid [i ] : df1 .religion [i ] for i in range (len (df1 .pid ))}
21
+ def get_params (dfx , elem , items_range ):
22
+ return {dfx .pid [i ]: elem [i ] for i in items_range }
23
+
24
+ sex1 = get_params (df1 , df1 .resp_gend , range (len (df1 .pid )))
25
+ caste1 = get_params (df1 , df1 .caste , range (len (df1 .pid )))
26
+ religion1 = (df1 , df1 .religion , range (len (df1 .pid )))
27
+
13
28
# Continue for df2 as well.
14
29
15
30
j = 203
16
- sex2 = {df2 .pid [j ] : df2 .resp_gend [j ] for j in range (203 ,406 )}
17
- caste2 = {df2 .pid [j ] : df2 .caste [j ] for j in range (203 ,406 ) }
18
- religion2 = {df2 .pid [j ] : df2 .religion [j ] for j in range (203 ,406 )}
19
-
31
+ sex2 = get_params (df2 , df2 .resp_gend , range (203 , 406 ))
32
+ caste2 = get_params (df2 , df2 .caste , range (203 , 406 ))
33
+ religion2 = (df2 , df2 .religion , range (203 , 406 ))
20
34
21
35
22
- from collections import Counter
23
36
def chance_homophily (chars ):
24
37
# Enter code here!
25
38
z = set (chars .values ())
26
39
su = 0
27
40
for c in z :
28
-
29
- su = su + pow ((sum (x == c for x in chars .values ())/ len (chars ) * 1.0 ),2 )
30
-
41
+ su = su + pow ((sum (x == c for x in chars .values ())/ len (chars ) * 1 ), 2 )
31
42
return su
32
43
44
+
33
45
favorite_colors = {
34
46
"ankit" : "red" ,
35
47
"xiaoyu" : "blue" ,
@@ -40,7 +52,6 @@ def chance_homophily(chars):
40
52
print (color_homophily )
41
53
42
54
43
-
44
55
print ("Village 1 chance of same sex:" , chance_homophily (sex1 ))
45
56
# Enter your code here.
46
57
print ("Village 1 chance of same caste:" , chance_homophily (caste1 ))
@@ -51,6 +62,31 @@ def chance_homophily(chars):
51
62
print ("Village 2 chance of same caste:" , chance_homophily (caste2 ))
52
63
53
64
65
+ def checks_for_homophility (nodes , G , chars , IDs ):
66
+
67
+ num_ties = 0 ;
68
+ num_same_ties = 0 ;
69
+ n1 = nodes [0 ]
70
+ n2 = nodes [1 ]
71
+
72
+ if n1 <= n2 :
73
+ return num_ties , num_same_ties
74
+
75
+ # do not double-count edges!
76
+ if (IDs [n1 ] in chars and IDs [n2 ] in chars ) == False :
77
+ return num_ties , num_same_ties
78
+
79
+ if G .has_edge (n1 , n2 ) == False :
80
+ return num_ties , num_same_ties
81
+
82
+ # Should `num_ties` be incremented?
83
+ # What about `num_same_ties`?
84
+ num_ties = 1
85
+ if chars [IDs [n1 ]] == chars [IDs [n2 ]]:
86
+ num_same_ties = 1
87
+
88
+ return num_ties , num_same_ties
89
+
54
90
55
91
def homophily (G , chars , IDs ):
56
92
"""
@@ -61,26 +97,25 @@ def homophily(G, chars, IDs):
61
97
num_same_ties , num_ties = 0 , 0
62
98
for n1 in G .nodes ():
63
99
for n2 in G .nodes ():
64
- if n1 > n2 : # do not double-count edges!
65
- if IDs [n1 ] in chars and IDs [n2 ] in chars :
66
- if G .has_edge (n1 , n2 ):
67
- # Should `num_ties` be incremented? What about `num_same_ties`?
68
- num_ties += 1
69
- if chars [IDs [n1 ]] == chars [IDs [n2 ]]:
70
100
101
+ nodes = [n1 , n2 ]
102
+ ties , same_ties = check_for_homophily (nodes , G , chars , IDs )
71
103
72
- return (num_same_ties / num_ties )
73
-
74
-
75
- print ("Village 1 observed proportion of same sex:" , homophily (G1 , sex1 , pid1 ))
76
- print ("Village 1 observed proportion of same caste:" , homophily (G1 , caste1 , pid1 ))
77
- print ("Village 1 observed proportion of same religion:" , homophily (G1 , religion1 , pid1 ))
78
- # Enter your code here!
79
- print ("Village 2 observed proportion of same sex:" , homophily (G2 , sex2 , pid2 ))
80
- print ("Village 2 observed proportion of same caste:" , homophily (G2 , caste2 , pid2 ))
81
- print ("Village 2 observed proportion of same religion:" , homophily (G1 , religion2 , pid2 ))
82
-
104
+ num_ties += ties
105
+ num_same_ties += same_ties
83
106
84
107
108
+ return (num_same_ties / num_ties )
85
109
86
110
111
+ print ("Village 1 observed proportion of same sex:" , homophily (G1 , sex1 , pid1 ))
112
+ print ("Village 1 observed proportion of same caste:" ,
113
+ homophily (G1 , caste1 , pid1 ))
114
+ print ("Village 1 observed proportion of same religion:" ,
115
+ homophily (G1 , religion1 , pid1 ))
116
+ # Enter your code here!
117
+ print ("Village 2 observed proportion of same sex:" , homophily (G2 , sex2 , pid2 ))
118
+ print ("Village 2 observed proportion of same caste:" ,
119
+ homophily (G2 , caste2 , pid2 ))
120
+ print ("Village 2 observed proportion of same religion:" ,
121
+ homophily (G1 , religion2 , pid2 ))
0 commit comments