|
26 | 26 | for dgp_type in dgp_types: |
27 | 27 | datasets_dgp = [] |
28 | 28 | for i in range(n_rep): |
29 | | - data = make_did_SZ2020(n_obs=n_obs, dgp_type=dgp_type, cross_sectional_data=False) |
| 29 | + df = make_did_SZ2020(n_obs=n_obs, dgp_type=dgp_type, cross_sectional_data=False, return_type='DataFrame', return_diff = False) |
| 30 | + # Add id variable |
| 31 | + df['id'] = np.arange(n_obs) |
| 32 | + # Reorganize the data in long format (one row per time period per individual) |
| 33 | + df = pd.wide_to_long(df, stubnames='y', i='id', j='t').reset_index().sort_values(['id', 't']) |
| 34 | + # add value of one to column t (periods 1 and 2) |
| 35 | + df['t'] = df['t'] + 1 |
| 36 | + # Create a new column called G which is based on d: G = 2*d |
| 37 | + # G: 0 if (not/never) treated and 2 if treated in period 2 |
| 38 | + df['G'] = df['d']*2 |
| 39 | + # drop d |
| 40 | + df.drop(columns = ['d'], inplace = True) |
| 41 | + |
| 42 | + data = dml.DoubleMLPanelData(df, y_col='y', d_cols='G' , t_col = "t", id_col = "id", x_cols=['Z1', 'Z2', 'Z3', 'Z4']) |
30 | 43 | datasets_dgp.append(data) |
31 | 44 | datasets.append(datasets_dgp) |
32 | 45 |
|
|
66 | 79 | for score in hyperparam_dict["score"]: |
67 | 80 | for in_sample_normalization in hyperparam_dict["in sample normalization"]: |
68 | 81 | if score == "experimental": |
69 | | - dml_DiD = dml.DoubleMLDID( |
| 82 | + dml_DiD = dml.DoubleMLDIDBINARY( |
70 | 83 | obj_dml_data=obj_dml_data, |
71 | 84 | ml_g=ml_g, |
72 | 85 | ml_m=None, |
73 | 86 | score=score, |
74 | 87 | in_sample_normalization=in_sample_normalization) |
75 | 88 | else: |
76 | 89 | assert score == "observational" |
77 | | - dml_DiD = dml.DoubleMLDID( |
| 90 | + dml_DiD = dml.DoubleMLDIDBINARY( |
78 | 91 | obj_dml_data=obj_dml_data, |
79 | 92 | ml_g=ml_g, |
80 | 93 | ml_m=ml_m, |
| 94 | + g_value = 2, |
| 95 | + t_value = 1, |
81 | 96 | score=score, |
82 | 97 | in_sample_normalization=in_sample_normalization) |
83 | 98 | dml_DiD.fit(n_jobs_cv=5) |
|
0 commit comments