-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQuantiles.do
136 lines (123 loc) · 5.77 KB
/
Quantiles.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
*****************************
*** SET WORKING DIRECTORY ***
*****************************
// Marta
else if (lower("`c(username)'") == "wb562318") {
cd "C:/Users/wb562318/OneDrive - WBG/Documents/Global Poverty/missing data/Missing Data"
}
// Daniel
if (lower("`c(username)'") == "wb514665") {
cd "C:\Users\WB514665\OneDrive - WBG\pip\GitHub\Missing-Data"
}
******************************************************
*** GENERATE DATASET WITH 100 QUANTILES PER SURVEY ***
******************************************************
cap remove "data/cleaned/Quantiles_Raw_2017PPP.dta"
forvalues quantile = 0.005(0.01)0.99501 {
disp in red "`quantile'"
qui pip, country(all) year(all) popsh(`quantile') clear
keep country_code welfare_time welfare_type reporting_level poverty_line headcount
// Only execute the following from the second quantile queried onwards
if `quantile' > 0.01 {
qui tempfile query
qui save `query'
qui cap use "data/cleaned/Quantiles_Raw_2017PPP.dta", clear
qui append using `query'
}
qui save "data/cleaned/Quantiles_Raw_2017PPP.dta", replace
}
// Only keep national surveys
// popsh() doesn't work for IDN/IND/CHN, those will be dealt with separately
keep if reporting_level=="national" | inlist(country_code,"ARG","SUR") | country_code=="BOL" & welfare_time<=1992 | country_code=="URY" & welfare_time<2006
drop if inlist(country_code,"CHN","IDN","IND")
drop reporting_level
// Removing some errors in the data
drop if headcount==-1 | headcount==1
drop if poverty_line<=0
duplicates drop
// Removing some strange error with MKD
bysort country_code welfare_time welfare_type poverty_line: gen N=_N
*br if N==2
bysort country_code welfare_time welfare_type poverty_line: drop if _n==2 & N==2
drop N
rename country_code code
rename welfare_time year
save "data/cleaned/Quantiles_Raw_2017PPP.dta", replace
use "data/cleaned/Quantiles_Raw_2017PPP.dta", clear
**********************************************************************
*** GENERATE DATASET WITH 100 QUANTILES PER SURVEY FOR CHN/IDN/IND ***
**********************************************************************
// Needed since popsh() doesn't work for CHN/IDN/IND (it works for urban/rural separately only, not national)
**************************************
*** FINDING POVERTY LINES TO QUERY ***
**************************************
// Query many poverty lines and then convert the queried poverty rates into quantiles
// The more poverty lines queried, the more precise results.
// We start with increments of 2 cents. We will later on linearly interpolate between these queried poverty lines.
clear
set obs 1000
// First query poverty lines at 2 cent intervals
gen double poverty_line = _n/50
// From $2 and upwards increase the poverty line by 1%
replace poverty_line = poverty_line[_n-1]*1.01 if poverty_line>2
// From $50 and upwards increase the poverty line by 2%
replace poverty_line = poverty_line[_n-1]*1.02 if poverty_line>50
// We don't need poverty lines above $100 (99.5th percentile in surveys for those countries is way less than $100
drop if poverty_line>100
replace poverty_line = round(poverty_line,0.01)
// The lines below group poverty lines to query into five. Some experimentation suggests that this goes a bit faster with pip.ado than querying one at a time
tostring poverty_line, replace force
gen poverty_line5 = poverty_line + " " + poverty_line[_n+1] + " " + poverty_line[_n+2] + " " + poverty_line[_n+3] + " " + poverty_line[_n+4]
keep if mod(_n-1,5)==0
drop poverty_line
**************************
*** QUERYING pip ***
**************************
preserve
cap erase "data/cleaned/Surveydata_CHNIDNIND.dta"
qui levelsof poverty_line5
foreach lvl in `r(levels)' {
disp as error "`lvl'"
qui pip, country(CHN IDN IND) year(all) povline(`lvl') clear
keep if reporting_level=="national"
keep country_code welfare_time poverty_line headcount welfare_type
tempfile querieddata
save `querieddata'
cap use "data/cleaned/Surveydata_CHNIDNIND_2017PPP.dta", clear
cap append using `querieddata'
save "data/cleaned/Surveydata_CHNIDNIND_2017PPP.dta", replace
}
restore
***************************
*** TURN INTO QUANTILES ***
***************************
// Create a dataset with the desired quantiles and (at this point) still unknown corresponding poverty lines
use "data/cleaned/Surveydata_CHNIDNIND_2017PPP.dta", clear
keep country_code welfare_time welfare_type
duplicates drop
expand 100
bysort country_code welfare_time welfare_type: gen headcount = _n/100-0.005
gen poverty_line = .
gen quantile = 1
tempfile quantile_CHNIDNIND
save `quantile_CHNIDNIND'
// Open the data with the queried poverty rates
use "data/cleaned/Surveydata_CHNIDNIND_2017PPP.dta", clear
merge m:1 country_code welfare_time welfare_type headcount using `quantile_CHNIDNIND', nogen
sort country_code welfare_time welfare_type headcount
// Interpolate to fill out poverty line at quantiles
bysort country_code welfare_time welfare_type (headcount): ipolate poverty_line headcount, gen(poverty_line_temp)
// Only keep the poverty lines reflecting the desired quantiles.
keep if quantile==1
drop poverty_line quantile
rename poverty_line_temp poverty_line
rename welfare_time year
rename country_code code
save "data/cleaned/Quantiles_CHNIDNIND_2017PPP.dta", replace
*************************************************
*** MERGE WITH QUANTILES FROM OTHER COUNTRIES ***
*************************************************
use "data/cleaned/Quantiles_Raw_2017PPP.dta", replace
append using "data/cleaned/Quantiles_CHNIDNIND_2017PPP.dta"
sort code welfare_type year headcount
save "data/cleaned/Quantiles_2017PPP.dta", replace