forked from worldbank/pip
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpip_clean.ado
258 lines (203 loc) · 8.16 KB
/
pip_clean.ado
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
/*==================================================
project: Clean data downloaded from PIP API
Author: R.Andres Castaneda
Dependencies: The World Bank
----------------------------------------------------
Creation Date: 5 Jun 2019 - 17:09:04
Modification Date: September, 2021
Do-file version: 02
References: Adopted from povcalnet_clean
Output: dta
==================================================*/
/*==================================================
0: Program set up
==================================================*/
program define pip_clean, rclass
version 16.0
syntax anything(name=type), ///
[ ///
year(string) ///
region(string) ///
iso ///
wb ///
nocensor ///
rc(string) ///
pause ///
]
if ("`pause'" == "pause") pause on
else pause off
/*==================================================
handling errors
==================================================*/
if ("`rc'" == "copy") {
noi dis ""
noi dis in red "It was not possible to download data from the PIP API."
noi dis ""
noi dis in white `"(1) Please check your Internet connection by "' _c
*noi dis in white `"{browse "http://iresearch.worldbank.org/PovcalNet/home.aspx" :clicking here}"'
noi dis in white `"{browse "https://pipscoreapiqa.worldbank.org" :clicking here}"' // needs to be replaced
noi dis in white `"(2) Please consider adjusting your Stata timeout parameters. For more details see {help netio}"'
noi dis in white `"(3) Please send us an email to:"'
noi dis in white _col(8) `"email: [email protected]"'
noi dis in white _col(8) `"subject: pip query error 20 on `c(current_date)' `c(current_time)'"'
noi di ""
error 673
}
if ("`rc'" == "in" | c(N) == 0) {
noi di ""
noi di as err "There was a problem loading the downloaded data." /*
*/ _n "Check that all parameters are correct and try again."
noi dis as text `"{p 4 4 2} You could use the {stata pip_info:guided selection} instead. {p_end}"'
noi di ""
break
error
}
/*==================================================
1: type 1
==================================================*/
ren reporting_year requestyear
ren reporting_pop reqyearpopulation
if ("`type'" == "1") {
if ("`year'" == "last"){
bys country_code: egen maximum_y = max(requestyear)
keep if maximum_y == requestyear
drop maximum_y
}
***************************************************
* 5. Labeling/cleaning
***************************************************
gen countryname = ""
local vars1 country_code region_code survey_coverage survey_year /*
*/welfare_type is_interpolated distribution_type poverty_line poverty_gap /*
*/poverty_severity // reporting_pop
local vars2 countrycode regioncode coveragetype datayear datatype isinterpolated usemicrodata /*
*/povertyline povgap povgapsqr //population
local i = 0
foreach var of local vars1 {
local ++i
rename `var' `: word `i' of `vars2''
}
keep countrycode countryname regioncode coveragetype requestyear datayear datatype isinterpolated usemicrodata /*
*/ ppp povertyline mean headcount povgap povgapsqr watts gini median mld polarization reqyearpopulation decile? decile10
order countrycode countryname regioncode coveragetype requestyear datayear datatype isinterpolated usemicrodata /*
*/ ppp povertyline mean headcount povgap povgapsqr watts gini median mld polarization reqyearpopulation decile? decile10
if "`iso'"!="" {
cap replace countrycode="XKX" if countrycode=="KSV"
cap replace countrycode="TLS" if countrycode=="TMP"
cap replace countrycode="PSE" if countrycode=="WBG"
cap replace countrycode="COD" if countrycode=="ZAR"
}
*rename prmld mld
foreach v of varlist polarization median gini mld decile? decile10 {
qui cap replace `v'=. if `v'==-1 | `v' == 0
}
cap drop if ppp==""
cap drop svyinfoid
pause query - after replacing invalid values to missing values
* cap drop polarization
qui count
local obs=`r(N)'
tostring coveragetype, replace
replace coveragetype = "1" if coveragetype == "rural"
replace coveragetype = "2" if coveragetype == "urban"
replace coveragetype = "4" if coveragetype == "A" // not available in pip data
replace coveragetype = "3" if coveragetype == "national"
destring coveragetype, force replace
label define coveragetype 1 "Rural" /*
*/ 2 "Urban" /*
*/ 3 "National" /*
*/ 4 "National (Aggregate)", modify
label values coveragetype coveragetype
replace datatype = "1" if datatype == "consumption"
replace datatype = "2" if datatype == "income"
destring datatype, force replace
label define datatype 1 "Consumption" 2 "Income", modify
label values datatype datatype
label var isinterpolated "Data is interpolated"
label var countrycode "Country/Economy Code"
label var usemicrodata "Data comes from grouped or microdata"
label var countryname "Country/Economy Name"
label var regioncode "Region Code"
label var region "Region Name"
label var coveragetype "Coverage"
label var requestyear "Year you requested"
label var datayear "Survey year"
label var datatype "Welfare measured by income or consumption"
label var ppp "Purchasing Power Parity"
label var povertyline "Poverty line in PPP$ (per capita per day)"
label var mean "Average monthly per capita income/consumption in PPP$"
label var headcount "Poverty Headcount"
label var povgap "Poverty Gap."
label var povgapsqr "Squared poverty gap."
label var watts "Watts index"
label var gini "Gini index"
label var median "Median monthly income or expenditure in PPP$"
label var mld "Mean Log Deviation"
label var reqyearpopulation "Population in year"
* Standardize names with R package
local Snames requestyear reqyearpopulation
local Rnames year population
local i = 0
foreach var of local Snames {
local ++i
rename `var' `: word `i' of `Rnames''
}
sort countrycode year coveragetype
}
/*==================================================
2: for Aggregate requests
==================================================*/
if ("`type'" == "2") {
if ("`region'" != "" & region_code != "CUSTOM") {
tempvar keep_this
gen `keep_this' = 0
local region_l = `""`region'""'
local region_l: subinstr local region_l " " `"", ""', all
dis "`region_l'"
dis "`keep_this'"
replace `keep_this' = 1 if inlist(region_code, `region_l')
if lower("`region'") == "all" replace `keep_this' = 1
keep if `keep_this' == 1
}
pause clean - after dropping by region
if ("`year'" == "last") {
tempvar maximum_y
bys region_code: egen `maximum_y' = max(requestyear)
keep if `maximum_y' == requestyear
}
***************************************************
* 4. Renaming and labeling
***************************************************
rename region_code regioncode
*rename regiontitle region
*rename hc headcount
rename poverty_line povertyline
rename poverty_gap povgap
rename poverty_severity povgapsqr
*rename reporting_pop reqyearpopulation
label var requestyear "Year you requested"
label var povertyline "Poverty line in PPP$ (per capita per day)"
label var mean "Average monthly per capita income/consumption in PPP$"
label var headcount "Poverty Headcount"
label var povgap "Poverty Gap"
label var povgapsqr "Squared poverty gap"
label var reqyearpopulation "Population in year"
keep requestyear regioncode povertyline mean headcount povgap povgapsqr reqyearpopulation
order requestyear regioncode povertyline mean headcount povgap povgapsqr reqyearpopulation
local Snames requestyear reqyearpopulation
local Rnames year population
local i = 0
foreach var of local Snames {
local ++i
rename `var' `: word `i' of `Rnames''
}
} // end of type 2
end
exit
/* End of do-file */
><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><
Notes:
1.
2.
3.
Version Control: