From f6c78ba87a4d2e0a827340fdec9be9bb4d7288de Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Thu, 27 Feb 2020 14:29:24 -0500 Subject: [PATCH 01/20] Coverage now in letter rather than numbers in raw_GroupData file --- pcn_groupdata.ado | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/pcn_groupdata.ado b/pcn_groupdata.ado index 74782d0..e3a62f7 100644 --- a/pcn_groupdata.ado +++ b/pcn_groupdata.ado @@ -69,25 +69,20 @@ import excel "raw_GroupData.xlsx", sheet("raw_GroupData") firstrow clear tostring survey, replace // in case survey is unknown gen id = countrycode + " " + strofreal(year) + " " + /* -*/ strofreal(coverage) + " " + datatype + " 0" + /* -*/ strofreal(formattype) + " " + survey +*/ coverage + " " + datatype + " 0" + /* +*/ strofreal(formattype) + " " + survey //------------saving data vintages levelsof id, local(ids) qui foreach id of local ids { - local cc: word 1 of `id' - local yr: word 2 of `id' - local cg: word 3 of `id' - local dt: word 4 of `id' - local ft: word 5 of `id' - local sy: word 6 of `id' - - if (`cg' == 1) local cov = "R" - else if (`cg' == 2) local cov = "U" - else if (`cg' == 3) local cov = "N" - else local cov = "A" + local cc: word 1 of `id' + local yr: word 2 of `id' + local cov: word 3 of `id' + local dt: word 4 of `id' + local ft: word 5 of `id' + local sy: word 6 of `id' local l2y = substr("`yr'", 3,.) @@ -198,24 +193,19 @@ qui foreach id of local ids { *get the mean sum welfare [w = welfare], meanonly - local `cc'`yr'`cg'm = r(mean) + local `cc'`yr'`cov'm = r(mean) restore } //------------ Check both files are in the most recent folder qui foreach id of local ids { - local cc: word 1 of `id' - local yr: word 2 of `id' - local cg: word 3 of `id' - local dt: word 4 of `id' - local ft: word 5 of `id' - local sy: word 6 of `id' - - if (`cg' == 1) local cov = "R" - else if (`cg' == 2) local cov = "U" - else if (`cg' == 3) local cov = "N" - else local cov = "A" + local cc: word 1 of `id' + local yr: word 2 of `id' + local cov: word 3 of `id' + local dt: word 4 of `id' + local ft: word 5 of `id' + local sy: word 6 of `id' local l2y = substr("`yr'", 3,.) From 9cb951693655dd4af6ad89765bf4f1ead7129430 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Thu, 27 Feb 2020 15:10:40 -0500 Subject: [PATCH 02/20] require country option --- pcn_groupdata.ado | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pcn_groupdata.ado b/pcn_groupdata.ado index e3a62f7..a553926 100644 --- a/pcn_groupdata.ado +++ b/pcn_groupdata.ado @@ -18,8 +18,8 @@ Output: ==================================================*/ program define pcn_groupdata, rclass syntax [anything(name=subcmd id="subcommand")], /// -[ /// -COUNtries(string) /// +COUNtry(string) /// +[ /// Years(numlist) /// maindir(string) /// type(string) /// @@ -37,6 +37,11 @@ if ("`pause'" == "pause") pause on else pause off +if (wordcount("`country'") != 1) { + noi disp in red "{it: country()} must have only one countrycode" + error +} + //------------set up if ("`maindir'" == "") cd "p:\01.PovcalNet\03.QA\01.GroupData" else cd "`maindir'" From 83ad9a3763827fc1b2ed01a07f6e0f112705812a Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Thu, 27 Feb 2020 15:11:08 -0500 Subject: [PATCH 03/20] search by country --- pcn_groupdata.ado | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/pcn_groupdata.ado b/pcn_groupdata.ado index a553926..26613fa 100644 --- a/pcn_groupdata.ado +++ b/pcn_groupdata.ado @@ -27,7 +27,7 @@ clear /// pause /// vermast(string) /// veralt(string) /// -replace /// +replace /// * /// ] version 14 @@ -56,21 +56,9 @@ local dirsep = c(dirsep) local vintage: disp %tdD-m-CY date("`c(current_date)'", "DMY") - -*------------------ Initial Parameters ------------------ -local mfiles: dir "../../00.Master/02.vintage/" files "Master_*.xlsx", respect -local vcnumbers: subinstr local mfiles "Master_" "", all -local vcnumbers: subinstr local vcnumbers ".xlsx" "", all -local vcnumbers: list sort vcnumbers - -mata: VC = strtoreal(tokens(`"`vcnumbers'"')); /* -*/ st_local("maxvc", strofreal(max(VC), "%15.0f")) - -* exDate = Format(Now(), "yyyymmddHhNnSs") // VBA name - - //------------load data -import excel "raw_GroupData.xlsx", sheet("raw_GroupData") firstrow clear +import excel "04.formatted/`country'/`country'_raw_GroupData.xlsx", /* + */ sheet("raw_GroupData") firstrow clear tostring survey, replace // in case survey is unknown gen id = countrycode + " " + strofreal(year) + " " + /* @@ -169,16 +157,16 @@ qui foreach id of local ids { local datetimeHRF: disp %tcDDmonCCYY_HH:MM:SS `datetime' local datetimeHRF = trim("`datetimeHRF'") - char _dta[filename] `fileid'_GROUP-`cov'.dta - char _dta[id] `fileid' - char _dta[datatype] `dt' - char _dta[countrycode] `cc' - char _dta[year] `yr' - char _dta[coverage] `cov' - char _dta[groupdata] 1 - char _dta[formattype] `ft' - char _dta[datetime] `datetime' - char _dta[datetimeHRF] `datetimeHRF' + char _dta[filename] `fileid'_GROUP-`cov'.dta + char _dta[id] `fileid' + char _dta[datatype] `dt' + char _dta[countrycode] `cc' + char _dta[year] `yr' + char _dta[survey_coverage] `cov' + char _dta[groupdata] 1 + char _dta[formattype] `ft' + char _dta[datetime] `datetime' + char _dta[datetimeHRF] `datetimeHRF' cap mkdir "`sydir'/_vintage" save "`sydir'/_vintage/`signature'_`datetime'.dta", replace From ea23401e9ac3cefc82ad05557f51e2831df7fa35 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Thu, 27 Feb 2020 15:13:00 -0500 Subject: [PATCH 04/20] fix bug --- pcn.ado | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pcn.ado b/pcn.ado index f2555db..5e2829e 100644 --- a/pcn.ado +++ b/pcn.ado @@ -89,7 +89,7 @@ qui { noi disp as text "Options available to download" local i = 0 - noi disp _n "select survey to load" _request(_survey) + noi disp _n "select survey to load" foreach db of local dldb { local ++i noi disp `" `i' {c |} {stata `db'}"' @@ -227,7 +227,7 @@ qui { if inlist(lower("`subcmd'"), "group", "groupdata", "gd", "groupd") { - noi pcn_groupdata, countries(`countries') years(`years') type(`type') /* + noi pcn_groupdata, country(`countries') years(`years') type(`type') /* */ vermast(`vermast') veralt(`veralt') /* */ `pause' `options' return add From 984f50af939622676a2d7aafef41dbd4107899ef Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Fri, 28 Feb 2020 12:08:22 -0500 Subject: [PATCH 05/20] include as special obs only in using NAs special for GDP --- pcn_master_update.ado | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pcn_master_update.ado b/pcn_master_update.ado index 47f3deb..a68ffcb 100644 --- a/pcn_master_update.ado +++ b/pcn_master_update.ado @@ -218,9 +218,10 @@ qui { merge 1:1 countrycode coverage year using `sna', replace update rename gdp sp_= - gen special= inlist(_merge, 3, 4, 5) + gen special= inlist(_merge, 2, 3, 4, 5) drop _merge + keep if inrange(year,1960, `maxyear') //======================================================== // Espen's code //======================================================== @@ -266,7 +267,6 @@ qui { //---- Espen's code ----- End - keep if inrange(year,1960, `maxyear') missings dropobs, force keep countrycode coverage year new_gdp preserve From d37d0833b5fd6708801f3948384140ab463df4c8 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Fri, 28 Feb 2020 12:23:32 -0500 Subject: [PATCH 06/20] add metadata of GDP sheet --- pcn_master_update.ado | 299 ++++++++++++++++++++++-------------------- 1 file changed, 155 insertions(+), 144 deletions(-) diff --git a/pcn_master_update.ado b/pcn_master_update.ado index a68ffcb..07f05f0 100644 --- a/pcn_master_update.ado +++ b/pcn_master_update.ado @@ -20,7 +20,8 @@ syntax [anything], /// update(string) /// [ /// cpivin(string) /// -MAXYear(integer 2018) /// +MAXYear(integer 2018) /// +FORCE /// ] version 15 // this is really important @@ -57,17 +58,17 @@ if wordcount("`update'") != 1 { } qui { - + /*================================================== 1: CPI ==================================================*/ if (lower("`update'") == "cpi") { - + *----------Find most recent version of CPI data in datalibweb if ("`cpivin'" == "") { local cpipath "c:\ado\personal\Datalibweb\data\GMD\SUPPORT\SUPPORT_2005_CPI" local cpidirs: dir "`cpipath'" dirs "*CPI_*_M" - + local cpivins "0" foreach cpidir of local cpidirs { if regexm("`cpidir'", "cpi_v([0-9]+)_m") local cpivin = regexs(1) @@ -75,17 +76,17 @@ qui { } local cpivin = max(`cpivins') } // if no cpi vintage is selected - + cap datalibweb, country(Support) year(2005) type(GMDRAW) /* */ fileserver surveyid(Support_2005_CPI_v0`cpivin'_M) /* */ filename(Final_CPI_PPP_to_be_used.dta) - + *Special cases *replace cpi2011_unadj = cpi2011 if code=="IDN"|code=="IND"|code=="CHN" - + replace cpi2011_unadj = cpi2011 if inlist(code, "IDN", "IND", "CHN") - - + + //------------ vector of available years sum year, meanonly local ymin = r(min) @@ -93,20 +94,20 @@ qui { tempname C mata: C = `ymin'..`ymax'; /* */ st_matrix("`C'", C) - + //------------Rename to match Master file especifications rename (cpi2011_unadj code levelnote countryname) (y CountryCode Coverqge CountryName) keep y CountryCode Coverqge CountryName survname year preserve - contract CountryCode Coverqge CountryName survname - drop _freq - local csize = `ymax'- `ymin' - expand `csize' - bysort CountryCode Coverqge CountryName survname: egen year = seq() - replace year = year - 1 + `ymin' - tempfile cdata - save `cdata' + contract CountryCode Coverqge CountryName survname + drop _freq + local csize = `ymax'- `ymin' + expand `csize' + bysort CountryCode Coverqge CountryName survname: egen year = seq() + replace year = year - 1 + `ymin' + tempfile cdata + save `cdata' restore merge 1:1 CountryCode Coverqge CountryName survname year using `cdata', nogen @@ -120,65 +121,65 @@ qui { //------------Re format to export reshape wide y, i(CountryCode CountryName Coverqge survname) j(year) - + collapse (mean) y*, by(CountryCode CountryName Coverqge) // fix if cpi per survey change tempname D mkmat y*, matrix(`D') - + //------------ Find most recent version of master file _pcn_max_master, mastervin("`mastervin'") newfile("`newfile'") local newfile = "`r(newfile)'" - + //------------ modify country name and coverage local msheet "CPI" export excel CountryName Coverqge CountryCode /* */ using "`mastervin'/`newfile'.xlsx", /* */ sheet("`msheet'") sheetreplace firstrow(variables) - - + + //------------ Add cpi values putexcel set "`mastervin'/`newfile'.xlsx", modify sheet("`msheet'") putexcel D1 = matrix(`C') putexcel D2 = matrix(`D') putexcel save - + //------------Update current version copy "`mastervin'/`newfile'.xlsx" "`masterdir'/01.current/Master.xlsx", replace - + local success = 1 - + } // end of CPI update - - + + /*================================================= GDP ==================================================*/ if (inlist(lower("`update'"),"gdp")) { - - + + //======================================================== // gets data from WDI API //======================================================== - + /* Note: This section is based on Espen's do-files available in p:\02.personal\_handover\Espen\NAS process\*/ - + *##s set checksum off wbopendata, indicator(NY.GDP.PCAP.KD) long clear ren ny_gdp_pcap_kd wdi_gdp keep countrycode year wdi_gdp - + gen sourcegdp="WDI 2019" - + local madison "https://www.rug.nl/ggdc/historicaldevelopment/maddison/data/mpd2018.dta" merge 1:1 countrycode year using "`madison'", nogen rename rgdpnapc mdp_gdp - + replace sourcegdp ="Maddison 2018" if sourcegdp == "" - + replace mdp_gdp = . if year>1999 // do not use madison for recent spells - + //======================================================== // Special cases //======================================================== @@ -187,10 +188,10 @@ qui { tostring coverage, replace replace coverage =cond(coverage == "1", "National", /* */ cond(coverage == "2", "Urban", "Rural")) - + tempfile fna save `fna' - + //------------Find most recent version local popdir "p:\01.PovcalNet\03.QA\04.NationalAccounts\data" local files: dir "`popdir'" files "NAS special*xlsx" @@ -203,71 +204,81 @@ qui { local maxdate = max(`vers') local fver: disp %tdCCYY-NN-DD `maxdate' // file version local fver = trim("`fver'") - + import excel using "`popdir'/NAS special_`fver'.xlsx", describe - + import excel using "`popdir'/NAS special_`fver'.xlsx", /* */ clear sheet("`sheet'") firstrow case(lower) - + tempfile sna save `sna' - + * Merge with special cases and downloaded data use `fna', clear - + merge 1:1 countrycode coverage year using `sna', replace update rename gdp sp_= - + gen special= inlist(_merge, 2, 3, 4, 5) drop _merge - + keep if inrange(year,1960, `maxyear') //======================================================== // Espen's code //======================================================== //---- ----- Start - + gen new_gdp=wdi_gdp // default replace new_gdp=sp_gdp if special // default - - + + local s "mdp_gdp" - + bys countrycode coverage (year): gen lfbck_`s'= /* */ (new_gdp!=. & new_gdp[_n-1]==. & _n!=1)*new_gdp/`s' - + bys countrycode coverage (year): egen lfbck_`s'i=max(lfbck_`s') - + // Linking factors, forward bys countrycode coverage (year): gen lffwd_`s'= /* */ (new_gdp!=. & new_gdp[_n+1]==. & _n!=_N)*new_gdp/`s' - + bys countrycode coverage (year): egen lffwd_`s'i=max(lffwd_`s') - + // Assess where to apply (forward==1 or backward==1) bys countrycode coverage (year): gen gap_`s'=1 if new_gdp==. & new_gdp[_n-1]!=. bys countrycode coverage (year): replace gap_`s'=1 if _n==1 bys countrycode coverage (year): gen gapsum_`s'=sum(gap_`s') - + // Apply: create linked value gen lvfwd_`s'=`s'*lffwd_`s'i gen lvbck_`s'=`s'*lfbck_`s'i - + // Replace where missing and indicate source - + // fwd replace new_gdp= lvfwd_`s' if new_gdp==. & gapsum_`s'==2 replace sourcegdp="`s'" if new_gdp==. & gapsum_`s'==2 & lvfwd_`s'!=. - + // bck replace new_gdp = lvbck_`s' if new_gdp==. & gapsum_`s'==1 replace sourcegdp = "`s'" if new_gdp==. & gapsum_`s'==1 & lvbck_`s'!=. replace sourcegdp = "NONE" if new_gdp==. - + //---- Espen's code ----- End - - missings dropobs, force + + local msheet = upper("`update'") + cap datasignature confirm using "`masterdir'/03.metadata/`msheet'" + if (_rc == 0 & "`force'" == "") { + noi disp in g "Sheet `msheet' has not changed since last time. No update will be made." + exit + } + + datasignature set, reset saving("`masterdir'/03.metadata/`msheet'", replace) + save "`masterdir'/03.metadata/_vintage/`msheet'_`date_time'.dta", replace + save "`masterdir'/03.metadata/`msheet'.dta", replace + keep countrycode coverage year new_gdp preserve datalibweb_inventory, clear @@ -275,7 +286,7 @@ qui { save `dlw' restore merge m:1 countrycode using `dlw', keep(match) nogen - + //--vector of available years sum year, meanonly local ymin = r(min) @@ -283,71 +294,71 @@ qui { tempname C mata: C = `ymin'..`ymax'; /* */ st_matrix("`C'", C) - + *##e rename new_gdp y reshape wide y, i(countryname countrycode coverage) j(year) - - + + // cleaning drop if inlist(region, "NAC", "OTHERS") drop region missings dropvars, force - + gen note = "" local idvars "countryname coverage countrycode note" order `idvars' sort `idvars' - + //------------ modify master file - + tempname D mkmat y*, matrix(`D') - + //------------ Find most recent version of master file _pcn_max_master, mastervin("`mastervin'") newfile("`newfile'") local newfile = "`r(newfile)'" - + //------------ modify country name and coverage local msheet "GDP" export excel `idvars' using "`mastervin'/`newfile'.xlsx", /* */ sheet("`msheet'") sheetreplace firstrow(varlabels) - + //------------ Add cpi values putexcel set "`mastervin'/`newfile'.xlsx", modify sheet("`msheet'") putexcel E1 = matrix(`C') putexcel E2 = matrix(`D') putexcel save - + //------------Update current version copy "`mastervin'/`newfile'.xlsx" "`masterdir'/01.current/Master.xlsx", replace - + local success = 1 - + } - - + + //================================================= // PCE //================================================= if (inlist(lower("`update'"),"pce", "hfce")) { - - + + //======================================================== // gets data from WDI API //======================================================== - + /* Note: This section is based on Espen's do-files available in p:\02.personal\_handover\Espen\NAS process\*/ - - + + set checksum off wbopendata, indicator(NE.CON.PRVT.PC.KD) long clear ren ne_con_prvt_pc_kd wdi_pce keep countrycode year wdi_pce - + gen sourcepce="WDI 2019" - + //======================================================== // Special cases //======================================================== @@ -356,10 +367,10 @@ qui { tostring coverage, replace replace coverage =cond(coverage == "1", "National", /* */ cond(coverage == "2", "Urban", "Rural")) - + tempfile fna save `fna' - + //------------Find most recent version local popdir "p:\01.PovcalNet\03.QA\04.NationalAccounts\data" local files: dir "`popdir'" files "NAS special*xlsx" @@ -372,36 +383,36 @@ qui { local maxdate = max(`vers') local fver: disp %tdCCYY-NN-DD `maxdate' // file version local fver = trim("`fver'") - + import excel using "`popdir'/NAS special_`fver'.xlsx", describe - + import excel using "`popdir'/NAS special_`fver'.xlsx", /* */ clear sheet("`sheet'") firstrow case(lower) - + tempfile sna save `sna' - + * Merge with special cases and downloaded data use `fna', clear - + merge 1:1 countrycode coverage year using `sna', replace update rename pce sp_= - + gen special= inlist(_merge, 3, 4, 5) drop _merge - + //======================================================== // Espen's code //======================================================== //---- ----- Start - + gen new_pce=wdi_pce // default replace new_pce=sp_pce if special // default replace sourcepce = "NONE" if new_pce==. - + //---- Espen's code ----- End - - + + keep if inrange(year,1960, `maxyear') missings dropobs, force keep countrycode coverage year new_pce @@ -411,7 +422,7 @@ qui { save `dlw' restore merge m:1 countrycode using `dlw', keep(match) nogen - + //--vector of available years sum year, meanonly local ymin = r(min) @@ -419,62 +430,62 @@ qui { tempname C mata: C = `ymin'..`ymax'; /* */ st_matrix("`C'", C) - + rename new_pce y reshape wide y, i(countryname countrycode coverage) j(year) - + // cleaning drop if inlist(region, "NAC", "OTHERS") drop region missings dropvars, force - + gen note = "" local idvars "countryname coverage countrycode note" order `idvars' sort `idvars' - + //------------ modify master file - + tempname D mkmat y*, matrix(`D') - + //------------ Find most recent version of master file _pcn_max_master, mastervin("`mastervin'") newfile("`newfile'") local newfile = "`r(newfile)'" - + //------------ modify country name and coverage local msheet "PCE" export excel `idvars' using "`mastervin'/`newfile'.xlsx", /* */ sheet("`msheet'") sheetreplace firstrow(varlabels) - + //------------ Add cpi values putexcel set "`mastervin'/`newfile'.xlsx", modify sheet("`msheet'") putexcel E1 = matrix(`C') putexcel E2 = matrix(`D') putexcel save - + //------------Update current version copy "`mastervin'/`newfile'.xlsx" "`masterdir'/01.current/Master.xlsx", replace - + local success = 1 - + } - - - + + + /*================================================== Population ==================================================*/ - + if (inlist(lower("`update'"),"pop", "popu", "population")) { /* Note: there is no way to know the starting point of the data. So, the we have to hardcode the limits. There are two different procedures: WDI or data sent by Emi Suzuki */ - + //------------If data comes from Emi Suzuki - - + + //------------Find most recent version local popdir "p:\01.PovcalNet\03.QA\03.Population\data" local files: dir "`popdir'" files "population_country*xlsx" @@ -487,11 +498,11 @@ qui { local maxdate = max(`vers') local fver: disp %tdCCYY-NN-DD `maxdate' // file version local fver = trim("`fver'") - + import excel using "`popdir'/population_country_`fver'.xlsx", describe if regexm("`r(range_1)'", ":([A-Z]+)[0-9]+$") local lc = regexs(1) local sheet = "`r(worksheet_1)'" - + //------------ Find years available import excel using "`popdir'/population_country_`fver'.xlsx", /* */ cellrange(F1:`lc'1) clear sheet("`sheet'") @@ -503,18 +514,18 @@ qui { replace col = upper(col) sort year drop n - + tempfile fyear save `fyear' - + //------------Data available import excel using "`popdir'/population_country_`fver'.xlsx", /* */ cellrange(A3) clear sheet("`sheet'") firstrow cap drop scale - + ds, has(type string) local idvars = "`r(varlist)'" - + ds, has(type numeric) disp "`r(varlist)'" local vars = "`r(varlist)'" @@ -522,18 +533,18 @@ qui { reshape long pop, i(`idvars') j(col) string replace col = upper(col) replace pop = pop/1e6 // divide by million - + //------------Merge data and clean merge m:1 col using `fyear', keep(match) nogen rename *, lower gen coverage = cond(series == "SP.POP.TOTL", "National", /* */ cond(series == "SP.RUR.TOTL", "Rural","Urban")) - + drop if series == "SP.URB.TOTL.IN.ZS" drop series_name col series drop if year > `maxyear' sort country year - + //------------Matrix with years available sum year, meanonly local ymin = r(min) @@ -541,7 +552,7 @@ qui { tempname C mata: C = `ymin'..`ymax'; /* */ st_matrix("`C'", C) - + local idvars "country_name coverage country" reshape wide pop, i(`idvars') j(year) order `idvars' @@ -552,61 +563,61 @@ qui { //------------Matrix with population values tempname D mkmat pop*, matrix(`D') - + //------------Find most recent master file _pcn_max_master, mastervin("`mastervin'") newfile("`newfile'") local newfile = "`r(newfile)'" - + //------------ modify country name and coverage local msheet "Population" export excel `idvars' using "`mastervin'/`newfile'.xlsx", /* */ sheet("`msheet'") sheetreplace firstrow(varlabels) - - + + //------------ Add cpi values putexcel set "`mastervin'/`newfile'.xlsx", modify sheet("`msheet'") putexcel D1 = matrix(`C') putexcel D2 = matrix(`D') putexcel save - + //------------Update current version copy "`mastervin'/`newfile'.xlsx" "`masterdir'/01.current/Master.xlsx", replace - + local success = 1 - } - + } + /*================================================== PPP ==================================================*/ - - + + /*================================================== CCF ==================================================*/ - - - + + + /*================================================== modify vintage control ==================================================*/ - + if (`success' == 1) { - + import excel "`masterdir'/_vintage_control.xlsx", describe if regexm("`r(range_1)'", "([0-9]+$)") { local lr = real(regexs(1))+1 // last row } - + putexcel set "`masterdir'/_vintage_control.xlsx", modify sheet("_vintage") putexcel A`lr' = "`newfile'" putexcel B`lr' = "`user'" putexcel C`lr' = "`msheet'" putexcel D`lr' = "Update `update' using datalibweb cpi version `cpivin'. Stata: pcn master, update(`update')" - + putexcel save - + noi disp in y "sheet(`msheet') in Master data has been update." _n /* - */ "{stata pcn master, load(`msheet'):Load data}" + */ "{stata pcn master, load(`msheet'):Load data}" } // end of success } // end of qui From 3457aafb3afd368445ea4e88665051962fa875ed Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Fri, 28 Feb 2020 12:24:08 -0500 Subject: [PATCH 07/20] update --- pcn_master_update.ado | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pcn_master_update.ado b/pcn_master_update.ado index 07f05f0..9fd2211 100644 --- a/pcn_master_update.ado +++ b/pcn_master_update.ado @@ -268,6 +268,7 @@ qui { //---- Espen's code ----- End missings dropobs, force + //------------ Save metadata local msheet = upper("`update'") cap datasignature confirm using "`masterdir'/03.metadata/`msheet'" if (_rc == 0 & "`force'" == "") { @@ -278,7 +279,8 @@ qui { datasignature set, reset saving("`masterdir'/03.metadata/`msheet'", replace) save "`masterdir'/03.metadata/_vintage/`msheet'_`date_time'.dta", replace save "`masterdir'/03.metadata/`msheet'.dta", replace - + + //------------ arrange code. keep countrycode coverage year new_gdp preserve datalibweb_inventory, clear From e1c671f5f957ebd199ecc2cbce525fd7e47a0ec4 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Sat, 29 Feb 2020 10:06:50 -0500 Subject: [PATCH 08/20] download gpwg even when they are pending --- pcn_download_gpwg.ado | 4 +++- pcn_groupdata.ado | 3 ++- pcn_primus_query.ado | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/pcn_download_gpwg.ado b/pcn_download_gpwg.ado index 574232c..b6da3a2 100644 --- a/pcn_download_gpwg.ado +++ b/pcn_download_gpwg.ado @@ -47,7 +47,9 @@ local user=c(username) ==================================================*/ qui { pcn_primus_query, countries(`countries') years(`years') /// - `pause' + `pause' gpwg + + pause after primus query local varlist = "`r(varlist)'" local n = _N diff --git a/pcn_groupdata.ado b/pcn_groupdata.ado index 26613fa..6d9bbdd 100644 --- a/pcn_groupdata.ado +++ b/pcn_groupdata.ado @@ -159,7 +159,8 @@ qui foreach id of local ids { char _dta[filename] `fileid'_GROUP-`cov'.dta char _dta[id] `fileid' - char _dta[datatype] `dt' + char _dta[welfaretype] `dt' + char _dta[weighttype] "PW" char _dta[countrycode] `cc' char _dta[year] `yr' char _dta[survey_coverage] `cov' diff --git a/pcn_primus_query.ado b/pcn_primus_query.ado index 083887b..65e1e09 100644 --- a/pcn_primus_query.ado +++ b/pcn_primus_query.ado @@ -31,6 +31,7 @@ MODule(string) /// clear /// pause /// status(string) /// +gpwg /// ] version 14 @@ -47,6 +48,21 @@ else pause off if ("`status'" == "") local status "approved" primus query, overalls(`status') +if ("`gpwg'" != "") { + preserve + primus query, overalls(pending) + local pr = `" "PENDING", "REJECTED", "-" "' + keep if !inlist(regional, `pr') & !inlist(decdg, `pr') & !inlist(povcalnet, `pr') + local nobs = _N + tempfile pendingf + save `pendingf', emptyok + restore + if (`nobs' != 0) { + append using `pendingf' + } +} + + * replace those that finish in GPWG and SARMD or something else. replace survey_id = regexs(1)+"GMD" if regexm(survey_id , "(.*)(GPWG.*)$") From e54849ff9c839ec5b419c52ba173f6222fe86c8d Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Mon, 2 Mar 2020 19:41:47 -0500 Subject: [PATCH 09/20] update cpi in master to account for reference year and duplicates in ETH --- pcn_master_update.ado | 311 +++++++++++++++++++++--------------------- 1 file changed, 158 insertions(+), 153 deletions(-) diff --git a/pcn_master_update.ado b/pcn_master_update.ado index 9fd2211..21bb235 100644 --- a/pcn_master_update.ado +++ b/pcn_master_update.ado @@ -21,7 +21,7 @@ update(string) /// [ /// cpivin(string) /// MAXYear(integer 2018) /// -FORCE /// +FORCE /// ] version 15 // this is really important @@ -58,17 +58,17 @@ if wordcount("`update'") != 1 { } qui { - + /*================================================== 1: CPI ==================================================*/ if (lower("`update'") == "cpi") { - + *----------Find most recent version of CPI data in datalibweb if ("`cpivin'" == "") { local cpipath "c:\ado\personal\Datalibweb\data\GMD\SUPPORT\SUPPORT_2005_CPI" local cpidirs: dir "`cpipath'" dirs "*CPI_*_M" - + local cpivins "0" foreach cpidir of local cpidirs { if regexm("`cpidir'", "cpi_v([0-9]+)_m") local cpivin = regexs(1) @@ -76,110 +76,115 @@ qui { } local cpivin = max(`cpivins') } // if no cpi vintage is selected - + cap datalibweb, country(Support) year(2005) type(GMDRAW) /* */ fileserver surveyid(Support_2005_CPI_v0`cpivin'_M) /* */ filename(Final_CPI_PPP_to_be_used.dta) - + *Special cases *replace cpi2011_unadj = cpi2011 if code=="IDN"|code=="IND"|code=="CHN" - + replace cpi2011_unadj = cpi2011 if inlist(code, "IDN", "IND", "CHN") - - + //------------ vector of available years - sum year, meanonly + + gen yr = year + replace yr = floor(ref_year) if ref_year <. // starting year of the survey + + sum yr, meanonly local ymin = r(min) local ymax = r(max) tempname C mata: C = `ymin'..`ymax'; /* */ st_matrix("`C'", C) - + //------------Rename to match Master file especifications rename (cpi2011_unadj code levelnote countryname) (y CountryCode Coverqge CountryName) - keep y CountryCode Coverqge CountryName survname year + keep y CountryCode Coverqge CountryName survname yr + duplicates drop // to adress the case of ETH + preserve contract CountryCode Coverqge CountryName survname drop _freq - local csize = `ymax'- `ymin' + local csize = `ymax'- `ymin' expand `csize' - bysort CountryCode Coverqge CountryName survname: egen year = seq() - replace year = year - 1 + `ymin' + bysort CountryCode Coverqge CountryName survname: egen yr = seq() + replace yr = yr - 1 + `ymin' tempfile cdata save `cdata' - restore - - merge 1:1 CountryCode Coverqge CountryName survname year using `cdata', nogen - sort CountryCode year survname - + restore + + merge 1:1 CountryCode Coverqge CountryName survname yr using `cdata', nogen + sort CountryCode yr survname + //------------Manual fix for India - sort CountryCode Coverqge survname year - replace y = y[_n-1] if (CountryCode == "IND" & year == 2012) - replace y = 1 if (CountryCode == "IND" & year == 2011) - + sort CountryCode Coverqge survname yr + replace y = y[_n-1] if (CountryCode == "IND" & yr == 2012) + replace y = 1 if (CountryCode == "IND" & yr == 2011) + //------------Re format to export - - reshape wide y, i(CountryCode CountryName Coverqge survname) j(year) - + + reshape wide y, i(CountryCode CountryName Coverqge survname) j(yr) + collapse (mean) y*, by(CountryCode CountryName Coverqge) // fix if cpi per survey change tempname D mkmat y*, matrix(`D') - + //------------ Find most recent version of master file _pcn_max_master, mastervin("`mastervin'") newfile("`newfile'") local newfile = "`r(newfile)'" - + //------------ modify country name and coverage local msheet "CPI" export excel CountryName Coverqge CountryCode /* */ using "`mastervin'/`newfile'.xlsx", /* */ sheet("`msheet'") sheetreplace firstrow(variables) - - + + //------------ Add cpi values putexcel set "`mastervin'/`newfile'.xlsx", modify sheet("`msheet'") putexcel D1 = matrix(`C') putexcel D2 = matrix(`D') putexcel save - + //------------Update current version copy "`mastervin'/`newfile'.xlsx" "`masterdir'/01.current/Master.xlsx", replace - + local success = 1 - + } // end of CPI update - - + + /*================================================= GDP ==================================================*/ if (inlist(lower("`update'"),"gdp")) { - - + + //======================================================== // gets data from WDI API //======================================================== - + /* Note: This section is based on Espen's do-files available in p:\02.personal\_handover\Espen\NAS process\*/ - + *##s set checksum off wbopendata, indicator(NY.GDP.PCAP.KD) long clear ren ny_gdp_pcap_kd wdi_gdp keep countrycode year wdi_gdp - + gen sourcegdp="WDI 2019" - + local madison "https://www.rug.nl/ggdc/historicaldevelopment/maddison/data/mpd2018.dta" merge 1:1 countrycode year using "`madison'", nogen rename rgdpnapc mdp_gdp - + replace sourcegdp ="Maddison 2018" if sourcegdp == "" - + replace mdp_gdp = . if year>1999 // do not use madison for recent spells - + //======================================================== // Special cases //======================================================== @@ -188,10 +193,10 @@ qui { tostring coverage, replace replace coverage =cond(coverage == "1", "National", /* */ cond(coverage == "2", "Urban", "Rural")) - + tempfile fna save `fna' - + //------------Find most recent version local popdir "p:\01.PovcalNet\03.QA\04.NationalAccounts\data" local files: dir "`popdir'" files "NAS special*xlsx" @@ -204,70 +209,70 @@ qui { local maxdate = max(`vers') local fver: disp %tdCCYY-NN-DD `maxdate' // file version local fver = trim("`fver'") - + import excel using "`popdir'/NAS special_`fver'.xlsx", describe - + import excel using "`popdir'/NAS special_`fver'.xlsx", /* */ clear sheet("`sheet'") firstrow case(lower) - + tempfile sna save `sna' - + * Merge with special cases and downloaded data use `fna', clear - + merge 1:1 countrycode coverage year using `sna', replace update rename gdp sp_= - + gen special= inlist(_merge, 2, 3, 4, 5) drop _merge - + keep if inrange(year,1960, `maxyear') //======================================================== // Espen's code //======================================================== //---- ----- Start - + gen new_gdp=wdi_gdp // default replace new_gdp=sp_gdp if special // default - - + + local s "mdp_gdp" - + bys countrycode coverage (year): gen lfbck_`s'= /* */ (new_gdp!=. & new_gdp[_n-1]==. & _n!=1)*new_gdp/`s' - + bys countrycode coverage (year): egen lfbck_`s'i=max(lfbck_`s') - + // Linking factors, forward bys countrycode coverage (year): gen lffwd_`s'= /* */ (new_gdp!=. & new_gdp[_n+1]==. & _n!=_N)*new_gdp/`s' - + bys countrycode coverage (year): egen lffwd_`s'i=max(lffwd_`s') - + // Assess where to apply (forward==1 or backward==1) bys countrycode coverage (year): gen gap_`s'=1 if new_gdp==. & new_gdp[_n-1]!=. bys countrycode coverage (year): replace gap_`s'=1 if _n==1 bys countrycode coverage (year): gen gapsum_`s'=sum(gap_`s') - + // Apply: create linked value gen lvfwd_`s'=`s'*lffwd_`s'i gen lvbck_`s'=`s'*lfbck_`s'i - + // Replace where missing and indicate source - + // fwd replace new_gdp= lvfwd_`s' if new_gdp==. & gapsum_`s'==2 replace sourcegdp="`s'" if new_gdp==. & gapsum_`s'==2 & lvfwd_`s'!=. - + // bck replace new_gdp = lvbck_`s' if new_gdp==. & gapsum_`s'==1 replace sourcegdp = "`s'" if new_gdp==. & gapsum_`s'==1 & lvbck_`s'!=. replace sourcegdp = "NONE" if new_gdp==. - + //---- Espen's code ----- End missings dropobs, force - + //------------ Save metadata local msheet = upper("`update'") cap datasignature confirm using "`masterdir'/03.metadata/`msheet'" @@ -275,12 +280,12 @@ qui { noi disp in g "Sheet `msheet' has not changed since last time. No update will be made." exit } - + datasignature set, reset saving("`masterdir'/03.metadata/`msheet'", replace) save "`masterdir'/03.metadata/_vintage/`msheet'_`date_time'.dta", replace save "`masterdir'/03.metadata/`msheet'.dta", replace - - //------------ arrange code. + + //------------ arrange code. keep countrycode coverage year new_gdp preserve datalibweb_inventory, clear @@ -288,7 +293,7 @@ qui { save `dlw' restore merge m:1 countrycode using `dlw', keep(match) nogen - + //--vector of available years sum year, meanonly local ymin = r(min) @@ -296,71 +301,71 @@ qui { tempname C mata: C = `ymin'..`ymax'; /* */ st_matrix("`C'", C) - + *##e rename new_gdp y reshape wide y, i(countryname countrycode coverage) j(year) - - + + // cleaning drop if inlist(region, "NAC", "OTHERS") drop region missings dropvars, force - + gen note = "" local idvars "countryname coverage countrycode note" order `idvars' sort `idvars' - + //------------ modify master file - + tempname D mkmat y*, matrix(`D') - + //------------ Find most recent version of master file _pcn_max_master, mastervin("`mastervin'") newfile("`newfile'") local newfile = "`r(newfile)'" - + //------------ modify country name and coverage local msheet "GDP" export excel `idvars' using "`mastervin'/`newfile'.xlsx", /* */ sheet("`msheet'") sheetreplace firstrow(varlabels) - + //------------ Add cpi values putexcel set "`mastervin'/`newfile'.xlsx", modify sheet("`msheet'") putexcel E1 = matrix(`C') putexcel E2 = matrix(`D') putexcel save - + //------------Update current version copy "`mastervin'/`newfile'.xlsx" "`masterdir'/01.current/Master.xlsx", replace - + local success = 1 - + } - - + + //================================================= // PCE //================================================= if (inlist(lower("`update'"),"pce", "hfce")) { - - + + //======================================================== // gets data from WDI API //======================================================== - + /* Note: This section is based on Espen's do-files available in p:\02.personal\_handover\Espen\NAS process\*/ - - + + set checksum off wbopendata, indicator(NE.CON.PRVT.PC.KD) long clear ren ne_con_prvt_pc_kd wdi_pce keep countrycode year wdi_pce - + gen sourcepce="WDI 2019" - + //======================================================== // Special cases //======================================================== @@ -369,10 +374,10 @@ qui { tostring coverage, replace replace coverage =cond(coverage == "1", "National", /* */ cond(coverage == "2", "Urban", "Rural")) - + tempfile fna save `fna' - + //------------Find most recent version local popdir "p:\01.PovcalNet\03.QA\04.NationalAccounts\data" local files: dir "`popdir'" files "NAS special*xlsx" @@ -385,36 +390,36 @@ qui { local maxdate = max(`vers') local fver: disp %tdCCYY-NN-DD `maxdate' // file version local fver = trim("`fver'") - + import excel using "`popdir'/NAS special_`fver'.xlsx", describe - + import excel using "`popdir'/NAS special_`fver'.xlsx", /* */ clear sheet("`sheet'") firstrow case(lower) - + tempfile sna save `sna' - + * Merge with special cases and downloaded data use `fna', clear - + merge 1:1 countrycode coverage year using `sna', replace update rename pce sp_= - + gen special= inlist(_merge, 3, 4, 5) drop _merge - + //======================================================== // Espen's code //======================================================== //---- ----- Start - + gen new_pce=wdi_pce // default replace new_pce=sp_pce if special // default replace sourcepce = "NONE" if new_pce==. - + //---- Espen's code ----- End - - + + keep if inrange(year,1960, `maxyear') missings dropobs, force keep countrycode coverage year new_pce @@ -424,7 +429,7 @@ qui { save `dlw' restore merge m:1 countrycode using `dlw', keep(match) nogen - + //--vector of available years sum year, meanonly local ymin = r(min) @@ -432,62 +437,62 @@ qui { tempname C mata: C = `ymin'..`ymax'; /* */ st_matrix("`C'", C) - + rename new_pce y reshape wide y, i(countryname countrycode coverage) j(year) - + // cleaning drop if inlist(region, "NAC", "OTHERS") drop region missings dropvars, force - + gen note = "" local idvars "countryname coverage countrycode note" order `idvars' sort `idvars' - + //------------ modify master file - + tempname D mkmat y*, matrix(`D') - + //------------ Find most recent version of master file _pcn_max_master, mastervin("`mastervin'") newfile("`newfile'") local newfile = "`r(newfile)'" - + //------------ modify country name and coverage local msheet "PCE" export excel `idvars' using "`mastervin'/`newfile'.xlsx", /* */ sheet("`msheet'") sheetreplace firstrow(varlabels) - + //------------ Add cpi values putexcel set "`mastervin'/`newfile'.xlsx", modify sheet("`msheet'") putexcel E1 = matrix(`C') putexcel E2 = matrix(`D') putexcel save - + //------------Update current version copy "`mastervin'/`newfile'.xlsx" "`masterdir'/01.current/Master.xlsx", replace - + local success = 1 - + } - - - + + + /*================================================== Population ==================================================*/ - + if (inlist(lower("`update'"),"pop", "popu", "population")) { /* Note: there is no way to know the starting point of the data. So, the we have to hardcode the limits. There are two different procedures: WDI or data sent by Emi Suzuki */ - + //------------If data comes from Emi Suzuki - - + + //------------Find most recent version local popdir "p:\01.PovcalNet\03.QA\03.Population\data" local files: dir "`popdir'" files "population_country*xlsx" @@ -500,11 +505,11 @@ qui { local maxdate = max(`vers') local fver: disp %tdCCYY-NN-DD `maxdate' // file version local fver = trim("`fver'") - + import excel using "`popdir'/population_country_`fver'.xlsx", describe if regexm("`r(range_1)'", ":([A-Z]+)[0-9]+$") local lc = regexs(1) local sheet = "`r(worksheet_1)'" - + //------------ Find years available import excel using "`popdir'/population_country_`fver'.xlsx", /* */ cellrange(F1:`lc'1) clear sheet("`sheet'") @@ -516,18 +521,18 @@ qui { replace col = upper(col) sort year drop n - + tempfile fyear save `fyear' - + //------------Data available import excel using "`popdir'/population_country_`fver'.xlsx", /* */ cellrange(A3) clear sheet("`sheet'") firstrow cap drop scale - + ds, has(type string) local idvars = "`r(varlist)'" - + ds, has(type numeric) disp "`r(varlist)'" local vars = "`r(varlist)'" @@ -535,18 +540,18 @@ qui { reshape long pop, i(`idvars') j(col) string replace col = upper(col) replace pop = pop/1e6 // divide by million - + //------------Merge data and clean merge m:1 col using `fyear', keep(match) nogen rename *, lower gen coverage = cond(series == "SP.POP.TOTL", "National", /* */ cond(series == "SP.RUR.TOTL", "Rural","Urban")) - + drop if series == "SP.URB.TOTL.IN.ZS" drop series_name col series drop if year > `maxyear' sort country year - + //------------Matrix with years available sum year, meanonly local ymin = r(min) @@ -554,7 +559,7 @@ qui { tempname C mata: C = `ymin'..`ymax'; /* */ st_matrix("`C'", C) - + local idvars "country_name coverage country" reshape wide pop, i(`idvars') j(year) order `idvars' @@ -565,60 +570,60 @@ qui { //------------Matrix with population values tempname D mkmat pop*, matrix(`D') - + //------------Find most recent master file _pcn_max_master, mastervin("`mastervin'") newfile("`newfile'") local newfile = "`r(newfile)'" - + //------------ modify country name and coverage local msheet "Population" export excel `idvars' using "`mastervin'/`newfile'.xlsx", /* */ sheet("`msheet'") sheetreplace firstrow(varlabels) - - + + //------------ Add cpi values putexcel set "`mastervin'/`newfile'.xlsx", modify sheet("`msheet'") putexcel D1 = matrix(`C') putexcel D2 = matrix(`D') putexcel save - + //------------Update current version copy "`mastervin'/`newfile'.xlsx" "`masterdir'/01.current/Master.xlsx", replace - + local success = 1 } - + /*================================================== PPP ==================================================*/ - - + + /*================================================== CCF ==================================================*/ - - - + + + /*================================================== modify vintage control ==================================================*/ - + if (`success' == 1) { - + import excel "`masterdir'/_vintage_control.xlsx", describe if regexm("`r(range_1)'", "([0-9]+$)") { local lr = real(regexs(1))+1 // last row } - + putexcel set "`masterdir'/_vintage_control.xlsx", modify sheet("_vintage") putexcel A`lr' = "`newfile'" putexcel B`lr' = "`user'" putexcel C`lr' = "`msheet'" putexcel D`lr' = "Update `update' using datalibweb cpi version `cpivin'. Stata: pcn master, update(`update')" - + putexcel save - - noi disp in y "sheet(`msheet') in Master data has been update." _n /* + + noi disp in y "sheet(`msheet') in Master data has been update." _n /* */ "{stata pcn master, load(`msheet'):Load data}" } // end of success } // end of qui From 6035c21d5fa4d6443f1a2edfa4d1193f7edc4253 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Mon, 2 Mar 2020 20:49:57 -0500 Subject: [PATCH 10/20] include EU-SILC files that are not approved in primus --- pcn_savedata.ado | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pcn_savedata.ado b/pcn_savedata.ado index cfbdb0d..dcbbf96 100644 --- a/pcn_savedata.ado +++ b/pcn_savedata.ado @@ -70,6 +70,13 @@ if (_rc) { // if file does not exist local mod = upper("`try'") local dlwcall = regexr("`dlwcall'", "(module\([a-zA-Z0-9]+\))", "") cap `dlwcall' module(`mod') + if (_rc & "`survey'" == "EU-SILC") { + local dlwcall"datalibweb, country(DEU) year(1991) surveyid(GSOEP-LIS) type(GMD) module(GPWG) vermast(01) veralt(03) clear" + + local dlwcall = regexr("`dlwcall'", "(veralt\([a-zA-Z0-9]+\))", "") + local dlwcall = regexr("`dlwcall'", "(vermast\([a-zA-Z0-9]+\))", "") + cap `dlwcall' + } } if (_rc) { local dlwnote "Error on datalibweb. File does NOT exist in P drive" From ca497d2a73ce6b6dc0e8bebb6cd2c36845bb708f Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Mon, 2 Mar 2020 20:52:23 -0500 Subject: [PATCH 11/20] bug --- pcn_savedata.ado | 1 - 1 file changed, 1 deletion(-) diff --git a/pcn_savedata.ado b/pcn_savedata.ado index dcbbf96..48a960f 100644 --- a/pcn_savedata.ado +++ b/pcn_savedata.ado @@ -71,7 +71,6 @@ if (_rc) { // if file does not exist local dlwcall = regexr("`dlwcall'", "(module\([a-zA-Z0-9]+\))", "") cap `dlwcall' module(`mod') if (_rc & "`survey'" == "EU-SILC") { - local dlwcall"datalibweb, country(DEU) year(1991) surveyid(GSOEP-LIS) type(GMD) module(GPWG) vermast(01) veralt(03) clear" local dlwcall = regexr("`dlwcall'", "(veralt\([a-zA-Z0-9]+\))", "") local dlwcall = regexr("`dlwcall'", "(vermast\([a-zA-Z0-9]+\))", "") From a028c32f1cb362a24464e74fba0ccaee5c087fce Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Tue, 3 Mar 2020 21:54:02 -0500 Subject: [PATCH 12/20] add creation of IDN and urban and rural --- pcn_create.ado | 135 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 92 insertions(+), 43 deletions(-) diff --git a/pcn_create.ado b/pcn_create.ado index c406cf9..295b0e3 100644 --- a/pcn_create.ado +++ b/pcn_create.ado @@ -56,7 +56,7 @@ local user=c(username) qui { pcn_primus_query, countries(`countries') years(`years') /// - `pause' vermast("`vermast'") veralt("`veralt'") + `pause' vermast("`vermast'") veralt("`veralt'") gpwg local varlist = "`r(varlist)'" local n = _N @@ -140,60 +140,109 @@ qui { * monthly data replace welfare=welfare/12 - * keep weight and welfare - keep weight welfare - sort welfare + keep weight welfare urban + + * special treatment for IDN and IND + if inlist("`country'", "IND", "IDN") { + preserve + keep if urban==0 + tempfile rfile + char _dta[cov] "R" + save `rfile' + + restore, preserve + + keep if urban==1 + char _dta[cov] "U" + tempfile ufile + save `ufile' + + restore + + // This part is going to change for reweighted file + char _dta[cov] "N" + tempfile wfile + save `wfile' + + + local cfiles "`rfile' `ufile' `wfile'" + } + else { + tempfile wfile + char _dta[cov] "" + save `wfile' + local cfiles "`wfile'" + } - * drop missing values - drop if welfare < 0 | welfare == . - drop if weight <= 0 | weight == . + foreach file of local cfiles { - order weight welfare + use `file', clear + local cc: char _dta[cov] // country coverage + if ("`cc'" == "") { + local cc "N" + local cov "" + } + else { + local cov "-`cc'" + } - //======================================================== - // Check if data is the same as the previous one and save. - //======================================================== + * keep weight and welfare + keep weight welfare + sort welfare - cap datasignature confirm using "`surdir'/`survid'/Data/`survid'_PCN" - local dsrc = _rc - if (`dsrc' == 9) { - cap mkdir "`surdir'/`survid'/Data/_vintage" - preserve // I cannot use copy because I need the pcn_datetime char + * drop missing values + drop if welfare < 0 | welfare == . + drop if weight <= 0 | weight == . - use "`surdir'/`survid'/Data/`survid'_PCN.dta", clear - save "`surdir'/`survid'/Data/_vintage/`survid'_PCN_`:char _dta[creationdate]'", replace + order weight welfare - restore - } - if (`dsrc' != 0) { - cap datasignature set, reset /* - */ saving("`surdir'/`survid'/Data/`survid'_PCN", replace) + //======================================================== + // Check if data is the same as the previous one and save. + //======================================================== - char _dta[filename] = "`filename'" - char _dta[survin] = "`survin'" - char _dta[survid] = "`survid'" - char _dta[surdir] = "`surdir'" - char _dta[creationdate] = "`date_time'" + cap datasignature confirm using "`surdir'/`survid'/Data/`survid'_PCN`cov'" + local dsrc = _rc + if (`dsrc' == 9) { + cap mkdir "`surdir'/`survid'/Data/_vintage" + preserve // I cannot use copy because I need the pcn_datetime char - //------------Uncollapsed data - save "`surdir'/`survid'/Data/`survid'_PCN.dta", `replace' - export delimited using "`surdir'/`survid'/Data/`survid'_PCN.txt", /// - novarnames nolabel delimiter(tab) `replace' + use "`surdir'/`survid'/Data/`survid'_PCN`cov'.dta", clear + save "`surdir'/`survid'/Data/_vintage/`survid'_PCN`cov'_`:char _dta[creationdate]'", replace + restore + } + if (`dsrc' != 0) { + cap datasignature set, reset /* + */ saving("`surdir'/`survid'/Data/`survid'_PCN`cov'", replace) - //------------ collapse data - collapse (sum) weight, by(welfare) + char _dta[filename] "`filename'" + char _dta[survin] "`survin'" + char _dta[survid] "`survid'" + char _dta[surdir] "`surdir'" + char _dta[creationdate] "`date_time'" + char _dta[survey_coverage] "`cc'" - save "`surdir'/`survid'/Data/`survid'_PCNc.dta", `replace' - export delimited using "`surdir'/`survid'/Data/`survid'_PCNc.txt", /// - novarnames nolabel delimiter(tab) `replace' - noi _dots `i' 0 - } - else { - noi _dots `i' -1 - continue - } + //------------Uncollapsed data + save "`surdir'/`survid'/Data/`survid'_PCN`cov'.dta", `replace' + export delimited using "`surdir'/`survid'/Data/`survid'_PCN`cov'.txt", /// + novarnames nolabel delimiter(tab) `replace' + + + //------------ collapse data + collapse (sum) weight, by(welfare) + + save "`surdir'/`survid'/Data/`survid'_PCNc`cov'.dta", `replace' + + export delimited using "`surdir'/`survid'/Data/`survid'_PCNc`cov'.txt", /// + novarnames nolabel delimiter(tab) `replace' + noi _dots `i' 0 + } + else { + noi _dots `i' -1 + continue + } + } // end of files loop * mata: P = pcn_info(P) From d85b1c5e5af95b6303318be6e269504b02003989 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Wed, 4 Mar 2020 08:36:23 -0500 Subject: [PATCH 13/20] manual inclusion of IDN metadata --- pcn_create.ado | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pcn_create.ado b/pcn_create.ado index 295b0e3..7f230f0 100644 --- a/pcn_create.ado +++ b/pcn_create.ado @@ -142,7 +142,7 @@ qui { keep weight welfare urban - * special treatment for IDN and IND + * special treatment for IDN and IND if inlist("`country'", "IND", "IDN") { preserve keep if urban==0 @@ -156,14 +156,14 @@ qui { char _dta[cov] "U" tempfile ufile save `ufile' - - restore - + + restore + // This part is going to change for reweighted file char _dta[cov] "N" tempfile wfile save `wfile' - + local cfiles "`rfile' `ufile' `wfile'" } @@ -211,7 +211,7 @@ qui { restore } - if (`dsrc' != 0) { + if (`dsrc' != 0 | "`replace'" != "") { cap datasignature set, reset /* */ saving("`surdir'/`survid'/Data/`survid'_PCN`cov'", replace) @@ -222,6 +222,12 @@ qui { char _dta[creationdate] "`date_time'" char _dta[survey_coverage] "`cc'" + // Special case for IDN 2018 (should be deleted later) + if ("`country'" == "IDN") { + char _dta[welfaretype] "CONS" + char _dta[weighttype] "aw" + } + //------------Uncollapsed data save "`surdir'/`survid'/Data/`survid'_PCN`cov'.dta", `replace' From f7889118467b86ac0661c7b7e92d97bf4f4a9d83 Mon Sep 17 00:00:00 2001 From: Daniel Mahler Date: Wed, 4 Mar 2020 10:17:43 -0500 Subject: [PATCH 14/20] Add reweighing of weights for IDN-N and IND-N --- pcn_create.ado | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/pcn_create.ado b/pcn_create.ado index 7f230f0..dcd0926 100644 --- a/pcn_create.ado +++ b/pcn_create.ado @@ -159,7 +159,52 @@ qui { restore - // This part is going to change for reweighted file + /* + + // Loading PPPs, population data and CPI data + preserve + + // PPPs + pcn master, load(ppp) + keep if countrycode=="`country'" & coveragetype!="National" + gen urban = coveragetype=="Urban" + keep urban ppp2011 + tempfile ppp + save `ppp' + + // Population + pcn master, load(population) + keep if countrycode=="`country'" & coveragetype!="National" & year==`year' + gen urban = coveragetype=="Urban" + keep urban population + tempfile pop + save `pop' + + // CPI + pcn master, load(cpi) + keep if countrycode=="`country'" & coveragetype!="National" & year==`year' + gen urban = coveragetype=="urban" + keep urban cpi + tempfile cpi + save `cpi' + + restore + + // Merge with raw data + merge m:1 urban using `ppp', nogen + merge m:1 urban using `pop', nogen + merge m:1 urban using `cpi', nogen + + // Create welfare in daily PPP terms + gen welf_daily2011ppp = welfare*12/365/cpi/ppp + + // Rescaling weights + forvalues i=0/1 { + qui sum weight if urban==`i' + replace weight = weight*pop/`r(sum)'*10^6 if urban==`i' + } + */ + char _dta[cov] "N" tempfile wfile save `wfile' From 04d510f589f79bf7479895939801765104e180fe Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Wed, 4 Mar 2020 11:35:08 -0500 Subject: [PATCH 15/20] create National re-weighted --- pcn.ado | 39 +++++++++++++++----------- pcn_create.ado | 22 +++++++-------- pcn_master_load.ado | 68 +++++++++++++++++++++++---------------------- 3 files changed, 68 insertions(+), 61 deletions(-) diff --git a/pcn.ado b/pcn.ado index 5e2829e..e91e6d4 100644 --- a/pcn.ado +++ b/pcn.ado @@ -31,6 +31,7 @@ pause /// vermast(string) /// veralt(string) /// * /// +qui /// ] version 14 @@ -80,6 +81,12 @@ qui { if ("`maindir'" == "") local maindir "`drive':/`root'" + if ("`qui'" == "") { + local nq "noi" + } + else { + local nq "qui" + } //------------ Download functions @@ -117,7 +124,7 @@ qui { if regexm("`subcmd'", "download[ ]+gpwg") { - noi pcn_download_gpwg, countries(`countries') years(`years') /* + `nq' pcn_download_gpwg, countries(`countries') years(`years') /* */ maindir("`maindir'") `pause' `options' return add exit @@ -129,11 +136,11 @@ qui { if regexm("`subcmd'", "primus[ ]+approved") { if regexm("`options'", "down\(.*\)") { - noi pcn_primus_download, countries(`countries') years(`years') /* + `nq' pcn_primus_download, countries(`countries') years(`years') /* */ status(approved) `pause' `options' } else if regexm("`options'", "load\(.*\)"){ - noi pcn_primus_load, s(approved) `pause' `options' + `nq' pcn_primus_load, s(approved) `pause' `options' } else err @@ -143,11 +150,11 @@ qui { if regexm("`subcmd'", "primus[ ]+pending") { if regexm("`options'", "down\(.*\)") { - noi pcn_primus_download, countries(`countries') years(`years') /* + `nq' pcn_primus_download, countries(`countries') years(`years') /* */ status(pending) `pause' `options' } else if regexm("`options'", "load\(.*\)"){ - noi pcn_primus_load, s(pending) `pause' `options' + `nq' pcn_primus_load, s(pending) `pause' `options' } else err @@ -160,7 +167,7 @@ qui { //======================================================== if regexm("`subcmd'", "download[ ]+wrk") { local maindir "p:\01.PovcalNet\03.QA\02.PRIMUS\pending" - noi pcn_download_wrk, countries(`countries') years(`years') /* + `nq' pcn_download_wrk, countries(`countries') years(`years') /* */ `pause' `options' maindir("`maindir'") return add exit @@ -172,7 +179,7 @@ qui { if ("`subcmd'" == "load" | "`subcmd'" == "load[ ]+gpwg") { - noi pcn_load, country(`countries') year(`years') type(`type') /* + `nq' pcn_load, country(`countries') year(`years') type(`type') /* */ maindir("`maindir'") vermast(`vermast') veralt(`veralt') /* */ `pause' `options' return add @@ -186,7 +193,7 @@ qui { local maindir "p:\01.PovcalNet\03.QA\02.PRIMUS\pending" - noi pcn_load_wrk, country(`countries') year(`years') /* + `nq' pcn_load_wrk, country(`countries') year(`years') /* */ maindir("`maindir'") vermast(`vermast') /* */ `pause' `clear' `options' return add @@ -200,7 +207,7 @@ qui { local maindir "p:\01.PovcalNet\03.QA\02.PRIMUS\pending" - noi pcn_load_estimates, maindir("`maindir'") /* + `nq' pcn_load_estimates, maindir("`maindir'") /* */ `pause' `clear' `options' return add exit @@ -213,7 +220,7 @@ qui { if ("`subcmd'" == "create") { - noi pcn_create, countries(`countries') years(`years') type(`type') /* + `nq' pcn_create, countries(`countries') years(`years') type(`type') /* */ maindir("`maindir'") vermast(`vermast') veralt(`veralt') /* */ `pause' `options' return add @@ -227,7 +234,7 @@ qui { if inlist(lower("`subcmd'"), "group", "groupdata", "gd", "groupd") { - noi pcn_groupdata, country(`countries') years(`years') type(`type') /* + `nq' pcn_groupdata, country(`countries') years(`years') type(`type') /* */ vermast(`vermast') veralt(`veralt') /* */ `pause' `options' return add @@ -243,7 +250,7 @@ qui { error } - noi pcn_update_cpi, `pause' `options' + `nq' pcn_update_cpi, `pause' `options' return add exit } @@ -253,7 +260,7 @@ qui { //======================================================== if regexm("`subcmd'", "load[ ]+cpi") { - noi pcn_load_cpi, `pause' `options' + `nq' pcn_load_cpi, `pause' `options' return add exit } @@ -269,11 +276,11 @@ qui { error } - noi pcn_master_update, `pause' `options' + `nq' pcn_master_update, `pause' `options' return add } if regexm("`options'", "load\(.*\)") { - noi pcn_master_load, `pause' `options' + `nq' pcn_master_load, `pause' `options' `qui' return add } exit @@ -286,7 +293,7 @@ qui { if ("`subcmd'" == "test") { - noi pcn_test + `nq' pcn_test exit } diff --git a/pcn_create.ado b/pcn_create.ado index dcd0926..743dbd2 100644 --- a/pcn_create.ado +++ b/pcn_create.ado @@ -159,21 +159,20 @@ qui { restore - /* - + // Loading PPPs, population data and CPI data preserve // PPPs - pcn master, load(ppp) - keep if countrycode=="`country'" & coveragetype!="National" - gen urban = coveragetype=="Urban" + pcn master, load(ppp) qui + keep if countrycode == "`country'" & coveragetype != "National" + gen urban = coveragetype == "Urban" keep urban ppp2011 tempfile ppp save `ppp' // Population - pcn master, load(population) + pcn master, load(population) qui keep if countrycode=="`country'" & coveragetype!="National" & year==`year' gen urban = coveragetype=="Urban" keep urban population @@ -181,7 +180,7 @@ qui { save `pop' // CPI - pcn master, load(cpi) + pcn master, load(cpi) qui keep if countrycode=="`country'" & coveragetype!="National" & year==`year' gen urban = coveragetype=="urban" keep urban cpi @@ -199,11 +198,10 @@ qui { gen welf_daily2011ppp = welfare*12/365/cpi/ppp // Rescaling weights - forvalues i=0/1 { - qui sum weight if urban==`i' - replace weight = weight*pop/`r(sum)'*10^6 if urban==`i' + forvalues x = 0/1 { + sum weight if urban==`x' + replace weight = weight*pop/`r(sum)'*10^6 if urban==`x' } - */ char _dta[cov] "N" tempfile wfile @@ -211,7 +209,7 @@ qui { local cfiles "`rfile' `ufile' `wfile'" - } + } // end of special cases else { tempfile wfile char _dta[cov] "" diff --git a/pcn_master_load.ado b/pcn_master_load.ado index 2774213..5d72a48 100644 --- a/pcn_master_load.ado +++ b/pcn_master_load.ado @@ -21,6 +21,7 @@ syntax [anything], load(string) /// version(string) /// pause /// shape(string) /// +qui /// ] version 14 @@ -113,9 +114,10 @@ qui { local svc = clock("`vcnumber'", "YMDhms") // stata readable form local dispdate: disp %tcDDmonCCYY_HH:MM:SS `svc' - noi disp in y "File:" _col(8) "{stata br:Master_`vcnumber'.xlsx} " /* - */ in y "will be loaded. " _n "Date: " _col(8) in w "`dispdate'" - + if ("`qui'" == "") { + noi disp in y "File:" _col(8) "{stata br:Master_`vcnumber'.xlsx} " /* + */ in y "will be loaded. " _n "Date: " _col(8) in w "`dispdate'" + } //======================================================== // Pick sheet @@ -149,36 +151,36 @@ qui { // CPI //======================================================== if (lower("`load'") == "cpi") { - import excel using "`mastervin'/Master_`vcnumber'.xlsx", /* - */ sheet("CPI") clear firstrow case(lower) - - missings dropvars, force - missings dropobs, force - - _label2name - - if ("`shape'" == "long") { - reshape long y, i(countrycode coverqge) j(year) - rename y cpi - drop if cpi == . - - * fix mismatches between the two dataset - rename coverqge coveragetype - clonevar data_coverage = coveragetype - - replace data_coverage = "Urban" if countrycode == "ARG" - replace data_coverage = "Rural" if countrycode == "ETH" & year == 1981 - replace data_coverage = "Urban" if countrycode == "BOL" & year == 1992 - replace data_coverage = "Urban" if countrycode == "ECU" & year == 1995 - replace data_coverage = "Urban" if countrycode == "FSM" & year == 2000 - replace data_coverage = "Urban" if countrycode == "HND" & year == 1986 - replace data_coverage = "Urban" if countrycode == "COL" & inrange(year, 1980,1991) - replace data_coverage = "Urban" if countrycode == "URY" & inrange(year, 1990,2005) - label var year "Year" - label var cpi "CPI" - } - - * if ("`shape'" == "long") rename year cpi_time + import excel using "`mastervin'/Master_`vcnumber'.xlsx", /* + */ sheet("CPI") clear firstrow case(lower) + + missings dropvars, force + missings dropobs, force + + _label2name + + if ("`shape'" == "long") { + reshape long y, i(countrycode coverqge) j(year) + rename y cpi + drop if cpi == . + + * fix mismatches between the two dataset + rename coverqge coveragetype + clonevar data_coverage = coveragetype + + replace data_coverage = "Urban" if countrycode == "ARG" + replace data_coverage = "Rural" if countrycode == "ETH" & year == 1981 + replace data_coverage = "Urban" if countrycode == "BOL" & year == 1992 + replace data_coverage = "Urban" if countrycode == "ECU" & year == 1995 + replace data_coverage = "Urban" if countrycode == "FSM" & year == 2000 + replace data_coverage = "Urban" if countrycode == "HND" & year == 1986 + replace data_coverage = "Urban" if countrycode == "COL" & inrange(year, 1980,1991) + replace data_coverage = "Urban" if countrycode == "URY" & inrange(year, 1990,2005) + label var year "Year" + label var cpi "CPI" + } + + * if ("`shape'" == "long") rename year cpi_time } From df2a9cba2e5a4f5f3b3301b4055efbf679069f3a Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Wed, 4 Mar 2020 11:50:55 -0500 Subject: [PATCH 16/20] query master, load directly in display --- pcn_master_load.ado | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pcn_master_load.ado b/pcn_master_load.ado index 5d72a48..bc682ae 100644 --- a/pcn_master_load.ado +++ b/pcn_master_load.ado @@ -82,11 +82,13 @@ qui { local dispdate: disp %tcDDmonCCYY_HH:MM:SS `svc' local dispdate = trim("`dispdate'") - noi disp `" `i' {c |} {stata `vc':`dispdate'}"' + local scode "pcn master, load(`load') version(`vc')" + noi disp `" `i' {c |} {stata `scode':`dispdate'}"' } - noi disp _n "select vintage control date from the list above" _request(_vcnumber) + noi disp _n "select vintage control date from the list above" + exit } else if inlist(lower("`version'"), "maxvc", "max", "") { local vcnumber = `maxvc' From 7fdd35360f763ac0af7417f792e144b26c59e6fe Mon Sep 17 00:00:00 2001 From: Daniel Mahler Date: Wed, 4 Mar 2020 12:08:45 -0500 Subject: [PATCH 17/20] Changes to IDN/IND reweighting --- pcn_create.ado | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pcn_create.ado b/pcn_create.ado index 743dbd2..e295e93 100644 --- a/pcn_create.ado +++ b/pcn_create.ado @@ -195,7 +195,7 @@ qui { merge m:1 urban using `cpi', nogen // Create welfare in daily PPP terms - gen welf_daily2011ppp = welfare*12/365/cpi/ppp + replace welfare = welfare*12/365/cpi/ppp // Rescaling weights forvalues x = 0/1 { @@ -203,6 +203,9 @@ qui { replace weight = weight*pop/`r(sum)'*10^6 if urban==`x' } + label var welfare "Welfare in 2011 USD PPP per day" + keep welfare weight urban + char _dta[cov] "N" tempfile wfile save `wfile' From 5ae8263ba9de989325d0df02006e3e3440099432 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Wed, 4 Mar 2020 15:39:52 -0500 Subject: [PATCH 18/20] update wdi date --- pcn_master_update.ado | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pcn_master_update.ado b/pcn_master_update.ado index 21bb235..78397c5 100644 --- a/pcn_master_update.ado +++ b/pcn_master_update.ado @@ -22,6 +22,7 @@ update(string) /// cpivin(string) /// MAXYear(integer 2018) /// FORCE /// +pause /// ] version 15 // this is really important @@ -175,7 +176,7 @@ qui { ren ny_gdp_pcap_kd wdi_gdp keep countrycode year wdi_gdp - gen sourcegdp="WDI 2019" + gen sourcegdp="WDI 2020-02" local madison "https://www.rug.nl/ggdc/historicaldevelopment/maddison/data/mpd2018.dta" merge 1:1 countrycode year using "`madison'", nogen @@ -567,6 +568,8 @@ qui { label var coverage "Coverage" label var country_name "Country Name" label var country "Country Code" + + pause after reshape to wide //------------Matrix with population values tempname D mkmat pop*, matrix(`D') From 960fcdc13cb376fff2ca9a8b3407fa5b0388d031 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Wed, 4 Mar 2020 22:19:53 -0500 Subject: [PATCH 19/20] fix conditions in loading master in pcn_create --- pcn_create.ado | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pcn_create.ado b/pcn_create.ado index e295e93..afce903 100644 --- a/pcn_create.ado +++ b/pcn_create.ado @@ -165,24 +165,24 @@ qui { // PPPs pcn master, load(ppp) qui - keep if countrycode == "`country'" & coveragetype != "National" - gen urban = coveragetype == "Urban" + keep if countrycode == "`country'" & lower(coveragetype) != "national" + gen urban = lower(coveragetype) == "urban" keep urban ppp2011 tempfile ppp save `ppp' // Population pcn master, load(population) qui - keep if countrycode=="`country'" & coveragetype!="National" & year==`year' - gen urban = coveragetype=="Urban" + keep if countrycode=="`country'" & lower(coveragetype) != "national" & year==`year' + gen urban = lower(coveragetype) == "urban" keep urban population tempfile pop save `pop' // CPI pcn master, load(cpi) qui - keep if countrycode=="`country'" & coveragetype!="National" & year==`year' - gen urban = coveragetype=="urban" + keep if countrycode=="`country'" & lower(coveragetype) != "national" & year==`year' + gen urban = lower(coveragetype) == "urban" keep urban cpi tempfile cpi save `cpi' @@ -204,7 +204,6 @@ qui { } label var welfare "Welfare in 2011 USD PPP per day" - keep welfare weight urban char _dta[cov] "N" tempfile wfile From f00362b21653925e58530a594fae266ef2947545 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Thu, 5 Mar 2020 14:44:59 -0500 Subject: [PATCH 20/20] drop old years in master pop update --- pcn_master_update.ado | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pcn_master_update.ado b/pcn_master_update.ado index 78397c5..7a1f88a 100644 --- a/pcn_master_update.ado +++ b/pcn_master_update.ado @@ -552,6 +552,8 @@ qui { drop series_name col series drop if year > `maxyear' sort country year + + drop if year < 1977 //------------Matrix with years available sum year, meanonly