From 88accf5340e085a86ad0fba4493b486c1f320e46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0tefan=20Baebler?= Date: Sun, 14 Apr 2024 20:54:35 +0200 Subject: [PATCH] Get & reproject HS.csv from new CSV source --- .github/workflows/make.yml | 18 ++--- Makefile | 20 +++++- README.md | 2 +- getSource.sh | 131 +++++++------------------------------ sigov-ca2.pem | 28 -------- 5 files changed, 49 insertions(+), 150 deletions(-) delete mode 100644 sigov-ca2.pem diff --git a/.github/workflows/make.yml b/.github/workflows/make.yml index 9a1bb4f..c538f0d 100644 --- a/.github/workflows/make.yml +++ b/.github/workflows/make.yml @@ -34,10 +34,10 @@ jobs: sudo apt-get install gdal-bin ogr2ogr --version - - name: Cache EGP downloads + - name: Cache JGP downloads uses: actions/cache@v4 env: - cache-name: cache-egp-downloads + cache-name: cache-jgp-downloads with: path: data/downloaded/ key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ github.sha }} @@ -49,16 +49,16 @@ jobs: # Runs a single command using the runners shell - name: Run make download - env: - username: ${{ secrets.egpUsername }} - password: ${{ secrets.egpPassword }} run: make download - - name: Run make geojson - run: make geojson + - name: Run make Housenumber cvs + run: make hscsv + + # - name: Run make geojson + # run: make geojson - - name: Run make split - run: make split + # - name: Run make split + # run: make split - name: update timestamp if needed run: | diff --git a/Makefile b/Makefile index dcc18f5..807c6d7 100644 --- a/Makefile +++ b/Makefile @@ -5,13 +5,26 @@ TMP = $(DATAFOLDER)temp/ TS = $$(cat $(TMP)timestamp.txt) TSYYYY = $$(cat $(TMP)timestamp.txt | cut -b 1-4) -all: download geojson split +all: download hscsv #TODO: geojson split .PHONY: download download: mkdir -p $(TMP) || true ./getSource.sh $(DLFOLDER) $(TMP) +.PHONY: hscsv +hscsv: + rm -rf "$(DATAFOLDER)HS.csv" || true + mkdir -p $(DATAFOLDER) + + # https://gdal.org/drivers/vector/csv.html + + ogr2ogr \ + -s_srs "EPSG:3794" -f "CSV" -oo X_POSSIBLE_NAMES=E -oo Y_POSSIBLE_NAMES=N \ + -t_srs "EPSG:4326" -lco STRING_QUOTING=IF_NEEDED -lco GEOMETRY=AS_XY \ + "$(DATAFOLDER)HS.csv" \ + "$(wildcard $(TMP)RPE_HS/KN_SLO_NASLOVI_HS_naslovi_hs_????????.csv)" + .PHONY: geojson geojson: mkdir -p $(DATAFOLDER) @@ -37,5 +50,6 @@ split: .PHONY: clean clean: - rm -r $(TMP) - rm -r $(DLFOLDER) + rm -rf $(TMP) + rm -rf $(DLFOLDER) + diff --git a/README.md b/README.md index 4e74fd6..352808f 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Dovoljenje / Licence: [CC-BY 4.0 SL](https://creativecommons.org/licenses/by/4.0 [Splošno pogoji uporabe](https://www.e-prostor.gov.si/fileadmin/struktura/preberi_me.pdf) / [General terms and conditions](https://www.e-prostor.gov.si/fileadmin/struktura/ANG/General_terms.pdf) -[Vir podatkov](https://egp.gu.gov.si/egp) / [Source of data](https://egp.gu.gov.si/egp/?lang=en) +[Vir podatkov](https://ipi.eprostor.gov.si/jgp/data) / [Source of data](https://ipi.eprostor.gov.si/jgp/data?lang=en) ## Tehnične podrobnosti / Technical details diff --git a/getSource.sh b/getSource.sh index 399aacc..ee38668 100755 --- a/getSource.sh +++ b/getSource.sh @@ -1,9 +1,8 @@ #!/bin/bash +set -e DownloadDest="${1}" TempDest="${2}" -credentialsFile="CREDENTIALS-egp.gu.gov.si.txt" maxAge=240 -baseUrl="https://egp.gu.gov.si/egp/" SEDCMD="sed" STATCMD="stat" @@ -21,16 +20,31 @@ MINGW*) machine=MinGw ;; esac echo Running on: "${machine}", using $SEDCMD and $STATCMD commands +# pass numeric file ids and name as parameter +function downloadFile() { + mkdir -p "${DownloadDest}" + echo "Downloading ${DownloadDest}$3..." + curl \ + --compressed \ + --output "${DownloadDest}$3" \ + --fail \ + --progress-bar \ + "https://ipi.eprostor.gov.si/jgp-service-api/display-views/groups/$1/composite-products/$2/file?filterParam=DRZAVA&filterValue=1" +} + function extractDownloaded() { + rm -rf "${TempDest}" || true + mkdir -p "${TempDest}" + #----- extract: ------- for file in "${DownloadDest}"RPE_*.ZIP; do extdir=$(basename "$file" .ZIP) echo "$extdir" unzip -o -d "${TempDest}$extdir" "$file" done - for file in "${TempDest}"RPE_*/*.zip; do unzip -o -d "${TempDest}" "$file"; done + # for file in "${TempDest}"RPE_*/*.zip; do unzip -o -d "${TempDest}" "$file"; done - $STATCMD -c '%y' "${TempDest}OB/OB.shp" | cut -d' ' -f1 >"${TempDest}timestamp.txt" + # $STATCMD -c '%y' ${TempDest}RPE_HS/KN_SLO_NASLOVI_HS_naslovi_hs_????????.csv | cut -d' ' -f1 >"${TempDest}timestamp.txt" } countTooOld=3 @@ -43,116 +57,15 @@ fi # exit if all are newer than max age if [ "$countTooOld" -gt "0" ]; then echo "Need to download $countTooOld files (they are either missing or older than $maxAge minutes)" + #------ Download all data we care about: ------ + downloadFile 119 12 RPE_PE.ZIP + downloadFile 119 181 RPE_UL.ZIP + downloadFile 121 141 RPE_HS.ZIP else echo "No need to download anything (source files are already there and not older than $maxAge minutes)" - extractDownloaded - exit 0 fi -# Clean up leftovers from previous failed runs -rm -f "${DownloadDest}cookies.txt" -rm -f "${DownloadDest}login.html" - -commonWgetParams=(--load-cookies "${DownloadDest}cookies.txt" --save-cookies "${DownloadDest}cookies.txt" --directory-prefix "${DownloadDest}" --keep-session-cookies --ca-certificate "sigov-ca2.pem") -# --no-hsts -# --quiet -# --ciphers "HIGH:!aNULL:!MD5:!RC4" \ -# --secure-protocol=TLSv1 \ -# --referer "${baseUrl}" \ - -function prepareCredentials() { - #------ username & password: ------ - # read possibly existing credentials... - # shellcheck source=/dev/null - source "$credentialsFile" - - echo Credentials for ${baseUrl} - - if [ -z "$username" ]; then - echo -n " Username: " - read -r username - echo "username=\"$username\"" >"$credentialsFile" - else - echo " Username: '$username'" - fi - - if [ -z "$password" ]; then - echo -n " Password: " - read -r password - read -p " Save password in plain text to $credentialsFile for future use? (y/N) " -n 1 -r - echo # (optional) move to a new line - if [[ $REPLY =~ ^[Yy]$ ]]; then - # save it only if wanted - echo "password=\"$password\"" >>"$credentialsFile" - fi - else - echo " Password: *********" - fi -} - -function login() { - #------ Log in to the server. This can be done only once ------ - wget "${commonWgetParams[@]}" \ - --quiet \ - "${baseUrl}login.html" - - # example login.html content: - # - csrftoken="$($SEDCMD -n 's/.*name="_csrf"\s\+value="\([^"]\+\).*/\1/p' "${DownloadDest}login.html")" - - if [ -z "${csrftoken}" ]; then - echo "No CSRF token found, exitting!" - exit 1 - fi - - echo "Got CSRF token: \"${csrftoken}\"." - - echo "TRAVIS=${TRAVIS}" - if [ "${TRAVIS}" != "true" ]; then - prepareCredentials - else - echo "Running in TRAVIS CI, using encrypted credentials." - fi - - - loginFormData="username=${username}&password=${password}&_csrf=${csrftoken}" - #echo login form data: $loginFormData - - #exit 1 - wget "${commonWgetParams[@]}" \ - --post-data "${loginFormData}" \ - --delete-after \ - --quiet \ - "${baseUrl}login.html" -} - - -# pass numeric file id as parameter -function downloadFile() { - wget "${commonWgetParams[@]}" \ - -q --show-progress \ - --content-disposition -N \ - "${baseUrl}download-file.html?id=$1&format=10&d96=1" -} - -# --------------------------------------------- -login - -#------ Download all data we care about: ------ -#RPE_PE.ZIP -downloadFile 105 - -#RPE_UL.ZIP -downloadFile 106 - -#RPE_HS.ZIP -downloadFile 107 - -# Clean up secrets so they are not cached -rm -f "${DownloadDest}cookies.txt" - - extractDownloaded echo getSource finished. diff --git a/sigov-ca2.pem b/sigov-ca2.pem deleted file mode 100644 index 8960b70..0000000 --- a/sigov-ca2.pem +++ /dev/null @@ -1,28 +0,0 @@ ------BEGIN CERTIFICATE----- -MIIEgDCCAuigAwIBAgINAL0ag3wAAAAAVnvHDjANBgkqhkiG9w0BAQsFADBXMQsw -CQYDVQQGEwJTSTEcMBoGA1UEChMTUmVwdWJsaWthIFNsb3ZlbmlqYTEXMBUGA1UE -YRMOVkFUU0ktMTc2NTk5NTcxETAPBgNVBAMTCFNJR09WLUNBMB4XDTE1MTIyNDA5 -NTEwNloXDTM1MTIyNDEwMjEwNlowVzELMAkGA1UEBhMCU0kxHDAaBgNVBAoTE1Jl -cHVibGlrYSBTbG92ZW5pamExFzAVBgNVBGETDlZBVFNJLTE3NjU5OTU3MREwDwYD -VQQDEwhTSUdPVi1DQTCCAaIwDQYJKoZIhvcNAQEBBQADggGPADCCAYoCggGBAMwL -1q8WkYT8bgTgdHHvdMS+A0Fiep9eQA4Y2bNiLFYF2Gzj2ykP1Llg2SPQJBIJkB4R -x49celNikM/XOzsxtov4qnP8eO9N6Ez4pu68MldzHdWC2UpglR8vE9QMjn3zFdGM -88x84x5A7wxDr05kgeiPLy9KD2vSVsX7CNIAWXGv9FAE0ayCtsUNWQBH+nln/eR9 -xJGSwYHySnvYniUwihAVsZMYRipBcjc8EpLWriaaQvxOhqmk8qwvTmbm9SCo4uTK -I9HGhIbhMQZ+6B/Q0cHNKa5hPJyQpJ9iWza7gBfUe7uU/Ee5Xj4WjNJlshS80wNu -S8y8gbkUvPPhAGuWEX0Hg5/v5yOpq6iMbkFL5vdLif4qdV06I/+j4oco8F761Obs -5VP39++QHUBF93Le6ZZBweftzCYegIUX/kRPyEoxmkUDCkme8HF0m6gJBF0prh7C -o4geBeEzSCq5DwxrKbb0ZhS30uxjEQr3dgXfxHqJQcfuyKdymlYEuGJ7TmmuVQID -AQABo0swSTAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIBBjATBgNVHSME -DDAKgAhGXkDlU+3+/jARBgNVHQ4ECgQIRl5A5VPt/v4wDQYJKoZIhvcNAQELBQAD -ggGBAKuyWriPA495SJiRLk8xh4ikFkIMgum61AXGg6O2B2KGn44cNQWUlpbYBu9s -9z86K43+rKbgDcl6o40paRwfneKRHNsjiHmXDyZa7u74S6jGf+t9SO+TDlpBe6iu -Pl97Yw8LMtnktQuKMnuKms78RIJrX9K8JRFQiGkhIAduDwYdcQEbQ77sJxNdcfRL -hK5fNZFWQiyWmdBxwIyhec5s69Vsu44+wUACrCDHNH69vxfWkS39DO4m46gjHfNr -+vo+G6ATo2kQX2cLIBz7AX/aF5grR+K+4Y/hZdyKF3WR4eIJUcaZ72P80M09fOiM -0sgBAQNNUFKgqHBojawIH2ZtdrZHY361gSd86tXEoJbq5xKLE+Ux6+7p86IM2gAr -lzs89rwII0TdMcAbCTZnOrjqtgiMGac/LL8jbsLcJlkhwW4ZM8sxZoHqlrvd8Sm6 -s43iLyLD/P6gsSCv65ZasettJxwVoBNF45qXEvQ973pkyPPK9DOrYs+r5801/MjD -eFARnA== ------END CERTIFICATE----- -