Skip to content

Commit

Permalink
Get & reproject HS.csv from new CSV source (#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanb authored Apr 14, 2024
1 parent d7e23bf commit 7606d17
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 150 deletions.
18 changes: 9 additions & 9 deletions .github/workflows/make.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ jobs:
sudo apt-get install gdal-bin
ogr2ogr --version
- name: Cache EGP downloads
- name: Cache JGP downloads
uses: actions/cache@v4
env:
cache-name: cache-egp-downloads
cache-name: cache-jgp-downloads
with:
path: data/downloaded/
key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ github.sha }}
Expand All @@ -49,16 +49,16 @@ jobs:
# Runs a single command using the runners shell
- name: Run make download
env:
username: ${{ secrets.egpUsername }}
password: ${{ secrets.egpPassword }}
run: make download

- name: Run make geojson
run: make geojson
- name: Run make Housenumber cvs
run: make hscsv

# - name: Run make geojson
# run: make geojson

- name: Run make split
run: make split
# - name: Run make split
# run: make split

- name: update timestamp if needed
run: |
Expand Down
20 changes: 17 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,26 @@ TMP = $(DATAFOLDER)temp/
TS = $$(cat $(TMP)timestamp.txt)
TSYYYY = $$(cat $(TMP)timestamp.txt | cut -b 1-4)

all: download geojson split
all: download hscsv #TODO: geojson split

.PHONY: download
download:
mkdir -p $(TMP) || true
./getSource.sh $(DLFOLDER) $(TMP)

.PHONY: hscsv
hscsv:
rm -rf "$(DATAFOLDER)HS.csv" || true
mkdir -p $(DATAFOLDER)

# https://gdal.org/drivers/vector/csv.html

ogr2ogr \
-s_srs "EPSG:3794" -f "CSV" -oo X_POSSIBLE_NAMES=E -oo Y_POSSIBLE_NAMES=N \
-t_srs "EPSG:4326" -lco STRING_QUOTING=IF_NEEDED -lco GEOMETRY=AS_XY \
"$(DATAFOLDER)HS.csv" \
"$(wildcard $(TMP)RPE_HS/KN_SLO_NASLOVI_HS_naslovi_hs_????????.csv)"

.PHONY: geojson
geojson:
mkdir -p $(DATAFOLDER)
Expand All @@ -37,5 +50,6 @@ split:

.PHONY: clean
clean:
rm -r $(TMP)
rm -r $(DLFOLDER)
rm -rf $(TMP)
rm -rf $(DLFOLDER)

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Dovoljenje / Licence: [CC-BY 4.0 SL](https://creativecommons.org/licenses/by/4.0

[Splošno pogoji uporabe](https://www.e-prostor.gov.si/fileadmin/struktura/preberi_me.pdf) / [General terms and conditions](https://www.e-prostor.gov.si/fileadmin/struktura/ANG/General_terms.pdf)

[Vir podatkov](https://egp.gu.gov.si/egp) / [Source of data](https://egp.gu.gov.si/egp/?lang=en)
[Vir podatkov](https://ipi.eprostor.gov.si/jgp/data) / [Source of data](https://ipi.eprostor.gov.si/jgp/data?lang=en)

## Tehnične podrobnosti / Technical details

Expand Down
131 changes: 22 additions & 109 deletions getSource.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#!/bin/bash
set -e
DownloadDest="${1}"
TempDest="${2}"
credentialsFile="CREDENTIALS-egp.gu.gov.si.txt"
maxAge=240
baseUrl="https://egp.gu.gov.si/egp/"

SEDCMD="sed"
STATCMD="stat"
Expand All @@ -21,16 +20,31 @@ MINGW*) machine=MinGw ;;
esac
echo Running on: "${machine}", using $SEDCMD and $STATCMD commands

# pass numeric file ids and name as parameter
function downloadFile() {
mkdir -p "${DownloadDest}"
echo "Downloading ${DownloadDest}$3..."
curl \
--compressed \
--output "${DownloadDest}$3" \
--fail \
--progress-bar \
"https://ipi.eprostor.gov.si/jgp-service-api/display-views/groups/$1/composite-products/$2/file?filterParam=DRZAVA&filterValue=1"
}

function extractDownloaded() {
rm -rf "${TempDest}" || true
mkdir -p "${TempDest}"

#----- extract: -------
for file in "${DownloadDest}"RPE_*.ZIP; do
extdir=$(basename "$file" .ZIP)
echo "$extdir"
unzip -o -d "${TempDest}$extdir" "$file"
done
for file in "${TempDest}"RPE_*/*.zip; do unzip -o -d "${TempDest}" "$file"; done
# for file in "${TempDest}"RPE_*/*.zip; do unzip -o -d "${TempDest}" "$file"; done

$STATCMD -c '%y' "${TempDest}OB/OB.shp" | cut -d' ' -f1 >"${TempDest}timestamp.txt"
# $STATCMD -c '%y' ${TempDest}RPE_HS/KN_SLO_NASLOVI_HS_naslovi_hs_????????.csv | cut -d' ' -f1 >"${TempDest}timestamp.txt"
}

countTooOld=3
Expand All @@ -43,116 +57,15 @@ fi
# exit if all are newer than max age
if [ "$countTooOld" -gt "0" ]; then
echo "Need to download $countTooOld files (they are either missing or older than $maxAge minutes)"
#------ Download all data we care about: ------
downloadFile 119 12 RPE_PE.ZIP
downloadFile 119 181 RPE_UL.ZIP
downloadFile 121 141 RPE_HS.ZIP
else
echo "No need to download anything (source files are already there and not older than $maxAge minutes)"
extractDownloaded
exit 0
fi


# Clean up leftovers from previous failed runs
rm -f "${DownloadDest}cookies.txt"
rm -f "${DownloadDest}login.html"

commonWgetParams=(--load-cookies "${DownloadDest}cookies.txt" --save-cookies "${DownloadDest}cookies.txt" --directory-prefix "${DownloadDest}" --keep-session-cookies --ca-certificate "sigov-ca2.pem")
# --no-hsts
# --quiet
# --ciphers "HIGH:!aNULL:!MD5:!RC4" \
# --secure-protocol=TLSv1 \
# --referer "${baseUrl}" \

function prepareCredentials() {
#------ username & password: ------
# read possibly existing credentials...
# shellcheck source=/dev/null
source "$credentialsFile"

echo Credentials for ${baseUrl}

if [ -z "$username" ]; then
echo -n " Username: "
read -r username
echo "username=\"$username\"" >"$credentialsFile"
else
echo " Username: '$username'"
fi

if [ -z "$password" ]; then
echo -n " Password: "
read -r password
read -p " Save password in plain text to $credentialsFile for future use? (y/N) " -n 1 -r
echo # (optional) move to a new line
if [[ $REPLY =~ ^[Yy]$ ]]; then
# save it only if wanted
echo "password=\"$password\"" >>"$credentialsFile"
fi
else
echo " Password: *********"
fi
}

function login() {
#------ Log in to the server. This can be done only once ------
wget "${commonWgetParams[@]}" \
--quiet \
"${baseUrl}login.html"

# example login.html content:
# <input type="hidden" name="_csrf" value="089070ed-b40a-4e3c-ab22-422de0daffff" />
csrftoken="$($SEDCMD -n 's/.*name="_csrf"\s\+value="\([^"]\+\).*/\1/p' "${DownloadDest}login.html")"

if [ -z "${csrftoken}" ]; then
echo "No CSRF token found, exitting!"
exit 1
fi

echo "Got CSRF token: \"${csrftoken}\"."

echo "TRAVIS=${TRAVIS}"
if [ "${TRAVIS}" != "true" ]; then
prepareCredentials
else
echo "Running in TRAVIS CI, using encrypted credentials."
fi


loginFormData="username=${username}&password=${password}&_csrf=${csrftoken}"
#echo login form data: $loginFormData

#exit 1
wget "${commonWgetParams[@]}" \
--post-data "${loginFormData}" \
--delete-after \
--quiet \
"${baseUrl}login.html"
}


# pass numeric file id as parameter
function downloadFile() {
wget "${commonWgetParams[@]}" \
-q --show-progress \
--content-disposition -N \
"${baseUrl}download-file.html?id=$1&format=10&d96=1"
}

# ---------------------------------------------
login

#------ Download all data we care about: ------
#RPE_PE.ZIP
downloadFile 105

#RPE_UL.ZIP
downloadFile 106

#RPE_HS.ZIP
downloadFile 107

# Clean up secrets so they are not cached
rm -f "${DownloadDest}cookies.txt"


extractDownloaded

echo getSource finished.
28 changes: 0 additions & 28 deletions sigov-ca2.pem

This file was deleted.

0 comments on commit 7606d17

Please sign in to comment.