-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathValidate.py
385 lines (293 loc) · 14.3 KB
/
Validate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
"""validation.py
Functionality to validate results from the main part of this tool
by comparing them against the service's data fetched from the server.
"""
import os
import logging
import io
import csv
import pdb
import datetime
import numpy as np
from PIL import Image
from osgeo import ogr, gdal
import rasterio.features
import geojson
import requests
# logging levels = DEBUG, INFO, WARNING, ERROR, CRITICAL
logging.basicConfig(filename="../../output_data/logs/" \
+ datetime.datetime.now().strftime("%d.%b_%Y_%H_%M_%S") \
+ '.log', level=logging.INFO)
def write_statistics(output_path, service_number, layer_name, pixels_count, correct_pixels,
false_pos_pixels, false_neg_pixels, bbox_area_decrease):
"""Write statistics to a spreadsheet.
:param str output_path: Path to the folder where statistics file will be written
:param str service_number: Number of the service
:param str layer_name: Name of the layer
:param int pixels_count: Total number of pixels in validation
:param int correct_pixels: Number of correctly determined pixels
:param int false_pos_pixels: Number of pixels for which the tool's result
was positive but there was no data in the location
in the fetched layer
:param int false_neg_pixels: Number of pixels for which the tool's result
was negative but there was data in the location
in the fetched layer
:param float bbox_area_decrease: Size of a bounding box that encapsulates
area with data as determined by the tool;
expressed in % of the original bounding box
"""
stats_path = os.path.split(output_path)[0] + "/stats_"\
+ datetime.datetime.now().strftime("%d.%b_%Y") + ".csv"
if not os.path.isfile(stats_path):
with open(stats_path, "w") as stats_file:
headers = ["Service number", "Layer name", "Pixel count",\
"Correct", "False positives", "False negatives",\
"Area of new bounding box in %% of original"]
row = [service_number, layer_name, pixels_count, correct_pixels,\
false_pos_pixels, false_neg_pixels, bbox_area_decrease]
writer = csv.writer(stats_file)
writer.writerow(headers)
writer.writerow(row)
else:
with open(stats_path, "a") as stats_file:
row = [service_number, layer_name, pixels_count, correct_pixels,\
false_pos_pixels, false_neg_pixels, bbox_area_decrease]
writer = csv.writer(stats_file)
writer.writerow(row)
logging.info("Statistics written to {}".format(stats_path))
def area_decrease(bbox, data_bounds):
"""Write statistics to a spreadsheet
:param list bbox: Bounding box as specified in the layer metadata
:param list data_bounds: Bounding box that encapsulates
area with data as determined by the tool
:return float: Size of data_bounds expressed as a percentage
of bbox
"""
if len(data_bounds) == 0:
# No bounds means no data found -> 100% decrease
return 100
x_decr = (data_bounds[2] - data_bounds[0])/(bbox[2] - bbox[0])
y_decr = (data_bounds[3] - data_bounds[1])/(bbox[3] - bbox[1])
return round(x_decr*y_decr*100)
def test_pixel(image):
"""Search for variation in a numpy array
:param numpy array image: Array of numerical values
:return boolean: Return True if there is variation in the array
"""
uniques = np.unique(image)
if len(uniques) == 1 and uniques[0] in [0, 255]:
return False
return True
def test_for_var(image):
"""Search for variation within each part of a 3x3 grid of the input image.
If variation is found, corresponding value in the data_grid
is set to True. If not, it is set to False.
:param numpy array image: Array of numerical values
:return numpy array data_grid: Array of boolean values
"""
data_grid = np.empty([3, 3])
size_x = round(image.shape[0]/data_grid.shape[0])
size_y = round(image.shape[1]/data_grid.shape[1])
for m in range(3):
for k in range(3):
image_subset = image[m*size_x:(m+1)*size_x, k*size_y:(k+1)*size_y]
data_grid[m][k] = test_pixel(image_subset)
return data_grid
def validate_wms(url, layer_name, srs, bbox, result_array, service_version):
"""Fetch image for the corresponding layer and search for variation with
the same parameters in both the image and the result produced by the tool.
:param str url: First part of URL pointing to a particular service
:param str layer_name: Name of the layer
:param str srs: EPSG code of a coordinate system
:param list bbox: Bounding box as specified in the service metadata
:param numpy array result_array: Array of boolean values; result
of executing the main part of the tool.
:param str service_version: Number of correctly determined pixels
:return numpy array real_data: Information on variation within grid areas
in image fetched from the server.
:return numpy array our_grid: Information on variation within grid areas
in raster produced by the tool.
"""
if srs == "CRS:84":
srs = "EPSG:4326"
#TODO: properly deal with service_version == None
if service_version is None:
service_version = "1.3.0"
if service_version == "1.3.0":
req_url = "{}?VERSION={}&SERVICE=WMS&REQUEST=GetMap&LAYERS={}&STYLES=&CRS={}\
&BBOX={}&WIDTH=256&HEIGHT=256&FORMAT=image/png&EXCEPTIONS=XML"\
.format(url, service_version, layer_name, srs, bbox)
elif service_version == "1.1.1":
req_url = "{}?VERSION={}&SERVICE=WMS&REQUEST=GetMap&LAYERS={}&STYLES=&SRS={}\
&BBOX={}&WIDTH=256&HEIGHT=256&FORMAT=image/png&EXCEPTIONS=XML"\
.format(url, service_version, layer_name, srs, bbox)
else:
raise Exception("Unknown service_version {}".format(service_version))
logging.info("URL used for validation: {}".format(req_url))
image_response = requests.get(req_url)
try:
image = np.array(Image.open(io.BytesIO(image_response.content)))
except:
return None, None
real_data = test_for_var(image).astype("uint8")
our_grid = test_for_var(result_array).astype("uint8")
logging.info("WMS validation: data fetched from server:\n {}".format(real_data))
logging.info("WMS validation: our data:\n {}".format(our_grid))
return real_data, our_grid
def validate_wfs(url, layer_name, srs, bbox, result_file,\
service_version, max_features_for_validation):
"""Fetch data for the corresponding layer and generate an array
that maps the spatial extent of the data
:param str url: First part of URL pointing to a particular service
:param str layer_name: Name of the layer
:param str srs: EPSG code of a coordinate system
:param list bbox: Bounding box as specified in the service metadata
:param numpy array result_array: Array of boolean values; result
of executing the main part of the tool.
:param str service_version: Number of correctly determined pixels
:param string max_features_for_validation: If exceeded, validation is skipped
:return numpy array real_data: Information on the location of data in layer
fetched from the server.
"""
wfs_drv = ogr.GetDriverByName('WFS')
# get just the layer name (as opposed to a URL)
layer_name = layer_name.split(":")[-1]
# Speeds up querying WFS capabilities for services with alot of layers
gdal.SetConfigOption('OGR_WFS_LOAD_MULTIPLE_LAYER_DEFN', 'NO')
# Set config for paging. Works on WFS 2.0 services and WFS 1.0 and 1.1 with some other services.
gdal.SetConfigOption('OGR_WFS_PAGING_ALLOWED', 'YES')
gdal.SetConfigOption('OGR_WFS_PAGE_SIZE', '10000')
# Fix for SSL connection errors
gdal.SetConfigOption('GDAL_HTTP_UNSAFESSL', 'YES')
# Open the webservice
#TODO: use version -> if service_version is not None: ...
req_url = "{}?service=wfs&version=2.0.0&srsName={}&BBOX={}".format(url, srs, bbox)
logging.info("URL used for validation: {}".format('WFS:' + req_url))
wfs_ds = wfs_drv.Open('WFS:' + req_url)
if not wfs_ds:
logging.error("Couldn't open connection to the server.")
#raise Exception("Couldn't open connection to the server.")
return None
shapes = []
# Get a specific layer
layer = wfs_ds.GetLayerByName(layer_name)
if not layer:
logging.error("Couldn't find layer in service")
return None
feature_count = layer.GetFeatureCount()
if max_features_for_validation is not None:
if feature_count > int(max_features_for_validation):
logging.warning("Validation for layer {} skipped. there was too many features: {}"\
.format(layer.GetName(), layer.GetFeatureCount()))
return None
logging.info("Layer: {}, Features: {}".format(layer.GetName(), feature_count))
# iterate over features
logging.info("Start iteration through features.")
feat = layer.GetNextFeature()
count = 0
while feat is not None:
count += 1
if feature_count > 5000:
log_every = 1000
else:
log_every = 100
if count % log_every == 0:
logging.info("Feature: {}".format(count))
geom = feat.GetGeometryRef().GetLinearGeometry()
json_feat = geojson.loads(geom.ExportToJson())
shapes.append(json_feat)
feat = layer.GetNextFeature()
feat = None
# Close the connection
wfs_ds = None
logging.info("Iteration done. Creating validation.")
if count == 0:
logging.info("No features in the layer.")
real_data = np.zeros_like(result_file.read(1))
else:
real_data = rasterio.features.rasterize(
shapes,
out_shape=result_file.shape,
transform=result_file.transform,
dtype='uint8'
)
return real_data
def validate(url, layer_name, srs, bbox, result_path, output_path, service_type, service_version,\
max_features_for_validation, flip_features, data_bounds, service_number):
"""Fetch data for the corresponding layer and generate an array
that maps the spatial extent of the data
:param str url: First part of URL pointing to a particular service
:param str layer_name: Name of the layer
:param str srs: EPSG code of a coordinate system
:param list bbox: Bounding box as specified in the service metadata
:param str result_path: Path to raster file produced by the tool
:param str output_path: Path to location where the validation raster will be written.
:param str service_type: Type of service (WMS/WFS)
:param str service_version: Number of correctly determined pixels
:param string max_features_for_validation: If exceeded, validation is skipped
:param boolean flip_features: If set to True, coordinate order must be flipped
in the URL
:param list data_bounds: Bounding box that encapsulates
area with data as determined by the tool
:param str service_number: Type of service (WMS/WFS)
:return int: 0 if validation run OK; -1 if it failed.
"""
logging.info("validation starts at {}".format(datetime.datetime.now()))
# Open the file to be validated
file = rasterio.open(result_path)
if flip_features:
bbox = [bbox[1], bbox[0], bbox[3], bbox[2]]
# change bbox from a list into a string, remove spaces and brackets
bbox_str = ''.join(char for char in str(bbox) if char not in '[]() ')
if service_type == 'WMS':
real_data, result = validate_wms(url, layer_name, srs, bbox_str,\
file.read(1), service_version)
elif service_type == 'WFS':
real_data, result = validate_wfs(url, layer_name, srs, bbox_str, file,\
service_version, max_features_for_validation), file.read(1)
if real_data is None:
logging.warning("Validation not successful. *feeling embarassed*")
return -1
# Since result is binary, the comparison is 0 if a value was the same.
# 1 if we got false positive and -1 (i.e. 255 in uint8) if we got false negative.
comparison = result - real_data
output = rasterio.open(
output_path,
'w',
driver='GTiff',
nodata=99,
height=file.height,
width=file.width,
count=1,
dtype='uint8',
crs=file.crs,
transform=file.transform
)
output.write(comparison, 1)
output.close()
logging.info("Statistics:")
logging.info("This is the np.unique count: {}"\
.format(np.unique(comparison, return_counts=True)))
pixels_count = np.size(comparison)
correct_pixels = None
false_neg_pixels = None
false_pos_pixels = None
unique_vals = np.unique(comparison, return_counts=True)
for i in range(len(unique_vals[0])):
if unique_vals[0][i] == 0:
correct_pixels = unique_vals[1][i]
logging.info("Correct: {}%".format(round(100*correct_pixels/pixels_count)))
elif unique_vals[0][i] == 1:
false_pos_pixels = unique_vals[1][i]
logging.info("False positives: {}%".format(round(100*false_pos_pixels/pixels_count)))
# this is supposed to be -1 but since uint8 is 0 to 255 it underflows and makes it 255
elif unique_vals[0][i] == 255:
false_neg_pixels = unique_vals[1][i]
logging.info("False negatives: {}%".format(round(100*false_neg_pixels/pixels_count)))
else:
raise Exception("Unexpected values in the validation raster: {}"\
.format(unique_vals[0]))
bbox_area_decrease = area_decrease(bbox, data_bounds)
write_statistics(output_path, service_number, layer_name, pixels_count, correct_pixels,\
false_pos_pixels, false_neg_pixels, bbox_area_decrease)
return 0