Skip to content
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from django.core.management.base import BaseCommand
from hs_data_services_sync import utilities
from hs_data_services import settings


class Command(BaseCommand):
help = "Find HS aggregations that don't have a corresponding layer in GeoServer"

def handle(self, *args, **options):
hydroshare_url = settings.HYDROSHARE_URL
hydroshare_url = hydroshare_url.replace("hsapi", "resource")
resources = utilities.get_list_of_public_geo_resources()
num_resources = len(resources)
print(f"Found {num_resources} resources in HydroShare")

# for every hydroshare resource, get a list of its geo aggregations
# and compare them with the layers in geoserver
hs_aggregations_missing_in_geoserver = []
total_missing_layers = 0

res_count = 1
for res_id in resources:
print(f"{total_missing_layers} total layers missing and {len(hs_aggregations_missing_in_geoserver)} resources so far...")
print(f"{res_count}/{num_resources} - Resource {res_id}")
files_missing_for_this_res = []

# get the files list for this resource
file_list = utilities.get_database_list(res_id, ignore_already_registered=True)["geoserver"]["register"]
raster_files = [f for f in file_list if f["layer_type"] == 'GeographicRaster']
feature_files = [f for f in file_list if f["layer_type"] == 'GeographicFeature']
if len(raster_files) == 0 and len(raster_files) == 0:
print(f"Resource {res_id} has no Geo aggregations")
res_count += 1
continue
geoserver_list = utilities.get_geoserver_list(res_id)
for raster in raster_files:
geoserver_rasters = [gs for gs in geoserver_list if gs[1] == 'coveragestores']
if (raster["layer_name"].replace("/", " "), 'coveragestores') not in geoserver_rasters:
files_missing_for_this_res.append(raster)
for feature in feature_files:
geoserver_features = [gs for gs in geoserver_list if gs[1] == 'datastores']
if (feature["layer_name"].replace("/", " "), 'datastores') not in geoserver_features:
files_missing_for_this_res.append(feature)
num_files_missing = len(files_missing_for_this_res)
if num_files_missing > 0:
total_missing_layers += num_files_missing
hs_aggregations_missing_in_geoserver.append((res_id, files_missing_for_this_res))
else:
print(f"Resource {res_id} has all files registered in GeoServer")
res_count += 1
print("-" * 80)
for res_id, files in hs_aggregations_missing_in_geoserver:
print("*" * 80)
print(f"Resource {res_id} has the following missing files in GeoServer: ")
for file in files:
print(f"{hydroshare_url}/{file['hs_path']}")
print("-" * 80)
print("Search complete!")
print(f"Found {len(hs_aggregations_missing_in_geoserver)} resources with missing layers")
print(f"Found {total_missing_layers} total layers missing in GeoServer")
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from django.core.management.base import BaseCommand
from hs_data_services_sync import utilities


class Command(BaseCommand):
help = "Find HS resources that don't have a corresponding workspace in GeoServer"

def handle(self, *args, **options):
resources = utilities.get_list_of_public_geo_resources()
num_resources = len(resources)
print(f"Found {num_resources} resources in HydroShare")

# use the geoserver rest api to get a list of workspaces
geoserver_workspaces = utilities.get_geoserver_workspaces_list()
num_workspaces = len(geoserver_workspaces)
print(f"Found {num_workspaces} workspaces in GeoServer")

# every workspace name has a leading "HS-" string that must be removed
geoserver_workspaces = [ws["name"][3:] for ws in geoserver_workspaces]

hs_resources_missing_in_geoserver = []
for res_id in resources:
if res_id not in geoserver_workspaces:
hs_resources_missing_in_geoserver.append(res_id)
num_missing_resources = len(hs_resources_missing_in_geoserver)
print(f"Found {num_missing_resources} resources missing in GeoServer")

# Now list all of the Geo aggregations in the missing resources
# So that we can compare them with the layers in GeoServer
print("Now listing all Geo aggregations in the missing resources")
count = 1
total_missing_layers = 0
for res_id in hs_resources_missing_in_geoserver:
print("*" * 80)
print(f"{count}/{num_missing_resources} - Resource {res_id}")
database_list = utilities.get_database_list(
res_id=res_id,
)
if database_list['access'] == 'public':
dbs = database_list['geoserver']['register']
num_dbs = len(dbs)
if num_dbs == 0:
print(f"Resource {res_id} has no Geo aggregations")
else:
print(f"Resource {res_id} has {num_dbs} Geo aggregations")
for db in dbs:
print(f"Resource {res_id} has a {db['layer_type']}: {db['hs_path']}")
total_missing_layers += num_dbs
count += 1
print()
print("-" * 80)
print(f"Found {num_missing_resources} resources missing in GeoServer")
print(f"Found {total_missing_layers} Geo aggregations missing in GeoServer")
print("Search complete")
27 changes: 24 additions & 3 deletions hs_data_services/hs_data_services_sync/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def update_data_services(resource_id):
return response


def get_database_list(res_id):
def get_database_list(res_id, ignore_already_registered=False):
"""
Gets a list of HydroShare databases on which web services can be published.
"""
Expand Down Expand Up @@ -113,7 +113,7 @@ def get_database_list(res_id):
layer_type = result["content_type"]
if result["content_type"] == "image/tiff" and layer_ext == "tif":
registered_list.append(layer_name.replace("/", " "))
if layer_name.replace("/", " ") not in [i[0] for i in geoserver_list]:
if ignore_already_registered or layer_name.replace("/", " ") not in [i[0] for i in geoserver_list]:
db_list["geoserver"]["register"].append(
{
"layer_name": layer_name,
Expand All @@ -128,7 +128,7 @@ def get_database_list(res_id):
)
if result["content_type"] == "application/x-qgis" and layer_ext == "shp":
registered_list.append(layer_name.replace("/", " "))
if layer_name.replace("/", " ") not in [i[0] for i in geoserver_list]:
if ignore_already_registered or layer_name.replace("/", " ") not in [i[0] for i in geoserver_list]:
# get the associated .shx, .dbf, and .prj files
extensions = [".shx", ".dbf", ".prj"]
associated_files = []
Expand Down Expand Up @@ -168,6 +168,27 @@ def get_database_list(res_id):
return db_list


def get_geoserver_workspaces_list():
"""
Gets a list of data stores and coverages from a GeoServer workspace.
"""

logger.info("Getting geoserver list")
workspace_list = []

geoserver_url = settings.DATA_SERVICES.get("geoserver", {}).get('URL')

workspace_rest_url = f"{geoserver_url}/workspaces.json"
response = requests.get(workspace_rest_url)

if response.status_code == 200:
response_content = json.loads(response.content)
if response_content.get("workspaces") and response_content.get("workspaces") != "":
workspace_list = response_content["workspaces"]["workspace"]

return workspace_list


def get_geoserver_list(res_id):
"""
Gets a list of data stores and coverages from a GeoServer workspace.
Expand Down