diff --git a/hs_data_services/hs_data_services_sync/management/commands/find_hs_aggs_missing_in_geoserver.py b/hs_data_services/hs_data_services_sync/management/commands/find_hs_aggs_missing_in_geoserver.py new file mode 100644 index 0000000..03a4835 --- /dev/null +++ b/hs_data_services/hs_data_services_sync/management/commands/find_hs_aggs_missing_in_geoserver.py @@ -0,0 +1,60 @@ +from django.core.management.base import BaseCommand +from hs_data_services_sync import utilities +from hs_data_services import settings + + +class Command(BaseCommand): + help = "Find HS aggregations that don't have a corresponding layer in GeoServer" + + def handle(self, *args, **options): + hydroshare_url = settings.HYDROSHARE_URL + hydroshare_url = hydroshare_url.replace("hsapi", "resource") + resources = utilities.get_list_of_public_geo_resources() + num_resources = len(resources) + print(f"Found {num_resources} resources in HydroShare") + + # for every hydroshare resource, get a list of its geo aggregations + # and compare them with the layers in geoserver + hs_aggregations_missing_in_geoserver = [] + total_missing_layers = 0 + + res_count = 1 + for res_id in resources: + print(f"{total_missing_layers} total layers missing and {len(hs_aggregations_missing_in_geoserver)} resources so far...") + print(f"{res_count}/{num_resources} - Resource {res_id}") + files_missing_for_this_res = [] + + # get the files list for this resource + file_list = utilities.get_database_list(res_id, ignore_already_registered=True)["geoserver"]["register"] + raster_files = [f for f in file_list if f["layer_type"] == 'GeographicRaster'] + feature_files = [f for f in file_list if f["layer_type"] == 'GeographicFeature'] + if len(raster_files) == 0 and len(raster_files) == 0: + print(f"Resource {res_id} has no Geo aggregations") + res_count += 1 + continue + geoserver_list = utilities.get_geoserver_list(res_id) + for raster in raster_files: + geoserver_rasters = [gs for gs in geoserver_list if gs[1] == 'coveragestores'] + if (raster["layer_name"].replace("/", " "), 'coveragestores') not in geoserver_rasters: + files_missing_for_this_res.append(raster) + for feature in feature_files: + geoserver_features = [gs for gs in geoserver_list if gs[1] == 'datastores'] + if (feature["layer_name"].replace("/", " "), 'datastores') not in geoserver_features: + files_missing_for_this_res.append(feature) + num_files_missing = len(files_missing_for_this_res) + if num_files_missing > 0: + total_missing_layers += num_files_missing + hs_aggregations_missing_in_geoserver.append((res_id, files_missing_for_this_res)) + else: + print(f"Resource {res_id} has all files registered in GeoServer") + res_count += 1 + print("-" * 80) + for res_id, files in hs_aggregations_missing_in_geoserver: + print("*" * 80) + print(f"Resource {res_id} has the following missing files in GeoServer: ") + for file in files: + print(f"{hydroshare_url}/{file['hs_path']}") + print("-" * 80) + print("Search complete!") + print(f"Found {len(hs_aggregations_missing_in_geoserver)} resources with missing layers") + print(f"Found {total_missing_layers} total layers missing in GeoServer") diff --git a/hs_data_services/hs_data_services_sync/management/commands/find_hs_resources_missing_in_geoserver.py b/hs_data_services/hs_data_services_sync/management/commands/find_hs_resources_missing_in_geoserver.py new file mode 100644 index 0000000..caa7795 --- /dev/null +++ b/hs_data_services/hs_data_services_sync/management/commands/find_hs_resources_missing_in_geoserver.py @@ -0,0 +1,54 @@ +from django.core.management.base import BaseCommand +from hs_data_services_sync import utilities + + +class Command(BaseCommand): + help = "Find HS resources that don't have a corresponding workspace in GeoServer" + + def handle(self, *args, **options): + resources = utilities.get_list_of_public_geo_resources() + num_resources = len(resources) + print(f"Found {num_resources} resources in HydroShare") + + # use the geoserver rest api to get a list of workspaces + geoserver_workspaces = utilities.get_geoserver_workspaces_list() + num_workspaces = len(geoserver_workspaces) + print(f"Found {num_workspaces} workspaces in GeoServer") + + # every workspace name has a leading "HS-" string that must be removed + geoserver_workspaces = [ws["name"][3:] for ws in geoserver_workspaces] + + hs_resources_missing_in_geoserver = [] + for res_id in resources: + if res_id not in geoserver_workspaces: + hs_resources_missing_in_geoserver.append(res_id) + num_missing_resources = len(hs_resources_missing_in_geoserver) + print(f"Found {num_missing_resources} resources missing in GeoServer") + + # Now list all of the Geo aggregations in the missing resources + # So that we can compare them with the layers in GeoServer + print("Now listing all Geo aggregations in the missing resources") + count = 1 + total_missing_layers = 0 + for res_id in hs_resources_missing_in_geoserver: + print("*" * 80) + print(f"{count}/{num_missing_resources} - Resource {res_id}") + database_list = utilities.get_database_list( + res_id=res_id, + ) + if database_list['access'] == 'public': + dbs = database_list['geoserver']['register'] + num_dbs = len(dbs) + if num_dbs == 0: + print(f"Resource {res_id} has no Geo aggregations") + else: + print(f"Resource {res_id} has {num_dbs} Geo aggregations") + for db in dbs: + print(f"Resource {res_id} has a {db['layer_type']}: {db['hs_path']}") + total_missing_layers += num_dbs + count += 1 + print() + print("-" * 80) + print(f"Found {num_missing_resources} resources missing in GeoServer") + print(f"Found {total_missing_layers} Geo aggregations missing in GeoServer") + print("Search complete") diff --git a/hs_data_services/hs_data_services_sync/utilities.py b/hs_data_services/hs_data_services_sync/utilities.py index 7339f91..b0fbe09 100644 --- a/hs_data_services/hs_data_services_sync/utilities.py +++ b/hs_data_services/hs_data_services_sync/utilities.py @@ -62,7 +62,7 @@ def update_data_services(resource_id): return response -def get_database_list(res_id): +def get_database_list(res_id, ignore_already_registered=False): """ Gets a list of HydroShare databases on which web services can be published. """ @@ -113,7 +113,7 @@ def get_database_list(res_id): layer_type = result["content_type"] if result["content_type"] == "image/tiff" and layer_ext == "tif": registered_list.append(layer_name.replace("/", " ")) - if layer_name.replace("/", " ") not in [i[0] for i in geoserver_list]: + if ignore_already_registered or layer_name.replace("/", " ") not in [i[0] for i in geoserver_list]: db_list["geoserver"]["register"].append( { "layer_name": layer_name, @@ -128,7 +128,7 @@ def get_database_list(res_id): ) if result["content_type"] == "application/x-qgis" and layer_ext == "shp": registered_list.append(layer_name.replace("/", " ")) - if layer_name.replace("/", " ") not in [i[0] for i in geoserver_list]: + if ignore_already_registered or layer_name.replace("/", " ") not in [i[0] for i in geoserver_list]: # get the associated .shx, .dbf, and .prj files extensions = [".shx", ".dbf", ".prj"] associated_files = [] @@ -168,6 +168,27 @@ def get_database_list(res_id): return db_list +def get_geoserver_workspaces_list(): + """ + Gets a list of data stores and coverages from a GeoServer workspace. + """ + + logger.info("Getting geoserver list") + workspace_list = [] + + geoserver_url = settings.DATA_SERVICES.get("geoserver", {}).get('URL') + + workspace_rest_url = f"{geoserver_url}/workspaces.json" + response = requests.get(workspace_rest_url) + + if response.status_code == 200: + response_content = json.loads(response.content) + if response_content.get("workspaces") and response_content.get("workspaces") != "": + workspace_list = response_content["workspaces"]["workspace"] + + return workspace_list + + def get_geoserver_list(res_id): """ Gets a list of data stores and coverages from a GeoServer workspace.