Skip to content

Commit

Permalink
Merge pull request #179 from ecmwf/new_develop
Browse files Browse the repository at this point in the history
New develop
  • Loading branch information
mathleur authored Jul 25, 2024
2 parents ee2eae9 + a1e60e6 commit 09ffc37
Show file tree
Hide file tree
Showing 54 changed files with 149 additions and 174 deletions.
49 changes: 38 additions & 11 deletions performance/fdb_slice_many_numbers_timeseries.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,50 @@
import time

import pandas as pd
import pygribjump as gj

from polytope.datacube.backends.fdb import FDBDatacube
from polytope.polytope import Polytope, Request
from polytope.shapes import All, Point, Select

time1 = time.time()
# Create a dataarray with 3 labelled axes using different index types

# config = {"class": "od", "expver": "0001", "levtype": "sfc", "type": "pf"}
options = {
"values": {"mapper": {"type": "octahedral", "resolution": 1280, "axes": ["latitude", "longitude"]}},
"date": {"merge": {"with": "time", "linkers": ["T", "00"]}},
"step": {"type_change": "int"},
"number": {"type_change": "int"},
"longitude": {"cyclic": [0, 360]},
"latitude": {"reverse": {True}},
"axis_config": [
{"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]},
{"axis_name": "number", "transformations": [{"name": "type_change", "type": "int"}]},
{
"axis_name": "date",
"transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}],
},
{
"axis_name": "values",
"transformations": [
{"name": "mapper", "type": "octahedral", "resolution": 1280, "axes": ["latitude", "longitude"]}
],
},
{"axis_name": "latitude", "transformations": [{"name": "reverse", "is_reverse": True}]},
{"axis_name": "longitude", "transformations": [{"name": "cyclic", "range": [0, 360]}]},
],
"compressed_axes_config": [
"longitude",
"latitude",
"levtype",
"step",
"date",
"domain",
"expver",
"param",
"class",
"stream",
"type",
"number",
],
"pre_path": {"class": "od", "expver": "0001", "levtype": "sfc", "type": "pf"},
}

config = {"class": "od", "expver": "0001", "levtype": "sfc", "type": "pf"}
fdbdatacube = FDBDatacube(config, axis_options=options)
self_API = Polytope(datacube=fdbdatacube, axis_options=options)
fdbdatacube = gj.GribJump()
self_API = Polytope(datacube=fdbdatacube, options=options)

print(time.time() - time1)

Expand All @@ -44,6 +69,8 @@
time3 = time.time()
result = self_API.retrieve(request)
time4 = time.time()
print("Polytope time")
print(self_API.time)
print(time.time() - time1)
print(time.time() - time2)
print(time4 - time3)
Expand Down
9 changes: 5 additions & 4 deletions polytope/datacube/backends/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def remap_path(self, path: DatacubePath):
return path

@staticmethod
def create(request, datacube, config={}, axis_options={}, compressed_axes_options=[], alternative_axes=[]):
def create(datacube, config={}, axis_options={}, compressed_axes_options=[], alternative_axes=[]):
# TODO: get the configs as None for pre-determined value and change them to empty dictionary inside the function
if type(datacube).__name__ == "DataArray":
from .xarray import XArrayDatacube
Expand All @@ -155,7 +155,8 @@ def create(request, datacube, config={}, axis_options={}, compressed_axes_option
if type(datacube).__name__ == "GribJump":
from .fdb import FDBDatacube

fdbdatacube = FDBDatacube(
datacube, request, config, axis_options, compressed_axes_options, alternative_axes
)
fdbdatacube = FDBDatacube(datacube, config, axis_options, compressed_axes_options, alternative_axes)
return fdbdatacube

def check_branching_axes(self, request):
pass
26 changes: 8 additions & 18 deletions polytope/datacube/backends/fdb.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import logging
import operator
import time
from copy import deepcopy
from itertools import product

Expand All @@ -9,7 +8,7 @@


class FDBDatacube(Datacube):
def __init__(self, gj, request, config=None, axis_options=None, compressed_axes_options=[], alternative_axes=[]):
def __init__(self, gj, config=None, axis_options=None, compressed_axes_options=[], alternative_axes=[]):
if config is None:
config = {}

Expand All @@ -26,14 +25,11 @@ def __init__(self, gj, request, config=None, axis_options=None, compressed_axes_
self.gj = gj
if len(alternative_axes) == 0:
self.fdb_coordinates = self.gj.axes(partial_request)
self.check_branching_axes(request)
else:
self.fdb_coordinates = {}
for axis_config in alternative_axes:
self.fdb_coordinates[axis_config.axis_name] = axis_config.values

# self.check_branching_axes(request)

logging.info("Axes returned from GribJump are: " + str(self.fdb_coordinates))

self.fdb_coordinates["values"] = []
Expand Down Expand Up @@ -74,8 +70,14 @@ def check_branching_axes(self, request):
self.fdb_coordinates.pop("direction", None)
self.fdb_coordinates.pop("frequency", None)

# NOTE: verify that we also remove the axis object for axes we've removed here
axes_to_remove = set(self.complete_axes) - set(self.fdb_coordinates.keys())

# Remove the keys from self._axes
for axis_name in axes_to_remove:
self._axes.pop(axis_name, None)

def get(self, requests: TensorIndexTree):
time1 = time.time()
if len(requests.children) == 0:
return requests
fdb_requests = []
Expand Down Expand Up @@ -109,16 +111,8 @@ def get(self, requests: TensorIndexTree):
complete_uncompressed_request = (uncompressed_request, compressed_request[1])
complete_list_complete_uncompressed_requests.append(complete_uncompressed_request)
complete_fdb_decoding_info.append(fdb_requests_decoding_info[j])
print("TIME BEFORE GJ EXTRACT")
print(time.time() - time1)
time0 = time.time()
output_values = self.gj.extract(complete_list_complete_uncompressed_requests)
print("GJ EXTRACT TIME")
print(time.time() - time0)
time2 = time.time()
self.assign_fdb_output_to_nodes(output_values, complete_fdb_decoding_info)
print("TIME ASSIGNING GJ OUTPUT TO NODES")
print(time.time() - time2)

def get_fdb_requests(
self,
Expand Down Expand Up @@ -161,7 +155,6 @@ def get_fdb_requests(
self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info, leaf_path)

def remove_duplicates_in_request_ranges(self, fdb_node_ranges, current_start_idxs):
time1 = time.time()
seen_indices = set()
for i, idxs_list in enumerate(current_start_idxs):
for k, sub_lat_idxs in enumerate(idxs_list):
Expand All @@ -184,9 +177,6 @@ def remove_duplicates_in_request_ranges(self, fdb_node_ranges, current_start_idx
current_start_idxs[i].pop(k)
else:
current_start_idxs[i][k] = new_current_start_idx

print("TIME REMOVING DUPLICATES")
print(time.time() - time1)
return (fdb_node_ranges, current_start_idxs)

def nearest_lat_lon_search(self, requests):
Expand Down
4 changes: 0 additions & 4 deletions polytope/datacube/backends/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,6 @@ def get(self, requests, leaf_path=None, axis_counter=0):
self.get(c, leaf_path, axis_counter + 1)
else:
key_value_path = {requests.axis.name: requests.values}
# num_indexes = len(requests.values)
print(key_value_path)
ax = requests.axis
(key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key(
key_value_path, leaf_path, self.unwanted_path
Expand All @@ -75,7 +73,6 @@ def get(self, requests, leaf_path=None, axis_counter=0):
else:
# We are at a leaf and need to assign value to it
leaf_path_copy = deepcopy(leaf_path)
# leaf_path_copy["values"] = tuple([leaf_path["values"][0] + i for i in range(num_indexes)])
unmapped_path = {}
self.refit_path(leaf_path_copy, unmapped_path, leaf_path)
for key in leaf_path_copy:
Expand All @@ -85,7 +82,6 @@ def get(self, requests, leaf_path=None, axis_counter=0):
unmapped_path[key] = list(unmapped_path[key])
subxarray = self.dataarray.sel(leaf_path_copy, method="nearest")
subxarray = subxarray.sel(unmapped_path)
# value = subxarray.item()
value = subxarray.values
key = subxarray.name
requests.result = (key, value)
Expand Down
8 changes: 0 additions & 8 deletions polytope/datacube/tree_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def encode_tree(tree: TensorIndexTree):

def write_encoded_tree_to_file(tree_bytes):
with open("encodedTree", "wb") as fs:
print(tree_bytes)
fs.write(tree_bytes)


Expand All @@ -39,18 +38,12 @@ def encode_child(tree: TensorIndexTree, child: TensorIndexTree, node, result_siz

if child.hidden:
# add indexes to parent and add also indexes size...
# print("LOOK NOW")
# print(len(tree.indexes))
# print(len(node.indexes))
# print(node.indexes)
node.indexes.extend(tree.indexes)
# node.size_indexes_branch.append(len(child.children))
break_tag = False
return break_tag

# need to add axis and children etc to the encoded node only if the tree node isn't hidden
else:
# new_result_size.append(len(child.values))
child_node.axis = child.axis.name
child_node.value.extend(child.values)
child_node.size_result.extend(new_result_size)
Expand All @@ -61,7 +54,6 @@ def encode_child(tree: TensorIndexTree, child: TensorIndexTree, node, result_siz
for c_ in child.children:
child_node.size_indexes_branch.append(len(c_.children))
break
# encode_child(child, c, child_node, [])

# we append the children once their branch has been completed until the leaf
if not child.hidden:
Expand Down
5 changes: 1 addition & 4 deletions polytope/engine/hullslicer.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,12 +209,9 @@ def extract(self, datacube: Datacube, polytopes: List[ConvexPolytope]):
# Determine list of axes to compress
self.find_compressed_axes(datacube, polytopes)

# REMOVE COMPRESSED AXES WHICH ARE IN A UNION
# remove compressed axes which are in a union
self.remove_compressed_axis_in_union(polytopes)

print("LOOK NOW")
print(self.compressed_axes)

# Convert the polytope points to float type to support triangulation and interpolation
for p in polytopes:
self._unique_continuous_points(p, datacube)
Expand Down
17 changes: 4 additions & 13 deletions polytope/polytope.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __repr__(self):


class Polytope:
def __init__(self, request, datacube, engine=None, options=None):
def __init__(self, datacube, engine=None, options=None):
from .datacube import Datacube
from .engine import Engine

Expand All @@ -47,26 +47,17 @@ def __init__(self, request, datacube, engine=None, options=None):

axis_options, compressed_axes_options, config, alternative_axes = PolytopeOptions.get_polytope_options(options)

self.datacube = Datacube.create(
request, datacube, config, axis_options, compressed_axes_options, alternative_axes
)
self.datacube = Datacube.create(datacube, config, axis_options, compressed_axes_options, alternative_axes)
self.engine = engine if engine is not None else Engine.default()
self.time = 0

def slice(self, polytopes: List[ConvexPolytope]):
"""Low-level API which takes a polytope geometry object and uses it to slice the datacube"""
return self.engine.extract(self.datacube, polytopes)

def retrieve(self, request: Request, method="standard"):
"""Higher-level API which takes a request and uses it to slice the datacube"""
# self.datacube.check_branching_axes(request)
import time

time0 = time.time()
self.datacube.check_branching_axes(request)
request_tree = self.engine.extract(self.datacube, request.polytopes())
print("POLYTOPE FIND TREE BY SLICING")
print(time.time() - time0)
time1 = time.time()
self.datacube.get(request_tree)
print("FDB GET TIME")
print(time.time() - time1)
return request_tree
2 changes: 1 addition & 1 deletion tests/test_cyclic_axis_over_negative_vals.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def setup_method(self, method):
"compressed_axes_config": ["long", "level", "step", "date"],
}
self.slicer = HullSlicer()
self.API = Polytope(request={}, datacube=array, engine=self.slicer, options=options)
self.API = Polytope(datacube=array, engine=self.slicer, options=options)

# Testing different shapes

Expand Down
1 change: 0 additions & 1 deletion tests/test_cyclic_axis_slicer_not_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def setup_method(self, method):
}
self.slicer = HullSlicer()
self.API = Polytope(
request={},
datacube=array,
engine=self.slicer,
options=self.options,
Expand Down
1 change: 0 additions & 1 deletion tests/test_cyclic_axis_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def setup_method(self, method):
}
self.slicer = HullSlicer()
self.API = Polytope(
request={},
datacube=array,
engine=self.slicer,
options=self.options,
Expand Down
2 changes: 0 additions & 2 deletions tests/test_cyclic_nearest.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ def test_regular_grid(self):
self.fdbdatacube = gj.GribJump()
self.slicer = HullSlicer()
self.API = Polytope(
request=request,
datacube=self.fdbdatacube,
engine=self.slicer,
options=self.options,
Expand All @@ -112,7 +111,6 @@ def test_regular_grid(self):
self.fdbdatacube = gj.GribJump()
self.slicer = HullSlicer()
self.API = Polytope(
request=request,
datacube=self.fdbdatacube,
engine=self.slicer,
options=self.options,
Expand Down
1 change: 0 additions & 1 deletion tests/test_cyclic_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def setup_method(self, method):
}
self.slicer = HullSlicer()
self.API = Polytope(
request={},
datacube=array,
engine=self.slicer,
options=options,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_cyclic_snapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def setup_method(self, method):
"compressed_axes_config": ["long"],
}
self.slicer = HullSlicer()
self.API = Polytope(request={}, datacube=array, engine=self.slicer, options=options)
self.API = Polytope(datacube=array, engine=self.slicer, options=options)

# Testing different shapes

Expand Down
1 change: 0 additions & 1 deletion tests/test_datacube_axes_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def setup_method(self, method):
}
self.slicer = HullSlicer()
self.API = Polytope(
request={},
datacube=latlon_array,
engine=self.slicer,
options=self.options,
Expand Down
6 changes: 2 additions & 4 deletions tests/test_datacube_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ def test_validate(self):
array = xr.Dataset(data_vars=dict(param=(["x", "y", "z"], dims)), coords={"x": [1], "y": [1], "z": [1]})
array = array.to_array()

datacube = Datacube.create({}, array, axis_options={})
datacube = Datacube.create({}, array, axis_options={})
datacube = Datacube.create(array, axis_options={})

datacube.validate(["x", "y", "z", "variable"])
datacube.validate(["x", "z", "y", "variable"])
Expand Down Expand Up @@ -54,8 +53,7 @@ def test_create(self):
for d, v in array.coords.variables.items():
print(v.dtype)

datacube = Datacube.create({}, array, axis_options={})
datacube = Datacube.create({}, array, axis_options={})
datacube = Datacube.create(array, axis_options={})

# Check the factory created the correct type of datacube
assert isinstance(datacube, XArrayDatacube)
Expand Down
Loading

0 comments on commit 09ffc37

Please sign in to comment.