Skip to content

Commit b551908

Browse files
authored
* initial catalog function * initial work for condensed catalog * housekeeping for testing * wrapping up first version of catalog
1 parent 45d2f30 commit b551908

8 files changed

+262
-31
lines changed

Readme.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ These are the driving design ideas and goals of the hec-dss-python project (subj
1717
|Easy transition from Jython|HEC products use an existing [Java/Jython API](https://www.hec.usace.army.mil/confluence/dssdocs/dssvueum/scripting/reading-and-writing-to-hec-dss-files). We will loosely follow that design | simplify porting from Jython|
1818
| hec_dss_native.py | native binding layer | isolate interactions with low level library(if performance is an issue this Ctypes layer can be replaced ) |
1919
| hec_dss.py | Programmer entry point ; Python API | Hides interactions with hec_dss_native, seek to be simple user experience|
20+
|catalog.py|manage list of DSS objects (catalog) |create condensed catalog perspective|
2021
|Pandas_Series_Utilities.py [future](https://github.com/HydrologicEngineeringCenter/hec-dss-python/issues/8) |NumPy/pandas support |provide features such as dataframes, separate from hec-dss.py; can be developed by different/parallel developers|
21-
| Easy to get started |nothing to install, just copy python files and shared library |require minimal privileges to install|
22+
|Easy to get started |nothing to install, just copy python files and shared library |require minimal privileges to install|
2223

2324

2425
## Features

catalog.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
from record_type import RecordType
2+
from dsspath import DssPath
3+
from datetime import datetime
4+
5+
class Catalog:
6+
"""manage list of objects inside a DSS database"""
7+
def __init__(self, items, recordTypes):
8+
self.rawCatalog = items
9+
self.rawRecordTypes = recordTypes
10+
self.timeSeriesDictNoDates = {} # key is path without date, value is a list dates
11+
self.recordTypeDict = {} # key is path w/o date, value is recordType
12+
self.__create_condensed_catalog()
13+
14+
def __create_condensed_catalog(self):
15+
"""
16+
condensed catalog combines time-series records into a single condensed path
17+
other record types are not condensed.
18+
time-series records must match all parts except the D (date) part to be combined.
19+
"""
20+
self.items = []
21+
for i in range(len(self.rawCatalog)):
22+
rawPath = self.rawCatalog[i]
23+
recordType = RecordType.RecordTypeFromInt(self.rawRecordTypes[i])
24+
path = DssPath(rawPath,recordType)
25+
cleanPath = str(path.pathWithoutDate())
26+
self.recordTypeDict[cleanPath] = recordType
27+
# if timeseries - accumulate dates within a dataset
28+
if path.isTimeSeries():
29+
tsRecords = self.timeSeriesDictNoDates.setdefault(cleanPath,[])
30+
t = datetime.strptime(path.D,"%d%b%Y")
31+
tsRecords.append(t)
32+
else:
33+
# add NON time-series to list (nothing else needed)
34+
self.items.append(path)
35+
36+
# go through each timeSeriesDictNoDates, and sort each list of dates
37+
# use first and last to create the condensed path
38+
for key in self.timeSeriesDictNoDates:
39+
dateList = sorted(self.timeSeriesDictNoDates[key])
40+
condensedDpart = dateList[0].strftime("%d%b%Y")
41+
if len(dateList) >1:
42+
condensedDpart +="-"+ dateList[-1].strftime("%d%b%Y")
43+
# insert condensed D part into path used as key
44+
rt = self.recordTypeDict[key]
45+
p = DssPath(key,rt)
46+
p.D = condensedDpart
47+
self.items.append(p)
48+
49+
def print(self):
50+
for ds in self.items:
51+
print(ds)
52+
53+
def __iter__(self):
54+
self.index = 0 # Initialize the index to 0
55+
return self
56+
57+
def __next__(self):
58+
if self.index < len(self.items):
59+
result = self.items[self.index]
60+
self.index += 1
61+
return result
62+
else:
63+
raise StopIteration

dateconverter.py

+16-11
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
class DateConverter:
44

55

6-
#
7-
# convert python datetime to DSS 24:00 style string
8-
# 2023-08-25 09:32:46.832952 -> 25Aug2023 09:23:47
9-
# 2023-08-25 00:00:00.0000000 -> 24Aug2023 24:00
10-
# 2023-08-25 00:10:00.0000000 -> 25Aug2023 00:10
116
@staticmethod
127
def dss_datetime_from_string(dt):
8+
"""
9+
convert python datetime to DSS 24:00 style string
10+
2023-08-25 09:32:46.832952 -> 25Aug2023 09:23:47
11+
2023-08-25 00:00:00.0000000 -> 24Aug2023 24:00
12+
2023-08-25 00:10:00.0000000 -> 25Aug2023 00:10
13+
"""
1314
if dt.time() == time(0, 0, 0, 0): # midnight
1415
# subract one day
1516
dtm1 = dt - timedelta(days=1)
@@ -22,6 +23,9 @@ def dss_datetime_from_string(dt):
2223

2324
@staticmethod
2425
def date_times_from_julian_array(times_julian, time_granularity_seconds, julian_base_date):
26+
""""
27+
convert from DSS integer datetime array to python datetime array
28+
"""
2529
if times_julian is None:
2630
raise ValueError("Time Series Times array was None. Something didn't work right in DSS.")
2731

@@ -42,10 +46,11 @@ def date_times_from_julian_array(times_julian, time_granularity_seconds, julian_
4246

4347

4448

45-
dt = datetime.today()
46-
x = DateConverter.dss_datetime_from_string(dt)
47-
print(x)
48-
midnight = datetime(2023,8,3,0,0,0)
49-
x = DateConverter.dss_datetime_from_string(midnight)
50-
print(x)
49+
if __name__ == "__main__":
50+
dt = datetime.today()
51+
x = DateConverter.dss_datetime_from_string(dt)
52+
print(x)
53+
midnight = datetime(2023,8,3,0,0,0)
54+
x = DateConverter.dss_datetime_from_string(midnight)
55+
print(x)
5156

dsspath.py

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from record_type import RecordType
2+
3+
class DssPath:
4+
"""manage parts of DSS path /A/B/C/D/E/F/
5+
condenses D part for timeseries records
6+
"""
7+
8+
_timeSeriesFamily = [ RecordType.IrregularTimeSeries, RecordType.RegularTimeSeries, RecordType.RegularTimeSeriesProfile ]
9+
10+
11+
def __init__(self,path,recType):
12+
"""
13+
path is raw dss pathname
14+
recType is a RecordType , such as RecordType.RegularTimeSeries
15+
"""
16+
if path[0]!='/' or path[-1]!= '/':
17+
raise Exception("Invalid DSS Path: '"+path+"'")
18+
path = path[1:-1] # remove beginning and ending '/'
19+
self.rawPath= path
20+
21+
split_parts = path.split('/')
22+
if len(split_parts) >= 6:
23+
self.A, self.B, self.C, self.D, self.E, self.F = split_parts[:6]
24+
self.recType = recType
25+
26+
def __str__(self):
27+
return "/"+self.A+"/"+self.B+"/"+self.C+"/"+self.D+"/"+self.E+"/"+self.F+"/"
28+
29+
def pathWithoutDate(self):
30+
s = "/"+self.A+"/"+self.B+"/"+self.C+"//"+self.E+"/"+self.F+"/"
31+
rval = DssPath(s,self.recType)
32+
return rval
33+
34+
def isTimeSeries(self):
35+
return self.recType in DssPath._timeSeriesFamily
36+
37+
def print(self):
38+
print("a:"+self.path.A)
39+
print("b:"+self.path.B)
40+
print("c:"+self.path.C)
41+
print("d:"+self.path.D)
42+
print("e:"+self.path.E)
43+
print("f:"+self.path.F)
44+
45+

hec_dss.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from datetime import datetime
33
from dateconverter import DateConverter
44
from timeseries import TimeSeries
5+
from catalog import Catalog
56
import os
67

78
class HecDss:
@@ -72,23 +73,22 @@ def put(self,ts):
7273

7374
self._native.hec_dss_tsStoreRegular(ts.pathname,startDate,startTime,ts.values,quality,False,ts.units,ts.dataType)
7475

76+
def getCatalog(self):
77+
paths,recordTypes = self._native.hec_dss_catalog()
78+
return Catalog(paths,recordTypes)
7579

7680
def recordCount(self):
7781
return self._native.hec_dss_record_count()
7882

7983
def setDebugLevel(self,level):
8084
return self._native.hec_dss_set_debug_level(level)
8185

82-
83-
#import pdb;pdb.set_trace()
84-
dss = HecDss("sample7.dss")
85-
print("record count = "+str(dss.recordCount()))
86-
t1 = datetime(2005, 1, 1)
87-
t2 = datetime(2005, 1 ,4)
88-
tsc = dss.get("//SACRAMENTO/PRECIP-INC//1Day/OBS/",t1,t2)
89-
tsc.print_to_console()
90-
tsc2 = dss.get("//SACRAMENTO/TEMP-MAX//1Day/OBS/",t1,t2)
91-
tsc2.print_to_console()
92-
tsc.pathname = "//SACRAMENTO/PRECIP-INC//1Day/OBS-modified/"
93-
#dss.setDebugLevel(15)
94-
dss.put(tsc)
86+
87+
if __name__ == "__main__":
88+
#import pdb;pdb.set_trace()
89+
dss = HecDss("sample7.dss")
90+
catalog = dss.getCatalog()
91+
for p in catalog:
92+
print(p)
93+
#print(catalog[0:5])
94+
#dss.setDebugLevel(15)

hec_dss_native.py

+48-3
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
import ctypes
22
from ctypes import c_char, c_double, c_int,byref, create_string_buffer
3+
import array
34

45

56
class HecDssNative:
7+
"""Wrapper for Native method calls to hecdss.dll or libhecdss.so """
8+
69
def __init__(self):
710
self.dll = ctypes.CDLL("hecdss")
811

9-
10-
11-
1212
def hec_dss_open(self,dss_filename):
1313
self.dll.hec_dss_open.argtypes = [ctypes.c_char_p, ctypes.POINTER(ctypes.c_void_p)]
1414
self.dll.hec_dss_open.restype = ctypes.c_int
@@ -45,6 +45,8 @@ def __hec_dss_set_value(self,name,value):
4545
def hec_dss_set_debug_level(self,value):
4646
self.__hec_dss_set_value("mlvl",value)
4747

48+
49+
4850
def hec_dss_export_to_file(self,path,outputFile,startDate,startTime,endDate,endTime):
4951

5052
self.dll.hec_dss_export_to_file.argtypes = [
@@ -60,6 +62,49 @@ def hec_dss_export_to_file(self,path,outputFile,startDate,startTime,endDate,endT
6062

6163
result = self.dll.hec_dss_export_to_file(self.handle,path, outputFile, startDate, startTime, endDate, endTime)
6264

65+
def hec_dss_CONSTANT_MAX_PATH_SIZE(self):
66+
f = self.dll.hec_dss_CONSTANT_MAX_PATH_SIZE
67+
f.restype = ctypes.c_int
68+
return f()
69+
70+
def hec_dss_catalog(self,filter=""):
71+
"""
72+
retrieves a list of objects in a DSS database
73+
74+
returns a list of paths, and recordTypes
75+
76+
"""
77+
count = self.hec_dss_record_count()
78+
pathBufferSize = self.hec_dss_CONSTANT_MAX_PATH_SIZE()
79+
self.dll.hec_dss_catalog.argtypes = [
80+
ctypes.c_void_p, # dss (assuming it's a pointer)
81+
ctypes.c_char_p, # pathBuffer
82+
ctypes.POINTER(ctypes.c_int), # recordTypes
83+
ctypes.c_char_p, # pathFilter
84+
ctypes.c_int, # count
85+
ctypes.c_int # pathBufferItemSize
86+
]
87+
self.dll.hec_dss_catalog.restype = ctypes.c_int
88+
89+
c_rawCatalog = create_string_buffer(count * pathBufferSize)
90+
pathFilter = filter.encode("ascii")
91+
recordTypes = (ctypes.c_int32 * count)()
92+
93+
pathNameList = []
94+
95+
numRecords = self.dll.hec_dss_catalog(self.handle,c_rawCatalog, recordTypes, pathFilter, count, pathBufferSize)
96+
recordTypeArray =[]
97+
recordTypeArray.extend(list(recordTypes[:count]))
98+
for i in range(numRecords):
99+
start = i * pathBufferSize
100+
end = start + pathBufferSize
101+
s = c_rawCatalog[start:end].decode('ascii').replace('\x00','')
102+
#print(f"str='{s}'")
103+
pathNameList.append(s)
104+
105+
return pathNameList,recordTypeArray
106+
107+
63108
def hec_dss_tsGetSizes(self,pathname,
64109
startDate, startTime,
65110
endDate, endTime,numberValues,qualityElementSize):

record_type.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from enum import Enum
2+
3+
class RecordType(Enum):
4+
Unknown = 0
5+
RegularTimeSeriesProfile = 1
6+
RegularTimeSeries = 2
7+
IrregularTimeSeries = 3
8+
PairedData = 4
9+
Text = 5
10+
Grid = 6
11+
Tin = 7
12+
LocationInfo = 8
13+
14+
@staticmethod
15+
def RecordTypeFromInt(recType):
16+
rval = RecordType.Unknown
17+
18+
if 100 <= recType < 110:
19+
if recType == 102 or recType == 107:
20+
rval = RecordType.RegularTimeSeriesProfile
21+
else:
22+
rval = RecordType.RegularTimeSeries
23+
elif 110 <= recType < 200:
24+
rval = RecordType.IrregularTimeSeries
25+
elif 200 <= recType < 300:
26+
rval = RecordType.PairedData
27+
elif 300 <= recType < 400:
28+
rval = RecordType.Text
29+
elif 400 <= recType < 450:
30+
rval = RecordType.Grid
31+
elif recType == 450:
32+
rval = RecordType.Tin
33+
elif recType == 20:
34+
rval = RecordType.LocationInfo
35+
36+
return rval

tests.py

+39-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from hec_dss import HecDss
22
from datetime import datetime
3-
3+
from catalog import Catalog
44
class Tests:
55

66
@staticmethod
@@ -16,9 +16,45 @@ def test_issue9():
1616
for path in pathnames:
1717
print(f"reading {path}")
1818
tsc = dss.get(path,t1,t2)
19-
print("len(tsc.values) = {len(tsc.values)}")
19+
print(f"len(tsc.values) = {len(tsc.values)}")
2020
assert(len(tsc.values)>1)
2121

22+
@staticmethod
23+
def basic_tests():
24+
dss = HecDss("sample7.dss")
25+
print("record count = "+str(dss.recordCount()))
26+
catalog = dss.getCatalog()
27+
print(catalog[0:5])
28+
29+
t1 = datetime(2005, 1, 1)
30+
t2 = datetime(2005, 1 ,4)
31+
tsc = dss.get("//SACRAMENTO/PRECIP-INC//1Day/OBS/",t1,t2)
32+
tsc.print_to_console()
33+
assert(len(tsc.values)>0)
34+
tsc2 = dss.get("//SACRAMENTO/TEMP-MAX//1Day/OBS/",t1,t2)
35+
tsc2.print_to_console()
36+
assert(len(tsc2.values)>0)
37+
# save to a new path
38+
tsc.pathname = "//SACRAMENTO/PRECIP-INC//1Day/OBS-modified/"
39+
dss.put(tsc)
40+
tsc3 = dss.get(tsc.pathname,t1,t2)
41+
assert(len(tsc3.values)== len(tsc.values))
42+
43+
@staticmethod
44+
def catalog_test():
45+
rawPaths = [
46+
"//SACRAMENTO/TEMP-MIN/01Jan1989/1Day/OBS/",
47+
"//SACRAMENTO/TEMP-MIN/01Jan1990/1Day/OBS/",
48+
"//SACRAMENTO/TEMP-MIN/01Jan1991/1Day/OBS/",
49+
"//SACRAMENTO/TEMP-MIN/01Jan1992/1Day/OBS/",
50+
"//SACRAMENTO/TEMP-MIN/01Jan1993/1Day/OBS/"
51+
]
52+
rt = [100,100,100,100,100]
53+
c = Catalog(rawPaths,rt)
54+
c.print()
2255

23-
Tests.test_issue9()
2456

57+
if __name__ == "__main__":
58+
#Tests.test_issue9()
59+
#Tests.basic_tests()
60+
Tests.catalog_test()

0 commit comments

Comments
 (0)