Skip to content

Commit 1096ad0

Browse files
authored
Merge pull request #6 from ddasilva/metadata-writer
Work in progress code for Metadata Manager and NetCDF writer classes
2 parents 930628f + 095c118 commit 1096ad0

File tree

4 files changed

+262
-42
lines changed

4 files changed

+262
-42
lines changed

environment.yml

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
name: suncet
22
channels:
33
- conda-forge
4-
# - defaults
54

65
dependencies:
76
- pip >=22.1
@@ -21,5 +20,5 @@ dependencies:
2120
- gnuradio-satellites=5.2.0
2221
- termcolor==2.4.0
2322
- pytest==7.1.3
23+
- h5netcdf==1.1.0
2424
- pykdtree==1.3.13
25-
#prefix: ~/anaconda3

suncet_processing_pipeline/make_level3.py

+118-39
Original file line numberDiff line numberDiff line change
@@ -2,66 +2,145 @@
22
This is the code to make the Level 3 data product.
33
"""
44
import argparse
5+
from pathlib import Path
6+
from pprint import pprint
57

8+
import h5netcdf
9+
import numpy as np
10+
from termcolor import cprint
611

12+
from . import config_parser
13+
from . import metadata_mgr
714

8-
class Level3:
9-
"""Class for applying the Level2 -> Level3 processing stage.
1015

11-
The key method is `run()`, which acts lik a main() method for
12-
this stage.
13-
"""
14-
def __init__(self, config):
16+
class Level3:
17+
"""Class for applying the Level2 -> Level3 processing stage."""
18+
def __init__(self, run_name, config):
1519
"""
16-
Parameters
17-
----------
18-
level2_data : dict, str -> array
19-
Level 2 data, mapping internal variable names to their values
20-
(generally numpy arrays)
21-
config : config_parser.Config
22-
SunCET Data Processing Pipeline configration object
20+
Args
21+
run_name: string, name of run we are processing
22+
config, config_parser.Config, SunCET Data Processing Pipeline
23+
configration object
2324
"""
25+
self.run_name = run_name
26+
self.run_dir = Path('processing_runs') / run_name
2427
self.config = config
2528

29+
if not self.run_dir.exists():
30+
raise RuntimeError(f'Could not find directory {self.run_dir}')
31+
2632
def run(self):
2733
"""Main method to process the level2 -> level3 stage."""
28-
# Parse command line arguments
29-
parser = self._get_parser()
30-
args = parser.parse_args()
34+
# Load metadata
35+
metadata = metadata_mgr.MetadataManager(self.run_dir)
3136

32-
def _get_parser(self):
33-
"""Get command line ArgumentParser object with options defined.
34-
35-
Returns
36-
-------
37-
parser : argparse.ArgumentParser
38-
object which can be used to parse command line objects
37+
# Start NetCDF File
38+
nc_output_path = self.run_dir / 'level3' / 'suncet_level3.nc'
39+
nc = Level3NetCDFWriter(nc_output_path, metadata)
40+
41+
# Write some blank values
42+
nc.write_variable('carring_lat', np.zeros(100))
43+
nc.write_variable('carring_long', np.ones(100))
44+
nc.close()
45+
46+
47+
class Level3NetCDFWriter:
48+
"""Class for writing Level3 NetCDF Output."""
49+
def __init__(self, output_path, metadata):
50+
self._output_path = output_path
51+
self._metadata = metadata
52+
self._nc_file = h5netcdf.File(self._output_path, 'w')
53+
54+
def write_variable(self, internal_name, variable_value):
55+
"""Write a variable and its associated metadata to the file.
56+
57+
This function is passed the internal name of the variable, and uses
58+
the metadata manager to look up the NetCDF4 name and associated
59+
attrbutes.
60+
61+
Args
62+
internal_name: Internal name of variable (within code)
63+
variable_value: Value for the variable in the file
3964
"""
40-
parser = argparse.ArgumentParser()
41-
parser.add_argument('-v', '--verbose', help='Print more debugging output')
65+
variable_name = self._metadata.get_netcdf4_variable_name(internal_name)
66+
67+
# Wrote variable data
68+
print(f'Writing internal variable ', end='')
69+
cprint(internal_name, 'yellow', end='')
70+
print(f' NetCDF variable ', end='')
71+
cprint(variable_name, 'yellow')
4272

43-
return parser
73+
# TODO: this is broken
74+
self._nc_file.dimensions[variable_name + '_dim'] = variable_value.shape
75+
76+
nc_variable = self._nc_file.create_variable(
77+
name=variable_name,
78+
dimensions=(variable_name + '_dim',),
79+
dtype=variable_value.dtype
80+
)
81+
82+
nc_variable[:] = variable_value
4483

84+
# Write variable attributes
85+
attrs = self._metadata.get_netcdf4_attrs(internal_name)
4586

87+
print('attributes:')
88+
pprint(attrs)
89+
90+
for key, value in attrs.items():
91+
nc_variable.attrs[key] = value
92+
93+
print()
94+
95+
def close(self):
96+
"""Close the NetCDF file, commiting all changes."""
97+
self._nc_file.close()
98+
99+
46100
def final_shdr_compositing_fix(level2_data, config):
47101
"""Fix any lingaring SHDR Compositing Issues.
48102
49-
Parameters
50-
----------
51-
level2_data : dict, str -> array
52-
Level 2 data, mapping internal variable names to their values
53-
(generally numpy arrays)
54-
config : config_parser.Config
55-
SunCET Data Processing Pipeline configration object
103+
Args
104+
level2_data : dict, str -> array
105+
Level 2 data, mapping internal variable names to their values
106+
(generally numpy arrays)
107+
config : config_parser.Config
108+
SunCET Data Processing Pipeline configration object
56109
57-
Returns
58-
-------
59-
level2_data_fixed : dict, str -> array
60-
Copy of level2 data with the fix applied.
110+
Returns
111+
level2_data_fixed : dict, str -> array
112+
Copy of level2 data with the fix applied.
61113
"""
62114
raise NotImplementedError()
115+
116+
117+
def _get_parser():
118+
"""Get command line ArgumentParser object with options defined.
119+
120+
Returns
121+
object which can be used to parse command line objects
122+
"""
123+
parser = argparse.ArgumentParser()
124+
parser.add_argument('-n', '--run-name', type=str, required=True,
125+
help='String name of the run')
126+
parser.add_argument('-v', '--verbose', help='Print more debugging output')
63127

128+
return parser
64129

65-
if __name__ == '__main__':
66-
level3 = Level3()
130+
131+
def main():
132+
"""Main method when running this script directly."""
133+
args = _get_parser().parse_args()
134+
135+
# Load config
136+
config_filename = Path('processing_runs') / args.run_name / 'config.ini'
137+
config = config_parser.ConfigParser()
138+
config.read(config_filename)
139+
140+
# Call run() method on Level3 class
141+
level3 = Level3(args.run_name, config)
67142
level3.run()
143+
144+
145+
if __name__ == '__main__':
146+
main()
+135
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import pandas as pd
2+
from pathlib import Path
3+
4+
5+
class MetadataManager:
6+
"""Class for interacting with SunCET Metadata files.
7+
8+
This expect the metadata is downloaded into the run directory. To do that,
9+
see: setup_minimum_required_folders_files.py
10+
"""
11+
def __init__(self, run_dir):
12+
"""Initialize a metadata manager from a run directory, which
13+
is expected to have the required files.
14+
15+
Args
16+
run_dir: Path to run directory
17+
"""
18+
# Set paths and check they exist
19+
self._metadata_path = Path(run_dir) / 'suncet_metadata_definition.csv'
20+
self._metadata_ver_path = Path(run_dir) / 'suncet_metadata_definition_version.csv'
21+
22+
if not self._metadata_path.exists():
23+
raise FileNotFoundError(
24+
f"Error: could not find metadata at {self._metadata_path}"
25+
)
26+
27+
if not self._metadata_ver_path.exists():
28+
raise FileNotFoundError(
29+
f"Error: could not find metadata version at {self._metadata_path}"
30+
)
31+
32+
# Load metadata CSV using Pandas
33+
print(f'Reading metadata from {self._metadata_path}')
34+
self._metadata_df = pd.read_csv(self._metadata_path)
35+
self._metadata_df = _clean_metadata_comments(self._metadata_df)
36+
37+
# Load metadata version (just read string from text file)
38+
with open(self._metadata_ver_path) as fh:
39+
self._metadata_ver = fh.read().strip()
40+
41+
print(f'Found metadata version "{self._metadata_ver}"')
42+
43+
# Convert metadata df to dictionary mapping internal name to dictionary
44+
# of columns -> values
45+
self._metadata_dict = _get_metadata_dict(self._metadata_df)
46+
47+
def get_netcdf4_variable_name(self, internal_name):
48+
"""Get name of variable for writing to a NetCDF4 file
49+
50+
Args
51+
internal_name: Internal name of variable (within code)
52+
Returns
53+
what that internal name should be called in a NetCDF4 file
54+
"""
55+
# Ensure variable is in the metadata dictionary
56+
if internal_name not in self._metadata_dict:
57+
raise RuntimeError(
58+
f"Could not find metadata for variable with internal name '{internal_name}'"
59+
)
60+
61+
# Get the variable name, raising Exception if its not filled out in the
62+
# table
63+
var_name = self._metadata_dict[internal_name]['netCDF variable name']
64+
65+
if not var_name:
66+
raise RuntimeError(
67+
'Needed NetCDF variable name for internal name "{internal_name}", but missing'
68+
)
69+
70+
# Return good result
71+
return var_name
72+
73+
def get_netcdf4_attrs(self, internal_name):
74+
"""Get dictionary of static NetCDF4 attributes for a given variable.
75+
76+
Args
77+
internal_name: Internal name of variable (within code)
78+
Returns
79+
dictionary of attribute keys to values
80+
"""
81+
# Ensure variable is in the metadata dictionary
82+
if internal_name not in self._metadata_dict:
83+
raise RuntimeError(
84+
f"Could not find metadata for variable with internal name "
85+
f"'{internal_name}'."
86+
)
87+
# Load variable dict and return subset of keys that are relevant
88+
var_dict = self._metadata_dict[internal_name]
89+
90+
return {
91+
"units": var_dict["units (human)"]
92+
}
93+
94+
95+
def _get_metadata_dict(metadata_df):
96+
"""Convert metadata dataframe to dictinoary mapping internal name
97+
to dictionary of cols to values.
98+
99+
Args
100+
metadata_df: Metadata dictionary as loaded from flie with comments
101+
cleaned
102+
Returns
103+
dictionary mapping internal names to dictionaries holding the
104+
row information.
105+
"""
106+
metadata_dict = {}
107+
108+
for _, row in metadata_df.iterrows():
109+
cur_dict = {col: row[col] for col in metadata_df.columns}
110+
cur_key = row['Internal Variable Name']
111+
112+
metadata_dict[cur_key] = cur_dict
113+
114+
return metadata_dict
115+
116+
117+
def _clean_metadata_comments(metadata_df):
118+
"""Remove comment rows from the metadata Data Frame.
119+
120+
A command has the work "COMMENT" in the first column
121+
122+
Args
123+
dataframe as loaded directly from CSV file
124+
Returns
125+
dataframe with comment row dropped
126+
"""
127+
collected_rows = []
128+
first_row = metadata_df.columns[0]
129+
130+
for _, row in metadata_df.iterrows():
131+
if 'COMMENT' not in row[first_row].upper():
132+
collected_rows.append(row)
133+
134+
return pd.DataFrame(collected_rows)
135+
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import tempfile
23
from .. import config_parser, make_level3
34

45

@@ -7,6 +8,12 @@ def test_Level3_object_instantiates():
78
os.path.dirname(__file__), '..', 'config_files',
89
'config_default.ini'
910
)
11+
1012
config = config_parser.Config(default_config)
11-
make_level3.Level3(config)
13+
temp_dir = tempfile.TemporaryDirectory()
1214

15+
try:
16+
make_level3.Level3(temp_dir.name, config)
17+
finally:
18+
temp_dir.cleanup()
19+

0 commit comments

Comments
 (0)