Skip to content

Commit

Permalink
allow indicating flat sensitivity value
Browse files Browse the repository at this point in the history
  • Loading branch information
carueda committed May 23, 2023
1 parent 1f9712d commit bc09f51
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 22 deletions.
1 change: 1 addition & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ main-cloud-chumash-basic-test max_segments="60" date="20230101":
#!/usr/bin/env bash
export DATE={{date}}
export S3_JSON_BUCKET_PREFIX="s3://pacific-sound-metadata/ch01"
export SENSITIVITY_FLAT_VALUE=176
export MAX_SEGMENTS={{max_segments}}
export CLOUD_TMP_DIR="cloud_tmp_chumash"
export REMOVE_DOWNLOADED_FILES=no
Expand Down
14 changes: 12 additions & 2 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


def parse_arguments():
description = "Pypam based processing of Pacific Sound data."
description = "PyPAM based processing of Pacific Sound data."
example = """
Examples:
src/main.py --json-base-dir=tests/json \\
Expand Down Expand Up @@ -68,7 +68,16 @@ def parse_arguments():
type=str,
default=None,
metavar="file",
help="URI of sensitivity NetCDF for calibration of result. By default, a flat value of 178 is applied.",
help="URI of sensitivity NetCDF for calibration of result. "
"Has precedence over --sensitivity-flat-value.",
)

parser.add_argument(
"--sensitivity-flat-value",
type=float,
default=None,
metavar="value",
help="Flat sensitivity value to be used for calibration.",
)

parser.add_argument(
Expand Down Expand Up @@ -146,6 +155,7 @@ def main(opts):
output_dir=opts.output_dir,
gen_csv=opts.gen_csv,
sensitivity_uri=opts.sensitivity_uri,
sensitivity_flat_value=opts.sensitivity_flat_value,
save_segment_result=opts.save_segment_result,
save_extracted_wav=opts.save_extracted_wav,
num_cpus=opts.cpus,
Expand Down
25 changes: 19 additions & 6 deletions src/main_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Script for cloud based processing. By this, we basically mean the ability
# to get input files (json and wav) from S3 and write output files to S3.
#
# Environment variables:
# Inputs to the program are to be passed via environment variables:
# DATE: (Required)
# The date to process. Format: "YYYYMMDD".
# S3_JSON_BUCKET_PREFIX: (Optional)
Expand All @@ -14,12 +14,17 @@
# Typically this is to be provided but it is optional to facilitate testing.
# SENSITIVITY_NETCDF_URI: (Optional)
# URI of sensitivity NetCDF file that should be used to calibrate the result.
# If omitted, a flat -178 value is used.
# NOTE: it is a TODO to retrieve this information using PyHydrophone.
# SENSITIVITY_FLAT_VALUE: (Optional)
# Flat sensitivity value to be used for calibration
# if SENSITIVITY_NETCDF_URI is not given.
#
# Mainly for testing purposes, also these environment variables are considered:
# *Note*:
# TODO retrieve sensitivity information using PyHydrophone when none
# of the `SENSITIVITY_*` environment variables above are given.
#
# Mainly for testing purposes, also these environment variables are inspected:
# CLOUD_TMP_DIR: (Optional)
# Workspace for downloads and for generated files to be uploaded.
# Local workspace for downloads and for generated files to be uploaded.
# By default, "cloud_tmp".
# MAX_SEGMENTS: (Optional)
# 0, the default, means no restriction, that is, all segments for each day
Expand Down Expand Up @@ -54,9 +59,16 @@ def main():
# The bucket to write the output to
output_bucket = os.getenv("S3_OUTPUT_BUCKET")

# URI of sensitivity NetCDF file
# URI of sensitivity NetCDF file to be used for calibration
sensitivity_uri = os.getenv("SENSITIVITY_NETCDF_URI")

# Flat sensitivity value to be used for calibration
sensitivity_flat_value = (
float(os.getenv("SENSITIVITY_FLAT_VALUE"))
if os.getenv("SENSITIVITY_FLAT_VALUE") is not None
else None
)

# Convenience for testing (0 means no restriction)
max_segments = int(os.getenv("MAX_SEGMENTS", "0"))

Expand Down Expand Up @@ -107,6 +119,7 @@ def main():
output_dir=generated_dir,
gen_csv=False,
sensitivity_uri=sensitivity_uri,
sensitivity_flat_value=sensitivity_flat_value,
max_segments=max_segments,
subset_to=(10, 100_000),
)
Expand Down
12 changes: 10 additions & 2 deletions src/process_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(
output_dir: str,
gen_csv: bool,
sensitivity_uri: Optional[str] = None,
sensitivity_flat_value: Optional[float] = None,
save_segment_result: bool = False,
save_extracted_wav: bool = False,
num_cpus: int = 1,
Expand All @@ -36,6 +37,7 @@ def __init__(
:param output_dir:
:param gen_csv:
:param sensitivity_uri:
:param sensitivity_flat_value:
:param save_segment_result:
:param save_extracted_wav:
:param num_cpus:
Expand All @@ -54,18 +56,23 @@ def __init__(
self.subset_to = subset_to

self.sensitivity_da: Optional[xr.DataArray] = None
self.sensitivity_flat_value: Optional[float] = sensitivity_flat_value

if sensitivity_uri is not None:
s_local_filename = file_helper.get_local_sensitivity_filename(sensitivity_uri)
if s_local_filename is not None:
sensitivity_ds = xr.open_dataset(s_local_filename)
debug(f"Loaded sensitivity from {s_local_filename=}")
info(f"Will use loaded sensitivity from {s_local_filename=}")
self.sensitivity_da = sensitivity_ds.sensitivity
debug(f"{self.sensitivity_da=}")
else:
error(
f"Unable to resolve sensitivity_uri: '{sensitivity_uri}'. Ignoring it."
)

if self.sensitivity_da is None and self.sensitivity_flat_value is not None:
info(f"Will use given flat sensitivity value: {sensitivity_flat_value}")

# obtained once upon first segment to be processed
self.pypam_support: Optional[PypamSupport] = None

Expand Down Expand Up @@ -108,7 +115,8 @@ def process_day(self, year: int, month: int, day: int) -> Optional[str]:

info("Aggregating results ...")
aggregated_result = self.pypam_support.get_aggregated_milli_psd(
self.sensitivity_da
sensitivity_da=self.sensitivity_da,
sensitivity_flat_value=self.sensitivity_flat_value,
)
basename = f"{self.output_dir}/milli_psd_{year:04}{month:02}{day:02}"
nc_filename = f"{basename}.nc"
Expand Down
30 changes: 18 additions & 12 deletions src/pypam_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@

from src.misc_helper import brief_list, debug, info

# Approximate "flat" sensitivity of the hydrophone
# TODO allow passing this in as a parameter
APPROX_FLAT_SENSITIVITY = -178


class PypamSupport:
def __init__(
Expand Down Expand Up @@ -53,14 +49,20 @@ def add_segment(self, data: np.ndarray, iso_minute: str):
self.num_secs_per_minute.append(num_secs)

def get_aggregated_milli_psd(
self, sensitivity_da: Optional[xr.DataArray] = None
self,
sensitivity_da: Optional[xr.DataArray] = None,
sensitivity_flat_value: Optional[float] = None,
) -> xr.DataArray:
"""
Gets the resulting hybrid millidecade bands.
Calibration is done if either `sensitivity_da` or `sensitivity_flat_value` is given.
`sensitivity_da` has priority over `sensitivity_flat_value`.
No calibration is done if neither is given.
:param sensitivity_da:
If given, it's used to calibrate the result.
Otherwise, the result is calibrated using APPROX_FLAT_SENSITIVITY.
If given, it will be used to calibrate the result.
:param sensitivity_flat_value:
If given, and sensitivity_da not given, it will be used to calibrate the result.
:return:
"""
# Convert the spectra to a datarray
Expand All @@ -72,7 +74,7 @@ def get_aggregated_milli_psd(

psd_da = self.spectra_to_bands(psd_da)
debug(f" {psd_da.frequency_bins=}")
psd_da = apply_sensitivity(psd_da, sensitivity_da)
psd_da = apply_sensitivity(psd_da, sensitivity_da, sensitivity_flat_value)

# just need single precision:
psd_da = psd_da.astype(np.float32)
Expand Down Expand Up @@ -183,16 +185,20 @@ def print_array(name: str, arr: np.ndarray):


def apply_sensitivity(
psd_da: xr.DataArray, sensitivity_da: Optional[xr.DataArray]
psd_da: xr.DataArray,
sensitivity_da: Optional[xr.DataArray],
sensitivity_flat_value: Optional[float] = None,
) -> xr.DataArray:
psd_da = cast(xr.DataArray, 10 * np.log10(psd_da))

if sensitivity_da is not None:
freq_subset = sensitivity_da.interp(frequency=psd_da.frequency_bins)
info(f" Applying sensitivity({len(freq_subset.values)})={freq_subset}")
psd_da -= freq_subset.values
else:
info(f" applying APPROX_FLAT_SENSITIVITY={APPROX_FLAT_SENSITIVITY}")
psd_da -= APPROX_FLAT_SENSITIVITY
elif sensitivity_flat_value is not None:
info(f" applying {sensitivity_flat_value=}")
psd_da -= sensitivity_flat_value

return psd_da


Expand Down

0 comments on commit bc09f51

Please sign in to comment.