diff --git a/bin/inference/pycbc_inference_plot_posterior b/bin/inference/pycbc_inference_plot_posterior index 7da59e1c19b..f8e63893e46 100644 --- a/bin/inference/pycbc_inference_plot_posterior +++ b/bin/inference/pycbc_inference_plot_posterior @@ -143,7 +143,7 @@ if opts.plot_prior is not None: for func, param in fp.attrs['remapped_params']: try: remapped_params[param] = prior_samples[func] - except (NameError, TypeError): + except (NameError, TypeError, AttributeError): continue prior_samples = FieldArray.from_kwargs(**remapped_params) for param in fp.attrs['static_params']: @@ -184,7 +184,7 @@ if opts.plot_injection_parameters: for p in parameters: try: vals = injections[p] - except (NameError, TypeError): + except (NameError, TypeError, AttributeError): # injection doesn't have this parameter, skip logging.warn("Could not find injection parameter {}".format(p)) continue diff --git a/bin/plotting/pycbc_plot_range b/bin/plotting/pycbc_plot_range index f2a0cd82c6f..dbf853350de 100644 --- a/bin/plotting/pycbc_plot_range +++ b/bin/plotting/pycbc_plot_range @@ -64,7 +64,6 @@ for psd_file in args.psd_files: pylab.errorbar((start+end)/2, ranges[wf_key], xerr=(end-start)/2, ecolor=pycbc.results.ifo_color(ifo), label=label, fmt='none') - pylab.legend(loc="best", fontsize='small') if len(args.approximant) == 1: diff --git a/bin/pycbc_inspiral b/bin/pycbc_inspiral index a8d414f1f6c..8006bc43549 100644 --- a/bin/pycbc_inspiral +++ b/bin/pycbc_inspiral @@ -23,6 +23,7 @@ import pycbc import pycbc.version from pycbc import vetoes, psd, waveform, strain, scheme, fft, DYN_RANGE_FAC, events from pycbc.vetoes.sgchisq import SingleDetSGChisq +from pycbc.vetoes.basischisq import SingleDetHMChisq, SingleDetCombinedChisq from pycbc.filter import MatchedFilterControl, make_frequency_series, qtransform from pycbc.types import TimeSeries, FrequencySeries, zeros, float32, complex64 import pycbc.version @@ -207,6 +208,8 @@ fft.insert_fft_option_group(parser) pycbc.opt.insert_optimization_option_group(parser) pycbc.inject.insert_injfilterrejector_option_group(parser) SingleDetSGChisq.insert_option_group(parser) +SingleDetHMChisq.insert_option_group(parser) +SingleDetCombinedChisq.insert_option_group(parser) opt = parser.parse_args() # Check that the values returned for the options make sense @@ -267,6 +270,8 @@ with ctx: 'psd_var_val' : float32, } out_types.update(SingleDetSGChisq.returns) + out_types.update(SingleDetHMChisq.returns) + out_types.update(SingleDetCombinedChisq.returns) out_vals = {key: None for key in out_types} names = sorted(out_vals.keys()) @@ -371,6 +376,29 @@ with ctx: sg_chisq = SingleDetSGChisq.from_cli(opt, bank, opt.chisq_bins) + hm_chisq = SingleDetHMChisq.from_cli(opt) + + combined_chisq = SingleDetCombinedChisq.from_cli(opt) + + # create scratch space for modes + if hm_chisq.do or combined_chisq.do: + approxs = numpy.unique(bank.table.approximant) + modesmem_cache = {} + for apprx in approxs: + # check that we can + try: + modes = waveform.hm_approximants[apprx] + except KeyError: + raise ValueError("cannot generate individual modes for " + "approximant {}".format(apprx)) + modemem = {mode: template_mem.copy() for mode in modes} + modesmem_cache[apprx] = modemem + # whiten data + wh_segments = [seg * seg.psd**0.5 for seg in segments] + else: + wh_segments = [None]*len(segments) + modesmem_cache = {} + ntemplates = len(bank) nfilters = 0 @@ -390,6 +418,14 @@ with ctx: t_num += tnum_start tmplt_generated = False + # Turn on individual modes generation if calculating HM Chisq + if hm_chisq.do: + apprx = bank.table.approximant[t_num] + bank.modes_out = modesmem_cache[apprx] + bank.generate_modes = bank.modes_out.keys() + else: + bank.generate_modes = None + for s_num, stilde in enumerate(segments): # Filter check checks the 'inj_filter_rejector' options to # determine whether @@ -439,6 +475,18 @@ with ctx: out_vals['chisq_dof'], idx+stilde.analyze.start) + out_vals['hm_chisq'], out_vals['hm_chisq_dof'] = \ + hm_chisq.values(template, wh_segments[s_num], stilde.psd, + norm*snrv, + idx+stilde.analyze.start, + data_whitening='whitened') + + out_vals['combined_chisq'], out_vals['combined_chisq_dof'] = \ + combined_chisq.values(template, wh_segments[s_num], stilde.psd, + norm*snrv, + idx+stilde.analyze.start, + data_whitening='whitened') + out_vals['cont_chisq'] = \ autochisq.values(snr, idx+stilde.analyze.start, template, stilde.psd, norm, stilde=stilde, diff --git a/pycbc/conversions.py b/pycbc/conversions.py index 0688f1386c6..c08436c3221 100644 --- a/pycbc/conversions.py +++ b/pycbc/conversions.py @@ -1215,10 +1215,10 @@ def get_final_from_initial(mass1, mass2, spin1x=0., spin1y=0., spin1z=0., final_mass = numpy.zeros(mass1.shape) final_spin = numpy.zeros(mass1.shape) for ii in range(final_mass.size): - m1 = mass1[ii] - m2 = mass2[ii] - spin1 = [spin1x[ii], spin1y[ii], spin1z[ii]] - spin2 = [spin2x[ii], spin2y[ii], spin2z[ii]] + m1 = numpy.float(mass1[ii]) + m2 = numpy.float(mass2[ii]) + spin1 = list(map(float, [spin1x[ii], spin1y[ii], spin1z[ii]])) + spin2 = list(map(float, [spin2x[ii], spin2y[ii], spin2z[ii]])) _, fm, fs = lalsim.SimIMREOBFinalMassSpin(m1, m2, spin1, spin2, getattr(lalsim, approximant)) final_mass[ii] = fm * (m1 + m2) diff --git a/pycbc/events/events.py b/pycbc/events/events.py new file mode 100644 index 00000000000..11efcc6bfa3 --- /dev/null +++ b/pycbc/events/events.py @@ -0,0 +1,779 @@ +# Copyright (C) 2012 Alex Nitz +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# self.option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + +# +# ============================================================================= +# +# Preamble +# +# ============================================================================= +# +"""This modules defines functions for clustering and thresholding timeseries to +produces event triggers +""" +from __future__ import absolute_import +import lal, numpy, copy, os.path + +from pycbc import WEAVE_FLAGS +from pycbc.types import Array +from pycbc.types import convert_to_process_params_dict +from pycbc.scheme import schemed +from pycbc.detector import Detector + +from . import coinc + +@schemed("pycbc.events.threshold_") +def threshold(series, value): + """Return list of values and indices values over threshold in series. + """ + return None, None + +@schemed("pycbc.events.threshold_") +def threshold_only(series, value): + """Return list of values and indices whose values in series are + larger (in absolute value) than value + """ + return None, None + +#FIXME: This should be under schemed, but I don't understand that yet! +def threshold_real_numpy(series, value): + arr = series.data + locs = numpy.where(arr > value)[0] + vals = arr[locs] + return locs, vals + +@schemed("pycbc.events.threshold_") +def threshold_and_cluster(series, threshold, window): + """Return list of values and indices values over threshold in series. + """ + return + +@schemed("pycbc.events.threshold_") +def _threshold_cluster_factory(series): + return + +class ThresholdCluster(object): + """Create a threshold and cluster engine + + Parameters + ----------- + series : complex64 + Input pycbc.types.Array (or subclass); it will be searched for + points above threshold that are then clustered + """ + def __new__(cls, *args, **kwargs): + real_cls = _threshold_cluster_factory(*args, **kwargs) + return real_cls(*args, **kwargs) # pylint:disable=not-callable + +# The class below should serve as the parent for all schemed classes. +# The intention is that this class serves simply as the location for +# all documentation of the class and its methods, though that is not +# yet implemented. Perhaps something along the lines of: +# +# http://stackoverflow.com/questions/2025562/inherit-docstrings-in-python-class-inheritance +# +# will work? Is there a better way? +class _BaseThresholdCluster(object): + def threshold_and_cluster(self, threshold, window): + """ + Threshold and cluster the memory specified at instantiation with the + threshold specified at creation and the window size specified at creation. + + Parameters + ----------- + threshold : float32 + The minimum absolute value of the series given at object initialization + to return when thresholding and clustering. + window : uint32 + The size (in number of samples) of the window over which to cluster + + Returns: + -------- + event_vals : complex64 + Numpy array, complex values of the clustered events + event_locs : uint32 + Numpy array, indices into series of location of events + """ + pass + + +def findchirp_cluster_over_window(times, values, window_length): + """ Reduce the events by clustering over a window using + the FindChirp clustering algorithm + + Parameters + ----------- + indices: Array + The list of indices of the SNR values + snr: Array + The list of SNR value + window_size: int + The size of the window in integer samples. Must be positive. + + Returns + ------- + indices: Array + The reduced list of indices of the SNR values + """ + assert window_length > 0, 'Clustering window length is not positive' + + from weave import inline + indices = numpy.zeros(len(times), dtype=int) + tlen = len(times) # pylint:disable=unused-variable + k = numpy.zeros(1, dtype=int) + absvalues = abs(values) # pylint:disable=unused-variable + times = times.astype(int) + code = """ + int j = 0; + int curr_ind = 0; + for (int i=0; i < tlen; i++){ + if ((times[i] - times[curr_ind]) > window_length){ + j += 1; + indices[j] = i; + curr_ind = i; + } + else if (absvalues[i] > absvalues[curr_ind]){ + indices[j] = i; + curr_ind = i; + } + } + k[0] = j; + """ + inline(code, ['times', 'absvalues', 'window_length', 'indices', 'tlen', 'k'], + extra_compile_args=[WEAVE_FLAGS]) + return indices[0:k[0]+1] + +def cluster_reduce(idx, snr, window_size): + """ Reduce the events by clustering over a window + + Parameters + ----------- + indices: Array + The list of indices of the SNR values + snr: Array + The list of SNR value + window_size: int + The size of the window in integer samples. + + Returns + ------- + indices: Array + The list of indices of the SNR values + snr: Array + The list of SNR values + """ + ind = findchirp_cluster_over_window(idx, snr, window_size) + return idx.take(ind), snr.take(ind) + +def newsnr(snr, reduced_x2, q=6., n=2.): + """Calculate the re-weighted SNR statistic ('newSNR') from given SNR and + reduced chi-squared values. See http://arxiv.org/abs/1208.3491 for + definition. Previous implementation in glue/ligolw/lsctables.py + """ + newsnr = numpy.array(snr, ndmin=1, dtype=numpy.float64) + reduced_x2 = numpy.array(reduced_x2, ndmin=1, dtype=numpy.float64) + + # newsnr is only different from snr if reduced chisq > 1 + ind = numpy.where(reduced_x2 > 1.)[0] + newsnr[ind] *= ( 0.5 * (1. + reduced_x2[ind] ** (q/n)) ) ** (-1./q) + + # If snr input is float, return a float. Otherwise return numpy array. + if hasattr(snr, '__len__'): + return newsnr + else: + return newsnr[0] + +def newsnr_sgveto(snr, bchisq, sgchisq): + """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq""" + # Test function + nsnr = newsnr(snr, bchisq) + nsnr = numpy.array(nsnr, ndmin=1) + sgchisq = numpy.array(sgchisq, ndmin=1) + t = numpy.array(sgchisq > 4, ndmin=1) + if len(t) > 0: + nsnr[t] = nsnr[t] / (sgchisq[t] / 4.0) ** 0.5 + + # If snr input is float, return a float. Otherwise return numpy array. + if hasattr(snr, '__len__'): + return nsnr + else: + return nsnr[0] + +def effsnr(snr, reduced_x2, fac=250.): + """Calculate the effective SNR statistic. See (S5y1 paper) for definition. + Previous implementation in glue/ligolw/lsctables.py + """ + snr = numpy.array(snr, ndmin=1, dtype=numpy.float64) + rchisq = numpy.array(reduced_x2, ndmin=1, dtype=numpy.float64) + effsnr = snr / (1 + snr ** 2 / fac) ** 0.25 / rchisq ** 0.25 + + # If snr input is float, return a float. Otherwise return numpy array. + if hasattr(snr, '__len__'): + return effsnr + else: + return effsnr[0] + +class EventManager(object): + def __init__(self, opt, column, column_types, **kwds): + self.opt = opt + self.global_params = kwds + + self.event_dtype = [ ('template_id', int) ] + for column, coltype in zip (column, column_types): + self.event_dtype.append( (column, coltype) ) + + self.events = numpy.array([], dtype=self.event_dtype) + self.template_params = [] + self.template_index = -1 + self.template_events = numpy.array([], dtype=self.event_dtype) + self.write_performance = False + + @classmethod + def from_multi_ifo_interface(cls, opt, ifo, column, column_types, **kwds): + """ + To use this for a single ifo from the multi ifo interface requires + some small fixing of the opt structure. This does that. As we edit the + opt structure the process_params table will not be correct. + """ + opt = copy.deepcopy(opt) + opt_dict = vars(opt) + for arg, value in opt_dict.items(): + if isinstance(value, dict): + setattr(opt, arg, getattr(opt, arg)[ifo]) + return cls(opt, column, column_types, **kwds) + + def chisq_threshold(self, value, num_bins, delta=0): + remove = [] + for i, event in enumerate(self.events): + xi = event['chisq'] / (event['chisq_dof'] / 2 + 1 + delta * event['snr'].conj() * event['snr']) + if xi > value: + remove.append(i) + self.events = numpy.delete(self.events, remove) + + def newsnr_threshold(self, threshold): + """ Remove events with newsnr smaller than given threshold + """ + if not self.opt.chisq_bins: + raise RuntimeError('Chi-square test must be enabled in order to use newsnr threshold') + + remove = [i for i, e in enumerate(self.events) if \ + newsnr(abs(e['snr']), e['chisq'] / e['chisq_dof']) < threshold] + self.events = numpy.delete(self.events, remove) + + def keep_near_injection(self, window, injections): + from pycbc.events.veto import indices_within_times + if len(self.events) == 0: + return + + inj_time = numpy.array(injections.end_times()) + gpstime = self.events['time_index'].astype(numpy.float64) + gpstime = gpstime / self.opt.sample_rate + self.opt.gps_start_time + i = indices_within_times(gpstime, inj_time - window, inj_time + window) + self.events = self.events[i] + + def keep_loudest_in_interval(self, window, num_keep): + if len(self.events) == 0: + return + + e = self.events + stat = newsnr(abs(e['snr']), e['chisq'] / e['chisq_dof']) + time = e['time_index'] + + wtime = (time / window).astype(numpy.int32) + bins = numpy.unique(wtime) + + keep = [] + for b in bins: + bloc = numpy.where((wtime == b))[0] + bloudest = stat[bloc].argsort()[-num_keep:] + keep.append(bloc[bloudest]) + keep = numpy.concatenate(keep) + self.events = e[keep] + + def maximize_over_bank(self, tcolumn, column, window): + if len(self.events) == 0: + return + + self.events = numpy.sort(self.events, order=tcolumn) + cvec = self.events[column] + tvec = self.events[tcolumn] + + indices = [] +# mint = tvec.min() +# maxt = tvec.max() +# edges = numpy.arange(mint, maxt, window) + +# # Get the location of each time bin +# bins = numpy.searchsorted(tvec, edges) +# bins = numpy.append(bins, len(tvec)) +# for i in range(len(bins)-1): +# kmin = bins[i] +# kmax = bins[i+1] +# if kmin == kmax: +# continue +# event_idx = numpy.argmax(cvec[kmin:kmax]) + kmin +# indices.append(event_idx) + + # This algorithm is confusing, but it is what lalapps_inspiral does + # REMOVE ME!!!!!!!!!!! + gps = tvec.astype(numpy.float64) / self.opt.sample_rate + self.opt.gps_start_time + gps_sec = numpy.floor(gps) + gps_nsec = (gps - gps_sec) * 1e9 + + wnsec = int(window * 1e9 / self.opt.sample_rate) + win = gps_nsec.astype(int) / wnsec + + indices.append(0) + for i in xrange(len(tvec)): + if gps_sec[i] == gps_sec[indices[-1]] and win[i] == win[indices[-1]]: + if abs(cvec[i]) > abs(cvec[indices[-1]]): + indices[-1] = i + else: + indices.append(i) + + self.events = numpy.take(self.events, indices) + + def add_template_events(self, columns, vectors): + """ Add a vector indexed """ + # initialize with zeros - since vectors can be None, look for the + # first one that isn't + new_events = None + for v in vectors: + if v is not None: + new_events = numpy.zeros(len(v), dtype=self.event_dtype) + break + # they shouldn't all be None + assert new_events is not None + new_events['template_id'] = self.template_index + for c, v in zip(columns, vectors): + if v is not None: + if isinstance(v, Array): + new_events[c] = v.numpy() + else: + new_events[c] = v + self.template_events = numpy.append(self.template_events, new_events) + + def cluster_template_events(self, tcolumn, column, window_size): + """ Cluster the internal events over the named column + """ + cvec = self.template_events[column] + tvec = self.template_events[tcolumn] + indices = findchirp_cluster_over_window(tvec, cvec, window_size) + self.template_events = numpy.take(self.template_events, indices) + + def new_template(self, **kwds): + self.template_params.append(kwds) + self.template_index += 1 + + def add_template_params(self, **kwds): + self.template_params[-1].update(kwds) + + def finalize_template_events(self): + self.events = numpy.append(self.events, self.template_events) + self.template_events = numpy.array([], dtype=self.event_dtype) + + def make_output_dir(self, outname): + path = os.path.dirname(outname) + if path != '': + if not os.path.exists(path) and path is not None: + os.makedirs(path) + + def save_performance(self, ncores, nfilters, ntemplates, run_time, + setup_time): + """ + Calls variables from pycbc_inspiral to be used in a timing calculation + """ + self.run_time = run_time + self.setup_time = setup_time + self.ncores = ncores + self.nfilters = nfilters + self.ntemplates = ntemplates + self.write_performance = True + + def write_events(self, outname): + """ Write the found events to a sngl inspiral table + """ + self.make_output_dir(outname) + + if '.hdf' in outname: + self.write_to_hdf(outname) + else: + raise ValueError('Cannot write to this format') + + def write_to_hdf(self, outname): + class fw(object): + def __init__(self, name, prefix): + import h5py + self.f = h5py.File(name, 'w') + self.prefix = prefix + + def __setitem__(self, name, data): + col = self.prefix + '/' + name + self.f.create_dataset(col, data=data, + compression='gzip', + compression_opts=9, + shuffle=True) + + self.events.sort(order='template_id') + th = numpy.array([p['tmplt'].template_hash for p in self.template_params]) + tid = self.events['template_id'] + f = fw(outname, self.opt.channel_name[0:2]) + + if len(self.events): + f['snr'] = abs(self.events['snr']) + try: + # Precessing + f['u_vals'] = self.events['u_vals'] + f['coa_phase'] = self.events['coa_phase'] + f['hplus_cross_corr'] = self.events['hplus_cross_corr'] + except Exception: + # Not precessing + f['coa_phase'] = numpy.angle(self.events['snr']) + f['chisq'] = self.events['chisq'] + f['bank_chisq'] = self.events['bank_chisq'] + f['bank_chisq_dof'] = self.events['bank_chisq_dof'] + f['cont_chisq'] = self.events['cont_chisq'] + f['end_time'] = self.events['time_index'] / float(self.opt.sample_rate) + self.opt.gps_start_time + try: + # Precessing + template_sigmasq_plus = numpy.array([t['sigmasq_plus'] for t in self.template_params], dtype=numpy.float32) + f['sigmasq_plus'] = template_sigmasq_plus[tid] + template_sigmasq_cross = numpy.array([t['sigmasq_cross'] for t in self.template_params], dtype=numpy.float32) + f['sigmasq_cross'] = template_sigmasq_cross[tid] + # FIXME: I want to put something here, but I haven't yet + # figured out what it should be. I think we would also + # need information from the plus and cross correlation + # (both real and imaginary(?)) to get this. + f['sigmasq'] = template_sigmasq_plus[tid] + except Exception: + # Not precessing + template_sigmasq = numpy.array([t['sigmasq'] for t in self.template_params], dtype=numpy.float32) + f['sigmasq'] = template_sigmasq[tid] + + template_durations = [p['tmplt'].template_duration for p in self.template_params] + f['template_duration'] = numpy.array(template_durations, dtype=numpy.float32)[tid] + + # FIXME: Can we get this value from the autochisq instance? + cont_dof = self.opt.autochi_number_points + if self.opt.autochi_onesided is None: + cont_dof = cont_dof * 2 + if self.opt.autochi_two_phase: + cont_dof = cont_dof * 2 + if self.opt.autochi_max_valued_dof: + cont_dof = self.opt.autochi_max_valued_dof + f['cont_chisq_dof'] = numpy.repeat(cont_dof, len(self.events)) + + if 'chisq_dof' in self.events.dtype.names: + f['chisq_dof'] = self.events['chisq_dof'] / 2 + 1 + else: + f['chisq_dof'] = numpy.zeros(len(self.events)) + + f['template_hash'] = th[tid] + + if 'sg_chisq' in self.events.dtype.names: + f['sg_chisq'] = self.events['sg_chisq'] + + if 'hm_chisq' in self.events.dtype.names: + f['hm_chisq'] = self.events['hm_chisq'] + f['hm_chisq_dof'] = self.events['hm_chisq_dof'] + + if 'combined_chisq' in self.events.dtype.names: + f['combined_chisq'] = self.events['combined_chisq'] + f['combined_chisq_dof'] = self.events['combined_chisq_dof'] + + if self.opt.psdvar_short_segment is not None: + f['psd_var_val'] = self.events['psd_var_val'] + + if self.opt.trig_start_time: + f['search/start_time'] = numpy.array([self.opt.trig_start_time]) + search_start_time = float(self.opt.trig_start_time) + else: + f['search/start_time'] = numpy.array([self.opt.gps_start_time + self.opt.segment_start_pad]) + search_start_time = float(self.opt.gps_start_time + self.opt.segment_start_pad) + if self.opt.trig_end_time: + f['search/end_time'] = numpy.array([self.opt.trig_end_time]) + search_end_time = float(self.opt.trig_end_time) + else: + f['search/end_time'] = numpy.array([self.opt.gps_end_time - self.opt.segment_end_pad]) + search_end_time = float(self.opt.gps_end_time - self.opt.segment_end_pad) + + if self.write_performance: + self.analysis_time = search_end_time - search_start_time + time_ratio = numpy.array([float(self.analysis_time) / float(self.run_time)]) + temps_per_core = float(self.ntemplates) / float(self.ncores) + filters_per_core = float(self.nfilters) / float(self.ncores) + f['search/templates_per_core'] = \ + numpy.array([float(temps_per_core) * float(time_ratio)]) + f['search/filter_rate_per_core'] = \ + numpy.array([filters_per_core / float(self.run_time)]) + f['search/setup_time_fraction'] = \ + numpy.array([float(self.setup_time) / float(self.run_time)]) + f['search/run_time'] = numpy.array([float(self.run_time)]) + + if 'q_trans' in self.global_params: + qtrans = self.global_params['q_trans'] + for key in qtrans: + if key == 'qtiles': + for seg in qtrans[key]: + for q in qtrans[key][seg]: + f['qtransform/%s/%s/%s' % (key,seg,q)]=qtrans[key][seg][q] + elif key == 'qplanes': + for seg in qtrans[key]: + f['qtransform/%s/%s' % (key,seg)]=qtrans[key][seg] + + if 'gating_info' in self.global_params: + gating_info = self.global_params['gating_info'] + for gate_type in ['file', 'auto']: + if gate_type in gating_info: + f['gating/' + gate_type + '/time'] = \ + numpy.array([float(g[0]) for g in gating_info[gate_type]]) + f['gating/' + gate_type + '/width'] = \ + numpy.array([g[1] for g in gating_info[gate_type]]) + f['gating/' + gate_type + '/pad'] = \ + numpy.array([g[2] for g in gating_info[gate_type]]) + +class EventManagerMultiDet(EventManager): + def __init__(self, opt, ifos, column, column_types, psd=None, **kwargs): + self.opt = opt + self.ifos = ifos + self.global_params = kwargs + if psd is not None: + self.global_params['psd'] = psd[ifos[0]] + + # The events array does not like holding the ifo as string, + # so create a mapping dict and hold as an int + self.ifo_dict = {} + self.ifo_reverse = {} + for i, ifo in enumerate(ifos): + self.ifo_dict[ifo] = i + self.ifo_reverse[i] = ifo + + self.event_dtype = [ ('template_id', int), ('event_id', int) ] + for column, coltype in zip (column, column_types): + self.event_dtype.append( (column, coltype) ) + + self.events = numpy.array([], dtype=self.event_dtype) + self.event_id_map = {} + self.event_index = 0 + self.template_params = [] + self.template_index = -1 + self.template_event_dict = {} + self.coinc_list = [] + self.write_performance = False + for ifo in ifos: + self.template_event_dict[ifo] = numpy.array([], + dtype=self.event_dtype) + + def add_template_events_to_ifo(self, ifo, columns, vectors): + """ Add a vector indexed """ + # Just call through to the standard function + self.template_events = self.template_event_dict[ifo] + self.add_template_events(columns, vectors) + self.template_event_dict[ifo] = self.template_events + self.template_events = None + + def cluster_template_events_single_ifo(self, tcolumn, column, window_size, + ifo): + """ Cluster the internal events over the named column + """ + # Just call through to the standard function + self.template_events = self.template_event_dict[ifo] + self.cluster_template_events(tcolumn, column, window_size) + self.template_event_dict[ifo] = self.template_events + self.template_events = None + + def finalize_template_events(self, perform_coincidence=True, + coinc_window=0.0): + # Set ids + for ifo in self.ifos: + num_events = len(self.template_event_dict[ifo]) + new_event_ids = numpy.arange(self.event_index, + self.event_index+num_events) + self.template_event_dict[ifo]['event_id'] = new_event_ids + self.event_index = self.event_index+num_events + + if perform_coincidence: + if not len(self.ifos) == 2: + err_msg = "Coincidence currently only supported for 2 ifos." + raise ValueError(err_msg) + ifo1 = self.ifos[0] + ifo2 = self.ifos[1] + end_times1 = self.template_event_dict[ifo1]['time_index'] /\ + float(self.opt.sample_rate[ifo1]) + self.opt.gps_start_time[ifo1] + end_times2 = self.template_event_dict[ifo2]['time_index'] /\ + float(self.opt.sample_rate[ifo2]) + self.opt.gps_start_time[ifo2] + light_travel_time = Detector(ifo1).light_travel_time_to_detector(\ + Detector(ifo2)) + coinc_window = coinc_window + light_travel_time + # FIXME: Remove!!! + coinc_window = 2.0 + if len(end_times1) and len(end_times2): + idx_list1, idx_list2, _ = \ + coinc.time_coincidence(end_times1, end_times2, + coinc_window) + if len(idx_list1): + for idx1, idx2 in zip(idx_list1, idx_list2): + event1 = self.template_event_dict[ifo1][idx1] + event2 = self.template_event_dict[ifo2][idx2] + self.coinc_list.append((event1, event2)) + for ifo in self.ifos: + self.events = numpy.append(self.events, + self.template_event_dict[ifo]) + self.template_event_dict[ifo] = numpy.array([], + dtype=self.event_dtype) + + def write_events(self, outname): + """ Write the found events to a sngl inspiral table + """ + self.make_output_dir(outname) + + if '.hdf' in outname: + self.write_to_hdf(outname) + else: + raise ValueError('Cannot write to this format') + + def write_to_hdf(self, outname): + class fw(object): + def __init__(self, name): + import h5py + self.f = h5py.File(name, 'w') + + def __setitem__(self, name, data): + col = self.prefix + '/' + name + self.f.create_dataset(col, data=data, + compression='gzip', + compression_opts=9, + shuffle=True) + + self.events.sort(order='template_id') + th = numpy.array([p['tmplt'].template_hash for p in \ + self.template_params]) + tid = self.events['template_id'] + f = fw(outname) + for ifo in self.ifos: + f.prefix = ifo + ifo_events = numpy.array([e for e in self.events if \ + e['ifo'] == self.ifo_dict[ifo]], dtype=self.event_dtype) + if len(ifo_events): + ifo_str = ifo.lower()[0] if ifo != 'H1' else ifo.lower() + f['snr_%s' % ifo_str] = abs(ifo_events['snr']) + try: + # Precessing + f['u_vals'] = ifo_events['u_vals'] + f['coa_phase'] = ifo_events['coa_phase'] + f['hplus_cross_corr'] = ifo_events['hplus_cross_corr'] + except Exception: + f['coa_phase'] = numpy.angle(ifo_events['snr']) + f['chisq'] = ifo_events['chisq'] + f['bank_chisq'] = ifo_events['bank_chisq'] + f['bank_chisq_dof'] = ifo_events['bank_chisq_dof'] + f['cont_chisq'] = ifo_events['cont_chisq'] + f['end_time'] = ifo_events['time_index'] / \ + float(self.opt.sample_rate[ifo_str]) + \ + self.opt.gps_start_time[ifo_str] + try: + # Precessing + template_sigmasq_plus = numpy.array([t['sigmasq_plus'] for t \ + in self.template_params], dtype=numpy.float32) + f['sigmasq_plus'] = template_sigmasq_plus[tid] + template_sigmasq_cross = numpy.array([t['sigmasq_cross'] for t \ + in self.template_params], dtype=numpy.float32) + f['sigmasq_cross'] = template_sigmasq_cross[tid] + # FIXME: I want to put something here, but I haven't yet + # figured out what it should be. I think we would also + # need information from the plus and cross correlation + # (both real and imaginary(?)) to get this. + f['sigmasq'] = template_sigmasq_plus[tid] + except Exception: + # Not precessing + template_sigmasq = numpy.array([t['sigmasq'][ifo] for t in \ + self.template_params], dtype=numpy.float32) + f['sigmasq'] = template_sigmasq[tid] + + template_durations = [p['tmplt'].template_duration for p in \ + self.template_params] + f['template_duration'] = numpy.array(template_durations, \ + dtype=numpy.float32)[tid] + + # FIXME: Can we get this value from the autochisq instance? + cont_dof = self.opt.autochi_number_points + if self.opt.autochi_onesided is None: + cont_dof = cont_dof * 2 + #if self.opt.autochi_two_phase: + # cont_dof = cont_dof * 2 + #if self.opt.autochi_max_valued_dof: + # cont_dof = self.opt.autochi_max_valued_dof + f['cont_chisq_dof'] = numpy.repeat(cont_dof, len(ifo_events)) + + if 'chisq_dof' in ifo_events.dtype.names: + f['chisq_dof'] = ifo_events['chisq_dof'] / 2 + 1 + else: + f['chisq_dof'] = numpy.zeros(len(ifo_events)) + + f['template_hash'] = th[tid] + + if self.opt.psdvar_short_segment is not None: + f['psd_var_val'] = ifo_events['psd_var_val'] + + if self.opt.trig_start_time: + f['search/start_time'] = numpy.array([\ + self.opt.trig_start_time[ifo]], dtype=numpy.int32) + search_start_time = float(self.opt.trig_start_time[ifo]) + else: + f['search/start_time'] = numpy.array([\ + self.opt.gps_start_time[ifo] + \ + self.opt.segment_start_pad[ifo]], dtype=numpy.int32) + search_start_time = float(self.opt.gps_start_time[ifo] + \ + self.opt.segment_start_pad[ifo]) + if self.opt.trig_end_time: + f['search/end_time'] = numpy.array([\ + self.opt.trig_end_time[ifo]], dtype=numpy.int32) + search_end_time = float(self.opt.trig_end_time[ifo]) + else: + f['search/end_time'] = numpy.array([self.opt.gps_end_time[ifo] \ + - self.opt.segment_end_pad[ifo]], dtype=numpy.int32) + search_end_time = float(self.opt.gps_end_time[ifo] - \ + self.opt.segment_end_pad[ifo]) + + if self.write_performance: + self.analysis_time = search_end_time - search_start_time + time_ratio = numpy.array([float(self.analysis_time) / float(self.run_time)]) + temps_per_core = float(self.ntemplates) / float(self.ncores) + filters_per_core = float(self.nfilters) / float(self.ncores) + f['search/templates_per_core'] = \ + numpy.array([float(temps_per_core) * float(time_ratio)]) + f['search/filter_rate_per_core'] = \ + numpy.array([filters_per_core / float(self.run_time)]) + f['search/setup_time_fraction'] = \ + numpy.array([float(self.setup_time) / float(self.run_time)]) + + if 'gating_info' in self.global_params: + gating_info = self.global_params['gating_info'] + for gate_type in ['file', 'auto']: + if gate_type in gating_info: + f['gating/' + gate_type + '/time'] = \ + numpy.array([float(g[0]) for g in \ + gating_info[gate_type]]) + f['gating/' + gate_type + '/width'] = \ + numpy.array([g[1] for g in gating_info[gate_type]]) + f['gating/' + gate_type + '/pad'] = \ + numpy.array([g[2] for g in gating_info[gate_type]]) + +__all__ = ['threshold_and_cluster', 'newsnr', 'effsnr', 'newsnr_sgveto', + 'findchirp_cluster_over_window', + 'threshold', 'cluster_reduce', 'ThresholdCluster', + 'threshold_real_numpy', 'threshold_only', + 'EventManager', 'EventManagerMultiDet'] diff --git a/pycbc/inference/io/base_hdf.py b/pycbc/inference/io/base_hdf.py index bd3cf26864d..8fd0969f15e 100644 --- a/pycbc/inference/io/base_hdf.py +++ b/pycbc/inference/io/base_hdf.py @@ -46,6 +46,7 @@ from pycbc.inject import InjectionSet from pycbc.io import (dump_state, load_state) from pycbc.workflow import WorkflowConfigParser +from pycbc.types import FrequencySeries def format_attr(val): @@ -913,3 +914,44 @@ def read_config_file(self, return_cp=True, index=-1): cp.read_file(cf) return cp return cf + + def read_data(self): + """Loads the data stored in the file as a FrequencySeries. + + Only works for models that store data as a frequency series in + ``data/DET/stilde``. A ``KeyError`` will be raised if the model used + did not store data in that path. + + Returns + ------- + dict : + Dictionary of detector name -> FrequencySeries. + """ + data = {} + fmt = 'data/{}/stilde' + for det in self['data'].keys(): + group = self[fmt.format(det)] + data[det] = FrequencySeries( + group[()], delta_f=group.attrs['delta_f'], + epoch=group.attrs['epoch']) + return data + + def read_psds(self): + """Loads the PSDs stored in the file as a FrequencySeries. + + Only works for models that store PSDs in + ``data/DET/psds/0``. A ``KeyError`` will be raised if the model used + did not store PSDs in that path. + + Returns + ------- + dict : + Dictionary of detector name -> FrequencySeries. + """ + psds = {} + fmt = 'data/{}/psds/0' + for det in self['data'].keys(): + group = self[fmt.format(det)] + psds[det] = FrequencySeries( + group[()], delta_f=group.attrs['delta_f']) + return psds diff --git a/pycbc/inference/models/base_data.py b/pycbc/inference/models/base_data.py index a9e51cf1868..46df3540375 100644 --- a/pycbc/inference/models/base_data.py +++ b/pycbc/inference/models/base_data.py @@ -81,10 +81,11 @@ class BaseDataModel(BaseModel): See ``BaseModel`` for additional attributes and properties. """ def __init__(self, variable_params, data, recalibration=None, gates=None, - injection_file=None, **kwargs): + injection_file=None, no_save_data=False, **kwargs): self._data = None self.data = data self.recalibration = recalibration + self.no_save_data = no_save_data self.gates = gates self.injection_file = injection_file super(BaseDataModel, self).__init__(variable_params, **kwargs) @@ -163,7 +164,8 @@ def write_metadata(self, fp): The inference file to write to. """ super(BaseDataModel, self).write_metadata(fp) - fp.write_stilde(self.data) + if not self.no_save_data: + fp.write_stilde(self.data) # save injection parameters if self.injection_file is not None: fp.write_injections(self.injection_file) diff --git a/pycbc/inference/models/gaussian_noise.py b/pycbc/inference/models/gaussian_noise.py index 60c5863f6aa..ced08e2a757 100644 --- a/pycbc/inference/models/gaussian_noise.py +++ b/pycbc/inference/models/gaussian_noise.py @@ -115,12 +115,16 @@ class BaseGaussianNoise(BaseDataModel): def __init__(self, variable_params, data, low_frequency_cutoff, psds=None, high_frequency_cutoff=None, normalize=False, static_params=None, ignore_failed_waveforms=False, + no_save_data=False, **kwargs): # set up the boiler-plate attributes super(BaseGaussianNoise, self).__init__(variable_params, data, static_params=static_params, + no_save_data=no_save_data, **kwargs) self.ignore_failed_waveforms = ignore_failed_waveforms + self.no_save_data = no_save_data + # check if low frequency cutoff has been provided for every IFO with # data for ifo in self.data: @@ -432,7 +436,7 @@ def write_metadata(self, fp): for det, data in self.data.items(): key = '{}_analysis_segment'.format(det) fp.attrs[key] = [float(data.start_time), float(data.end_time)] - if self._psds is not None: + if self._psds is not None and not self.no_save_data: fp.write_psd(self._psds) # write the times used for psd estimation (if they were provided) for det in self.psd_segments: @@ -458,7 +462,8 @@ def write_metadata(self, fp): self._f_upper[det] @classmethod - def from_config(cls, cp, data_section='data', **kwargs): + def from_config(cls, cp, data_section='data', data=None, psds=None, + **kwargs): r"""Initializes an instance of this class from the given config file. In addition to ``[model]``, a ``data_section`` (default ``[data]``) @@ -526,8 +531,11 @@ def from_config(cls, cp, data_section='data', **kwargs): args['normalize'] = True if cp.has_option('model', 'ignore-failed-waveforms'): args['ignore_failed_waveforms'] = True + if cp.has_option('model', 'no-save-data'): + args['no_save_data'] = True # get any other keyword arguments provided in the model section - ignore_args = ['name', 'normalize', 'ignore-failed-waveforms'] + ignore_args = ['name', 'normalize', + 'ignore-failed-waveforms', 'no-save-data'] for option in cp.options("model"): if option in ("low-frequency-cutoff", "high-frequency-cutoff"): ignore_args.append(option) @@ -549,20 +557,22 @@ def from_config(cls, cp, data_section='data', **kwargs): # load the data opts = data_opts_from_config(cp, data_section, args['low_frequency_cutoff']) - strain_dict, psd_strain_dict = data_from_cli(opts, **data_args) - # convert to frequency domain and get psds - stilde_dict, psds = fd_data_from_strain_dict(opts, strain_dict, - psd_strain_dict) - # save the psd data segments if the psd was estimated from data - if opts.psd_estimation is not None: - _tdict = psd_strain_dict or strain_dict - for det in psds: - psds[det].psd_segment = (_tdict[det].start_time, - _tdict[det].end_time) - # gate overwhitened if desired - if opts.gate_overwhitened and opts.gate is not None: - stilde_dict = gate_overwhitened_data(stilde_dict, psds, opts.gate) - args.update({'data': stilde_dict, 'psds': psds}) + if data is None or psds is None: + strain_dict, psd_strain_dict = data_from_cli(opts, **data_args) + # convert to frequency domain and get psds + stilde_dict, psds = fd_data_from_strain_dict(opts, strain_dict, + psd_strain_dict) + # save the psd data segments if the psd was estimated from data + if opts.psd_estimation is not None: + _tdict = psd_strain_dict or strain_dict + for det in psds: + psds[det].psd_segment = (_tdict[det].start_time, + _tdict[det].end_time) + # gate overwhitened if desired + if opts.gate_overwhitened and opts.gate is not None: + stilde_dict = gate_overwhitened_data(stilde_dict, psds, opts.gate) + data = stilde_dict + args.update({'data': data, 'psds': psds}) # any extra args args.update(cls.extra_args_from_config(cp, "model", skip_args=ignore_args)) diff --git a/pycbc/inference/models/relbin.py b/pycbc/inference/models/relbin.py index 3af6c90e70c..74c1270336a 100644 --- a/pycbc/inference/models/relbin.py +++ b/pycbc/inference/models/relbin.py @@ -36,6 +36,7 @@ from pycbc.types import Array from .gaussian_noise import BaseGaussianNoise +from .relbin_cpu import likelihood_parts, likelihood_parts_v def setup_bins(f_full, f_lo, f_hi, chi=1.0, eps=0.5, gammas=None): @@ -137,7 +138,7 @@ class Relative(BaseGaussianNoise): epsilon : float, optional Tuning parameter used in calculating the frequency bins. Lower values will result in higher resolution and more bins. - vary_polarization: boolean, optional + earth_rotation: boolean, optional Default is False. If True, then vary the fp/fc polarization values as a function of frequency bin, using a predetermined PN approximation for the time offsets. @@ -155,7 +156,7 @@ def __init__( fiducial_params=None, gammas=None, epsilon=0.5, - vary_polarization=False, + earth_rotation=False, **kwargs ): super(Relative, self).__init__( @@ -179,7 +180,8 @@ def __init__( self.comp_psds = {ifo: p.numpy() for ifo, p in self.psds.items()} # store fiducial waveform params - self.fid_params = fiducial_params + self.fid_params = self.static_params.copy() + self.fid_params.update(fiducial_params) for ifo in data: # store data and frequencies @@ -209,36 +211,32 @@ def __init__( ) # prune low frequency samples to avoid waveform errors - nbelow = sum(self.f[ifo] < f_lo) - fpoints = Array(self.f[ifo].astype(numpy.float64))[nbelow:] - approx = self.static_params["approximant"] - fid_hp, fid_hc = get_fd_waveform_sequence( - approximant=approx, sample_points=fpoints, **self.fid_params - ) + fpoints = Array(self.f[ifo].astype(numpy.float64)) + fpoints = fpoints[self.kmin[ifo]:self.kmax[ifo]+1] + fid_hp, fid_hc = get_fd_waveform_sequence(sample_points=fpoints, + **self.fid_params) + # check for zeros at high frequencies - numzeros = list(fid_hp[::-1] != 0j).index(True) - n_above_fhi = (len(self.f[ifo]) - 1) - self.kmax[ifo] # make sure only nonzero samples are included in bins - if numzeros > n_above_fhi: - nremove = numzeros - n_above_fhi - new_kmax = self.kmax[ifo] - nremove + numzeros = list(fid_hp[::-1] != 0j).index(True) + if numzeros > 0: + new_kmax = self.kmax[ifo] - numzeros f_hi = new_kmax * self.df[ifo] logging.info( "WARNING! Fiducial waveform terminates below " "high-frequency-cutoff, final bin frequency " - "will be %s Hz", - f_hi, - ) + "will be %s Hz", f_hi) # make copy of fiducial wfs, adding back in low frequencies - hp0 = numpy.concatenate([[0j] * nbelow, fid_hp.copy()]) - hc0 = numpy.concatenate([[0j] * nbelow, fid_hc.copy()]) + fid_hp.resize(len(self.f[ifo])) + fid_hc.resize(len(self.f[ifo])) + hp0 = numpy.roll(fid_hp, self.kmin[ifo]) + hc0 = numpy.roll(fid_hc, self.kmin[ifo]) + fp, fc = self.det[ifo].antenna_pattern( - self.fid_params["ra"], - self.fid_params["dec"], - self.fid_params["polarization"], - self.fid_params["tc"], - ) + self.fid_params["ra"], self.fid_params["dec"], + self.fid_params["polarization"], self.fid_params["tc"]) + tshift = numpy.exp(-2.0j * numpy.pi * self.f[ifo] * self.ta[ifo]) self.h00[ifo] = numpy.array(hp0 * fp + hc0 * fc) * tshift @@ -277,8 +275,8 @@ def __init__( self.sdat[ifo] = self.summary_data(ifo) # Calculate the times to evaluate fp/fc - if vary_polarization is not False: - logging.info("Enabling frequency-dependent polarization") + if earth_rotation is not False: + logging.info("Enabling frequency-dependent earth rotation") from pycbc.waveform.spa_tmplt import spa_length_in_time times = spa_length_in_time( @@ -288,8 +286,27 @@ def __init__( f_lower=self.fedges[ifo], ) self.antenna_time[ifo] = self.fid_params["tc"] - times + self.lik = likelihood_parts_v else: self.antenna_time[ifo] = self.fid_params["tc"] + self.lik = likelihood_parts + + # determine the unique ifo layouts + self.edge_unique = [] + self.ifo_map = {} + for ifo in self.fedges: + if len(self.edge_unique) == 0: + self.ifo_map[ifo] = 0 + self.edge_unique.append(Array(self.fedges[ifo])) + else: + for i, edge in enumerate(self.edge_unique): + if numpy.array_equal(edge, self.fedges[ifo]): + self.ifo_map[ifo] = i + break + else: + self.ifo_map[ifo] = len(self.edge_unique) + self.edge_unique.append(Array(self.fedges[ifo])) + logging.info("%s unique ifo layouts", len(self.edge_unique)) def summary_data(self, ifo): """Compute summary data bin coefficients encoding linear @@ -305,39 +322,47 @@ def summary_data(self, ifo): hd = numpy.conjugate(self.comp_data[ifo]) * self.h00[ifo] hd /= self.comp_psds[ifo] hh = (numpy.absolute(self.h00[ifo]) ** 2.0) / self.comp_psds[ifo] + # constant terms - a0 = numpy.array( - [ + a0 = numpy.array([ 4.0 * self.df[ifo] * numpy.sum(hd[l:h]) for l, h in self.bins[ifo] - ] - ) - b0 = numpy.array( - [ + ]) + b0 = numpy.array([ 4.0 * self.df[ifo] * numpy.sum(hh[l:h]) for l, h in self.bins[ifo] - ] - ) + ]) + # linear terms bin_lefts = [fl for fl, fh in self.fbins[ifo]] - a1 = numpy.array( - [ - 4.0 - * self.df[ifo] + a1 = numpy.array([ + 4.0 * self.df[ifo] * numpy.sum(hd[l:h] * (self.f[ifo][l:h] - bl)) for (l, h), bl in zip(self.bins[ifo], bin_lefts) - ] - ) - b1 = numpy.array( - [ - 4.0 - * self.df[ifo] + ]) + b1 = numpy.array([ + 4.0 * self.df[ifo] * numpy.sum(hh[l:h] * (self.f[ifo][l:h] - bl)) for (l, h), bl in zip(self.bins[ifo], bin_lefts) - ] - ) + ]) + + freqs = self.fedges[ifo] + df = (freqs[1:] - freqs[:-1]) + a1 /= df + b1 /= df return {"a0": a0, "a1": a1, "b0": b0, "b1": b1} + def get_waveforms(self, params): + """ Get the waveform polarizations for each ifo + """ + wfs = [] + for edge in self.edge_unique: + hp, hc = get_fd_waveform_sequence(sample_points=edge, **params) + hp = hp.numpy() + hc = hc.numpy() + wfs.append((hp, hc)) + return {ifo: wfs[self.ifo_map[ifo]] for ifo in self.data} + def _loglr(self): r"""Computes the log likelihood ratio, @@ -357,40 +382,34 @@ def _loglr(self): # get model params p = self.current_params.copy() p.update(self.static_params) + wfs = self.get_waveforms(p) hh = 0.0 hd = 0j for ifo in self.data: - # get detector antenna pattern - fp, fc = self.det[ifo].antenna_pattern( - p["ra"], p["dec"], p["polarization"], self.antenna_time[ifo] - ) - # get timeshift relative to end of data - dt = self.det[ifo].time_delay_from_earth_center( - p["ra"], p["dec"], p["tc"] - ) - dtc = p["tc"] + dt - self.end_time[ifo] - tshift = numpy.exp(-2.0j * numpy.pi * self.fedges[ifo] * dtc) - # generate template and calculate waveform ratio - hp, hc = get_fd_waveform_sequence( - sample_points=Array(self.fedges[ifo]), **p - ) - htilde = numpy.array(fp * hp + fc * hc) * tshift - r = (htilde / self.h00_sparse[ifo]).astype(numpy.complex128) - r0 = r[:-1] - r1 = (r[1:] - r[:-1]) / ( - self.fedges[ifo][1:] - self.fedges[ifo][:-1] - ) - # is sum over bins of A0r0 + A1r1 - hd += numpy.sum( - self.sdat[ifo]["a0"] * r0 + self.sdat[ifo]["a1"] * r1 - ) - # is sum over bins of B0|r0|^2 + 2B1Re(r1r0*) - hh += numpy.sum( - self.sdat[ifo]["b0"] * numpy.absolute(r0) ** 2.0 - + 2.0 * self.sdat[ifo]["b1"] * (r1 * numpy.conjugate(r0)).real - ) + det = self.det[ifo] + freqs = self.fedges[ifo] + sdat = self.sdat[ifo] + hp, hc = wfs[ifo] + h00 = self.h00_sparse[ifo] + end_time = self.end_time[ifo] + times = self.antenna_time[ifo] + + # project waveform to detector frame + fp, fc = det.antenna_pattern(p["ra"], p["dec"], + p["polarization"], times) + dt = det.time_delay_from_earth_center(p["ra"], p["dec"], times) + dtc = p["tc"] + dt - end_time + + hdp, hhp = self.lik(freqs, fp, fc, dtc, + hp, hc, h00, + sdat['a0'], sdat['a1'], + sdat['b0'], sdat['b1']) + + hd += hdp + hh += hhp + hd = abs(hd) llr = numpy.log(special.i0e(hd)) + hd - 0.5 * hh return float(llr) diff --git a/pycbc/inference/models/relbin_cpu.pyx b/pycbc/inference/models/relbin_cpu.pyx new file mode 100644 index 00000000000..2038226ea76 --- /dev/null +++ b/pycbc/inference/models/relbin_cpu.pyx @@ -0,0 +1,67 @@ +""" Optimized inner loop functions for the relative likelihood model +""" +cdef extern from "complex.h": + double complex exp(double complex) + double norm(double complex) + double complex conj(double complex) + double real(double complex) + +cimport cython + +cpdef likelihood_parts(double [::1] freqs, + double fp, + double fc, + double dtc, + double complex[::1] hp, + double complex[::1] hc, + double complex[::1] h00, + double complex[::1] a0, + double complex[::1] a1, + double [::1] b0, + double [::1] b1, + ) : + cdef size_t i + cdef double complex hd=0, r0, r0n, r1 + cdef double hh=0 + + N = freqs.shape[0] + for i in range(N): + r0n = (exp(-2.0j * 3.141592653 * dtc * freqs[i]) + * (fp * hp[i] + fc * hc[i])) / h00[i] + r1 = r0n - r0 + + if i > 0: + hd += a0[i-1] * r0 + a1[i-1] * r1 + hh += b0[i-1] * norm(r0) + 2.0 * b1[i-1] * real(r1 * conj(r0)) + + r0 = r0n + return hd, hh + +cpdef likelihood_parts_v(double [::1] freqs, + double[::1] fp, + double[::1] fc, + double[::1] dtc, + double complex[::1] hp, + double complex[::1] hc, + double complex[::1] h00, + double complex[::1] a0, + double complex[::1] a1, + double [::1] b0, + double [::1] b1, + ) : + cdef size_t i + cdef double complex hd=0, r0, r0n, r1 + cdef double hh=0 + + N = freqs.shape[0] + for i in range(N): + r0n = (exp(-2.0j * 3.141592653 * dtc[i] * freqs[i]) + * (fp[i] * hp[i] + fc[i] * hc[i])) / h00[i] + r1 = r0n - r0 + + if i > 0: + hd += a0[i-1] * r0 + a1[i-1] * r1 + hh += b0[i-1] * norm(r0) + 2.0 * b1[i-1] * real(r1 * conj(r0)) + + r0 = r0n + return hd, hh diff --git a/pycbc/inference/models/single_template.py b/pycbc/inference/models/single_template.py index 8782a62ea06..fc5e90ebe16 100644 --- a/pycbc/inference/models/single_template.py +++ b/pycbc/inference/models/single_template.py @@ -81,7 +81,8 @@ def __init__(self, variable_params, data, low_frequency_cutoff, #polarization array to marginalize over if num_samples given self.pflag = 0 if polarization_samples is not None: - self.polarization = numpy.linspace(0, 2*numpy.pi, polarization_samples) + self.polarization = numpy.linspace(0, 2*numpy.pi, + int(polarization_samples)) self.pflag = 1 # Calculate high sample rate SNR time series @@ -146,4 +147,4 @@ def _loglr(self): if self.pflag == 0: return float(vloglr) else: - return float(logsumexp(vloglr)) + return float(logsumexp(vloglr)) - numpy.log(len(vloglr)) diff --git a/pycbc/io/hdf.py b/pycbc/io/hdf.py index 4b983b4325e..1e334930513 100644 --- a/pycbc/io/hdf.py +++ b/pycbc/io/hdf.py @@ -637,6 +637,14 @@ def snr(self): def sgchisq(self): return self.get_column('sg_chisq') + @property + def hmchisq(self): + return np.array(self.trigs['hm_chisq'])[self.mask] + + @property + def combined_chisq(self): + return np.array(self.trigs['combined_chisq'])[self.mask] + @property def u_vals(self): return self.get_column('u_vals') @@ -650,6 +658,16 @@ def rchisq(self): def psd_var_val(self): return self.get_column('psd_var_val') + @property + def rhmchisq(self): + return np.array(self.trigs['hm_chisq'])[self.mask] \ + / np.array(self.trigs['hm_chisq_dof'])[self.mask] + + @property + def rcombined_chisq(self): + return np.array(self.trigs['combined_chisq'])[self.mask] \ + / np.array(self.trigs['combined_chisq_dof'])[self.mask] + @property def newsnr(self): return ranking.newsnr(self.snr, self.rchisq) @@ -671,6 +689,23 @@ def newsnr_sgveto_psdvar_threshold(self): def get_ranking(self, rank_name, **kwargs): return ranking.get_sngls_ranking_from_trigs(self, rank_name, **kwargs) + @property + def newsnr_hmchisq(self): + return events.newsnr_sgveto(self.snr, self.rhmchisq) + + @property + def newsnr_hmchisq_sgveto(self): + return events.newsnr_sgveto(self.snr, self.rhmchisq, self.sgchisq) + + @property + def newsnr_combined_chisq(self): + return events.newsnr_sgveto(self.snr, self.rcombined_chisq) + + @property + def newsnr_combined_chisq_sgveto(self): + return events.newsnr_sgveto(self.snr, self.rcombined_chisq, + self.sgchisq) + def get_column(self, cname): # Fiducial value that seems to work, not extensively tuned. MFRAC = 0.3 @@ -1118,6 +1153,7 @@ def __getitem__(self, col): chisq_choices = ['traditional', 'cont', 'bank', 'max_cont_trad', 'sg', + 'hm', 'combined', 'max_bank_cont', 'max_bank_trad', 'max_bank_cont_trad'] def get_chisq_from_file_choice(hdfile, chisq_choice): @@ -1141,6 +1177,14 @@ def get_chisq_from_file_choice(hdfile, chisq_choice): bank_chisq /= bank_chisq_dof if chisq_choice == 'sg': chisq = f['sg_chisq'][:] + elif chisq_choice == 'hm': + chisq = f['hm_chisq'][:] + dof = f['hm_chisq_dof'][:] + chisq /= dof + elif chisq_choice == 'combined': + chisq = f['combined_chisq'][:] + dof = f['combined_chisq_dof'][:] + chisq /= dof elif chisq_choice == 'traditional': chisq = trad_chisq elif chisq_choice == 'cont': diff --git a/pycbc/io/record.py b/pycbc/io/record.py index 6c7de929569..993d330fb4e 100644 --- a/pycbc/io/record.py +++ b/pycbc/io/record.py @@ -810,13 +810,17 @@ def __copy_attributes__(self, other, default=None): [setattr(other, attr, copy.deepcopy(getattr(self, attr, default))) \ for attr in self.__persistent_attributes__] - def __getattribute__(self, attr): + def __getattribute__(self, attr, no_fallback=False): """Allows fields to be accessed as attributes. """ # first try to get the attribute try: return numpy.ndarray.__getattribute__(self, attr) except AttributeError as e: + # don't try getitem, which might get back here + if no_fallback: + raise(e) + # might be a field, try to retrive it using getitem if attr in self.fields: return self.__getitem__(attr) @@ -884,25 +888,48 @@ def __getitem__(self, item): # # arg isn't a simple argument of row, so we'll have to eval it # - # get the function library - item_dict = dict(_numpy_function_lib.items()) - item_dict.update(self._functionlib) - # get the set of fields & attributes we will need - itemvars = get_vars_from_arg(item) - # pull out any other needed attributes - itemvars = get_fields_from_arg(item) - d = {attr: getattr(self, attr) - for attr in set(dir(self)).intersection(itemvars)} - item_dict.update(d) - # pull out the fields: note, by getting the parent fields, we - # also get the sub fields name - item_dict.update({fn: self.__getbaseitem__(fn) \ - for fn in set(self.fieldnames).intersection(itemvars)}) - # add any aliases - item_dict.update({alias: item_dict[name] - for alias,name in self.aliases.items() - if name in item_dict}) - return eval(item, {"__builtins__": None}, item_dict) + if not hasattr(self, '_code_cache'): + self._code_cache = {} + + if item not in self._code_cache: + code = compile(item, '', 'eval') + + # get the function library + item_dict = dict(_numpy_function_lib.items()) + item_dict.update(self._functionlib) + + # parse to get possible fields + itemvars_raw = get_fields_from_arg(item) + + itemvars = [] + for it in itemvars_raw: + try: + float(it) + is_num = True + except ValueError: + is_num = False + + if not is_num: + itemvars.append(it) + + self._code_cache[item] = (code, itemvars, item_dict) + + code, itemvars, item_dict = self._code_cache[item] + for it in itemvars: + if it in self.fieldnames: + # pull out the fields: note, by getting the parent fields + # we also get the sub fields name + item_dict[it] = self.__getbaseitem__(it) + elif (it in self.__dict__) or (it in self._virtualfields): + # pull out any needed attributes + item_dict[it] = self.__getattribute__(it, no_fallback=True) + else: + # add any aliases + aliases = self.aliases + if it in aliases: + item_dict[it] = self.__getbaseitem__(aliases[it]) + + return eval(code, {"__builtins__": None}, item_dict) def __contains__(self, field): """Returns True if the given field name is in self's fields.""" diff --git a/pycbc/psd/analytical.py b/pycbc/psd/analytical.py old mode 100755 new mode 100644 diff --git a/pycbc/psd/estimate.py b/pycbc/psd/estimate.py index de26ea06081..9d5bbd993f5 100644 --- a/pycbc/psd/estimate.py +++ b/pycbc/psd/estimate.py @@ -313,3 +313,22 @@ def bandlimited_interpolate(series, delta_f): return interpolated_series + +def invert_psd(psd): + """Inverts the given PSD. + + Parameters + ---------- + psd : FrequencySeries + The PSD to invert. + + Returns + ------- + FrequencySeries + The inverse PSD. + """ + pdata = psd.copy().numpy() + # set 0s to infinity so they will be 0 in the inverse + zidx = pdata == 0. + pdata[zidx] = numpy.inf + return FrequencySeries(1./pdata, delta_f=psd.delta_f, epoch=psd.epoch) diff --git a/pycbc/results/legacy_grb.py b/pycbc/results/legacy_grb.py old mode 100755 new mode 100644 diff --git a/pycbc/strain/strain.py b/pycbc/strain/strain.py index e5912fe23b5..ff95d7bd9fc 100644 --- a/pycbc/strain/strain.py +++ b/pycbc/strain/strain.py @@ -263,6 +263,13 @@ def from_cli(opt, dyn_range_fac=1, precision='single', seed=opt.fake_strain_seed, sample_rate=fake_rate, low_frequency_cutoff=fake_flow) + if not strain.sample_rate_close(fake_rate): + err_msg = "Actual sample rate of generated data does not match " + err_msg += "that expected. Possible causes of this:\n" + err_msg += "The desired duration is not a multiple of delta_t. " + err_msg += "e.g. If using LISA with delta_t = 15 the duration " + err_msg += "must be a multiple of 15 seconds." + raise ValueError(err_msg) if not opt.channel_name and (opt.injection_file \ or opt.sgburst_injection_file): @@ -460,7 +467,7 @@ def insert_strain_option_group(parser, gps_times=True): data_reading_group.add_argument("--taper-data", help="Taper ends of data to zero using the supplied length as a " "window (integer seconds)", type=int, default=0) - data_reading_group.add_argument("--sample-rate", type=int, + data_reading_group.add_argument("--sample-rate", type=float, help="The sample rate to use for h(t) generation (integer Hz)") data_reading_group.add_argument("--channel-name", type=str, help="The channel containing the gravitational strain data") @@ -637,7 +644,8 @@ def insert_strain_option_group_multi_ifo(parser, gps_times=True): type=int, default=0, metavar='IFO:LENGTH', help="Taper ends of data to zero using the " "supplied length as a window (integer seconds)") - data_reading_group_multi.add_argument("--sample-rate", type=int, nargs='+', + data_reading_group_multi.add_argument("--sample-rate", type=float, + nargs='+', action=MultiDetOptionAction, metavar='IFO:RATE', help="The sample rate to use for h(t) generation " " (integer Hz).") diff --git a/pycbc/types/frequencyseries.py b/pycbc/types/frequencyseries.py index dc0419ad3d7..955ba82ddf5 100644 --- a/pycbc/types/frequencyseries.py +++ b/pycbc/types/frequencyseries.py @@ -561,6 +561,19 @@ def match(self, other, psd=None, low_frequency_cutoff=low_frequency_cutoff, high_frequency_cutoff=high_frequency_cutoff) + def plot(self, **kwds): + """ Basic plot of this frequency series + """ + from matplotlib import pyplot + + if self.kind == 'real': + plot = pyplot.plot(self.sample_frequencies, self, **kwds) + return plot + elif self.kind == 'complex': + plot1 = pyplot.plot(self.sample_frequencies, self.real(), **kwds) + plot2 = pyplot.plot(self.sample_frequencies, self.imag(), **kwds) + return plot1, plot2 + def load_frequencyseries(path, group=None): """ Load a FrequencySeries from a .hdf, .txt or .npy file. The diff --git a/pycbc/types/optparse.py b/pycbc/types/optparse.py index c8299bcfcc2..1ff934c827e 100644 --- a/pycbc/types/optparse.py +++ b/pycbc/types/optparse.py @@ -43,7 +43,7 @@ class MultiDetOptionAction(argparse.Action): def __init__(self, option_strings, dest, - nargs=None, + nargs='+', const=None, default=None, type=None, diff --git a/pycbc/types/timeseries.py b/pycbc/types/timeseries.py index f38a3665bfc..0502ebfa41c 100644 --- a/pycbc/types/timeseries.py +++ b/pycbc/types/timeseries.py @@ -1016,6 +1016,19 @@ def detrend(self, type='linear'): from scipy.signal import detrend return self._return(detrend(self.numpy(), type=type)) + def plot(self, **kwds): + """ Basic plot of this time series + """ + from matplotlib import pyplot + + if self.kind == 'real': + plot = pyplot.plot(self.sample_times, self, **kwds) + return plot + elif self.kind == 'complex': + plot1 = pyplot.plot(self.sample_times, self.real(), **kwds) + plot2 = pyplot.plot(self.sample_times, self.imag(), **kwds) + return plot1, plot2 + def load_timeseries(path, group=None): """ Load a TimeSeries from a .hdf, .txt or .npy file. The diff --git a/pycbc/vetoes/basischisq.py b/pycbc/vetoes/basischisq.py new file mode 100644 index 00000000000..9281985cdee --- /dev/null +++ b/pycbc/vetoes/basischisq.py @@ -0,0 +1,462 @@ +#!/usr/bin/env python + +# Copyright (C) 2018 Collin Capano +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import numpy +from pycbc import types +from pycbc.types import FrequencySeries +from pycbc.waveform import apply_fseries_time_shift +from pycbc import filter +from pycbc import psd as pypsd +from pycbc.vetoes.chisq import SingleDetPowerChisq + + +def construct_waveform_basis(waveforms, invasd=None, low_frequency_cutoff=None, + normalize=True): + """Constructs a basis out of the given list of waveforms. + + This uses a QR decomposition to construct the basis. + + Parameters + ---------- + waveforms : list of FrequencySeries + The waveforms to orthogonalize. + invasd : FrequencySeries, optional + Whiten the waveforms using the given inverse ASD before + orthogonalizing. + low_frequency_cutoff : float, optional + Only orthogonalize above the given frequency. If provided, the returned + basis will be zero at frequencies below this. + normalize : bool, optional + Normalize the bases before returning. Default is True. + """ + df = waveforms[0].delta_f + if low_frequency_cutoff is not None: + kmin = int(low_frequency_cutoff / df) + else: + kmin = 0 + # exclude any waveforms that are zero in the integration region + waveforms = [h for h in waveforms if (h.numpy()[kmin:] != 0.).any()] + nwfs = len(waveforms) + # get the maximum length of all of the waveforms, to ensure we get a big + # enough array + wf_lens = [len(h) for h in waveforms] + # sainity checks + if any([h.delta_f != df for h in waveforms]): + raise ValueError("delta_f mismatch between waveforms") + if invasd is not None: + if invasd.delta_f != df: + raise ValueError("delta_f mismatch with inverse ASD") + wf_lens = [min(len(invasd), l) for l in wf_lens] + # only create a basis if we have more than one waveform + if nwfs > 1: + # create the waveform array to orthogonalize + wf_array = numpy.zeros((max(wf_lens), nwfs), dtype=waveforms[0].dtype) + for ii,(h,kmax) in enumerate(zip(waveforms, wf_lens)): + wf_array[:kmax,ii] = h.numpy()[:kmax] + # whiten + if invasd is not None: + wf_array[:kmax,ii] *= invasd.numpy()[:kmax] + wf_array[:kmin,ii] *= 0. + # get bases as a list of FrequencySeries + q, _ = numpy.linalg.qr(wf_array) + bases = [FrequencySeries(ek, delta_f=df, epoch=h.epoch) + for ek,h in zip(q.T, waveforms)] + # otherwise, just return a copy of the input + else: + bases = [waveforms[0].copy()] + if normalize: + for ek in bases: + ek /= filter.sigma(ek, low_frequency_cutoff=low_frequency_cutoff) + return bases + + +def basis_chisq(whstilde, cplx_snr, trigger_times, sigma, basis, aks, + low_frequency_cutoff=None, high_frequency_cutoff=None, + corrmem=None): + """Calculates the basis chi squared for the given trigger SNRs/times. + + Parameters + ---------- + whstilde : FrequencySeries + The **whitened** data. + cplx_snr : array of complex float + A list of triggers' complex SNR of the full template with the data. + trigger_times : array of float + A list of the trigger times. Must be the same length as ``cplx_snr``. + sigma : float + The normalization used in calculation of the complex SNR; i.e., the + overlap of the full template with itself. + basis : list of FrequencySeries + The list of basis waveforms to use. These are assumed to be whitened + and orthonormal. + aks : list of complex floats + The template's coefficients for each basis. + low_frequency_cutoff : float, optional + The starting frequency to use for the overlap calculations. If None, + will start at the beginning of the basis/data. + high_frequency_cutoff : float, optional + The ending frequency to use for the overlap calculations. If None, + will go to the end of the basis. + corremem : list of FrequencySeries, optional + List of FrequencySeries to write the correlations between the bases and + data to. Must be the same length as basis. If None, will create. + + Returns + ------- + array of float + The basis chi squared for each trigger time. + """ + # compute the correlations between the basis and the data + if corrmem is None: + corrmem = [types.FrequencySeries(types.zeros(len(ek), dtype=ek.dtype), + delta_f=ek.delta_f) + for ek in basis] + for ek,corr in zip(basis, corrmem): + if corr.delta_f != whstilde.delta_f: + raise ValueError("data has a different delta_f than the basis") + N = (min(len(whstilde), len(ek)) - 1) * 2 + kmin, kmax = filter.get_cutoff_indices(low_frequency_cutoff, + high_frequency_cutoff, + whstilde.delta_f, N) + filter.correlate(ek[kmin:kmax], whstilde[kmin:kmax], corr[kmin:kmax]) + # set the epoch of the corr mem to be the same as the data, so we get + # the right time shifts + corr.kmin = kmin + corr.kmax = kmax + corr *= 4. * whstilde.delta_f + # cycle over the triggers, shifting the correlation vectors appropriately + chisq = numpy.zeros(len(cplx_snr), + dtype=types.real_same_precision_as(whstilde)) + for ii in range(len(cplx_snr)): + rho = cplx_snr[ii] + trigtime = trigger_times[ii] + for corr,ak in zip(corrmem, aks): + # rotate the correlation to the appropriate time + # note: we have to shift backward because the template is + # conjugated in the corr vector + dt = -float(trigtime - whstilde.start_time) + shifted = apply_fseries_time_shift(corr, dt, kmin=corr.kmin) + ek_s = shifted[corr.kmin:corr.kmax].sum() + chisq[ii] += abs(ek_s - rho*ak/sigma)**2. + return chisq + + +def basis_chisq_timeseries(whstilde, cplx_snr, sigma, basis, aks, + low_frequency_cutoff=None, + high_frequency_cutoff=None): + """Calculates a time series of basis chi squared. + + Parameters + ---------- + whstilde : FrequencySeries + The **whitened** data. + cplx_snr : complex TimeSeries + A time series of the full template's complex SNR. + sigma : float + The normalization used in calculation of the complex SNR; i.e., the + overlap of the full template with itself. + basis : list of FrequencySeries + The list of basis waveforms to use. These are assumed to be whitened + and orthonormal. + aks : list of complex floats + The template's coefficients for each basis. + low_frequency_cutoff : float, optional + The starting frequency to use for the overlap calculations. If None, + will start at the beginning of the basis/data. + high_frequency_cutoff : float, optional + The ending frequency to use for the overlap calculations. If None, + will go to the end of the basis. + + Returns + ------- + TimeSeries + A time series of the basis chi squared. + """ + chisq = None + for ek,ak in zip(basis, aks): + akhat = ak / sigma + # get the overlap of the basis with the data + ek_s = filter.matched_filter(ek, whstilde, + low_frequency_cutoff=low_frequency_cutoff, + high_frequency_cutoff=high_frequency_cutoff) + diff = abs(ek_s - cplx_snr*akhat)**2. + if chisq is None: + chisq = diff + else: + chisq += diff + return chisq + + +class SingleDetBasisChisq(object): + """Class that handles precomutation and memory management for running + a basis chisq. + """ + _basis_cache = 'cached_basis' + + def __init__(self, snr_threshold=None): + self.basis = None + self.coeffs = None + self.ndim = None + self.dof = None + self.snr_threshold = snr_threshold + self.do = snr_threshold is not None + self._invasd = {} + self._corrmem = {} + + def get_basis(self, template, psd=None): + """Retrieve basis and coefficients from the given template. + """ + # we'll use the ID of the PSD to cache things + if psd is None: + key = None + else: + key = id(psd) + return getattr(template, self._basis_cache)[key] + + def update_basis(self, template, psd=None): + """Updates the current basis/coefficients using the given template. + """ + basis, coeffs = self.get_basis(template, psd=psd) + self.basis = basis + self.coeffs = coeffs + self.ndim = len(basis) + self.dof = 2*self.ndim - 2 + return basis, coeffs + + def return_timeseries(self, ntrigs, dlen): + """Determines if chi squared time series should be calculated. + + Parameters + ---------- + ntrigs : int + The number of triggers that an SNR is needed for. + dlen : int + The length of the data (in the frequency domain). + + Returns + ------- + bool + True if a full time series should be calculated; False if it's + better to do a series of point estimates. + """ + # if an FFT is done, then we need to do ~N log N operations for each + # basis + n_fft = dlen * numpy.log2(dlen) * self.ndim + # if a point estimates are done then, for each trigger, we need to do + # ~N*ndim operations to get the overlap of the basis with the data + # + ~N operations to rotate the data in time for each trigger + n_point = ntrigs*(dlen * self.ndim + dlen) + + def invasd(self, psd): + """Gets the inverse ASD from the given the PSD. + """ + if psd is None: + return None + key = id(psd) + try: + invasd = self._invasd[key] + except KeyError: + # means PSD has changed, clear the dict and re-calculate + self._invasd.clear() + invasd = pypsd.invert_psd(psd)**0.5 + self._invasd[key] = invasd + return invasd + + def corrmem(self, stilde): + """Creates/retrieves vectors for writing correlations. + + Vectors are cached based on the length of stilde. + """ + N = len(stilde) + try: + corrmem = self._corrmem[N] + except KeyError: + # clear the dictionary for a new list + self._corrmem.clear() + corrmem = [] + # add/subtract any more needed vectors + ncorrs = len(corrmem) + if ncorrs < self.ndim: + corrmem += [types.FrequencySeries( + types.zeros(N, dtype=stilde.dtype), + delta_f=stilde.delta_f) + for _ in range(self.ndim-ncorrs)] + self._corrmem[N] = corrmem + elif self.ndim < ncorrs: + corrmem = corrmem[:self.ndim] + self._corrmem[N] = corrmem + return corrmem + + def values(self, template, stilde, psd, trigger_snrs, trigger_idx, + data_whitening=None): + """Calculates the HM chisq for the given points. + """ + if not self.do: + return None, None + if self.snr_threshold is not None: + keep = abs(trigger_snrs) >= self.snr_threshold + trigger_snrs = trigger_snrs[keep] + trigger_idx = trigger_idx[keep] + + basis, coeffs = self.update_basis(template, psd) + if data_whitening is None: + # need to whiten the data + whstilde = stilde * self.invasd(psd) + elif data_whitening == 'whitened': + # don't need to do anything + whstilde = stilde + elif data_whitening == 'overwhitened': + whstilde = stilde * psd**0.5 + else: + raise ValueError("unrecognized data_whitening argument {}".format( + data_whitening)) + + trigger_times = trigger_idx * stilde.delta_t + stilde.start_time + sigma = template.sigmasq(psd)**0.5 + + chisq = basis_chisq(whstilde, trigger_snrs, trigger_times, sigma, + basis, coeffs, + low_frequency_cutoff=template.f_lower, + corrmem=self.corrmem(stilde)) + # add back 1s for triggers that were below threshold + if self.snr_threshold is not None and len(keep) != len(chisq): + out = numpy.ones(len(keep), dtype=chisq.dtype) + out[keep] = chisq + chisq = out + return chisq, numpy.repeat(self.dof, len(chisq)) + + +class SingleDetHMChisq(SingleDetBasisChisq): + """Class that handles precomutation and memory management for running + the higher-mode chisq. + """ + _basis_cache = 'cached_hmbasis' + returns = {'hm_chisq': numpy.float32, 'hm_chisq_dof': int} + + def construct_waveform_basis(self, template, psd): + """Constructs a basis from the template's modes.""" + invasd = self.invasd(psd) + waveforms = template.modes.values() + return construct_waveform_basis(waveforms, invasd, template.f_lower) + + def update_basis(self, template, psd=None): + """Calculates/retrieves basis for the given template. + """ + if psd is not None: + key = id(psd) + else: + key = None + try: + # first try to return a cached basis + return super(SingleDetHMChisq, self).update_basis(template, psd) + except AttributeError: + # template does not have the attribute: means that a basis hasn't + # been calculated, so create the attribute and calculate + setattr(template, self._basis_cache, {}) + except KeyError: + # means the PSD has changed; delete the old basis and create a + # new one + getattr(template, self._basis_cache).clear() + # Generate the mode basis + basis = self.construct_waveform_basis(template, psd) + # whiten the template + invasd = self.invasd(psd) + kmin = int(template.f_lower / template.delta_f) + kmax = min(len(invasd), len(template)) + whtemplate = template.copy() + whtemplate[kmin:kmax] *= invasd[kmin:kmax] + whtemplate[:kmin] *= 0. + # calculate the expected values + coeffs = [numpy.complex(filter.overlap_cplx(ek, whtemplate, + low_frequency_cutoff=template.f_lower, + normalized=False)) + for ek in basis] + + getattr(template, self._basis_cache)[key] = (basis, coeffs) + return super(SingleDetHMChisq, self).update_basis(template, psd=psd) + + @staticmethod + def insert_option_group(parser): + """Adds the options needed to set up the HM Chisq.""" + group = parser.add_argument_group("HM Chisq") + group.add_argument("--hmchisq-snr-threshold", type=float, default=None, + help="SNR threshold to use for applying the HM " + "chisq. If not provided, the HM Chisq test " + "will not be performed.") + + @classmethod + def from_cli(cls, opts): + """Initializes the HM Chisq using the given options.""" + return cls(snr_threshold=opts.hmchisq_snr_threshold) + + +class SingleDetCombinedChisq(SingleDetHMChisq): + """Class that handles precomutation and memory management for running + the higher-mode chisq combined with the power chisq. + """ + _basis_cache = 'cached_cbasis' + returns = {'combined_chisq': numpy.float32, 'combined_chisq_dof': int} + + def __init__(self, power_chisq_bins=0, snr_threshold=None): + super(SingleDetCombinedChisq, self).__init__( + snr_threshold=snr_threshold) + # use an instance of SingleDetPowerChisq in order to get cache power + # chisq bins + self._powerchisq = SingleDetPowerChisq(power_chisq_bins, + snr_threshold) + + def construct_waveform_basis(self, template, psd): + """Constructs a basis from the power chisq bins and the template's + modes. + """ + # get the power chisq bins + pchisq_bins = self._powerchisq.cached_chisq_bins(template, psd) + # construct the waveforms + waveforms = [] + for ii,kstart in enumerate(pchisq_bins[:-1]): + kend = pchisq_bins[ii+1] + h = template.copy() + h[:kstart] *= 0. + h[kend:] *= 0. + waveforms.append(h) + # get the modes, excluding any that are 0 in the integration region + kmin = pchisq_bins[0] + modes = [h for h in template.modes.values() + if (h.numpy()[kmin:] != 0.).any()] + # only use modes if we have more than one + if len(modes) > 1: + waveforms += modes + invasd = self.invasd(psd) + return construct_waveform_basis(waveforms, invasd, template.f_lower) + + @staticmethod + def insert_option_group(parser): + """Adds the options needed to set up the Combined Chisq.""" + group = parser.add_argument_group("Combined Chisq") + group.add_argument("--combined-chisq-snr-threshold", type=float, + default=None, + help="SNR threshold to use for applying the " + "combined chisq. If not provided, the " + "combined chisq test will not be performed.") + + @classmethod + def from_cli(cls, opts): + """Initializes the Combined Chisq using the given options.""" + return cls(power_chisq_bins=opts.chisq_bins, + snr_threshold=opts.combined_chisq_snr_threshold) + diff --git a/pycbc/waveform/bank.py b/pycbc/waveform/bank.py index 48e23d4a287..88e4afa6b64 100644 --- a/pycbc/waveform/bank.py +++ b/pycbc/waveform/bank.py @@ -659,6 +659,7 @@ def __init__(self, filename, filter_length, delta_f, dtype, enable_compressed_waveforms=True, low_frequency_cutoff=None, waveform_decompression_method=None, + generate_modes=None, modes_out=None, **kwds): self.out = out self.dtype = dtype @@ -671,6 +672,8 @@ def __init__(self, filename, filter_length, delta_f, dtype, self.max_template_length = max_template_length self.enable_compressed_waveforms = enable_compressed_waveforms self.waveform_decompression_method = waveform_decompression_method + self.generate_modes = generate_modes + self.modes_out = modes_out super(FilterBank, self).__init__(filename, approximant=approximant, parameters=parameters, **kwds) @@ -749,7 +752,8 @@ def generate_with_delta_f_and_max_freq(self, t_num, max_freq, delta_f, htilde = pycbc.waveform.get_waveform_filter( cached_mem, self.table[t_num], approximant=approximant, f_lower=low_frequency_cutoff, f_final=max_freq, delta_f=delta_f, - distance=1./DYN_RANGE_FAC, delta_t=1./(2.*max_freq)) + distance=1./DYN_RANGE_FAC, delta_t=1./(2.*max_freq), + generate_modes=self.generate_modes, modes_out=self.modes_out) return htilde def __getitem__(self, index): @@ -759,6 +763,12 @@ def __getitem__(self, index): else: tempout = self.out + if self.generate_modes is not None: + modes_out = self.modes_out + if modes_out is None: + modes_out = {mode: tempout.copy() + for mode in self.generate_modes} + approximant = self.approximant(index) f_end = self.end_frequency(index) if f_end is None or f_end >= (self.filter_length * self.delta_f): @@ -785,6 +795,7 @@ def __getitem__(self, index): tempout[0:self.filter_length], self.table[index], approximant=approximant, f_lower=f_low, f_final=f_end, delta_f=self.delta_f, delta_t=self.delta_t, distance=distance, + generate_modes=self.generate_modes, modes_out=modes_out, **self.extra_args) # If available, record the total duration (which may @@ -798,10 +809,11 @@ def __getitem__(self, index): self.table[index].template_duration = template_duration - htilde = htilde.astype(self.dtype) + # FIXME + htilde._data = htilde._data.astype(self.dtype) htilde.f_lower = f_low htilde.min_f_lower = self.min_f_lower - htilde.end_idx = int(f_end / htilde.delta_f) + htilde.end_idx = int(f_end / self.delta_f) htilde.params = self.table[index] htilde.chirp_length = template_duration htilde.length_in_time = ttotal diff --git a/pycbc/waveform/phenomhm.py b/pycbc/waveform/phenomhm.py new file mode 100644 index 00000000000..c9cafb528a8 --- /dev/null +++ b/pycbc/waveform/phenomhm.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python + +# Copyright (C) 2018 Collin Capano, Sebastian Khan +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +"""Temporary module for calling PhenomHM mode-by-mode, directly. This should +be removed once a standard method has been established in lalsimulation. +""" + +import numpy +import lal +import lalsimulation as lalsim +from pycbc.types import FrequencySeries + +all_modes = [(2,2), (2,1), (3,3), (3,2), (4,4), (4,3)] +n_modes = len(all_modes) + +def get_phenomhm(**input_params): + """Generates an PhenomHM waveform. Requires modes list and phi_ref to be + provided. + + Parameters + ---------- + modes : list of tuples + List of tuples giving (l,m) modes to generate. If not provided, or + None, will use all available modes. + phi_ref : float + Default is 0. + \**kwargs : + The rest of the keyword args should be the same as given to + ``get_fd_waveform``. + """ + m1 = float(input_params['mass1'] * lal.MSUN_SI) + m2 = float(input_params['mass2'] * lal.MSUN_SI) + spin1x = float(input_params['spin1x']) + spin1y = float(input_params['spin1y']) + spin1z = float(input_params['spin1z']) + spin2x = float(input_params['spin2x']) + spin2y = float(input_params['spin2y']) + spin2z = float(input_params['spin2z']) + f_ref = float(input_params['f_ref']) + f_lower = float(input_params['f_lower']) + f_final = float(input_params['f_final']) + delta_f = float(input_params['delta_f']) + distance = float(input_params['distance'] * 1e6 * lal.PC_SI) + inclination = float(input_params['inclination']) + # check for precessing spins (not supported) + if numpy.nonzero([spin1x, spin1y, spin2x, spin2y])[0].any(): + raise ValueError("non-zero x/y spins provided, but this is " + "aligned-spin approximant") + try: + phi_ref = float(input_params['phi_ref']) + except KeyError: + phi_ref = 0. + try: + modes = input_params['modes'] + if not isinstance(modes, list): + modes = [modes] + modes = list(set(modes)) + except KeyError: + modes = None + # create the modes + if modes is not None: + ma = lalsim.SimInspiralCreateModeArray() + for l,m in modes: + lalsim.SimInspiralModeArrayActivateMode(ma, int(l), int(m)) + params = lal.CreateDict() + lalsim.SimInspiralWaveformParamsInsertModeArray(params, ma) + else: + params = None + modes = all_modes + # generate the hlms + freqs = lal.CreateREAL8Sequence(2) + freqs.data = [f_lower, f_final] + hlms = lalsim.SimIMRPhenomHMGethlmModes(freqs, m1, m2, spin1z, spin2z, + phi_ref, delta_f, f_ref, params) + # convert the mode frequency series to pycbc FrequencySeries + hplus = None + hcross = None + for l,m in modes: + l = int(l) + m = int(m) + h = lalsim.SphHarmFrequencySeriesGetMode(hlms, l, m) + ylm = lal.SpinWeightedSphericalHarmonic(inclination, 0., -2, l, m) + ylnm = numpy.conj(lal.SpinWeightedSphericalHarmonic( + inclination, 0., -2, l, -m)) + yplus = 0.5 * (ylm + (-1)**l * ylnm) + ycross = 0.5j * (ylm - (-1)**l * ylnm) + hp = FrequencySeries(yplus * h.data.data, delta_f=h.deltaF) + hc = FrequencySeries(ycross * h.data.data, delta_f=h.deltaF) + if hplus is None: + hplus = hp + else: + hplus += hp + if hcross is None: + hcross = hc + else: + hcross += hc + amp0 = lalsim.SimPhenomUtilsFDamp0((m1+m2)/lal.MSUN_SI, distance) + return amp0*hplus, amp0*hcross + + +phenomhm_approximants = {"IMRPhenomHM_modes": get_phenomhm} diff --git a/pycbc/waveform/utils.py b/pycbc/waveform/utils.py index 53a143b1e53..164aef6fa5b 100644 --- a/pycbc/waveform/utils.py +++ b/pycbc/waveform/utils.py @@ -481,6 +481,8 @@ def fd_taper(out, start, end, beta=8, side='left'): window = Array(signal.get_window(('kaiser', beta), winlen)) kmin = int(start / out.delta_f) kmax = kmin + winlen//2 + if kmin == kmax: + raise ValueError("zero window length provided") if side == 'left': out[kmin:kmax] *= window[:winlen//2] out[:kmin] *= 0. diff --git a/pycbc/waveform/waveform.py b/pycbc/waveform/waveform.py index aa90471dd4a..1e8ffd7fd71 100644 --- a/pycbc/waveform/waveform.py +++ b/pycbc/waveform/waveform.py @@ -42,7 +42,7 @@ spa_tmplt_precondition, spa_amplitude_factor, \ spa_length_in_time from six.moves import range as xrange - +from . import phenomhm class NoWaveformError(Exception): """This should be raised if generating a waveform would just result in all @@ -600,11 +600,11 @@ def get_fd_waveform_from_td(**params): """ # determine the duration to use - full_duration = duration = get_waveform_filter_length_in_time(**params) + full_duration = duration = max(0.1, get_waveform_filter_length_in_time(**params)) nparams = params.copy() while full_duration < duration * 1.5: - full_duration = get_waveform_filter_length_in_time(**nparams) + full_duration = max(0, get_waveform_filter_length_in_time(**nparams)) nparams['f_lower'] -= 1 if 'f_fref' not in nparams: @@ -948,6 +948,12 @@ def get_hm_length_in_time(lor_approx, maxm_default, **kwargs): cpu_td=cpu_td, filter_time_lengths=_filter_time_lengths) +# Approximants with higher modes +hm_approximants = {"IMRPhenomHM_modes": phenomhm.all_modes} + +cpu_fd.update(phenomhm.phenomhm_approximants) + +# We can do interpolation for waveforms that have a time length for apx in copy.copy(_filter_time_lengths): fd_apx = list(cpu_fd.keys()) td_apx = list(cpu_td.keys()) @@ -981,6 +987,42 @@ def get_waveform_filter(out, template=None, **kwargs): input_params = props(template, **kwargs) + try: + generate_modes = input_params['generate_modes'] + except KeyError: + generate_modes = None + if generate_modes is not None: + # check that we can + if input_params['approximant'] not in hm_approximants: + raise ValueError("cannot generate individual modes for approximant" + " {}".format(input_params['approximant'])) + modes = {} + try: + modes_out = input_params['modes_out'] + except KeyError: + # memory space for modes doesn't exist, so create it + modes_out = {lm: out.copy() for lm in generate_modes} + # copy the kwargs, but with generate modes disabled + mode_kwargs = kwargs.copy() + mode_kwargs.pop('generate_modes') + # zero-out 'out' for summing up the modes + out.clear() + for mode in generate_modes: + mode_kwargs['modes'] = [mode] + hlm = modes_out[mode] + hlm.clear() + hp = get_waveform_filter(hlm, template=template, **mode_kwargs) + modes[mode] = FrequencySeries(hlm, delta_f=hp.delta_f, copy=False) + kmax = min(n, len(hlm)) + # create the full waveform + out[:kmax] += hlm[:kmax] + htilde = FrequencySeries(out, delta_f=hp.delta_f, copy=False) + htilde.length_in_time = hp.length_in_time + htilde.chirp_length = hp.chirp_length + # attach the modes to the full waveform + htilde.modes = modes + return htilde + if input_params['approximant'] in filter_approximants(_scheme.mgr.state): wav_gen = filter_wav[type(_scheme.mgr.state)] htilde = wav_gen[input_params['approximant']](out=out, **input_params) @@ -1207,4 +1249,5 @@ def get_waveform_filter_length_in_time(approximant, template=None, **kwargs): "print_sgburst_approximants", "sgburst_approximants", "td_waveform_to_fd_waveform", "get_two_pol_waveform_filter", "NoWaveformError", "FailedWaveformError", "get_td_waveform_from_fd", + 'hm_approximants', 'cpu_fd', 'cpu_td', 'fd_sequence', '_filter_time_lengths'] diff --git a/pycbc/workflow/minifollowups.py b/pycbc/workflow/minifollowups.py index a915b13ea7d..f0f079e18f2 100644 --- a/pycbc/workflow/minifollowups.py +++ b/pycbc/workflow/minifollowups.py @@ -210,6 +210,8 @@ def setup_single_det_minifollowups(workflow, single_trig_file, tmpltbank_file, if statfiles: statfiles = statfiles.find_output_with_ifo(curr_ifo) node.add_input_list_opt('--statistic-files', statfiles) + if tags: + node.add_list_opt('--tags', tags) node.new_output_file_opt(workflow.analysis_time, '.dax', '--output-file') node.new_output_file_opt(workflow.analysis_time, '.dax.map', '--output-map') @@ -307,6 +309,8 @@ def setup_injection_minifollowups(workflow, injection_file, inj_xml_file, node.add_input_opt('--inspiral-segments', insp_segs) node.add_opt('--inspiral-data-read-name', insp_data_name) node.add_opt('--inspiral-data-analyzed-name', insp_anal_name) + if tags: + node.add_list_opt('--tags', tags) node.new_output_file_opt(workflow.analysis_time, '.dax', '--output-file', tags=tags) node.new_output_file_opt(workflow.analysis_time, '.dax.map', '--output-map', tags=tags) node.new_output_file_opt(workflow.analysis_time, '.tc.txt', '--transformation-catalog', tags=tags) diff --git a/setup.py b/setup.py index 8fa743b6976..2443d2943b2 100755 --- a/setup.py +++ b/setup.py @@ -271,6 +271,13 @@ def run(self): extra_link_args=cython_link_args, compiler_directives={'embedsignature': True}) ext.append(e) +e = Extension("pycbc.inference.models.relbin_cpu", + ["pycbc/inference/models/relbin_cpu.pyx"], + language='c++', + extra_compile_args=cython_compile_args, + extra_link_args=cython_link_args, + compiler_directives={'embedsignature': True}) +ext.append(e) setup ( @@ -294,6 +301,7 @@ def run(self): 'pycbc.results': find_files('pycbc/results'), 'pycbc.tmpltbank': find_files('pycbc/tmpltbank')}, ext_modules = ext, + python_requires='>=2.7', classifiers=[ 'Programming Language :: Python', 'Programming Language :: Python :: 2.7', diff --git a/test/test_infmodel.py b/test/test_infmodel.py new file mode 100644 index 00000000000..c405f021ef0 --- /dev/null +++ b/test/test_infmodel.py @@ -0,0 +1,132 @@ +# Copyright (C) 2021 Alex Nitz +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# +# ============================================================================= +# +# Preamble +# +# ============================================================================= +# +""" +These are the unittests for pycbc.inference.models +""" +import unittest +import copy +from utils import simple_exit +from pycbc.catalog import Merger +from pycbc.psd import interpolate, inverse_spectrum_truncation +from pycbc.frame import read_frame +from pycbc.filter import highpass, resample_to_delta_t +from astropy.utils.data import download_file +from pycbc.inference import models, sampler +from pycbc.distributions import Uniform, JointDistribution, SinAngle + +class TestModels(unittest.TestCase): + def setUp(self): + ###### Get data for references analysis of 170817 + m = Merger("GW170817") + ifos = ['H1', 'V1', 'L1'] + self.psds = {} + self.data = {} + + for ifo in ifos: + print("Processing {} data".format(ifo)) + + # Download the gravitational wave data for GW170817 + url = "https://dcc.ligo.org/public/0146/P1700349/001/" + url += "{}-{}1_LOSC_CLN_4_V1-1187007040-2048.gwf" + fname = download_file(url.format(ifo[0], ifo[0]), cache=True) + ts = read_frame(fname, "{}:LOSC-STRAIN".format(ifo), + start_time=int(m.time - 260), + end_time=int(m.time + 40)) + ts = highpass(ts, 15.0) + ts = resample_to_delta_t(ts, 1.0/2048) + ts = ts.time_slice(m.time-112, m.time + 16) + self.data[ifo] = ts.to_frequencyseries() + + psd = interpolate(ts.psd(4), ts.delta_f) + psd = inverse_spectrum_truncation(psd, int(4 * psd.sample_rate), + trunc_method='hann', + low_frequency_cutoff=20.0) + self.psds[ifo] = psd + + self.static = {'mass1':1.3757, + 'mass2':1.3757, + 'f_lower':20.0, + 'approximant':"TaylorF2", + 'polarization':0, + 'ra': 3.44615914, + 'dec': -0.40808407, + 'tc': 1187008882.42840, + } + self.variable = ('distance', + 'inclination', + ) + self.flow = {'H1':25, 'L1':25, 'V1':25} + inclination_prior = SinAngle(inclination=None) + distance_prior = Uniform(distance=(10, 100)) + tc_prior = Uniform(tc=(m.time-0.1, m.time+0.1)) + self.prior = JointDistribution(self.variable, inclination_prior, + distance_prior) + + ###### Expected answers + # Answer taken from marginalized gaussian model + self.q1 = {'distance':42.0, 'inclination':2.5} + self.a1 = 541.8235746138382 + + def test_marginlized_gaussian_noise(self): + model = models.MarginalizedPhaseGaussianNoise( + self.variable, copy.deepcopy(self.data), + low_frequency_cutoff=self.flow, + psds=self.psds, + static_params=self.static, + prior=self.prior, + ) + model.update(**self.q1) + self.assertAlmostEqual(self.a1, model.loglr, delta=1e-3) + + def test_relative(self): + model = models.Relative(self.variable, copy.deepcopy(self.data), + low_frequency_cutoff=self.flow, + psds = self.psds, + static_params = self.static, + prior = self.prior, + fiducial_params = {'mass1':1.3756}, + epsilon = .1, + ) + model.update(**self.q1) + self.assertAlmostEqual(self.a1, model.loglr, delta=0.002) + + def test_single(self): + model = models.MarginalizedPhaseGaussianNoise( + self.variable, copy.deepcopy(self.data), + low_frequency_cutoff=self.flow, + psds = self.psds, + static_params = self.static, + prior = self.prior, + ) + model.update(**self.q1) + self.assertAlmostEqual(self.a1, model.loglr, delta=0.02) + +suite = unittest.TestSuite() +suite.addTest(unittest.TestLoader().loadTestsFromTestCase(TestModels)) + +if __name__ == '__main__': + from astropy.utils import iers + iers.conf.auto_download = False + results = unittest.TextTestRunner(verbosity=2).run(suite) + simple_exit(results) diff --git a/test/test_waveform.py b/test/test_waveform.py old mode 100755 new mode 100644