diff --git a/bdsg/include/bdsg/overlays/overlay_helper.hpp b/bdsg/include/bdsg/overlays/overlay_helper.hpp index 8485bfa5..caeec671 100644 --- a/bdsg/include/bdsg/overlays/overlay_helper.hpp +++ b/bdsg/include/bdsg/overlays/overlay_helper.hpp @@ -66,10 +66,11 @@ template class OverlayHelper { public: // Handle non-const base graph - T* apply(V* input_graph) { + template + T* apply(V* input_graph, Params&&... params) { auto mutable_overlaid = dynamic_cast(input_graph); if (mutable_overlaid == nullptr) { - overlay = make_unique(input_graph); + overlay = make_unique(input_graph, std::forward(params)...); mutable_overlaid = dynamic_cast(overlay.get()); assert(mutable_overlaid != nullptr); } @@ -78,10 +79,11 @@ class OverlayHelper { } // Handle const base graph - const T* apply(const V* input_graph) { + template + const T* apply(const V* input_graph, Params&&... params) { overlaid = dynamic_cast(input_graph); if (overlaid == nullptr) { - overlay = make_unique(input_graph); + overlay = make_unique(input_graph, std::forward(params)...); overlaid = dynamic_cast(overlay.get()); assert(overlaid != nullptr); } @@ -100,6 +102,7 @@ class OverlayHelper { /// Implementation of overlay helper functionality for when multiple overlays need to be stacked. // There must be a way to generalize with variadic templates // (I had trouble chaining the output of the nested overlays together and getting the types right when trying) +// TODO: Add support for passing overlay constructor arguments through. template class PairOverlayHelper { public: diff --git a/bdsg/include/bdsg/overlays/reference_path_overlay.hpp b/bdsg/include/bdsg/overlays/reference_path_overlay.hpp index 1164e59c..a073c616 100644 --- a/bdsg/include/bdsg/overlays/reference_path_overlay.hpp +++ b/bdsg/include/bdsg/overlays/reference_path_overlay.hpp @@ -9,6 +9,7 @@ #define BDSG_REFERENCE_PATH_OVERLAY_HPP_INCLUDED #include +#include #include #include @@ -30,7 +31,9 @@ class ReferencePathOverlay : public PathPositionHandleGraph { public: - ReferencePathOverlay(const PathHandleGraph* graph); + /// Create a ReferencePathOverlay. For paths with names in + /// extra_path_names, index them as if they were reference paths. + ReferencePathOverlay(const PathHandleGraph* graph, const std::unordered_set& extra_path_names = {}); ReferencePathOverlay() = default; ~ReferencePathOverlay() = default; diff --git a/bdsg/src/reference_path_overlay.cpp b/bdsg/src/reference_path_overlay.cpp index 8c69da41..f4ba79fd 100644 --- a/bdsg/src/reference_path_overlay.cpp +++ b/bdsg/src/reference_path_overlay.cpp @@ -6,25 +6,52 @@ #include #include +#include namespace bdsg { using namespace std; using namespace handlegraph; -ReferencePathOverlay::ReferencePathOverlay(const PathHandleGraph* graph) : graph(graph) { +ReferencePathOverlay::ReferencePathOverlay(const PathHandleGraph* graph, const std::unordered_set& extra_path_names) : graph(graph) { - // init the base hash table and gather path handles - uint64_t max_path_handle = 0; + // Get step counts for all paths we want to process, once. + std::unordered_map cached_step_counts; + graph->for_each_path_matching({PathSense::REFERENCE, PathSense::GENERIC}, {}, {}, [&](const path_handle_t& path) { + // Find and measure all the reference and generic paths. + // TODO: Kick out generic paths? + cached_step_counts[path] = graph->get_step_count(path); + }); + for (auto& path_name : extra_path_names) { + if (graph->has_path(path_name)) { + // The graph actually has this path. + path_handle_t path = graph->get_path_handle(path_name); + auto found = cached_step_counts.find(path); + if (found == cached_step_counts.end()) { + // And it's not already reference sense. + // Count steps and remember it + cached_step_counts.emplace_hint(found, path, graph->get_step_count(path)); + } + } + } + + // Now use the cache as a source of truth and make a vector of the paths. std::vector path_handles; - graph->for_each_path_handle([&](const path_handle_t& path) { + // We also track the numerically max path handle + uint64_t max_path_handle = 0; + for (auto& handle_and_length : cached_step_counts) { + const path_handle_t& path = handle_and_length.first; path_handles.push_back(path); + + // Each of the paths needs a PathRecord reference_paths.insert(pair(path, PathRecord())); + // And needs to be maxed into the max handles. max_path_handle = std::max(max_path_handle, handlegraph::as_integer(path)); - }); + } + // sort in descending order by length to limit parallel scheduling makespan std::sort(path_handles.begin(), path_handles.end(), [&](path_handle_t a, path_handle_t b) { - return graph->get_step_count(a) > graph->get_step_count(b); + return cached_step_counts.at(a) > cached_step_counts.at(b); }); std::vector> num_steps(graph->max_node_id() + 1); @@ -35,7 +62,7 @@ ReferencePathOverlay::ReferencePathOverlay(const PathHandleGraph* graph) : graph auto& path_record = reference_paths.at(path); // init the step vectors - size_t path_size = graph->get_step_count(path); + size_t path_size = cached_step_counts.at(path); path_record.steps.resize(path_size); // record the steps and the path length