Skip to content

Commit

Permalink
Allow hinting paths to reference-index
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak committed Oct 25, 2024
1 parent 33e2fc2 commit fd5c186
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 12 deletions.
11 changes: 7 additions & 4 deletions bdsg/include/bdsg/overlays/overlay_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,11 @@ template<typename T, typename U, typename V>
class OverlayHelper {
public:
// Handle non-const base graph
T* apply(V* input_graph) {
template <typename ...Params>
T* apply(V* input_graph, Params&&... params) {
auto mutable_overlaid = dynamic_cast<T*>(input_graph);
if (mutable_overlaid == nullptr) {
overlay = make_unique<U>(input_graph);
overlay = make_unique<U>(input_graph, std::forward<Params>(params)...);
mutable_overlaid = dynamic_cast<T*>(overlay.get());
assert(mutable_overlaid != nullptr);
}
Expand All @@ -78,10 +79,11 @@ class OverlayHelper {
}

// Handle const base graph
const T* apply(const V* input_graph) {
template <typename ...Params>
const T* apply(const V* input_graph, Params&&... params) {
overlaid = dynamic_cast<const T*>(input_graph);
if (overlaid == nullptr) {
overlay = make_unique<U>(input_graph);
overlay = make_unique<U>(input_graph, std::forward<Params>(params)...);
overlaid = dynamic_cast<T*>(overlay.get());
assert(overlaid != nullptr);
}
Expand All @@ -100,6 +102,7 @@ class OverlayHelper {
/// Implementation of overlay helper functionality for when multiple overlays need to be stacked.
// There must be a way to generalize with variadic templates
// (I had trouble chaining the output of the nested overlays together and getting the types right when trying)
// TODO: Add support for passing overlay constructor arguments through.
template<typename T1, typename U1, typename V1, typename T2, typename U2, typename V2>
class PairOverlayHelper {
public:
Expand Down
5 changes: 4 additions & 1 deletion bdsg/include/bdsg/overlays/reference_path_overlay.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#define BDSG_REFERENCE_PATH_OVERLAY_HPP_INCLUDED

#include <unordered_map>
#include <unordered_set>

#include <handlegraph/path_position_handle_graph.hpp>
#include <sdsl/bit_vectors.hpp>
Expand All @@ -30,7 +31,9 @@ class ReferencePathOverlay : public PathPositionHandleGraph {

public:

ReferencePathOverlay(const PathHandleGraph* graph);
/// Create a ReferencePathOverlay. For paths with names in
/// extra_path_names, index them as if they were reference paths.
ReferencePathOverlay(const PathHandleGraph* graph, const std::unordered_set<std::string>& extra_path_names = {});
ReferencePathOverlay() = default;
~ReferencePathOverlay() = default;

Expand Down
41 changes: 34 additions & 7 deletions bdsg/src/reference_path_overlay.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,52 @@
#include <omp.h>

#include <handlegraph/util.hpp>
#include <handlegraph/types.hpp>

namespace bdsg {

using namespace std;
using namespace handlegraph;

ReferencePathOverlay::ReferencePathOverlay(const PathHandleGraph* graph) : graph(graph) {
ReferencePathOverlay::ReferencePathOverlay(const PathHandleGraph* graph, const std::unordered_set<std::string>& extra_path_names) : graph(graph) {

// init the base hash table and gather path handles
uint64_t max_path_handle = 0;
// Get step counts for all paths we want to process, once.
std::unordered_map<path_handle_t, size_t> cached_step_counts;
graph->for_each_path_matching({PathSense::REFERENCE, PathSense::GENERIC}, {}, {}, [&](const path_handle_t& path) {
// Find and measure all the reference and generic paths.
// TODO: Kick out generic paths?
cached_step_counts[path] = graph->get_step_count(path);
});
for (auto& path_name : extra_path_names) {
if (graph->has_path(path_name)) {
// The graph actually has this path.
path_handle_t path = graph->get_path_handle(path_name);
auto found = cached_step_counts.find(path);
if (found == cached_step_counts.end()) {
// And it's not already reference sense.
// Count steps and remember it
cached_step_counts.emplace_hint(found, path, graph->get_step_count(path));
}
}
}

// Now use the cache as a source of truth and make a vector of the paths.
std::vector<path_handle_t> path_handles;
graph->for_each_path_handle([&](const path_handle_t& path) {
// We also track the numerically max path handle
uint64_t max_path_handle = 0;
for (auto& handle_and_length : cached_step_counts) {
const path_handle_t& path = handle_and_length.first;
path_handles.push_back(path);

// Each of the paths needs a PathRecord
reference_paths.insert(pair<path_handle_t, PathRecord>(path, PathRecord()));
// And needs to be maxed into the max handles.
max_path_handle = std::max<uint64_t>(max_path_handle, handlegraph::as_integer(path));
});
}

// sort in descending order by length to limit parallel scheduling makespan
std::sort(path_handles.begin(), path_handles.end(), [&](path_handle_t a, path_handle_t b) {
return graph->get_step_count(a) > graph->get_step_count(b);
return cached_step_counts.at(a) > cached_step_counts.at(b);
});

std::vector<std::atomic<size_t>> num_steps(graph->max_node_id() + 1);
Expand All @@ -35,7 +62,7 @@ ReferencePathOverlay::ReferencePathOverlay(const PathHandleGraph* graph) : graph
auto& path_record = reference_paths.at(path);

// init the step vectors
size_t path_size = graph->get_step_count(path);
size_t path_size = cached_step_counts.at(path);
path_record.steps.resize(path_size);

// record the steps and the path length
Expand Down

0 comments on commit fd5c186

Please sign in to comment.