Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Zipcode support #195

Merged
merged 21 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
05380ad
Add regular snarl check
Jan 25, 2023
90cc9e9
Fix bug in finding connected comonent in snarl tree
xchang1 Jan 25, 2023
b335c68
Add finding a chain and distances from its rank in the parent snarl
xchang1 Feb 1, 2023
a539334
Get actual root handle for root snarls
xchang1 Feb 5, 2023
4ced6f7
Fix getting distances in root snarl
xchang1 Feb 6, 2023
dd60057
Merge remote-tracking branch 'origin/preloading' into bugfix
xchang1 Feb 10, 2023
be9e28e
Merge commit 'dd600577131cc3e6e7e14d0c470bb8851c15b629' into lr-giraffe
adamnovak Mar 16, 2023
4045f6f
Merge remote-tracking branch 'origin/master' into lr-giraffe
adamnovak Mar 16, 2023
89fc1ca
Get children of a snarl more efficiently
xchang1 Mar 20, 2023
838e86d
Merge remote-tracking branch 'origin/master' into HEAD
xchang1 Apr 20, 2023
29d4dda
Merge remote-tracking branch 'upstream/master' into HEAD
adamnovak Apr 27, 2023
b809cb3
Merge remote-tracking branch 'origin/zip-clustering' into HEAD
adamnovak Jun 2, 2023
6edcbc9
Merge remote-tracking branch 'origin/master' into HEAD
Jun 20, 2023
095ea01
Add dag checker for snarls
xchang1 Jun 20, 2023
f5e50be
Use dijkstra for big snarls with zipcodes
xchang1 Jun 27, 2023
ba14f9f
Fix checking dags
Jun 28, 2023
8ebcfd3
Find snarls by rank by going through snarls, so that the ranks can be…
xchang1 Jul 1, 2023
97e42e0
Fix bug finding distances in a root snarl
xchang1 Jul 24, 2023
363588e
Fix is_loopin_chain to consider trivial chains
xchang1 Aug 31, 2023
a107621
Make max_prefix_sum accessible
xchang1 Feb 16, 2024
eb4a9a3
Merge remote-tracking branch 'origin/zip-clustering' into HEAD
xchang1 Feb 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 41 additions & 6 deletions bdsg/include/bdsg/snarl_distance_index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,13 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab
//Distance limit is the distance after which we give up if we're doing a traversal.
size_t distance_in_parent(const net_handle_t& parent, const net_handle_t& child1, const net_handle_t& child2, const HandleGraph* graph=nullptr, size_t distance_limit = std::numeric_limits<size_t>::max()) const;

//Distance_in_parent for distances in a snarl given the rank and orientation instead of a handle
//You should use distance in parent unless you're sure the ranks are correct - this shouldn't
//be exposed to the public interface but I needed it
size_t distance_in_snarl(const net_handle_t& parent, const size_t& rank1, const bool& right_side1,
const size_t& rank2, const bool& right_side2, const HandleGraph* graph=nullptr,
size_t distance_limit = std::numeric_limits<size_t>::max()) const;

///Find the maximum distance between two children in the parent.
///This is the same as distance_in_parent for everything except children of chains
size_t max_distance_in_parent(const net_handle_t& parent, const net_handle_t& child1, const net_handle_t& child2, const HandleGraph* graph=nullptr, size_t distance_limit = std::numeric_limits<size_t>::max()) const;
Expand All @@ -257,6 +264,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab
bool is_externally_start_start_connected(const net_handle_t net) const {return is_externally_start_start_connected(snarl_tree_records->at(get_record_offset(net)));}
bool is_externally_end_end_connected(const net_handle_t net) const {return is_externally_end_end_connected(snarl_tree_records->at(get_record_offset(net)));}


///For two net handles, get a net handle lowest common ancestor.
///If the lowest common ancestor is the root, then the two handles may be in
///different connected components. In this case, return false.
Expand Down Expand Up @@ -325,21 +333,25 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab

///Get the prefix sum value for a node in a chain.
///Fails if the parent of net is not a chain
size_t get_prefix_sum_value(const net_handle_t net) const;
size_t get_prefix_sum_value(const net_handle_t& net) const;

///Get the prefix sum value for a node in a chain.
///Get the maximum prefix sum value for a node in a chain.
///Fails if the parent of net is not a chain
size_t get_forward_loop_value(const net_handle_t net) const;
size_t get_max_prefix_sum_value(const net_handle_t& net) const;

///Get the prefix sum value for a node in a chain.
///Get the forward loop value for a node in a chain.
///Fails if the parent of net is not a chain
size_t get_forward_loop_value(const net_handle_t& net) const;

///Get the reverse value for a node in a chain.
///Fails if the parent of net is not a chain
size_t get_reverse_loop_value(const net_handle_t net) const;
size_t get_reverse_loop_value(const net_handle_t& net) const;

//If get_end is true, then get the second component of the last node in a looping chain.
//If the chain loops, then the first and last node are the same.
//If it is also a multicomponent, chain, then it is in two different components.
//If get_end is true, then get the larger of the two components.
size_t get_chain_component(const net_handle_t net, bool get_end = false) const;
size_t get_chain_component(const net_handle_t& net, bool get_end = false) const;



Expand All @@ -363,11 +375,26 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab
///Returns true if the given net handle refers to (a traversal of) a snarl.
bool is_snarl(const net_handle_t& net) const;

///Return true if the given snarl is a DAG and false otherwise
///Returns true if the given net_handle_t is not a snarl
bool is_dag(const net_handle_t& snarl) const;

///Given a snarl, return the number of non-dag edges it contains
///0 for a dag
size_t non_dag_edge_count(const net_handle_t& snarl, const HandleGraph* graph) const;

///Returns true if the given net handle refers to (a traversal of) a simple snarl
///A simple snarl is a bubble where each child node can only reach the boundary nodes,
///and each side of a node reaches a different boundary node
///There may also be an edge connecting the two boundary nodes but no additional
///edges are allowed
bool is_simple_snarl(const net_handle_t& net) const;

///Returns true if the given net handle refers to (a traversal of) a regular snarl
///A regular snarl is the same as a simple snarl, except that the children may be
///nested chains, rather than being restricted to nodes
bool is_regular_snarl(const net_handle_t& net) const;

///Returns true if the given net handle refers to (a traversal of) a chain.
bool is_chain(const net_handle_t& net) const;
///Returns true if the given net handle refers to (a traversal of) a chain that is not start-end connected
Expand Down Expand Up @@ -471,6 +498,11 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab
///number of nodes in the top-level snarl
size_t connected_component_count() const;

///Get the child of a snarl from its rank. This shouldn't be exposed to the public interface but I need it
///Please don't use it
///For 0 or 1, returns the sentinel facing in. Otherwise return the child as a chain going START_END
net_handle_t get_snarl_child_from_rank(const net_handle_t& snarl, const size_t& rank) const;

protected:
///Internal implementation for for_each_child.
bool for_each_child_impl(const net_handle_t& traversal, const std::function<bool(const net_handle_t&)>& iteratee) const;
Expand Down Expand Up @@ -1165,6 +1197,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab

size_t get_node_count() const;

//Get the offset of the list of children
size_t get_child_record_pointer() const;

bool for_each_child(const std::function<bool(const net_handle_t&)>& iteratee) const;
Expand Down Expand Up @@ -1223,6 +1256,8 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab
size_t get_node_length(size_t rank = std::numeric_limits<size_t>::max()) const;
bool get_node_is_reversed(size_t rank = std::numeric_limits<size_t>::max()) const;


net_handle_t get_child_from_rank(const size_t& rank) const;
bool for_each_child(const std::function<bool(const net_handle_t&)>& iteratee) const;

};
Expand Down
Loading
Loading