Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions khmer/_oxli/graphs.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -139,15 +139,15 @@ cdef extern from "oxli/hashgraph.hh" namespace "oxli" nogil:

void consume_seqfile_and_tag[SeqIO](const string &,
unsigned int,
unsigned long long)
unsigned long long)

# Ugly workaround. For some reason, Cython doesn't like *just this*
# templated overload -- it chooses whichever was defined last, breaking
# resolution for either strings of FastxParserPtr. So, we rename it on
# the Cython side and give it a real name substitution for code gen.
void consume_seqfile_and_tag_readparser "consume_seqfile_and_tag" [SeqIO](shared_ptr[CpReadParser[SeqIO]],
unsigned int,
unsigned long long)
unsigned long long)

void consume_sequence_and_tag(const string &,
unsigned long long &)
Expand All @@ -160,7 +160,7 @@ cdef extern from "oxli/hashgraph.hh" namespace "oxli" nogil:
unsigned int &,
unsigned long long &) except +oxli_raise_py_error

uintptr_t trim_on_stoptags(string)
uintptr_t trim_on_stoptags(string)

unsigned int traverse_from_kmer(CpKmer,
uint32_t,
Expand All @@ -177,7 +177,7 @@ cdef extern from "oxli/hashgraph.hh" namespace "oxli" nogil:
void load_stop_tags(string, bool) except +oxli_raise_py_error
void extract_unique_paths(string, uint32_t, float, vector[string])
void calc_connected_graph_size(CpKmer, uint64_t&, KmerSet&,
const uint64_t, bool)
const uint64_t, bool)
uint32_t kmer_degree(HashIntoType, HashIntoType)
uint32_t kmer_degree(const char *)
void find_high_degree_nodes(const char *, set[HashIntoType] &) const
Expand Down
117 changes: 76 additions & 41 deletions khmer/_oxli/graphs.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ cdef class Hashtable:
return deref(self._ht_this).get_count(<HashIntoType> kmer)
else:
self._kmer_type_error(kmer)


def ksize(self):
"""k-mer size"""
Expand Down Expand Up @@ -211,18 +211,6 @@ cdef class Hashtable:
max_count))
return posns

def consume_seqfile_with_reads_parser(self, read_parser):
"""Count all k-mers from read_parser."""
cdef unsigned long long n_consumed = 0
cdef unsigned int total_reads = 0

cdef CPyReadParser_Object* parser = <CPyReadParser_Object*>read_parser

deref(self._ht_this).consume_seqfile[CpFastxReader](parser.parser,
total_reads,
n_consumed)
return total_reads, n_consumed
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cut n pasted further down.


def consume_seqfile(self, file_name):
"""Count all k-mers from file_name."""
cdef unsigned long long n_consumed = 0
Expand Down Expand Up @@ -273,6 +261,53 @@ cdef class Hashtable:
n_consumed)
return total_reads, n_consumed

def consume_seqfile_with_parser(self, object read_parser):
"""Count all k-mers from read_parser."""
cdef unsigned long long n_consumed = 0
cdef unsigned int total_reads = 0
cdef CPyReadParser_Object* parser = <CPyReadParser_Object*>read_parser

deref(self._ht_this).consume_seqfile[CpFastxReader](
parser.parser, total_reads, n_consumed
)
return total_reads, n_consumed

def consume_seqfile_with_mask_with_parser(self, object read_parser,
Hashtable mask, int threshold=0):
cdef unsigned long long n_consumed = 0
cdef unsigned int total_reads = 0
cdef CPyReadParser_Object* parser = <CPyReadParser_Object*>read_parser
cdef CpHashtable * cmask = mask._ht_this.get()
deref(self._ht_this).consume_seqfile_with_mask[CpFastxReader](
parser.parser, cmask, threshold, total_reads, n_consumed
)
return total_reads, n_consumed

def consume_seqfile_banding_with_parser(self, object read_parser, num_bands,
band):
"""Count all k-mers from file_name."""
cdef unsigned long long n_consumed = 0
cdef unsigned int total_reads = 0
cdef CPyReadParser_Object* parser = <CPyReadParser_Object*>read_parser
deref(self._ht_this).consume_seqfile_banding[CpFastxReader](
parser.parser, num_bands, band, total_reads, n_consumed
)
return total_reads, n_consumed

def consume_seqfile_banding_with_mask_with_parser(self, object read_parser,
num_bands, band,
Hashtable mask,
int threshold=0):
cdef unsigned long long n_consumed = 0
cdef unsigned int total_reads = 0
cdef CPyReadParser_Object* parser = <CPyReadParser_Object*>read_parser
cdef CpHashtable * cmask = mask._ht_this.get()
deref(self._ht_this).consume_seqfile_banding_with_mask[CpFastxReader](
parser.parser, num_bands, band, cmask, threshold, total_reads,
n_consumed
)
return total_reads, n_consumed

def abundance_distribution(self, file_name, Hashtable tracking):
"""Calculate the k-mer abundance distribution over reads in file_name."""
cdef FastxParserPtr parser = get_parser[CpFastxReader](_bstring(file_name))
Expand All @@ -284,7 +319,7 @@ cdef class Hashtable:
abunds.append(x[i])
return abunds

def abundance_distribution_with_reads_parser(self, object read_parser, Hashtable tracking):
def abundance_distribution_with_parser(self, object read_parser, Hashtable tracking):
"""Calculate the k-mer abundance distribution over reads."""

cdef CpHashtable * cptracking = tracking._ht_this.get()
Expand Down Expand Up @@ -486,12 +521,12 @@ cdef class Hashgraph(Hashtable):
list; used in graph contraction.'''
cdef HashSet hdns = HashSet(self.ksize())
_sequence = self._valid_sequence(sequence)
deref(self._hg_this).find_high_degree_nodes(_sequence,
deref(self._hg_this).find_high_degree_nodes(_sequence,
hdns.hs)
return hdns


def traverse_linear_path(self, object kmer, HashSet hdns,
def traverse_linear_path(self, object kmer, HashSet hdns,
Nodegraph stop_filter=None):
'''Traverse the path through the graph starting with the given
k-mer and avoiding high-degree nodes, finding (and returning)
Expand Down Expand Up @@ -539,7 +574,7 @@ cdef class Hashgraph(Hashtable):
cdef HashSet hs = HashSet(self.ksize())
deref(self._hg_this).get_tags_for_sequence(_sequence, hs.hs)
return hs

def find_all_tags_list(self, object kmer):
'''Find all tags within range of the given k-mer, return as list'''
cdef CpKmer _kmer = self._build_kmer(kmer)
Expand All @@ -548,7 +583,7 @@ cdef class Hashgraph(Hashtable):
cdef shared_ptr[CpHashgraph] this = self._hg_this

with nogil:
deref(deref(self._hg_this).partition).find_all_tags(_kmer, deref(tags),
deref(deref(self._hg_this).partition).find_all_tags(_kmer, deref(tags),
deref(this).all_tags)

return result
Expand All @@ -564,16 +599,16 @@ cdef class Hashgraph(Hashtable):
total_reads,
n_consumed)
return total_reads, n_consumed

def print_tagset(self, str filename):
'''Print out all of the tags.'''
deref(self._hg_this).print_tagset(_bstring(filename))

def add_tag(self, object kmer):
'''Add a k-mer to the tagset.'''
cdef HashIntoType _kmer = self.sanitize_hash_kmer(kmer)
deref(self._hg_this).add_tag(_kmer)

def get_tagset(self):
'''Get all tagged k-mers as DNA strings.'''
cdef HashIntoType st
Expand All @@ -591,29 +626,29 @@ cdef class Hashgraph(Hashtable):
def load_tagset(self, str filename, clear_tags=True):
'''Load tags from a file.'''
deref(self._hg_this).load_tagset(_bstring(filename), clear_tags)

def save_tagset(self, str filename):
'''Save tags to a file.'''
deref(self._hg_this).save_tagset(_bstring(filename))

@property
def n_tags(self):
'''Return the count of all tags.'''
return deref(self._hg_this).n_tags()

def divide_tags_into_subsets(self, int subset_size=0):
'''Divide tags equally up into subsets of given size.'''
cdef set[HashIntoType] divvy
deref(self._hg_this).divide_tags_into_subsets(subset_size, divvy)
cdef HashSet hs = HashSet(self.ksize())
hs.hs = divvy
return hs

@property
def tag_density(self):
'''Get the tagging density.'''
return deref(self._hg_this)._get_tag_density()

@tag_density.setter
def tag_density(self, int density):
'''Set the tagging density.'''
Expand All @@ -630,7 +665,7 @@ cdef class Hashgraph(Hashtable):
cdef HashIntoType end = self.sanitize_hash_kmer(end_kmer)
cdef bool cbreak = break_on_stoptags
cdef bool cstop = stop_big_traversals

with nogil:
deref(subset_ptr).do_partition(start, end, cbreak, cstop)

Expand All @@ -650,15 +685,15 @@ cdef class Hashgraph(Hashtable):

return ppi


def assign_partition_id(self, PrePartitionInfo ppi):
'''Assign a partition ID to a given tag.'''
cdef cp_pre_partition_info * cppi = ppi._this.get()
cdef PartitionID pi
pi = deref(deref(self._hg_this).partition).assign_partition_id(deref(cppi).kmer,
deref(cppi).tagged_kmers)
return pi

def output_partitions(self, str filename, str output, bool
output_unassigned=False):
'''Write out sequences in given filename to another file, annotating '''
Expand All @@ -668,20 +703,20 @@ cdef class Hashgraph(Hashtable):
_bstring(output),
output_unassigned)
return n_partitions

def load_partitionmap(self, str filename):
'''Load a partitionmap for the master subset.'''
deref(deref(self._hg_this).partition).load_partitionmap(_bstring(filename))

def save_partitionmap(self, str filename):
'''Save a partitionmap for the master subset.'''
deref(deref(self._hg_this).partition).save_partitionmap(_bstring(filename))

def _validate_partitionmap(self):
'''Run internal validation checks.'''
deref(deref(self._hg_this).partition)._validate_pmap()
def consume_seqfile_and_tag_with_reads_parser(self, object read_parser):

def consume_seqfile_and_tag_with_parser(self, object read_parser):
'''Count all k-mers using the given reads parser'''
cdef unsigned long long n_consumed = 0
cdef unsigned int total_reads = 0
Expand All @@ -693,7 +728,7 @@ cdef class Hashgraph(Hashtable):
total_reads,
n_consumed)
return total_reads, n_consumed

def consume_partitioned_fasta(self, filename):
'''Count all k-mers in a given file'''
cdef unsigned long long n_consumed = 0
Expand All @@ -703,19 +738,19 @@ cdef class Hashgraph(Hashtable):
total_reads,
n_consumed)
return total_reads, n_consumed

def merge_subset(self, SubsetPartition subset):
'''Merge the given subset into this one.'''
deref(deref(self._hg_this).partition).merge(subset._this.get())

def merge_subset_from_disk(self, str filename):
'''Merge the given subset (filename) into this one.'''
deref(deref(self._hg_this).partition).merge_from_disk(_bstring(filename))

def count_partitions(self):
'''Count the number of partitions in the master partitionmap.'''
return self.partition.count_partitions()

def set_partition_id(self, object kmer, PartitionID pid):
'''Set the partition ID for this tag.'''
cdef string start = self.sanitize_kmer(kmer)
Expand All @@ -729,7 +764,7 @@ cdef class Hashgraph(Hashtable):
'''Get the partition ID of this tag.'''
cdef string _kmer = self.sanitize_kmer(kmer)
return deref(deref(self._hg_this).partition).get_partition_id(_kmer)

def repartition_largest_partition(self, Countgraph counts not None,
unsigned int distance,
unsigned int threshold,
Expand All @@ -754,15 +789,15 @@ cdef class Hashgraph(Hashtable):
def load_stop_tags(self, object filename, clear_tags=False):
'''Load the set of stop tags.'''
deref(self._hg_this).load_stop_tags(_bstring(filename), clear_tags)

def save_stop_tags(self, object filename):
'''Save the set of stop tags.'''
deref(self._hg_this).save_stop_tags(_bstring(filename))

def print_stop_tags(self, filename):
'''Print out the set of stop tags.'''
deref(self._hg_this).print_stop_tags(_bstring(filename))

def trim_on_stoptags(self, str sequence):
'''Trim the reads on the given stop tags.'''
cdef size_t trim_at
Expand All @@ -776,7 +811,7 @@ cdef class Hashgraph(Hashtable):
'''Add this k-mer as a stop tag.'''
cdef HashIntoType _kmer = self.sanitize_hash_kmer(kmer)
deref(self._hg_this).add_stop_tag(_kmer)

def get_stop_tags(self):
'''Return a DNA list of all of the stop tags.'''
cdef HashIntoType st
Expand Down
4 changes: 2 additions & 2 deletions oxli/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ def build_graph(ifilenames, graph, num_threads=1, tags=False):
- tags: should there be tags
"""
if tags:
eat = graph.consume_seqfile_and_tag_with_reads_parser
eat = graph.consume_seqfile_and_tag_with_parser
else:
eat = graph.consume_seqfile_with_reads_parser
eat = graph.consume_seqfile_with_parser

for _, ifile in enumerate(ifilenames):
rparser = khmer.ReadParser(ifile)
Expand Down
2 changes: 1 addition & 1 deletion sandbox/count-kmers-single.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def main():
for _ in range(args.threads):
thread = \
threading.Thread(
target=countgraph.consume_seqfile_with_reads_parser,
target=countgraph.consume_seqfile_with_parser,
args=(rparser, )
)
threads.append(thread)
Expand Down
2 changes: 1 addition & 1 deletion sandbox/optimal_args_hashbits.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def main():
file=sys.stderr)

htable = khmer.new_nodegraph(args.ksize, args.max_tablesize, args.n_tables)
target_method = htable.consume_seqfile_with_reads_parser
target_method = htable.consume_seqfile_with_parser

for _, filename in enumerate(filenames):
rparser = khmer.ReadParser(filename)
Expand Down
4 changes: 2 additions & 2 deletions scripts/abundance-dist-single.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def main(): # pylint: disable=too-many-locals,too-many-branches
for _ in range(args.threads):
thread = \
threading.Thread(
target=countgraph.consume_seqfile_with_reads_parser,
target=countgraph.consume_seqfile_with_parser,
args=(rparser, )
)
threads.append(thread)
Expand All @@ -163,7 +163,7 @@ def main(): # pylint: disable=too-many-locals,too-many-branches
abundance_lists = []

def __do_abundance_dist__(read_parser):
abundances = countgraph.abundance_distribution_with_reads_parser(
abundances = countgraph.abundance_distribution_with_parser(
read_parser, tracking)
abundance_lists.append(abundances)

Expand Down
2 changes: 1 addition & 1 deletion scripts/filter-abund-single.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def main():
for _ in range(args.threads):
cur_thread = \
threading.Thread(
target=graph.consume_seqfile_with_reads_parser,
target=graph.consume_seqfile_with_parser,
args=(rparser, )
)
threads.append(cur_thread)
Expand Down
Loading