From 7e0e0e1229d1693d1ff73068459202c676967c8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9r=C3=A9nice=20Batut?= Date: Mon, 22 Sep 2025 15:04:16 +0200 Subject: [PATCH 1/3] Add tool to detect circular sequences --- tools/detect_circular_sequences/.lint_skip | 1 + tools/detect_circular_sequences/.shed.yml | 15 + .../detect_circular_sequences.py | 286 ++++++++++++++ .../detect_circular_sequences.xml | 58 +++ .../test-data/input.fasta | 359 ++++++++++++++++++ 5 files changed, 719 insertions(+) create mode 100644 tools/detect_circular_sequences/.lint_skip create mode 100644 tools/detect_circular_sequences/.shed.yml create mode 100644 tools/detect_circular_sequences/detect_circular_sequences.py create mode 100644 tools/detect_circular_sequences/detect_circular_sequences.xml create mode 100644 tools/detect_circular_sequences/test-data/input.fasta diff --git a/tools/detect_circular_sequences/.lint_skip b/tools/detect_circular_sequences/.lint_skip new file mode 100644 index 00000000000..6b7b23ca24c --- /dev/null +++ b/tools/detect_circular_sequences/.lint_skip @@ -0,0 +1 @@ +CitationsMissing \ No newline at end of file diff --git a/tools/detect_circular_sequences/.shed.yml b/tools/detect_circular_sequences/.shed.yml new file mode 100644 index 00000000000..1b6b6130e09 --- /dev/null +++ b/tools/detect_circular_sequences/.shed.yml @@ -0,0 +1,15 @@ +name: detect_circular_sequences +description: Detect circular sequences (e.g. circular contigs) in a FASTA file by k-mer matching +long_description: | + Detect circular sequences (e.g. circular contigs) by looking for exact identical k-mer at the two + ends on a cadre sequence of the sequences prodvide in fasta file. In order to be able + to predict genes spanning the origin of circular sequences, the first 1,000 nucleotides + of each circular sequences are duplicated and added at the sequence's end. + + Inspired by Simon Roux work for Metavir2 (2014) and Corentin Hochart work in PlasSuite +categories: +- Sequence Analysis +- Assembly +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/detect_circular_sequences +homepage_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/detect_circular_sequences +type: unrestricted diff --git a/tools/detect_circular_sequences/detect_circular_sequences.py b/tools/detect_circular_sequences/detect_circular_sequences.py new file mode 100644 index 00000000000..f9a645a97ba --- /dev/null +++ b/tools/detect_circular_sequences/detect_circular_sequences.py @@ -0,0 +1,286 @@ +#!/usr/bin/env python3 + +######################################################################################### +# This script detect circular contigs by looking for exact identical k-mer at the two +# ends on a cadre sequence of the sequences prodvide in fasta file. In order to be able +# to predict genes spanning the orgin of circular contigs, the first 1,000 nucleotides +# of each circular contigs are dulicated and added at the contig's end. +# +# Inspired by Simon Roux work for Metavir2 (2014) and Corentin Hochart work in PlasSuite +# +######################################################################################### + +import argparse +import re +import sys +import tempfile +import textwrap +from pathlib import Path + + +def error(message): + """ + Print an error message to stderr and exit the program with a non-zero status. + + Args: + message (str): The error message to display. + """ + print(f"{sys.argv[0]} (error): {message}. Execution halted.", file=sys.stderr) + sys.exit(1) + + +def warning(message, verbose): + """ + Print a warning message to stderr if verbose mode is enabled. + + Args: + message (str): The warning message to display. + verbose (bool): If True, the message will be printed. + """ + if verbose: + print(f"{sys.argv[0]} (info): {message}", file=sys.stderr) + + +def fasta_format(seq): + """ + Format sequence into lines of 60 characters each. + + Args: + seq (str): sequence to format. + """ + return textwrap.wrap(seq, width=60, break_on_hyphens=False) + + +def one_line_fasta(input_fp, output_fp): + """ + Convert FASTA file to a format with sequences on single lines. + + Args: + input_fp (Path): path to input FASTA file + output_fp (Path): path to output FASTA file + """ + with input_fp.open("r") as infile, output_fp.open("w") as outfile: + for line in infile: + if line.startswith(">"): + outfile.write(f"\n{line}") # Newline before header + else: + outfile.write(line.rstrip("\n")) # Remove newline and concatenate + outfile.write("\n") # Final newline (like END in awk) + + +def find_kmer_occurrences(begin, end): + """ + Find all starting positions of 'begin' in 'end'. + + Args: + begin (): + end (): + """ + pattern = re.compile(re.escape(begin)) + return [match.start() + len(begin) for match in pattern.finditer(end)] + + +def is_circular(line_chars, scale, pos): + """ + Check if the sequence is circular by comparing segments. + + Args: + line_chars (list): Sequence characters + scale ( ): Starting k-mer + pos (): + + Returns: + bool: True if circular, False otherwise + """ + for i in range(scale): + if line_chars[i] != line_chars[pos + i]: + return False + return True + + +def process_sequence( + line, + header, + verbose, + kmer_length=10, + cadre_length=0, + duplicate_nucleotides=1000, +): + """ + Process a single sequence to detect circularity and modify if needed. + + Args: + line (): + header (): + verbose (): + kmer_length (): + cadre_length (): + duplicate_nucleotides (): + + Returns: + : True if circular, False otherwise + """ + try: + line_chars = list(line) + seq_len = len(line_chars) + + if seq_len < kmer_length: + warning(f"Short sequence ({seq_len}bp): {header}", verbose) + return None, None + + # Determine cadre length + if cadre_length == 0 or cadre_length > seq_len - kmer_length: + cadre_length = seq_len - kmer_length + + # Extract begin and end + begin = "".join(line_chars[:kmer_length]) + end_part = line_chars[-cadre_length:] # [::-1] + end_str = "".join(end_part) + + # Find all positions where 'begin' appears in the reversed end part + end_positions = find_kmer_occurrences(begin, end_str) + + if not end_positions: + return None, None + + # Check for circularity at each position + status = False + for pos in end_positions: + scale = len(line_chars) - pos + if is_circular(line_chars, scale, pos): + status = True + + if not status: + return None, None + + # Modify the sequence + modified_seq = line_chars[: len(line_chars) - scale] + + if len(modified_seq) < duplicate_nucleotides: + modified_seq += modified_seq + else: + modified_seq += line_chars[:duplicate_nucleotides] + + return header, "".join(modified_seq) + except Exception as e: + error(f"Error processing sequence {header}: {e}") + + +def detect_circular( + fasta_in, + fasta_out, + id_out, + kmer_length=10, + cadre_length=0, + duplicate_nucleotides=1000, + verbose=False, +): + """ + Detect and process circular sequences. + + Args: + fasta_in (Path): Input FASTA file + fasta_out (Path): Output FASTA file with modifications + id_out (Path): File to record identifiers of circular sequences + kmer_length (int): Length of k-mer to search for + cadre_length (int): Length of sequence end to inspect + duplicate_nucleotides (int): Number of nucleotides to duplicate at the end + verbose (bool): Enable verbose output + """ + tmp_file = tempfile.NamedTemporaryFile(mode="w", delete=False) + one_line_fasta(fasta_in, Path(tmp_file.name)) + + with Path(tmp_file.name).open("r") as infile, fasta_out.open( + "w" + ) as fasta_output, id_out.open("w") as id_output: + + sequence = "" + header = "" + for line in infile.readlines(): + # print(line) + # Read header + if line.startswith(">"): + header = line[1:].strip() + sequence = "" + continue + else: + sequence = line.strip() + + # Process the sequence + header, modified_seq = process_sequence( + sequence, + header, + verbose=verbose, + kmer_length=kmer_length, + cadre_length=cadre_length, + duplicate_nucleotides=duplicate_nucleotides, + ) + + if modified_seq: + # Write to output files + id_output.write(f"{header}\n") + + fasta_output.write(f">{header}\n") + formatted = fasta_format(modified_seq) + for line in formatted: + fasta_output.write(f"{line}\n") + + # Clean up temporary file + Path(tmp_file.name).unlink() + + +def main(): + """ + Main function to detect circular contigs in a FASTA file. + + This function parses command-line arguments, reads the input FASTA file, + processes each sequence to detect circular contigs, and prints the results. + It handles both verbose and non-verbose modes, and can optionally print only circular sequences. + + Command-line arguments: + --fasta-in: Path to the input FASTA file (required). + --kmer-length: Length of the k-mer used to identify circular sequences (default: 10). + --cadre: Length of the fragment at the sequence 5' end to inspect for k-mer identity. + If 0, the entire sequence is screened (default: 0). + --only_circular: If set, only circular sequences are printed. + --verbose: If set, warning messages are printed during execution. + --output: Path to output file + + The function processes each sequence in the FASTA file, checks for circularity, + and prints the results in FASTA format, with circular sequences marked in the header. + """ + parser = argparse.ArgumentParser( + description="Detect circular contigs by k-mer matching." + ) + parser.add_argument("--fasta-in", required=True, help="Input FASTA file") + parser.add_argument( + "--kmer-length", type=int, default=10, help="Length of k-mer (default: 10)" + ) + parser.add_argument( + "--cadre-length", + type=int, + default=0, + help="Inspect fragment length (default: 0)", + ) + parser.add_argument("--verbose", action="store_true", help="Enable verbose output") + parser.add_argument("--fasta-out", required=True, help="Output FASTA file") + parser.add_argument( + "--id-out", required=True, help="File to write circular sequence IDs" + ) + + args = parser.parse_args() + + warning("Starting script execution.", args.verbose) + detect_circular( + Path(args.fasta_in), + Path(args.fasta_out), + Path(args.id_out), + kmer_length=args.kmer_length, + cadre_length=args.cadre_length, + verbose=args.verbose, + ) + warning("Script execution completed.", args.verbose) + + +if __name__ == "__main__": + main() diff --git a/tools/detect_circular_sequences/detect_circular_sequences.xml b/tools/detect_circular_sequences/detect_circular_sequences.xml new file mode 100644 index 00000000000..5c789915057 --- /dev/null +++ b/tools/detect_circular_sequences/detect_circular_sequences.xml @@ -0,0 +1,58 @@ + + (e.g. circular contigs) in a FASTA file by k-mer matching + + 0 + 0 + 24.0 + + + python + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tools/detect_circular_sequences/test-data/input.fasta b/tools/detect_circular_sequences/test-data/input.fasta new file mode 100644 index 00000000000..93614d70793 --- /dev/null +++ b/tools/detect_circular_sequences/test-data/input.fasta @@ -0,0 +1,359 @@ +>SRR17300492_25544 +ACACCATATATCTCATTTTGTCTGTTGAGAGCGTCGCTCGATACTACGAAGTAGATATCT +CACTCCGCTGGGTTTTGTGTTTTCTTTTGTTTTCGATTTCGCGACTTGCCTGACAGCAAG +CCTCTCATCTGGAGCCCACCGCGAGGGGTGGTACAGACTTTATATCTCTTAAATTCCTTA +AATTCCATTTAACAAACTCGAGCCTTATTACCACCTAGTTACGCGTCGCGAGGCTTTGTT +CGAAGCTGCACAAGTAACTTAATAAGCTTGACATAAATCACAACGGCGCCTCGAAAGAAA +GCGTCGAAAGTAGTTAAAGACAAGCCCGAAGAATTAGAAGTTGAGGAGGAAACTACTCAA +CCAAAACCCACCAAAGGCCCAGTACGAAAAGCAGCTCCAGCTAAAGGCGCTAAAACAGTC +GCGATCCCAGTTCCTGTCTTAGCACCTAAAACTCGAGCATCGCAAAGGGCAACAACTAGT +AAAACAGTAGTTGAGGCAGACGCAACACCCGAAGCAGTCGGGAAGCCTGGCAAAAGATTC +GTTGTACCTGCTCGATTACCAGGACCATATTCTAAATTCTTAACTGAGCCATCAGTTGTA +AACCCAGTTGGTGAACCCAGGACAGTACCGACCGGATTATTTGAGGGATATCAGAGACCA +GACCGATCATACGAGCAGTCGCGACAGACACTTCCAAAAGCCCCGGAAGACTTTGCTGAA +ACATCTTTTACGGAAGATCTTCACGATAGTCCGGAGAGTGACAAGGTTGAGTTTGAAGTT +GCTGTGAATATATTACAAGGCTTGGGAGACGAGAATCCTTACCAGAGACCGATCGCGATC +CATCCTACACCACTTGCAGCCAAGCTCAAAGCCCCATTTGAAACAGATAGCCCTTTAACA +CCTTTGCCGAGTGATTACCAGTCTCCAGAAGCGAATTACGCTAAAAGCCAGACAGCTCGC +GATCCAACTGAACCGGATCTAGAGCCGTGGGGTCAACCGCGCGCTAGTTATACTTTCGGA +GAAAGTTCCGCAGCAGGTTTACAAGAGCCACCTGCATATTCTTGGGGAGAAGCATCAGAA +CAACAAAGGGAGAGAGAAGAGAGAATCCAGCGTTATCTTCAGTCACAAATTATTGAACCA +CAAGAGCCTTCGGTTAAAGAAGAAGTTAGATCGGATATCTCGTATCTTACCCAACAAGAA +TATCCACCGCTACCAAGTTCACCTAGACCAGACTTAAGAGGCTCTAGAAGATCTAGATCA +CGATCCAGCACACACCAAAGCAGTAACCACGAAACTAGACACTCGCGATCCGAAATGAGT +GACCATGGAGGAGATGCCGGAGGAGTACGGCGCCAGTACAAACCTATGCCGGCCAGGAGA +TCGAAGGATGCACCTGAATTCGATGAGACTGAGCCAGAGAGTTTGATGCGATATTTCGAG +GATCTAGAGGCGTGTTTCGAGAACGCAGGATTGACAAATGACACAGAGAAAAAGAAATAT +GTAGGAAGATATGTAAAAGCTCGACTAGAGTCGGAGTGGAGCACGCTGAGTGGAGCTGAG +GATGGAAATTCCTACCAGGATTATAAAGCGCAAATTATGGGGGATTATTACGCTGTAGGA +AGCCTGAAACGAGGCTCGATCAAGCGTCTTACTCAGATTTGCAAAGAGCATCAGCGTATT +AGTGCGAACGATATAGACGATCTGTTAACCCTGAAACGACAATTCAGCGCGGAGGCTAAG +AAATTAATGGAGCCGCCGGCTCTATTGGCTAACCATACGCTGGTAGAAAATTTCATGGGA +TGTCTGACTCGCGACTTCCGCGAGAAAGTCTATCAGCAGTTAGAGTCGAACGCGCGCACC +GATATCAGAATAAAGAAAGCTATCACAGCTAATGCGCCAGCTGGTTTGGTACCACCGGCA +CCAGTACCTGCGAGACCCACTCGTCACAGGCCGGAAGATCGTTTCGAGTTAGACGAAGTG +ATCGCGATGGCTGAGGAAATTGCTCGCGAGCAAAATCCAGGTGTCGCGGCGGTATCTTTA +AATTCGCGAACAGGAGGAGCGGCTGCAGCCCCACCAATGGTTGTCAAGACGGAGAGCTTT +AAGCTGGAACCCTTGACCCAGCAATTAGAAGAGCTCAGGGCTGAACTCGCCATGAGTCGC +GATCGTGAGGTGGAGCGTCAGAAGCGCTTTGTCGAAGAAGTAAGGGCTGCCATGCAGCAA +GGGATGCATAATGCGCCGGCGCCACTGCCTGCGCAGCCTAAATTGTATGAGCCTCGCGCT +CCAAATCCCAGCTATGCACGCCCTGAGTACCCTACCACGATGAAGTGTTTCTATTGCGGT +TTACAAGGACACACGTTCAATAGATGTCCCGCGAAAGAAGCGCATATTCAGCTGGGGAAA +ATCATCGACAAAGGATCGCGAGTGCATCTCCCGGATCATACCTACTTAGGTGTCGATCCG +AATAGAACTATTCTATCGCGAGTGGAGGAATACCACGCGCAGAAAAAGCTGGACAGTAAT +TATTTAAATTATGGAGGCCAAAGCGTGCTTCAGCTAGCGCAGATACAGCAGCAGGTCGGG +CCTAACTCAGTCTTTACCAATAAGGCTAAAGATACCAGGGACGATCTGATCTCTGCGCTG +CAAAAGGAAAACGCTTCGCTTAAGGTGTTGATGAATGGACACTCTGAGGTCTTGCCGACG +ATGGCAGCGTCAAGCTCGTCGTCGGATCTAGATCTAGTACCTAGACGCCTACCGGACCCT +GAGGAATACGGGGACCGGTACCACGACGCACTGCGCAAAGAGCGCGAGGAGTTGATGAGT +CTTATTAGTCAACTGGACTATTCGCAGCCGGAAGCGGCGGGTTTCACCAGTACCTGAGGA +AGGCAGTGCTGGCAAACGCGCGCAAGACGCTGGCGCGATCTGCCAAAACTCCGGAGGGTG +CATCAGCAAAATCGAAAGCAGTGCCTAATAAAAATATTAAATCTATTCTGAAAGAGAAAC +AGCCAGTAGAGGAAGAGCCAGAAGAAAGTGATGATGAAGAAAATGAGGTAGCGGAGAAGG +AAATACCCATACAAAAGGATCGCGATCCGGCTGTAAAGCAGGTACCTACTACAGCAGTAC +CCCGTTCTATGCAGAAACATGTAGAGTTCGAAGTCTTACCAGAGGGAGGGTTACCAATGA +GAGCTAAGCCCACAAAGTCGGGGATGATACCGTTCGTGGACATCCCGCCAATGAATCCAG +CCTTGCGGCAAAAGAAACCTGTCGCGATCGAAGCAGAACCCTTACCCACTATTGATAAGA +AAGGACCAGCGTACAAACATCGTGCTCCAATTGAGGACGACGCTAACTGGTCTGGTTTCA +TAAAAACGTTGCGCGATCTGCCTGTAACGATCTCTCAAGGGCAATTAATGGGATTAATTA +ATGCTCAGAGCCGTAAACGGGTAATTGAAGAATTAACTGCTAAGCGAATGCCTGTAAATG +AGGATAAGCCAGTTAAACGCGGCGTGACTATGGTGGAAGAAGTGGAGGGCATGCTAGACG +ACGACGGAAATGCGTATGAGCAGTTAGTATTTACTGAGCTGCCAGAACCGACCTATACAA +TCCTTGGGCAGGATGAGGGTGAGTTAAAAGCGGGGTCAATCATCGCGAGCGATCCAGTAA +CTCAATATCTCAACGGACTCGCTCCTGAGGAGGCGGCTAGGAAAATTATGGTTGCTCGTG +AATCTTATTTACTAAAAACGCTATATCCATTAGTAAACGCACAAGCAGAGGTGGAGTCGA +TTATAGACGGGGGATCACAAATCATATCGATGTCATCAGATGTCGCGATCCAAGTAGGAG +CGACCTGGGACCCAGATGTCACGATCCAAATGCAGAGCGCCAATAAACAGCTTGAAAAGA +CACAAGGGCTAGTAAGAAACATGCCGTTCAAATTCGGTGATGAAATTACTGTCTACTTTC +AAGTACATGTAATGCAGAATGCGGCGTACGACGTGCTACTCGGGCGACCGTTCGAGGTAC +TCACGGCGTCGGTATTCGAAAACGCTCTAGATGGATCACAAATGGTGACGTTGACGGAAC +CGAATTCAGGGAAGCGTTGTAAAATGCCAACTTACGACAGAGGGAAACCACGTAAAGTAA +TCAGCCGTAAGCACCCTGAGACGGTAGAGTCTTTTTAAACCTCGATGAGCTGGTTATAGA +TCAGGGAGAAGTTGCATTTATTATTGGGTCGGACGGCAATGAAGGCTATGTGATCGAAGG +CTATCAATTTCCTCAGGAAGGCCAGAAGTTCACTCAGCACGCACTTAGAAATGCGTACTT +AGAGAGAGCTATCCTGGAAGACGGACATGAGGATGCAGAACGAGAGAAAATAATTAAATA +TTTAGTAGATACACGAAAAACAAAGCCAGTAGAAGGATCGCGATCCAATCGTGGCACGTT +TGGAAGCATCGCGAGCCAATTCCATATGGCAGAATGTGTCGAGAAAGCCCCAAAACAAGT +ATTTGCTGCGACAAAGAAGTACAAGAAGGTGGCAGATAAAATACGGCCCGTATATCAAGA +ATTACCCGACAAATATCGGATCGTGCGCGACATTAAAGGGGATCCATTGAAAAACATGCC +TATGCTATCTAAGCAGCCGCCAGAATTCGTACCCACTGGCCGATACTCTCGCGAGCGCAA +GGAGCAAATGGACATAGTCCATGGCGGAGATTTTCTATGGGAAGAAGAGCGAAAGCTCGT +GCATCAGTTAATCATGCAGCAAAATGAGGCGTTTGCGTGGGACGACACAGAGAAGGGAAG +CTTCAAGTCTGAATTCTTCCCGCCTGTCGAGATCCCCGCGATCGAACATGTGCCCTGGGT +ATTAAAGAATATTCCTATCCCACCGGGTATGCACACCGAGATTTGCGATTTTATTCGGCG +CAAAGTAGAAGCAGGGACCTATGAGCCGTCTAGCTCGTCCTATAGGACGAGGTGGTTTAT +AGTGATGAAAAAGGACGGCAAGACATTTAGGATCGTGCACAGTCTAGAGCCTTTAAATGC +TGTCACGATCGCTCATTCTGGCTTACCGCCGGCTATGGAATCGTTAGCAGAACATTTCGC +AGGACGCGCGTGCGGCGGAATACTGGATTTATACGTTGGGTATGACGAGCGACTACTTGC +AGAAGTGTCGCGAGACATGACGACGTTCCAGACTCCGTTCGGCGCGCTCAGACTCGTCAC +GCTCCCTATGGGATGGACCAATTCGGTGCCAATCTTTCACGAGGATGTTACATATATTCT +TAGGGAAGAAATACCCGAATTTACGGAGCCATACATAGACGATGTACCTATCAGAGGGCC +TAAAACACGCTATGAACTGCCTGATGGTGGCTACGAAGTTATTCCAGAAAATCCAGGAAT +ACGAAGGTTCGTGTGGGAGCATATGGTTAACGTAAACCGGATCGTGCAGCGAATGAAATA +TAGCGGAGGCACGTTTTCAGGTTACAAATCACTATTATGCGCTGCCGAAATCGTTGTCGT +AGGGCATTTATGTACCTTTGAAGGAAGGAAGCCTATGCCGGATAAAGTGCATGTTATTCA +GAATTGGGGATCGTGCAAGAATATTAGTGATGTACGAGCGTTTATGGGAACTATGGGATT +ATTGCGGATATACATATCTGATTATGCGTCGCGAGCCCATCATATCCAGAAACTGCTGCG +TAACAACACGCCGTTCGAGTGGGGACCTGATCAAGAGGAAAGCATGCGCCTGCTGAAAGA +AGGCGTAACGGATGCACATTGTATAACGCCACTAGATTACACGATGTCAGGTAAAATAAT +ACTGTCCGTGGATACATCGTGGCGAGCCGTGGGATTCTATATCCGACAAGAGGATGAGAA +TGATAAGAAGAAAAAGAGATATGCGCGCTTTGGTTCGATATTATTGGGGGATCGCGAGCA +AAGGTTCTCGCAGCCAAAACGCGAGCTGTACGGCTTACTTCGCGCCTTGACAGCGTGCTA +TTATTGGCTCATTGGAGCACGGAACCTAGTGATCGAGACGGATGCTAAATATATTAAAGG +CATGCTCGAAAATCCAGGCATGGGTCCGAACGCCACTATAAATAGATGGATCGATCAGAT +ATTGATGTTCCATTTTGAGCTCAGGCACGTTGCGGGGAAGACATTTGGTCCAGATGGACT +GTCGCGACGCGAATGGCAGCCCGGGGATGAGGAATACGAGAACCCGGAACAGCAAGTGGA +GGATGGATTAGGATCGCTCACCTATGTAAAGAAATTTAAGAGTGATCCAGATCCTTTAGA +GTTTGAAGAGTTTAAAGAGACTATAGATACGCGTGGAGGTTATCAACAGGAGGTGGCGCC +CCAGTTCATACTGGAGGAAACTACAGACCTAGCTGTGTCAGTGGAATGTTTTCAGAAGGA +GTTAGACGCCGCGAGGCATCAAAACAAGTTGGAGCGAGAGATCGCGATCCAATACATCAG +CGCGGGAAGCGGCTCACCAGAGCAGCGCGACTTCTTGCAGCAGTTTATGCTCTCGCCAGA +ACTGCCCAGCGATAAGATAACGGAGAGCTCGGACGAAGCCGAGTACGATGAAAGGAACCG +GACGCCGGCGGGGACGGAACTAGATAATAAAATTCCGTTGATCAAGCAATGGTTAAAAGA +CCCGACCTCGAGGCCTGCTGGGATGTCCACCAAGGAGTATTTTAATTTCGCAAGGAGCGC +GAGAAATTTCTTTGTAGATAAGAAGGGAAGATTATACCGACGCTCTATAGATGAGGCGCA +CAAATTATTTGTAGAAAAGAAAGACCGTACGCGATTAATGCAGAGCGCTCACGATAGTCT +GGGACACAGAGGATCATACGCTACACGCACTATGCTGCAGGAGCGTTTCTGGTGGCCAGA +ATTGGATCGCGACGTGCATTGGTATGTTAAAACGTGCCATTTATGCCAAGAGCGTCAGAA +GACTATGATAAGGATACCTAGGACGGAGACTCACACGCCGTCAATCTTCCAGCAATTGCA +CGTAGACACTATGCACATGACGCCGCAGTCGCATGGGTGTGGGTACATTGTGCATGGAAG +GTGCGGGTTAACTTCGTATCCGGAAGGACGGCCGCTCCGCAAAGAAAATGCAGAATCGAT +CGCGAACTGGCTGTTCGAGGACATAATCTGTCGGTGGGGAAGTCTTAGGGAAATTATCAC +AGATAATGGCGGGCCATTTGTAAAGGCGTTAGCGCATTTGAAGCAGAGGTGGGGCATAGA +TTGGATCACGATCTCTGCGTACAACTCTAGAGCGAACGGAAAGATCGAGAGACCGCACTG +GGATATTAGACAAATGTTATTCAAGGCTTGCGGAGGCGAAGAATTCGCAAGTAAATGGTA +TAAATATTTTTACCATGTGCTTTGGGCGGATCGCGTGTCAATTAGGAAAGGTTTCGGGGC +CTCGCCCTTCTTTCTCGTGACAGGAGCGCACCCTGTATTGCCTATGGACATCATAGAAGC +CACTTGGCTTGTCGAATTACCGGACCGTGTGCTCACGACCGAAGAACTCATTGGATTCAG +AGCGCGAGCGCTCGCAAAGCATAAAGAACACATCGACGAAATGCGCGAGCGCGTATCTAA +ACAAAAGCGCGATGCACTGTTAAAATTCGAGGAGAAACATGTACATAAAATTAAAGATTA +CAAATTTAAAAGCGGAGACTTAGTGCTGGTACGAAATTCGCCGGTAGAAATGTCCTTGAA +CAGAAAAATGAGACGTCGTTGGGAAGGTCCATTTATTGTTATTACAAGGAAAGCTGGCGG +TGCGTATATACTAGCAGACATGAGCGGAAAAGTGTATAAAGATAAAATTGCGGCGTTCAG +AGTAATCCCTTACTTTGCGCGGCGTCATATTCAAGTGCCCGAAAATATTACAGAGATTTT +AGACCAAAATAAAGAGGATCTAGATGCGTTAGCCAACGCTCCAGACAATGAGGAAGCGGA +CAAATTAGAACGTTTAGTTTATAACGAAAACGATCGCGATCCGCAGATTATTGACTGGGA +CGATGATGAGGAACGCTGGTGGGACGATGATATATGGCCTTAGGCCCAACAGGTTTATTC +AATATACGGATCGTGACAAAAGTATAACTACAACAGGCCCCTTTAAGGCTGTAAAGACGG +GCGCACGCGCCGGAACGACTACCCTACTCGGCCTCAATCCCCGCCGGGTTGAAAGTGGCA +CGAAGGCGAGTCAACTCCTCCGAAAGCAAAGAACGGTGCTTCAAGAGGAAGTCGAGCTTG +TAGAGGTTGGTAATGAGCTCCGAGCGGAGCTCGCCTTCAACGCGCCTAAATGCGAGAACA +GGCCCATCGGTAGGGAGACCGATCGCGACAACTTCTGCTGCTTCCACAGCGACCGCAGGG +GACGGCGCGCCGGCTTCCTGCGCCTCTAAAAACATGAAATAAAAATTATTTTAATCAAAA +ATTAAAAAGAGAGGAAGGGGGAGCTGAAGCTCACCGGTGACAGTAGCGGCAGGAGGAGTA +GCAGGAGGAGTCGCAGCGCCATCCAAGCGAGCCCGCTTGGACGGACCAGACTCGGTAGAG +CCATCCTCGGCCGCAGAGAGAATTTGGACGGCCTTCAGATAAGCGTAACTGGTCAAATCC +ATACGTGGAACAAGGTGATAAGGGAGACGTACAACTTCGGCACGTTTGGAGGTGGTCGCC +TAAAATTTGAGGTTTAATGTTGGATCGCGATCCTCATTTAGAGAAAATATAACTCACGCG +GGAAGAAAAGCCACTCGCAGGCGGCTCGCGACGCTTCTTGGACGGGCCAGGGACCTCGGA +AGATAAAGAAGGTACACCGGGATCGGTAGCGCACTCGAGAGTGGTCGACTCTTGACCCAA +AAGCATCTGAGAAAATGCCCAAGCGAGCGTTCGTTCGCAGTCAGCAAACGAGAGACCTTA +AAAAATTCATAAGTACACACTTCGATGTCAGTGTCCTCTGAAGTACCTGCTGGGGCAGAA +GCAGTCTCAAGAGGGTCGATGCGCTTGCCCTTACCCGCGATATGGCCACGCGGAACGCGC +TGTAAAAATATTAAGTTAGGAAATAAAGAAAAGAGGAGAAAAATAAAGCATACAGGGCCA +GTCGGGTGGGTATCTTTGTACGTGACCACAGGAGAGCTGGTCGCAGACAGGTGGAAGCTA +GGGTTAATCCTCGCAAGAGCCAAATGAAGGCCGAAGAAGTCGCGGACTGCGCTAGACATA +GTATATCCCTTCTGTGTAACCACTGCCTGGAAAGCCCGGCGAACCGAGACCACCAAAGCG +GCAGCTTCAGGATTGGCTGCAATAGCAGGCGCGAGGCCAGACCACTTCCGGAGTACCACT +GCAATATCCTCCAGAATGTTGGCATAATGCGCAGGAACGGCGGAGGGCGAAGAACGTATG +CTCTCTAAGAGTGCGACGCGTTCGGCGATCTCAGTAAAGGCAGGAAGGTTGCGACGAGGG +GAAGCCATGACTGACTAAAGAAACACGGTCAGAGGAAGACCGTTTTATACCATCTACAAG +GATCGCGATCCACAACACGCGTAAATTGGGAGACCAGGGAACTAGTGCAACTCAGTAGCG +AGGCGTGCAAGTAGAAAAATACCCGGTTACCGAACAAAAAGCAGAGCTAGCGAGAAAATC +TGCTGCGCATGGGTCGTAAATACTTGGAAAGATATGGAGTAAAATGAATCGCGATCCTTT +GAAACATGAGGCAGAGACGATATCTCAAGCCAAAACAGTGGCAGTCAAGGTCTGGAATAA +TAAAATAATGTTTTATTGAAGATAAAATACGCGCGACGCGCTCAATATAGAGTAGTAGCC +AAATTCGGACGCTTAAGCGGTGTAAGCCTAAGTAGGCACACTGTGTACGGCAGTACGGTA +ATGGGATGTCCACTTGACGACATATTCATTAAACCACCGAACGGAATCCAAAAATAATAC +ATTTCAAATAAGAAGCTTGGCGAGTCTACAGATGGATCGCGAGCACAAGAAAAGGTACAC +TAGTCGGGCAACTCGAGCGCAGATAGGGTAACAGTGCCTGCAGGCTGCGGAAACGCGCGG +CCTTGCTGCAATACCAAAAGTCAGCTACGCTCAGTAAGCACGGAAGAAATAAGGCGTACA +GAGACCATGAACTGGTAGCAGGAGATATCGCGAGCTGCGTGTACGAGGTTACGGAGACGG +CAGTGCTCGTCGCCCAGATAGGGCATGCGCTGATGATTGAGGATAGCGAAATGCGCCACG +ACAGAAGAGTGAACGCCGAGTAATTCTGACAATAAGTGCAGAAGCTCGGGGTCACCAAAC +AAATGCTCTGGTGTGAGATCAAGGCGACACTGGATCGCGAGACTTCAGATGGGCAAAAAT +ACATGATAGTAAAAGACTTACCGCCGCGCGAATCATGGACTGGACGGTGTACAAATTGGC +ACGCGTACCCGCCATGGTATTAGTCTGGTCATAAGTCGAAATGCGCCGCTTGAGAGCGAT +AATGCCAATCACCAGTTCGTTAACAGATGGACTGGGCGGAGGAGATTCTAGATCCCCGTC +GAGAATAAAGAGGTCGATAGAGGGATGTGCCGGGTTCAAGCCATTAGCGGTGCGGGGTGT +AATAGGTTGATCGCGAGCGTCGGGAGTGTCGAACAAAGGAGTCTCGAGCACTTCACCATT +AAGGTGGGACGCGCTAGAGTTCTCAGATTGACGCGGGAGACCAGGCATAAGGCGAAAATG +AATACAATTCCATTCGCAAAAACTACATGTCTATATATGCTCGTAATCAAAAGCTCCGCA +GAGCTGAAACGCCTTAGGAAGTGTGAACGCGTGCGAGATGACGATACTAGAATCGTGTTA +GATATCTGGAGTGATAAAATCAAAAATATCCACGTACAGATGGATCGTGAACCATCAGAT +GCGTAAACGCAAGCAACCCTTCTAATCCCAGGCGCTCGTCACAGAAATGGAACGGACGCT +TGTGCTGCAAAAAGGGAGCGCGATCGCAGATCTTGATGACGGCAAGTTCGAGCCGAAATA +AGTACAGAATGTGCAAAACTAAAGTAGGGTTGGTGGCGAGATTATGAGCAGAAAGACGCC +AGGACGTCTGGATCTCGAGACAAATTAATCAATAATAAATGAATAAATAATTGTGATACG +CACCAGACGACGGTACAAAGCGTGGATACGCTCTAGAGTGCCTTCTGGATCGTCCTTGCC +CACAGTAGACAAGTAACGAGCATCGAATGAATCCAAATCAACAAGCAATTGCTTGAAAAG +TTTGGATCGCGACTCATCGTCGAGGCGATGAAGGAAGATACCGGCGTCGGATGCCTCGCG +AATATTCGTAGAGCAAAGAGGCACGTCGGGGACATAGGGAAAAGCCATCGTGCGCCTGAT +ATGTCTGACGTGTACGCAGAGAATTTTATACAAAACAAACAGAGATCGCGATCTCGATAT +TCCATTCAGAGACCAACTGGGGTCAGTTGCTTTTTGTTGGGGGGGAGAGATAAGATCTAT +CTAGAGAAAATTTAACCCGAATAATTTTCGAGTTTTTTCACTTAGAAATCACACTTCAAA +TCAGGCAAATTTGGAAAGACAAAGCGAAGTTTTTATTGAGAAGAAAACACAAGTACTACA +GGGAACTTCAAAAACCCTATAGTAAAAGAAGAGATGAGATATGAAAAATATAAAAAAAAA +AAGAGAAAAATGCTGTCAACGGCGAGCCCTCTTGACCCTAGGACGAGGAACAGGAGACGA +GCCAGAGCGCTTGAGGCGGAGGAGCTTCTGACGCAGGGGAGAAGGCGAACGGTCACCAGC +ACCGCTCCCAGCCTTATCCTCCTCGTCCTGATTGCCACCAACGCTCTCACCCTGGTCGTC +CTCTTCGTCCTCTTCCTCATCACCATCACCTTCGCCAGAGCTTTCTATAGCTTCGTCGTC +GATAAGAACAGATCGACGAGGAAGAGCGCGTTGTGGACGAATGGAAGCACTAGCACGGCA +AGTGCGCGTTGGCGCGCGCGTCGAAGATGCGCCGGTCGCGACAGTTGCTACAGGCACCGA +AGGGGCGGGAGCTGTAGATAGCGCGGGATGAACCGTGAGTGGAGACCGATCCGGGGTAGG +GGTAGCCCAGATGGAGAGCTGCGCGAATAAGTTAGCGAGCTCGTCCGGAGACGAAAGCAC +GCTGGGTGTCGGCTCCAGTCGCACGCGCTGTGCGTCGATATCAGGGAGCTCCAGGAACCA +GCGAAAGATGTCGAAGGCCTCGGGGTCCGAGTGAAATAGAGTCGCGATAGGTGTGGAAGC +GCTAAGGCGCTCGGATAGTTGGAGCGCGCGGTTAAGCGCGCGGGCACTGGCTTCGGCCGA +CCGGATCGCGAGCTCTTGTTGACGGAACGCCTGTTGCGCGTGAAGCACAGCTTCGGCCAA +CAAGGACTTCAACACTGCAAAAATAGTTTAGACTGAAGAAATAAAGAAGAATAGAATAAA +GAAAGCACCGTTGGGATCAAGATCATTCCATGGCGCAAGAATAGCCGCGAGCTCCCCGAG +AATCTTGGGGGAAGCTGCGAAAGTACAGCCAGTCTTGCGGCCGGTCAGGCAGGCCTTGCA +TTGGAGGCCCCAGCCGCGGAATTCGCAAAATAAAAGAGTCCCGTCGGCATGAGGGATGGT +ACAGGGAATGCACCGGGCTGGGGCCTAAAAATAAAATAAATAAATCAAGATAATAGAGCA +GAAAAGAAATAAACTCACCAATAGAGTCTTGATCACCGCTAACGCCTTCTGAGCGTCGCG +GAGGAGCGCAGAGTCGCGCATGGGGCCCAGGGCGACGGCCAGATACGCGCCAGCAGCGTC +GCGAGGCGTGAGGGACTTGAATAATTCGAGAGTCTCCTCGTCTGGCTCCGGCAGGGAATC +AAAAGCGGCGTTGGGATGGCCGACCGAGGGCGGGACGACTTGTTGGATCTCGACCGAGCT +CACGGCCTTCTTGAATAAGTTGATGGCCCGTTGGCTTTTGGACCTCATAGTGACAACCTT +CGACCCGTCCTTGCCGTGGTTGGATGTCGACGGGCGCAGCGCAATAGTGGGCTAAAAATA +AGGCATCAAAAGATAGGCAGTAAATTAAAAATAGACAAATACCTCGAGAGGGAGCGTATT +GAAGAAAGTTTGCGCATTAGAGGTGAGCCGAGCGCGAGAAAGGAGAAGAGCGGCAAGCTC +CTCCTCGGCATCGGCGTCCTCATCACCTTGATCAGAGGGATCGCGCTCTTCCTCCGACGG +AACTATAAAAACCGTGAATAATAAGAAAACAGAAAAGAATGATAACTTACCGGGTGTAGT +AGCATGGCCTTTCCCCTTGTGCGCCCTGGATAAGCGCGGGGTGGACTCGTGATCCGACTC +CTCGAGCTGATCGACCTCAATAGGTTGAGGCTCTTCGGGAGGCGACGACGCGGGTTCTGT +AAAAATATAAACCAGAATTAAAAAAGGGTACGCGAGTAATGAAAAACAAAAGAAAATAAG +TACCTTGTAACGCCGCTACAGGAGGAGCAGCAGCAGAAGAAATAGTAATCTCCATGTCCT +CGCGACCGGAGAAGTCATCGTCCTCGGGAGGGGGCAGAGCTGGGACACCAATGGGGAACT +GCGCCTCGGCAAGCTTGAGAAGATCCTCGTCGACGGGAGGATCGCGAGCGACGTGAATAG +GGTCTTCCAGGTTGTCGACAGGAGAAGCTGGTTGAAAACGCCTCGCGATAGCAGATAAAG +ACAAGAGGCGCTCACCAAGAGAAGAAGGAGCGGACACAGCGGGCCGGGGAGTAGCGAAGA +CGGAGCTGTTGGAAGTAGAGGCTGCCACCACGGAAACAGAAGCGGAGGCGGCAGCTGCAG +GCACAGAGAGAGAACCCGCCACGCTTGCGGCGTCTGGCTCCGGAAGAGCGTCGACCGGAA +GTAGGAGCGGGCGAATAAAGGACAGAAGAATATTGGCGGCCTCTTGATCAGCCGAGCGCG +GCGTCGACGACAGAGGCTCTTGATCCTGATGGACCGAAGAATCGCCCGCGGTGAGATTGG +GAGAAGGTTCGCGAGTCGGCGAGGCAGTCATGAGCAAAGGAGATGGCGGTGGAGAAGGGT +CGCGAGCTGATAAAGGTGGCGACGGCGCGTCGCGCGCGGGAGATGGAGGCCGGTCGTCGC +GTAAAAGCGCCCCGCTGTCGAGAGGGGTGTCCATGAACGACGAGGGCTCTCGCCAGGTCG +AAGGAGGGCTGCCGCTAGCAGCCAGCGCGGCAGAAAGACCAGGTGGGGGTGAAAAAGCAT +CGTTAGGGTCGTACCGGAGGTCGGCGCTGCAGGGGAAGCTCATGTCGATAGGTGGTGGAA +TAGGGAGTTGCTGCACGTAGCAGCCGATCTGTCCGGCATCGAGCCATCGGTGTCCAAAAA +GATGCGCTTGGTCTGGAAACCATAGCGCGGGATCGGTGCTAAAGAACAAAAGTTCAGCGT +CGTACTAAACCAAAATATGTAAACGTTAGTAAGACATGGCAAACGAGGAAAAAGGATCGC +GACACGTACTATGACAACGTCCCGGTACCAGCCATGCGATAGATGTTCCAGCGTCGCGCT +GGAAGGCACAGCATCGCAAACACGGGAGATGGAATACAGGAAACGCTCGACGGGAGCGCG +CAGGAAGTGGTCAGCGGGAAATCTCGCCAGTCGGTCATTCAGAACTCCCTGAATACGACA +CAGTTTAGCTAGAAAAATTTAAAAGAGACGGAATATGACGCTCACGAGGTGAGACGACCA +CTCAGTGATCAGCGCTCTTAGCCAGGCGTGGCTGGGCGTCTCTTGAGCAAGGAAGGCGGC +TATGATCTTGCTGAAGCGGAAAAAGGAAAGAGCGAAGGAAGAGGAATAATTAGTATCCGG +GGAGAAAAGTGTCCCCTGGAAAGTAGGAAAGAGGATAGGGTGTGTATGCATAGTGAAAGG +ATGAGGAAAGTGGGAAACATACGCGACAAGGGATCGCGACTTAAATACGTCGTGAACCAG +CGGCGTACGCGGACAGTCAAAGTCTCGCGAGAGCCCACCGGCAATCTAGGAAGCGTTGAT +TTGCGTCTATAATGGGCAGTTAACTTCCTAGTGATAGCTTCATGGATCGCGAGAGACGTC +ATAGCGTAAAACTTATGTTGCGCGGCGTGTATGTCGCGAGAGTAATGCTGGGCTAGGGCC +AAATTCTAAAAAGTTTAATTGATTGATGGGGAAATTCTTCTGCGACGCCGCTACAAAGGT +CGCGATGCATCCTCACGCCTAGTTGGAGGATCGCGACGAAAGAACTATCAAAATAATGGC +CTTGCGCGCCTTAAGCGCTCAGGATAGACTATGCGGAAGGGACGTACCAGGATGAGGAGA +TGCCGAGGAAGGAAGCGCGTCGCAATCTAAAAATAGATAGGTGCAGCGCCGTAGCAACGG +CTTCGATGTCTAGCCCAGGAAGCTGGAACCAGGCCAGAAGGCTGTCCGTGGGCGCCGATA +TGAAAGAGCGGTGGCGCCCACTGATAGAATACCAAGGAATAATGACCAAAGCAGCAGGGG +AAGTGCCCGTCTAGGGATCGGTCGTACGGAGTGTCCAGAAAACGGATCGCGACAGAACCG +AATAGCGTTTTAAAAATAGAATCAGGTCGGGGAAACTCTGAATGGGTGTCGACTGGGGAC +AGTCGATATTTGTTAGGGGGGAGATGATAGGGTAGTTACATGGCTACGCCTGAAAATTCG +AAATTTTCACATTTCCCTTACAAAATAAATAATGATTAAGAGGAGCTAAGGCCGTCCCGG +GCTGAGAGCTCAGAGCAAATGCGCGTGCGGGCGATAATGATCGCGAGCCTGTGACGAAGG +TAGCCGCGCGCTGTGCTAAATATAGGTGCACGTTCGGCAAGCTGGGAGACAGTGTACAAA +GCTCGCGATCCATTATTATTGTAGTAAAACTGCACGGCGGCTCCATTACACTAAGGGACA +GGAGGACTAGACGCTATGAGGGAGTCGCGAGACTAATACAGGTCGTCCAGGACGCTCGCG +ATCGCTAGTACGGAAGACACACGCCTACCGGCGGACCGGCAATAATAGCACACCGGCCGG +ATAGCGCTAAACCGTTGGCGCGCGCTTAAGCTATGAAGGCACGGCAAATGCCGAGCGCGC +CCGAGCGCGCTTCCGAGGATGCCTGCAGAAGGCGTATCTGCGCCAATACGGGCTCACGAT +CTGACTTATCAGGTCGCGGAGTACCTAAACAATAAACGGGTAGGATCCGAGGGGCTTATC +TCGAGCCCCGTGTCCTAAAAACGCTCAGAAACAGTAGATAGAAGGTGACTCATACGAGGT +AGCACATGGATCGCGAGCCTCGAGGAGGGCAACCCTTCGCCCATTTGTTTGTATAACGGT +TTGATCATATTAGACTTAGAGATATTTTCCATACAACCTGTGTACTATTTAACTAGCTGT +AGAGTACAGAATAAAATCAAGCAATCGTACCCCACATTTTACAAGAGCTAAACCTGCAAA +GGCGCGAGCTCGACTAAAACTAAGGAATTTACACATTTGACTTCCGAGCTTCGAACTTAA +GCACTTCGTAAGATCGTGATCCCGTTAGCGCCGTCAGTGATAGTAGATTTAAGACCGAGC +CTACTGAGAGTGGTAGAGAAGCTAGTTTTAAAGAAAATATTGCAGACGGATAGCGCGAGA +CTACGAGAATTTTGAGGTGCCGTAGAGTGCAAACGCTCCAGATCCAAGGCTACTAGACGT +TTTCGAGTGCCCTAGTGATTGATTGATATTTTACTCTCGCGATACATTGGAAGTTTTGGC +CGGAGTGTTATACAACGGAAAAGCTGAAGATAAATAAATCTAAAATCTGTTTAAATCCCC +CTCAACCTTGAAGAACATCGCAGCACTCGCTGTCGAGGATCGCGACTACTTCTACACGTA +TATAAAACAATAAAACCCCTAAAACTACACTTTTGCTTCGTTTTCACCCTAGGAGCTAGA +CCACACCATATATCTCATTTTGTCTGTTGAGAGCGTCGCTCGATACTACGAAGTAGATAT +CTCACTCCGCTGGGTTTTGTGTTTTCTTTTGTTTTCGATTTCGCGACTTGCCTGACAGCA +AGCCTCTCA +>SRR17300492_42243 +TAATTAGGAAATTAGACACTTTTAGATGCTATTAATACTTTTAGATACTTTTAGACACTA +TTAGATATAATCCGGAGCTAATACAAGGGGGTGGTTTTCCAGATGTGTCCAGCTTCTTGT +AAACAGAGATTCTAGGATATTGAGTGCCATAAGGAATTGTATCGGTGCTTGCGATATCAT +TGTCCCAGCTCTGGCCACCCTGATGAAGATTGCCATGGCAAGCACATGGATCATCTACAT +TGGCGCAAGTACGGACGAGGCCAAGACTAGAGCAAATTCCAGTACTTTCGATGATTCTGA +TCTAGAGATACAATAAATGTTCAAGGCTCACCTAAATTTGAGGCAACACACTCACCAACT +TAGCCTGGATGTTGTCACCCTGCTTGAATTTTGTGAACTGTTACTCACAAAAATAGGGCA +AATGTCATGGCAATGCGTGTAAAAAAAGTCTTGAAAAAATCGTTTATTGGCATAGGGCCA +ATGAAGCTGTAGTCCATGTCTATAACCGATGGTACTGTGCTGAGGAGGGATGATCTCATA +TCAGTCCGAGGATGTGTAAGCAGTGAGCAACTAGAGTGAGAGTTCAGGAGGTCAGAAAGA +AGAAGGATGGTGTCGCCGACATACCCAGGCTAGTAGTGATTACAAGTCCAACGAAGAGCA +AGATGTTGAGCCATTGAGGACTCGAGCCACCACACGTTCAATTGGCTGGGTTTGAATTAC +ACATTTAAAACTCCAAAGTACACGTGACCTGGCATCCAGTGACAATGTGGCATAGTACAC +ACTGCTCTCTAGAGTCTAGTCCATAGAATAGGGTAGCTTTTCATTTGGGCGGGGAATCAT +ATACGCCGGCATGGGCATCAATCACTCGACTCAACACAGCATACAAGTAATCTTAGTGAA +TATCACGAAACTAGACACTGAGATCTGGTAGTGGGCACTGAACTCCTTGGGTCATCGCAC +GAGCCTTGAGTCACAAGCAGAGACACCAGAATAAGCTACAAGACGGTGCAGTAACAGTCG +CATCACAACTACGCCGTCTTTCGTTTACCACTGGCCCTCTGCAACGCAACTTCGTCCAAG +CTGCGCCTAGCTCCGGCCAGCTTGTAGTATAACGATTCTCCATGTAAGCTGTCGCTCTGG +TCATTCCTATTACTTCCCCATCCCACCGAGGACAGGTGGTTGTGGAAAATCTTGGTGAGG +GAGTCTGACGCAGTCAATGCAGTGCGAGCTTCGTCAGTGGTGGGGAGCAAAGAAAGGTCA +ATTCTCGATGGTCGGTCAGAGTAAAGGGGAACAAACAGCCACCGAAGTTCATGAATAAGT +TCGACGAGTACTAGACTGAAGAACTGCAGAAGAGTGCTCTCGTTGAAGTGTGTGGTTGCG +ATCTGAAAGAAAGCCCGTTTGCCTTCCCCGCCAGTAGCAACACCATCATCGTCGACTTCA +CATTGATCGAATACTACTTTCATCGACTTGGTTAGTTGTGAACGAAGCGGTTTCGGTATT +TTGTGAGTTTGAAACTAAACACAGTGATATACCAGAACATGGAGAAATGATTCCAAGTCA +TCACAGAGTTGATGGGGCTGCGATCCAGGTTGACTCAGTCGAGTTAACAACATGAATTGC +CATGTTCCCTGATAAAATATATTACGATGCAGAAGTTAAATAATCAAATAGTACCAGCTC +ACCGTTCTCCGATTGAGCCTTGCTCCTGAAACGGGGCAATCTGATCTTCTTGCAAAGTCC +CAGTCTATGAGCAGAGCGTGGCCCTCGTTCGTGATTAGAATATTCCCACTGCTTATATCT +TGATGCAGCATGCCAGAGACAAAAGCCTCCTGATGGGCTGGCTCAGAAATAAATTAGTTG +GTATGGGGCTGTGGATGCGTAATAGGCAACATACAGATAATGGCTTTATAGAGCACCTTG +CAGAGTTGCTCTGTAGAATTGTACTGGCTCAACACATCTGGAAAACCACCCCCTTGTATT +AGCTCTGGATTATATCTAATAGTGTCTAAAAGTATCTAAAAGTATTAATAGCATCTAAAA +GTGTCTAATTTCCTAATTAGGAGCAACACA +>SRR17300492_101362 +CCATGGTGCGCCGCGGCTTTCTTCCACCCTATTGCATATGGACCGTGAAGTGAAGGTGCC +TGACCCTATGGCTGGTCTAGCAGAGCTTCACTCAGGCGCTCTACCACCTCGCCGCGAATC +CAGAGTACGCATGCGTGCTGCGCAAGGAGGTCGAGTCCGCGGTCGCGGAAGACGGGTGGA +CGAAGCACGCGGTGGGTAAGATGCTCAAACTCGACAGCTTCATTCGGGAATCGCAGCGCC +TCAATGGACTTGGTCTATGTTAGTTTCTTCCTTGCCTTCGACCCGTGTCTGTTTACTAAC +CTCACCCGTCAATCTCTCCTATTACGGCACTGCCATGGGTATGATTGCCAGTGACACTTC +AGCGCTATGCCCTCAAACCTTTCACGCTCTCGACCGGTCAGACGATCCCAGCGGGAACGT +TTCTCTCGTGCACATCGCTGCCGACACATCATGATCCTGAACTGTATGCCGATCCCGACG +TCTTCGATCCCTGGCGCTTCGCTAGCATGCGCGAATCACACACCAATTCGGATGACGACC +TGAGCGCCCTGGCCAGGGCCAAACTCCAGATGGTGTCCACCAGCACAGACTTCCTAGCCT +TCGGGCACGGCAAGCACGCCTGGTAAGTCGACCTCCCCTCCGGCACGCCCCTCGATCTTC +AGGAAAGCACGCCCTACTGCACGCGACGTCTTTCACGACACATGTTGACCTCGTTTCCGC +AGCCCCGGGCGCTTCTTCGCCGCCTACGAGATGAAAGCAATGCTTGCGCACGTCGTCGTC +GCCTACGACGTCAAGTTCGAGAACGGCGCTGCGGACTTCCCGCCAAATTTGTTCATCGGA +TCTGCCTGTTTGCCGGGAAAGGGACAGGTCATGTTCAGGAAGCGGCAGGTGAGCGACGAG +TTGACTCTCGCTTCCCTAGAAAAGTAGTCATGTTGAGTACTCGTGTGTAACGTACCTTGA +GTCAAGGAGATTGACTGAAGGTACAATTATGGTTTCTTGCATAGTGCTGATAAAATAGTG +AGAGTAACAGAGGAAGGTGTGAAACAAACGACTCCGGGAGATATACAGTCATCACGAGCA +ATATAGTACATGCATTACCACTGGCACTACGAGATCGGACTCTCCTTCAATAATACCCAC +AGAAAGCAGCTAGGAAAAACCCCTACTCTAAGAAGACTTCGGTAGGCCACATGTGGGGCA +TACGATAGGCCTAGTCTTGCACGCCTTGTATACTGCGTATCCCAGTCCTGCTACCGCCAT +CCCGCCTGCAGCGCTGATGATGAAGCCTACGACAGGAAGTGCACCTCCGGCTCCTACGCT +GGTAGCGACGGCGAACGGGGATCCCGCGACAACGTTACCGATGCTAGCTTGCACTGCGGC +TGCCCATGATGCTACGCATAATATAAAGTCATTGTCTGTTGGTGGGCAGCAAATGGTGCG +AATGAGCGCACTTGCAATGATACCCTCAGATCCGAAACCAAGTAGAGCTAGGACACCTGG +GGCTGCAGCTGGAGCAGCAGCCAGTCCTATCACTGCACCCGTACCCGTGACAGCGACCTT +AGTATGGTCGCCACAGGTACAGAATTGAGGGCCAGCAGCTTCAGCACCCTCTCCAGCAGG +ATTTATTGCGTCTAGATTGCAAGAGGTGATCAGCAGTCGATGTGTAGTGCACACTCGATC +TCGATGAAGAACTTACTATTCATGTTGCTCCTATTATTCACGACGGTCGAGCGATAGAGA +CTTAGAGTCTGAGGATGTTTCAAGATTCGATTGGTTGTACTGAGTGGGTCGAAGGGTGTT +GGCCAGTCTGCCCCCCACGAGCGTCCTTATATACTTCGACACGTTCCCATCGAAGATTCT +GGGTTATTAGGGTACCTAGCGCTCATCCTATTTTGATATACAAGGTAGAGCTTATGATAA +TATCATATTTGACTCGAAGTATGCCTGGTAGACAAAAGTCAGCCGCTCGTCGAAAGATAG +AAGGACCGCCCACTGCCAAATGTATCGTATCATGTTCCCTTCGGGACCTTGTTTCCAAAC +ACACGCCTGTGCTCTTTGTTCTCATCGGAAAGCATAGCATGGTGACTCCACCTACCCTG \ No newline at end of file From 3b64836d269cc8f1291208ab24156cefb82dd848 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9r=C3=A9nice=20Batut?= Date: Mon, 22 Sep 2025 17:01:48 +0200 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Saim Momin <64724322+SaimMomin12@users.noreply.github.com> --- tools/detect_circular_sequences/.shed.yml | 4 ++-- tools/detect_circular_sequences/detect_circular_sequences.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/detect_circular_sequences/.shed.yml b/tools/detect_circular_sequences/.shed.yml index 1b6b6130e09..c97ce9282e9 100644 --- a/tools/detect_circular_sequences/.shed.yml +++ b/tools/detect_circular_sequences/.shed.yml @@ -10,6 +10,6 @@ long_description: | categories: - Sequence Analysis - Assembly -remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/detect_circular_sequences -homepage_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/detect_circular_sequences +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences +homepage_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences type: unrestricted diff --git a/tools/detect_circular_sequences/detect_circular_sequences.xml b/tools/detect_circular_sequences/detect_circular_sequences.xml index 5c789915057..683cb79ee6d 100644 --- a/tools/detect_circular_sequences/detect_circular_sequences.xml +++ b/tools/detect_circular_sequences/detect_circular_sequences.xml @@ -1,7 +1,7 @@ (e.g. circular contigs) in a FASTA file by k-mer matching - 0 + 1.0 0 24.0 From 803c484a2e070df1184e932473f19a0401bb3cb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gr=C3=BCning?= Date: Fri, 26 Sep 2025 20:34:46 +0200 Subject: [PATCH 3/3] Update tools/detect_circular_sequences/detect_circular_sequences.xml --- tools/detect_circular_sequences/detect_circular_sequences.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/detect_circular_sequences/detect_circular_sequences.xml b/tools/detect_circular_sequences/detect_circular_sequences.xml index 683cb79ee6d..59ca79ecad6 100644 --- a/tools/detect_circular_sequences/detect_circular_sequences.xml +++ b/tools/detect_circular_sequences/detect_circular_sequences.xml @@ -1,5 +1,5 @@ - (e.g. circular contigs) in a FASTA file by k-mer matching + (e.g. circular contigs) in a FASTA file by k-mer matching 1.0 0