Skip to content
8 changes: 4 additions & 4 deletions environment_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ dependencies:
- bwidget=1.9.14
- bzip2=1.0.8
- c-ares=1.24.0
- ca-certificates=2023.11.17
- ca-certificates=2024.6.2
- cairo=1.16.0
- certifi=2023.11.17
- certifi=2024.6.2
- charset-normalizer=3.3.2
- click=8.1.7
- contourpy=1.1.1
Expand All @@ -82,7 +82,7 @@ dependencies:
- gfortran_impl_linux-64=13.2.0
- giflib=5.2.1
- glpk=5.0
- gmap=2023.10.10
- gmap=2024.05.20
- gmp=6.3.0
- graphite2=1.3.13
- gsl=2.7
Expand Down Expand Up @@ -155,7 +155,7 @@ dependencies:
- numpy=1.24.4
- openjdk=17.0.3
- openjpeg=2.5.0
- openssl=3.2.0
- openssl=3.3.1
- packaging=23.2
- pandas=2.0.3
- pandoc=3.1.3
Expand Down
8 changes: 4 additions & 4 deletions environment_macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ dependencies:
- bwidget=1.9.14
- bzip2=1.0.8
- c-ares=1.20.1
- ca-certificates=2023.11.17
- ca-certificates=2024.6.2
- cairo=1.16.0
- cctools_osx-64=973.0.1
- certifi=2023.11.17
- certifi=2024.6.2
- charset-normalizer=3.3.1
- clang=16.0.1
- clang-16=16.0.1
Expand Down Expand Up @@ -87,7 +87,7 @@ dependencies:
- gfortran_impl_osx-64=11.4.0
- gfortran_osx-64=11.4.0
- glpk=5.0
- gmap=2023.10.10
- gmap=2024.05.20
- gmp=6.2.1
- graphite2=1.3.13
- gsl=2.7
Expand Down Expand Up @@ -157,7 +157,7 @@ dependencies:
- numpy=1.24.4
- openjdk=21.0.1
- openjpeg=2.5.0
- openssl=3.2.0
- openssl=3.3.1
- packaging=23.2
- pandas=2.0.3
- pandoc=3.1.3
Expand Down
34 changes: 33 additions & 1 deletion pepti_map/aligning/gmap_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


class GmapWrapper:
def __init__(self):
def __init__(self, min_trimmed_coverage: float = 0.0, min_identity: float = 0.0):
env_vars = dotenv_values()
try:
n_threads = env_vars.get("GMAP_N_THREADS")
Expand All @@ -27,6 +27,30 @@ def __init__(self):
self._n_threads = n_threads
self._batch_mode = batch_mode

try:
assert min_trimmed_coverage >= 0.0 and min_trimmed_coverage <= 1.0
self._min_trimmed_coverage = min_trimmed_coverage
except AssertionError:
logging.info(
(
"--min_trimmed_coverage option for GMAP must be between 0.0 and "
f"1.0, but was {str(min_trimmed_coverage)}. Using default of 0.0"
)
)
self._min_trimmed_coverage = 0.0

try:
assert min_identity >= 0.0 and min_identity <= 1.0
self._min_identity = min_identity
except AssertionError:
logging.info(
(
"--min_identity option for GMAP must be between 0.0 and "
f"1.0, but was {str(min_identity)}. Using default of 0.0"
)
)
self._min_identity = 0.0

def build_index(
self,
files_to_index: List[Path],
Expand Down Expand Up @@ -111,6 +135,10 @@ def produce_alignment(
str(self._n_threads),
"-f",
"gff3_gene",
"--min-trimmed-coverage",
str(self._min_trimmed_coverage),
"--min-identity",
str(self._min_identity),
]
alignment_command.extend(
[
Expand All @@ -137,6 +165,10 @@ def _produce_alignment_for_single_file(self, path_to_sequences: Path) -> None:
str(self._n_threads),
"-f",
"gff3_gene",
"--min-trimmed-coverage",
str(self._min_trimmed_coverage),
"--min-identity",
str(self._min_identity),
path_to_sequences.absolute().as_posix(),
]
with open(
Expand Down
78 changes: 70 additions & 8 deletions pepti_map/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def generate_trinity_results(
return trinity_results_paths


def load_trinity_results_paths() -> List[Path]:
def load_current_results_paths() -> List[Path]:
return TrinityWrapper.load_results_filepaths()


Expand All @@ -205,8 +205,10 @@ def align_reads_to_genome(
genome: Union[str, None],
gmap_index: Union[str, None],
output_dir: str,
) -> None:
gmap_wrapper = GmapWrapper()
min_trimmed_coverage: float,
min_identity: float,
) -> List[Path]:
gmap_wrapper = GmapWrapper(min_trimmed_coverage, min_identity)
# TODO: How to automatically use previously generated index?
if gmap_index is not None and gmap_index != "":
gmap_index_path = Path(gmap_index)
Expand All @@ -220,24 +222,45 @@ def align_reads_to_genome(
logging.error(missing_option_message)
raise ValueError(missing_option_message)

new_results_paths: List[Path] = []
for trinity_results_path in trinity_results_paths:
gmap_wrapper.produce_alignment(
[trinity_results_path],
trinity_results_path.parent / "alignment_result.gff3",
)
# Check if actual output was produced
with open(
trinity_results_path.parent / "alignment_result.gff3",
"rt",
encoding="utf-8",
) as gmap_output:
line_count = 0
for _ in gmap_output:
line_count += 1
if line_count == 4:
new_results_paths.append(trinity_results_path)
break

# Save new output paths
TrinityWrapper.save_results_filepaths(new_results_paths)
_write_last_step(Step.ALIGNMENT.value)
logging.info("Generated alignment of assembled contigs with GMAP.")
return new_results_paths


def generate_pogo_input(paths_to_subdirectories: List[Path], peptide_file: str) -> None:
def generate_pogo_input(
paths_to_subdirectories: List[Path],
peptide_file: str,
no_indels: bool
) -> None:
pogo_input_helper = PoGoInputHelper(
Path(peptide_file), PATH_PEPTIDE_TO_CLUSTER_MAPPING_FILE
)
pogo_input_helper.generate_all_peptide_input_files(
paths_to_subdirectories, PATH_TO_MERGED_INDEXES
)
PoGoInputHelper.generate_gtf_and_protein_files_for_multiple_directories(
paths_to_subdirectories
paths_to_subdirectories, no_indels
)
_write_last_step(Step.POGO_INPUT.value)
logging.info("Generated PoGo input files.")
Expand Down Expand Up @@ -338,6 +361,7 @@ def concat_output(paths_to_subdirectories: List[Path], output_dir: str) -> None:
"-pi",
"--precompute-intersections",
is_flag=True,
default=False,
help=(
"If used, the intersection sizes for the Jaccard Index "
"calculation are precomputed during the matching phase."
Expand Down Expand Up @@ -394,6 +418,33 @@ def concat_output(paths_to_subdirectories: List[Path], output_dir: str) -> None:
"the '-g/--genome' option is ignored."
),
)
@click.option(
"-mtc",
"--min-trimmed-coverage",
required=False,
type=float,
default=0.0,
show_default=True,
help="Sets the '--min-trimmed-coverage' option for GMAP during alignment.",
)
@click.option(
"-mid",
"--min-identity",
required=False,
type=float,
default=0.0,
show_default=True,
help="Sets the '--min-identity' option for GMAP during alignment.",
)
@click.option(
"-ni",
"--no-indels",
required=False,
is_flag=True,
default=False,
help=("If set, contig alignments containing indels "
"are excluded from further processing.")
)
def main(
peptide_file: str,
rna_file: str,
Expand All @@ -407,6 +458,9 @@ def main(
min_contig_length: int,
genome: Union[str, None],
gmap_index: Union[str, None],
min_trimmed_coverage: float,
min_identity: float,
no_indels: bool
):
_setup(output_dir)

Expand Down Expand Up @@ -464,21 +518,29 @@ def main(
)
else:
logging.info("Using already generated Trinity output files.")
trinity_results_paths = load_trinity_results_paths()
trinity_results_paths = load_current_results_paths()

if last_step < Step.ALIGNMENT.value:
logging.info("Aligning assembled RNA-seq reads to the genome.")
align_reads_to_genome(trinity_results_paths, genome, gmap_index, output_dir)
trinity_results_paths = align_reads_to_genome(
trinity_results_paths,
genome,
gmap_index,
output_dir,
min_trimmed_coverage,
min_identity,
)
else:
logging.info("Using already generated alignments.")
trinity_results_paths = load_current_results_paths()

paths_to_subdirectories = [
trinity_results_path.parent for trinity_results_path in trinity_results_paths
]

if last_step < Step.POGO_INPUT.value:
logging.info("Generating input files for PoGo.")
generate_pogo_input(paths_to_subdirectories, peptide_file)
generate_pogo_input(paths_to_subdirectories, peptide_file, no_indels)
else:
logging.info("Using already generated PoGo input files.")

Expand Down
Loading