Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release 3.11.14 #122

Merged
merged 3 commits into from
May 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ release: clean
make all

common.o: common.hpp common.inc
curl_easy.o: curl_easy.hpp common.hpp common.inc
gff.o: gff.hpp common.hpp common.inc
alignment.o: alignment.hpp alignment.hpp common.inc

Expand All @@ -106,7 +107,7 @@ amrfinder: $(amrfinderOBJS)
$(CXX) -o $@ $(amrfinderOBJS) -pthread $(DBDIR)

amrfinder_update.o: common.hpp common.inc
amrfinder_updateOBJS=amrfinder_update.o common.o
amrfinder_updateOBJS=amrfinder_update.o common.o curl_easy.o
amrfinder_update: $(amrfinder_updateOBJS)
@if [ "$(TEST_UPDATE)" != "" ] ; \
then \
Expand Down
16 changes: 0 additions & 16 deletions amr_report.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -910,22 +910,6 @@ struct BlastAlignment : Alignment
}
string getMethod (const Locus &cds) const
{ //IMPLY (refExactlyMatched () && ! mutation_all. get (), ! isMutationProt ())
#if 0
string method (fromHmm
? "HMM"
: refExactlyMatched ()
? alleleReported ()
? "ALLELE"
: "EXACT" // PD-776
: isMutationProt ()
? "POINT"
: partial ()
? truncated (cds)
? "PARTIAL_CONTIG_END" // PD-2267
: "PARTIAL"
: "BLAST"
);
#endif
string method (fromHmm
? "HMM"
: isMutationProt ()
Expand Down
92 changes: 61 additions & 31 deletions amrfinder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,15 @@
* AMRFinder
*
* Dependencies: NCBI BLAST, HMMer
* gunzip (optional)
*
* Release changes:
* 05/06/2023 PD-4598 error messages in curl_easy.cpp
* 3.11.14 05/05/2023 extern "C" { #include <curl.h> }
* 3.11.13 05/04/2023 PD-4596 Prohibit ASCII characters only between 0x00 and 0x1F in GFF files
* 04/24/2023 PD-4583 Process files ending with ".gz", see https://github.com/ncbi/amr/issues/61, dependence on gunzip (optional)
* 04/19/2023 On failure no empty output file (-o) is created
* 3.11.12 04/13/2023 Application::makeKey()
* PD-4548 fasta_check.cpp prohibits '\t' (not any '\'), and all restrictions are only for nucleotide sequences
* 3.11.11 04/13/2023 PD-4566 --hmmer_bin
* 3.11.10 04/12/2023 PD-4548 fasta_check.cpp prohibits ';', '.', '~' in the last position of a sequence identifier
Expand Down Expand Up @@ -470,6 +477,22 @@ struct ThisApplication : ShellApplication
t. qc ();
t. saveFile (tmp + "/" + tmpSuf);
}



string uncompress (const string &quotedFName,
const string &suffix) const
{
const string res (shellQuote (tmp + "/" + suffix));
ASSERT (quotedFName != res);
const string s (unQuote (quotedFName));
if (isRight (s, ".gz"))
{
exec ("gunzip -c " + quotedFName + " > " + res);
return res;
}
return quotedFName;
}



Expand Down Expand Up @@ -553,8 +576,11 @@ struct ThisApplication : ShellApplication
}

if (! output. empty ())
{
try { OFStream f (output); }
catch (...) { throw runtime_error ("Cannot create output file " + shellQuote (output)); }
removeFile (output);
}


// For timing...
Expand Down Expand Up @@ -641,13 +667,9 @@ struct ThisApplication : ShellApplication
if (! dbDir. items. empty () && dbDir. items. back () == "latest")
{
prog2dir ["amrfinder_update"] = execDir;
string blast_bin_par;
if (! blast_bin. empty ())
blast_bin_par = " --blast_bin " + shellQuote (blast_bin);
string hmmer_bin_par;
if (! hmmer_bin. empty ())
hmmer_bin_par = " --hmmer_bin " + shellQuote (hmmer_bin);
exec (fullProg ("amrfinder_update") + " -d " + shellQuote (dbDir. getParent ()) + ifS (force_update, " --force_update") + blast_bin_par + hmmer_bin_par
exec (fullProg ("amrfinder_update") + " -d " + shellQuote (dbDir. getParent ()) + ifS (force_update, " --force_update")
+ makeKey ("blast_bin", blast_bin)
+ makeKey ("hmmer_bin", hmmer_bin)
+ ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + logFName, logFName);
}
else
Expand Down Expand Up @@ -759,18 +781,27 @@ struct ThisApplication : ShellApplication

for (const string& include : includes)
stderr << " - include " << include << '\n';
}


// Quoted names
const string prot_flat = uncompress (prot, "prot_flat");
const string dna_flat = uncompress (dna, "dna_flat");
const string gff_flat = uncompress (gff, "gff_flat");


{
StringVector emptyFiles;
if (! emptyArg (prot) && ! getFileSize (unQuote (prot))) emptyFiles << prot;
if (! emptyArg (dna) && ! getFileSize (unQuote (dna))) emptyFiles << dna;
if (! emptyArg (gff) && ! getFileSize (unQuote (gff))) emptyFiles << gff;
if (! emptyArg (prot) && ! getFileSize (unQuote (prot_flat))) emptyFiles << prot;
if (! emptyArg (dna) && ! getFileSize (unQuote (dna_flat))) emptyFiles << dna;
if (! emptyArg (gff) && ! getFileSize (unQuote (gff_flat))) emptyFiles << gff;
for (const string& emptyFile : emptyFiles)
{
const Warning warning (stderr);
stderr << "Empty file: " << emptyFile;
}
}


// organism --> organism1
string organism1;
Expand Down Expand Up @@ -853,7 +884,7 @@ struct ThisApplication : ShellApplication
bool lcl = false;
if (gffType == Gff::pgap && ! emptyArg (dna)) // PD-3347
{
LineInput f (unQuote (dna));
LineInput f (unQuote (dna_flat));
while (f. nextLine ())
if (isLeft (f. line, ">"))
{
Expand All @@ -863,6 +894,9 @@ struct ThisApplication : ShellApplication
}


const bool blastn = ! emptyArg (dna) && ! organism1. empty () && fileExists (db + "/AMR_DNA-" + organism1);


// Create files for amr_report
string amr_report_blastp;
string amr_report_blastx;
Expand All @@ -881,20 +915,20 @@ struct ThisApplication : ShellApplication
{
string gff_prot_match;
string gff_dna_match;
if (getFileSize (unQuote (prot)))
if (getFileSize (unQuote (prot_flat)))
{
findProg ("blastp");
findProg ("hmmsearch");

string prot1 (prot); // Protein FASTA with no dashes in the sequences
string prot1 (prot_flat); // Protein FASTA with no dashes in the sequences
size_t nProt = 0;
size_t protLen_max = 0;
size_t protLen_total = 0;
if (! fastaCheck (prot, true, qcS, logFName, nProt, protLen_max, protLen_total))
if (! fastaCheck (prot_flat, true, qcS, logFName, nProt, protLen_max, protLen_total))
{
prot1 = shellQuote (tmp + "/prot");
OFStream outF (unQuote (prot1));
LineInput f (unQuote (prot));
LineInput f (unQuote (prot_flat));
while (f. nextLine ())
{
trimTrailing (f. line);
Expand Down Expand Up @@ -941,13 +975,13 @@ struct ThisApplication : ShellApplication
string dnaPar;
if (! emptyArg (dna))
{
dnaPar = " -dna " + dna;
dnaPar = " -dna " + dna_flat;
if (gffType == Gff::pseudomonasdb)
gff_dna_match = " -gff_dna_match " + tmp + "/dna_match";
}
try
{
exec (fullProg ("gff_check") + gff + annotS + " -prot " + prot1 + dnaPar + gff_prot_match + gff_dna_match + qcS + " -log " + logFName, logFName);
exec (fullProg ("gff_check") + gff_flat + annotS + " -prot " + prot1 + dnaPar + gff_prot_match + gff_dna_match + qcS + " -log " + logFName, logFName);
}
catch (...)
{
Expand Down Expand Up @@ -1004,19 +1038,18 @@ struct ThisApplication : ShellApplication

amr_report_blastp = "-blastp " + tmp + "/blastp -hmmsearch " + tmp + "/hmmsearch -hmmdom " + tmp + "/dom";
if (! emptyArg (gff))
amr_report_blastp += " -gff " + gff + gff_prot_match + gff_dna_match + annotS;
amr_report_blastp += " -gff " + gff_flat + gff_prot_match + gff_dna_match + annotS;
}


if (! emptyArg (dna))
{
const bool blastn = ! organism1. empty () && fileExists (db + "/AMR_DNA-" + organism1);
if (getFileSize (unQuote (dna)))
if (getFileSize (unQuote (dna_flat)))
{
size_t nDna = 0;
size_t dnaLen_max = 0;
size_t dnaLen_total = 0;
EXEC_ASSERT (fastaCheck (dna, false, qcS, logFName, nDna, dnaLen_max, dnaLen_total));
EXEC_ASSERT (fastaCheck (dna_flat, false, qcS, logFName, nDna, dnaLen_max, dnaLen_total));
const string blastx (/*"tblastn"*/ dnaLen_max > 100000 ? "tblastn" : "blastx"); // PAR // SB-3643

stderr. section ("Running " + blastx);
Expand All @@ -1028,14 +1061,14 @@ struct ThisApplication : ShellApplication
const string blastx_par (blastp_par + " -word_size 3 -query_gencode " + to_string (gencode));
ASSERT (threads_max >= 1);
if (blastx == "blastx")
exec (fullProg ("blastx") + " -query " + dna + " -db " + tmp + "/db/AMRProt" + " "
exec (fullProg ("blastx") + " -query " + dna_flat + " -db " + tmp + "/db/AMRProt" + " "
+ blastx_par + " " BLAST_FMT " " + get_num_threads_param ("blastx", min (nDna, dnaLen_total / 10002))
+ " -out " + tmp + "/blastx > /dev/null 2> " + tmp + "/blastx-err", tmp + "/blastx-err");
else
{
ASSERT (blastx == "tblastn");
findProg ("makeblastdb");
exec (fullProg ("makeblastdb") + " -in " + dna + " -out " + tmp + "/nucl" + " -dbtype nucl -logfile " + tmp + "/makeblastdb.log", tmp + "/makeblastdb.log");
exec (fullProg ("makeblastdb") + " -in " + dna_flat + " -out " + tmp + "/nucl" + " -dbtype nucl -logfile " + tmp + "/makeblastdb.log", tmp + "/makeblastdb.log");
if (threads_max > 1)
{
createDirectory (tmp + "/AMRProt_chunk");
Expand All @@ -1061,7 +1094,7 @@ struct ThisApplication : ShellApplication
findProg ("blastn");
stderr. section ("Running blastn");
const Chronometer_OnePass cop ("blastn", cerr, false, qc_on && ! quiet);
exec (fullProg ("blastn") + " -query " + dna + " -db " + tmp + "/db/AMR_DNA-" + organism1 + " -evalue 1e-20 -dust no -max_target_seqs 10000 "
exec (fullProg ("blastn") + " -query " + dna_flat + " -db " + tmp + "/db/AMR_DNA-" + organism1 + " -evalue 1e-20 -dust no -max_target_seqs 10000 "
+ get_num_threads_param ("blastn", min (nDna, dnaLen_total / 2500000)) + " " BLAST_FMT " -out " + tmp + "/blastn > " + logFName + " 2> " + tmp + "/blastn-err", tmp + "/blastn-err");
}
}
Expand Down Expand Up @@ -1127,10 +1160,7 @@ struct ThisApplication : ShellApplication
+ ifS (suppress_common, " -suppress_prot " + tmp + "/suppress_prot")
+ nameS + qcS + " " + parm + " -log " + logFName + " > " + tmp + "/amr", logFName);
}
if ( ! emptyArg (dna)
&& ! organism1. empty ()
&& fileExists (db + "/AMR_DNA-" + organism1)
)
if (blastn)
{
const Chronometer_OnePass cop ("dna_mutation", cerr, false, qc_on && ! quiet);
const string mutation_allS (mutation_all. empty () ? "" : ("-mutation_all " + tmp + "/mutation_all.dna"));
Expand Down Expand Up @@ -1188,7 +1218,7 @@ struct ThisApplication : ShellApplication
if (! emptyArg (dna_out))
{
prepare_fasta_extract (StringVector {"Contig id", "Start", "Stop", "Strand", "Gene symbol", "Sequence name"}, "dna_out", false);
exec (fullProg ("fasta_extract") + dna + " " + tmp + "/dna_out" + qcS + " -log " + logFName + " > " + dna_out, logFName);
exec (fullProg ("fasta_extract") + dna_flat + " " + tmp + "/dna_out" + qcS + " -log " + logFName + " > " + dna_out, logFName);
}
if (! emptyArg (dnaFlank5_out))
{
Expand All @@ -1204,7 +1234,7 @@ struct ThisApplication : ShellApplication
t. saveHeader = false;
t. qc ();
t. saveFile (tmp + "/dnaFlank5_out");
exec (fullProg ("fasta_extract") + dna + " " + tmp + "/dnaFlank5_out" + qcS + " -log " + logFName + " > " + dnaFlank5_out, logFName);
exec (fullProg ("fasta_extract") + dna_flat + " " + tmp + "/dnaFlank5_out" + qcS + " -log " + logFName + " > " + dnaFlank5_out, logFName);
}


Expand Down
Loading