Skip to content

Commit

Permalink
Fix double Bonferroni correction
Browse files Browse the repository at this point in the history
* added output file to init.ini
* add results format to init.ini (int or real)
  • Loading branch information
friofry committed Jun 10, 2021
1 parent f1b24ac commit 0e8fa54
Show file tree
Hide file tree
Showing 9 changed files with 100 additions and 90 deletions.
2 changes: 2 additions & 0 deletions iterative_finder_lib/argo_cuda_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,7 @@ struct ArgoCudaParams {
double max_motif_score_contrast{ 0.0 }; // Maximum score in a contrast set of sequences
double min_motif_chi2{ 0.0 }; // [deprecated]
bool bonferroni_correction{ false }; // Output results with Bonferroni correction [0, 1]
std::string output_file; // Output file
bool int_results { true }; // Output integer scores
};
// clang-format on
10 changes: 7 additions & 3 deletions iterative_finder_lib/argo_cuda_params_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ ArgoCudaParams read_ini_file(const char *inifile)
{
auto lines = readlines(inifile);
ArgoCudaParams result;
if (lines.size() < 14) {
cout << "Ini file should contain exactly 15 parameters. " << inifile << lines.size() << endl;
if (lines.size() < 17) {
cout << "Ini file should contain exactly 17 parameters. " << inifile << lines.size() << endl;
return result;
}

Expand All @@ -68,6 +68,8 @@ ArgoCudaParams read_ini_file(const char *inifile)
result.max_motif_score_contrast = stod(lines[12]);
result.min_motif_chi2 = 0; // [deprecated] stod(lines[13]);
result.bonferroni_correction = atoi(lines[14].c_str());
result.output_file = get_first_word(lines[15]);
result.int_results = atoi(lines[16].c_str());
return result;
}

Expand All @@ -79,7 +81,7 @@ void print_argo_cuda_params(const ArgoCudaParams &params)
cout << params.max_motif_prob_by_chance * 100
<< "\t\tMaximum presence of motif for random reasons in the positive set of sequences [0-100]" << endl;
cout << params.min_motif_presence * 100
<< "\t\tMinimum presence of motif for random reasons in a positive set of sequences [0-100]" << endl;
<< "\t\tMinimum presence of motif in a positive set of sequences [0-100]" << endl;
cout << params.positive_sequences << "\tFile with positive set of sequences" << endl;
cout << (int)params.use_real_nucl_frequences
<< "\t\t0 - neutral frequencies, 1 - real nucleotide frequencies in the set of sequences [0, 1]" << endl;
Expand All @@ -96,4 +98,6 @@ void print_argo_cuda_params(const ArgoCudaParams &params)
cout << params.max_motif_score_contrast << "\t\tMaximum score in a contrast set of sequences" << endl;
cout << params.min_motif_chi2 << "\t\t[deprecated]" << endl;
cout << params.bonferroni_correction << "\t\tOutput results with Bonferroni correction [0, 1]" << endl;
cout << params.output_file << "\t\tResults file [0, 1]" << endl;
cout << params.int_results << "\t\tWrite integer results. 0 - real values, 1 - integer [0, 1]" << endl;
}
22 changes: 9 additions & 13 deletions iterative_finder_lib/iterative_finder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@
using namespace std;

ImportantMotifFinder::ImportantMotifFinder(const ArgoCudaParams &params,
const FindOccurrencesAlgorithm &external_algorithm,
const char *output_file)
const FindOccurrencesAlgorithm &external_algorithm)
: _params(params)
, _external_algorithm(external_algorithm)
, _output_file(output_file)
, _output_file(params.output_file)
{
if (_output_file.empty()) {
_output_file = "a.txt";
}

if (params.use_old_motifs_file) {
throw invalid_argument("use_old_motifs_file is not supported yet.\n");
}
Expand Down Expand Up @@ -206,27 +209,20 @@ void ImportantMotifFinder::exclude_motifs_by_score(std::vector<uint32_t> &motif_

void ImportantMotifFinder::write_results_old()
{
static double bonferroni_k = log10(TOTAL_MOT);

ofstream f(_output_file.c_str());
for (uint32_t i = 0; i < _found_motifs_data.size(); i++) {
const auto &d = _found_motifs_data[i];
auto rand_w = _stat_model->get_random_weight(d.hash);
double score = d.score;
if (_params.bonferroni_correction) {
score -= bonferroni_k;
}
f << hash_to_string(d.hash) << "\t";
f << int(100 * d.weight / _sequence_hashes.count) << "\t";
f << int(100 * rand_w / _sequence_hashes.count) << "\t";
f << (_params.bonferroni_correction ? score : int(score)) << endl;
f << (_params.int_results ? int(d.score) : d.score) << endl;
}
}

std::vector<uint32_t> find_important_motifs(const ArgoCudaParams &params,
const FindOccurrencesAlgorithm &external_algorithm,
const char *output_file)
const FindOccurrencesAlgorithm &external_algorithm)
{
ImportantMotifFinder finder(params, external_algorithm, output_file);
ImportantMotifFinder finder(params, external_algorithm);
return finder.find();
}
6 changes: 2 additions & 4 deletions iterative_finder_lib/iterative_finder.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ using FindOccurrencesAlgorithm = std::function<void(const std::vector<uint32_t>
class ImportantMotifFinder {
public:
ImportantMotifFinder(const ArgoCudaParams &params,
const FindOccurrencesAlgorithm &external_algorithm,
const char *output_file = "a.txt");
const FindOccurrencesAlgorithm &external_algorithm);
std::vector<uint32_t> find();

private:
Expand All @@ -37,5 +36,4 @@ class ImportantMotifFinder {
};

std::vector<uint32_t> find_important_motifs(const ArgoCudaParams &params,
const FindOccurrencesAlgorithm &external_algorithm,
const char *output_file = "a.txt");
const FindOccurrencesAlgorithm &external_algorithm);
30 changes: 16 additions & 14 deletions motif_finder_cpu/init.ini
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
1 Complementarity. 0 - forward strand. 1 - forward + reverse strand.
8 Minimum score
35 Maximum presence of motif for random reasons in the positive set of sequences [0-100]
5 Minimum presence of motif in a positive set of sequences [0-100]
1 Complementarity. 0 - forward strand. 1 - forward + reverse strand.
8 Minimum score
35 Maximum presence of motif for random reasons in the positive set of sequences [0-100]
5 Minimum presence of motif in a positive set of sequences [0-100]
test_12.fst File with positive set of sequences
0 0 - neutral frequencies, 1 - real nucleotide frequencies in the set of sequences [0, 1]
0 deprecated
10 Maximum number of result motifs. 0 - reveal all significant motifs [0, ]
0 Markov chain order (0-Bernulli, 1-dinucleotide, 2-trinucleotide), when using real nucleotide frequencies [0-3]
0 deprecated
0 deprecated
neg.fst File with contrast set of sequences
10 Maximum score in a contrast set of sequences
0 deprecated
0 Use Bonferroni correction for score [0, 1]
0 0 - neutral frequencies, 1 - real nucleotide frequencies in the set of sequences [0, 1]
0 deprecated
10 Maximum number of result motifs. 0 - reveal all significant motifs [0, ]
0 Markov chain order (0-Bernulli, 1-dinucleotide, 2-trinucleotide), when using real nucleotide frequencies [0-3]
0 deprecated
0 deprecated
neg.fst File with contrast set of sequences
10 Maximum score in a contrast set of sequences
0 deprecated
0 Use Bonferroni correction for score [0, 1]
a.txt Results file
1 Write integer results. 0 - real values, 1 - integer [0, 1]
30 changes: 16 additions & 14 deletions motif_finder_gpu/init.ini
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
1 Complementarity. 0 - forward strand. 1 - forward + reverse strand.
8 Minimum score
35 Maximum presence of motif for random reasons in the positive set of sequences [0-100]
5 Minimum presence of motif in the positive set of sequences [0-100]
1 Complementarity. 0 - forward strand. 1 - forward + reverse strand.
8 Minimum score
35 Maximum presence of motif for random reasons in the positive set of sequences [0-100]
5 Minimum presence of motif in a positive set of sequences [0-100]
test_12.fst File with positive set of sequences
0 0 - neutral frequencies, 1 - real nucleotide frequencies in the set of sequences [0, 1]
0 deprecated
10 Maximum number of result motifs. 0 - reveal all significant motifs [0, ]
0 Markov chain order (0-Bernulli, 1-dinucleotide, 2-trinucleotide), when using real nucleotide frequencies [0-3]
0 deprecated
0 deprecated
0.fst File with contrast set of sequences
90 Maximum score in a contrast set of sequences
0 deprecated
0 Output results with Bonferroni correction. 0 - without correction , 1 - use correction [0, 1]
0 0 - neutral frequencies, 1 - real nucleotide frequencies in the set of sequences [0, 1]
0 deprecated
10 Maximum number of result motifs. 0 - reveal all significant motifs [0, ]
0 Markov chain order (0-Bernulli, 1-dinucleotide, 2-trinucleotide), when using real nucleotide frequencies [0-3]
0 deprecated
0 deprecated
neg.fst File with contrast set of sequences
10 Maximum score in a contrast set of sequences
0 deprecated
0 Use Bonferroni correction for score [0, 1]
a.txt Results file
1 Write integer results. 0 - real values, 1 - integer [0, 1]
30 changes: 16 additions & 14 deletions test_data/test_0/init.ini
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
1 Complementarity. 0 - forward strand. 1 - forward + reverse strand.
1 Minimum score
35 Maximum presence of motif for random reasons in the positive set of sequences [0-100]
1 Minimum presence of motif for random reasons in a positive set of sequences [0-100]
1 Complementarity. 0 - forward strand. 1 - forward + reverse strand.
1 Minimum score
35 Maximum presence of motif for random reasons in the positive set of sequences [0-100]
1 Minimum presence of motif in a positive set of sequences [0-100]
pos.fst File with positive set of sequences
0 0 - neutral frequencies, 1 - real nucleotide frequencies in the set of sequences [0, 1]
1 Score type: 0 - chi-squared or 1 - binomial [0, 1]
1 Maximum number of result motifs. 0 - reveal all significant motifs [0, ]
0 Markov chain order (0-Bernulli, 1-dinucleotide, 2-trinucleotide), when using real nucleotide frequencies [0-3]
0 deprecated
0 deprecated
neg.fst File with contrast set of sequences
10 Maximum score in a contrast set of sequences
0 deprecated
0 Output results with Bonferroni correction [0, 1]
0 0 - neutral frequencies, 1 - real nucleotide frequencies in the set of sequences [0, 1]
1 deprecated
1 Maximum number of result motifs. 0 - reveal all significant motifs [0, ]
0 Markov chain order (0-Bernulli, 1-dinucleotide, 2-trinucleotide), when using real nucleotide frequencies [0-3]
0 deprecated
0 deprecated
neg.fst File with contrast set of sequences
10 Maximum score in a contrast set of sequences
0 deprecated
0 Use Bonferroni correction for score [0, 1]
a1.txt Results file
1 Write integer results. 0 - real values, 1 - integer [0, 1]
30 changes: 16 additions & 14 deletions test_data/test_1/init.ini
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
1 Complementarity. 0 - forward strand. 1 - forward + reverse strand.
1 Minimum score
35 Maximum presence of motif for random reasons in the positive set of sequences [0-100]
1 Minimum presence of motif for random reasons in a positive set of sequences [0-100]
1 Complementarity. 0 - forward strand. 1 - forward + reverse strand.
1 Minimum score
35 Maximum presence of motif for random reasons in the positive set of sequences [0-100]
1 Minimum presence of motif in a positive set of sequences [0-100]
pos.fst File with positive set of sequences
0 0 - neutral frequencies, 1 - real nucleotide frequencies in the set of sequences [0, 1]
1 Score type: 0 - chi-squared or 1 - binomial [0, 1]
1 Maximum number of result motifs. 0 - reveal all significant motifs [0, ]
0 Markov chain order (0-Bernulli, 1-dinucleotide, 2-trinucleotide), when using real nucleotide frequencies [0-3]
0 deprecated
0 deprecated
neg.fst File with contrast set of sequences
10 Maximum score in a contrast set of sequences
0 deprecated
0 Output results with Bonferroni correction [0, 1]
0 0 - neutral frequencies, 1 - real nucleotide frequencies in the set of sequences [0, 1]
1 deprecated
1 Maximum number of result motifs. 0 - reveal all significant motifs [0, ]
0 Markov chain order (0-Bernulli, 1-dinucleotide, 2-trinucleotide), when using real nucleotide frequencies [0-3]
0 deprecated
0 deprecated
neg.fst File with contrast set of sequences
10 Maximum score in a contrast set of sequences
0 deprecated
0 Use Bonferroni correction for score [0, 1]
a1.txt Results file
1 Write integer results. 0 - real values, 1 - integer [0, 1]
30 changes: 16 additions & 14 deletions test_data/test_2/init.ini
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
1 Complementarity. 0 - forward strand. 1 - forward + reverse strand.
1 Minimum score
35 Maximum presence of motif for random reasons in the positive set of sequences [0-100]
1 Minimum presence of motif for random reasons in a positive set of sequences [0-100]
1 Complementarity. 0 - forward strand. 1 - forward + reverse strand.
1 Minimum score
35 Maximum presence of motif for random reasons in the positive set of sequences [0-100]
1 Minimum presence of motif in a positive set of sequences [0-100]
pos.fst File with positive set of sequences
0 0 - neutral frequencies, 1 - real nucleotide frequencies in the set of sequences [0, 1]
1 Score type: 0 - chi-squared or 1 - binomial [0, 1]
1 Maximum number of result motifs. 0 - reveal all significant motifs [0, ]
0 Markov chain order (0-Bernulli, 1-dinucleotide, 2-trinucleotide), when using real nucleotide frequencies [0-3]
0 deprecated
0 deprecated
neg.fst File with contrast set of sequences
10 Maximum score in a contrast set of sequences
0 deprecated
0 Output results with Bonferroni correction [0, 1]s
0 0 - neutral frequencies, 1 - real nucleotide frequencies in the set of sequences [0, 1]
1 deprecated
1 Maximum number of result motifs. 0 - reveal all significant motifs [0, ]
0 Markov chain order (0-Bernulli, 1-dinucleotide, 2-trinucleotide), when using real nucleotide frequencies [0-3]
0 deprecated
0 deprecated
neg.fst File with contrast set of sequences
10 Maximum score in a contrast set of sequences
0 deprecated
0 Use Bonferroni correction for score [0, 1]
a1.txt Results file
1 Write integer results. 0 - real values, 1 - integer [0, 1]

0 comments on commit 0e8fa54

Please sign in to comment.