Commit 69981a34 authored by Leandro Ishi Soares de Lima's avatar Leandro Ishi Soares de Lima

Merge branch 'portable_binary'

parents b16a04d7 a6aedb5e
cmake_minimum_required(VERSION 2.6)
cmake_minimum_required(VERSION 3.6)
# Handle superbuild first
option (USE_SUPERBUILD "Whether or not a superbuild should be invoked" ON)
......@@ -21,7 +21,7 @@ endif()
################################################################################
SET (gatb-tool_VERSION_MAJOR 0)
SET (gatb-tool_VERSION_MINOR 5)
SET (gatb-tool_VERSION_PATCH 1)
SET (gatb-tool_VERSION_PATCH 2)
#write the version number
file(WRITE ${PROJECT_SOURCE_DIR}/tools/DBGWAS/src/version.h "\#define VERSION \"${gatb-tool_VERSION_MAJOR}.${gatb-tool_VERSION_MINOR}.${gatb-tool_VERSION_PATCH}\"")
......@@ -119,7 +119,6 @@ INSTALL (FILES ${PROJECT_SOURCE_DIR}/html_templates/index_template.html DESTI
INSTALL (FILES ${PROJECT_SOURCE_DIR}/html_templates/index_table_template.html DESTINATION ./bin/DBGWAS_lib/ COMPONENT precompiled)
INSTALL (DIRECTORY ${PROJECT_SOURCE_DIR}/sample_example DESTINATION . COMPONENT precompiled)
INSTALL (DIRECTORY ${PROJECT_SOURCE_DIR}/csjs/lib DESTINATION ./bin/DBGWAS_lib/ COMPONENT precompiled)
INSTALL (DIRECTORY ${PROJECT_SOURCE_DIR}/sharedobjects DESTINATION ./bin/DBGWAS_lib/ COMPONENT precompiled)
set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
set(CPACK_COMPONENTS_ALL precompiled)
......
Done v0.5.2:
-Fixed various issues with the building process and the precompiled binary:
-The precompiled binary was not portable enough. Now we added a script to create a portable binary inspired by https://pmelsted.wordpress.com/2015/10/14/building-binaries-for-bioinformatics/;
-The script can be found in folder portable_binary_builder;
-The new precompiled binary (v0.5.2) is portable now;
-Fixed some bugs with CMAKE_ARGS in boostSuperBuild/SuperBuild.cmake;
-Updated GEMMA to a portable static binary;
-This binary is build using Holy Build Box in https://github.com/leoisl/gemma0.93b
-Changed parameters -nc_db and -pt_db to -nc-db and -pt-db , in order to keep consistency with the other parameters naming
-Added five new parameters to deal with binary incompatibility of the executables that are bundled with DBGWAS (GEMMA, Blast suite, phantomjs):
-GEMMA-path , -Blast-path , -phantomjs-path , -no-preview , and -Rscript-path
-Replaced std::regex to boost::regex;
-Added an automated test based on the 282 pseudomonas_aeruginosa dataset;
-Improved well README.md;
Done v0.5.1:
-Fixed compilation issues
-Due to this issue: https://gitlab.com/leoisl/dbgwas/issues/2 (reported by mail from the user of issue https://gitlab.com/leoisl/dbgwas/issues/1 )
......
This diff is collapsed.
---------------------------------------------------------------------------------------------------------------------------------------------
TODO list
---------------------------------------------------------------------------------------------------------------------------------------------
Paper:
-Test on ubuntu, debian, openSuse, and any other OS we can;
-Table to track the performance of DBGWAS
-Subsampling SA, PA and TB big datasets
-100, 250, 500, 1000, 2500, 5000 10000
-Create the input files [MAGALI]
-Run once each point
-no-annotations
-no-newick
-8 cores
-Log the time and memory for each point
-Parse output file and create the following table:
Species Nb_Genome RunNb Time Mem
-Magali generate the graph
-Changed all mentions of -nc_db and -pt_db to -nc-db and -pt-db in the text;
-Put S1 online, and increment it to be a user-manual
-Other stuff needed for the paper (prioritize code, then test, then text)...
Next release (v0.5.2 ETA 31/05/2018):
Continuous genotypes/phenotypes:
- Still read the phenotype as a string, to deal with NAs.
......@@ -21,8 +40,6 @@ Priority:
3) Do a text output of DBGWAS so users can use other tools to post-process its output
4) Make step 3 multithreaded
5) Add to DBGWAS parameter -runOnlyStep1 Freq/Bin
-It will just produces the variant matrices and stop
-Freq = use frequences
......
......@@ -10,11 +10,23 @@ list (APPEND DEPENDENCIES boost_1_62_0)
ExternalProject_Add (boost_1_62_0
URL ${PROJECT_SOURCE_DIR}/thirdparty/boost_1_62_0.tar.gz
URL_MD5 6f4571e7c5a66ccc3323da6c24be8f05
CONFIGURE_COMMAND ./bootstrap.sh --with-libraries=filesystem,system
CONFIGURE_COMMAND ./bootstrap.sh --with-libraries=filesystem,system,regex
BUILD_COMMAND ./b2 link=static
BUILD_IN_SOURCE 1
INSTALL_COMMAND ""
)
set(CMAKE_ARGS_DBGWAS "-DUSE_SUPERBUILD=OFF ${EXTRA_CMAKE_ARGS}")
if (CMAKE_BUILD_TYPE)
set(CMAKE_ARGS_DBGWAS "${CMAKE_ARGS_DBGWAS} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}")
endif()
if (CMAKE_CXX_COMPILER)
set(CMAKE_ARGS_DBGWAS "${CMAKE_ARGS_DBGWAS} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}")
endif()
if (CMAKE_C_COMPILER)
set(CMAKE_ARGS_DBGWAS "${CMAKE_ARGS_DBGWAS} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}")
endif()
list (APPEND EXTRA_CMAKE_ARGS
-DBOOST_ROOT=${CMAKE_CURRENT_BINARY_DIR}/Dependencies/Source/boost_1_62_0
-DBoost_NO_SYSTEM_PATHS=ON)
......@@ -23,6 +35,6 @@ list (APPEND EXTRA_CMAKE_ARGS
ExternalProject_Add (build_full_DBGWAS_project
DEPENDS ${DEPENDENCIES}
SOURCE_DIR ${PROJECT_SOURCE_DIR}
CMAKE_ARGS -DUSE_SUPERBUILD=OFF ${EXTRA_CMAKE_ARGS}
CMAKE_ARGS ${CMAKE_ARGS_DBGWAS}
INSTALL_COMMAND ""
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/DBGWAS)
No preview for this file type
Usage:
cd <DBGWAS_folder>/portable_binary_builder
bash build_portable_dbgwas.sh
The portable package will be in <DBGWAS_folder>/DBGWAS-<version>-Linux-precompiled.tar.gz
# Invokes docker to build portable dbgwas.
# Inspired by Páll Melsted blog (https://pmelsted.wordpress.com/2015/10/14/building-binaries-for-bioinformatics/),
# on how he and the other authors managed to make kallisto (Nicolas L Bray, Harold Pimentel, Páll Melsted and Lior Pachter,
# Near-optimal probabilistic RNA-seq quantification, Nature Biotechnology 34, 525–527 (2016), doi:10.1038/nbt.3519)
# portable in different linux distributions.
set -eu
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
DBGWAS_DIR="$(dirname "${SCRIPT_DIR}")"
cd $DBGWAS_DIR
if [ -d "${DBGWAS_DIR}/build" ]; then
echo "Please remove ${DBGWAS_DIR}/build before proceeding."
exit 1
fi
sudo docker run -t -i --rm \
-v ${DBGWAS_DIR}:/io \
phusion/holy-build-box-64:latest \
bash /io/portable_binary_builder/build_portable_dbgwas_core.sh
#!/bin/bash
# Builds portable dbgwas.
# Inspired by Páll Melsted blog (https://pmelsted.wordpress.com/2015/10/14/building-binaries-for-bioinformatics/),
# on how he and the other authors managed to make kallisto (Nicolas L Bray, Harold Pimentel, Páll Melsted and Lior Pachter,
# Near-optimal probabilistic RNA-seq quantification, Nature Biotechnology 34, 525–527 (2016), doi:10.1038/nbt.3519)
# portable in different linux distributions.
set -e
# Activate Holy Build Box environment.
source /hbb_exe/activate
set -eux
# compile and install dbgwas
cd io
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
make
cd DBGWAS/DBGWAS
make package
cp DBGWAS*Linux-precompiled.tar.gz /io/
Testing if a new release is ok with the sample example:
#1. Download the dbgwas version you want to test (let's use v0.5.2 as an example):
wget https://www.dropbox.com/s/1fbf00cy8vhfxpg/DBGWAS-0.5.2-Linux-precompiled.tar.gz?dl=1 -O DBGWAS-0.5.2-Linux-precompiled.tar.gz
#2. extract and go to bin
tar -zxvf DBGWAS-0.5.2-Linux-precompiled.tar.gz
cd bin/
#3. download the test case:
wget https://gitlab.com/leoisl/dbgwas/raw/portable_binary/tests/test_DBGWAS_on_pseudomonas_aeruginosa.sh
#4. run the test case:
bash test_DBGWAS_on_pseudomonas_aeruginosa.sh
1/ Download the correct sample example output: https://www.dropbox.com/s/jp24184zony1977/correct_output.zip?dl=1
2/ Unpack it
3/ Run the new release on the sample data
4/ Compare the two outputs like:
diff -rq <correct_output> <output_new_release> | grep -v ".png" | grep -v ".log.txt" | grep -v "visualisations/index.html"
5/ If this previous command does not output anything, then it is fine
In the end, you should have something like:
Differences found (check them to see if they are really a problem):
And a list of files that differ, if they exist. Then you check the diff to see if they are really a problem.
\ No newline at end of file
set -eux
#run the tested version on pseudomonas_aeruginosa_full_dataset
wget https://www.dropbox.com/s/0g1llvdbfv1jys6/pseudomonas_aeruginosa_full_dataset.zip?dl=1 -O pseudomonas_aeruginosa_full_dataset.zip
unzip pseudomonas_aeruginosa_full_dataset.zip
wget https://www.dropbox.com/s/mt3g4oh0bt5jwmr/Resistance_DB_for_DBGWAS.fasta?dl=1 -O Resistance_DB_for_DBGWAS.fasta
wget https://www.dropbox.com/s/9y1p0yw918ips6k/uniprot_sprot_bacteria_for_DBGWAS.fasta?dl=1 -O uniprot_sprot_bacteria_for_DBGWAS.fasta
./DBGWAS -strains pseudomonas_aeruginosa_full_dataset/strains -newick pseudomonas_aeruginosa_full_dataset/strains.newick -nc-db Resistance_DB_for_DBGWAS.fasta -pt-db uniprot_sprot_bacteria_for_DBGWAS.fasta
#get the correct output
wget https://www.dropbox.com/s/pr5vn76xksdbtzj/correct_output_v0.5.2.zip?dl=1 -O correct_output.zip
unzip correct_output.zip
#compare both outputs
echo "Differences found (check them to see if they are really a problem):"
diff -rq correct_output output | grep -v ".png" | grep -v ".log.txt"
......@@ -26,8 +26,9 @@ include_directories (${PSTREAMS_SOURCE_DIR})
set (WHEREAMI_SOURCE_DIR ${PROJECT_SOURCE_DIR}/whereami/src)
include_directories (${WHEREAMI_SOURCE_DIR})
set(BOOST_ROOT ${CMAKE_BINARY_DIR}/../Dependencies/Source/boost_1_62_0)
set(Boost_USE_STATIC_LIBS ON) #This makes boost plug the libraries we specify as static libraries (not dynamic ones - the user does not even need to have boost installed in their pc)
FIND_PACKAGE(Boost 1.62.0 COMPONENTS filesystem system REQUIRED)
FIND_PACKAGE(Boost 1.62.0 COMPONENTS filesystem system regex REQUIRED)
include_directories(${Boost_INCLUDE_DIR})
# We add the path for extra libraries
......
......@@ -22,7 +22,7 @@ BlastRecord BlastRecord::parseString (const string &str) {
boost::replace_all(header, "'", "\\'");
//parse all tags
regex expression("DBGWAS_(\\w+)_tag_*=_*([^;]+)_*;?");
boost::regex expression("DBGWAS_(\\w+)_tag_*=_*([^;]+)_*;?");
map<string, string> allTags = extractValuesWithRegex(header, expression);
//check if general tag was specified
......@@ -54,11 +54,11 @@ BlastRecord BlastRecord::parseString (const string &str) {
//parse the header and extract all the DBGWAS tags
//header is intentionally string and not const string &
map<string, string> BlastRecord::extractValuesWithRegex(string header, const regex &expression) {
map<string, string> BlastRecord::extractValuesWithRegex(string header, const boost::regex &expression) {
map<string, string> extractedValues;
smatch matchResults;
boost::smatch matchResults;
while(regex_search(header, matchResults, expression))
while(boost::regex_search(header, matchResults, expression))
{
string key = matchResults.str(1);
boost::trim_if(key, [](char c) -> bool { return c=='_';});
......@@ -78,7 +78,7 @@ vector<BlastRecord> Blast::blast (const string &command, const string &queryPath
//build the command line
stringstream ss;
ss << dirWhereDBGWASIsInstalled << DBGWAS_lib << "/" << command << " -query " << queryPath << " -db " << dbPath << " -out " << outFilePath << " -num_threads " << nbCores << " -outfmt '6 qseqid sseqid qcovs bitscore pident evalue'";
ss << blastPath << "/" << command << " -query " << queryPath << " -db " << dbPath << " -out " << outFilePath << " -num_threads " << nbCores << " -outfmt '6 qseqid sseqid qcovs bitscore pident evalue'";
string commandLine=ss.str();
executeCommand(commandLine, false);
......@@ -129,7 +129,7 @@ string Blast::makeblastdb (const string &dbtype, const string &originalDBPath, c
//create the DB using the fixed FASTA
{
string commandLineMakeblastdb = dirWhereDBGWASIsInstalled + DBGWAS_lib + string("/makeblastdb -dbtype ") + dbtype + " -in " + fixedDBPath;
string commandLineMakeblastdb = blastPath + "/makeblastdb -dbtype " + dbtype + " -in " + fixedDBPath;
executeCommand(commandLineMakeblastdb);
}
......
......@@ -11,7 +11,7 @@
#include <set>
#include <map>
#include <boost/algorithm/string.hpp>
#include <regex>
#include <boost/regex.hpp>
using namespace std;
......@@ -40,7 +40,7 @@ private:
//parse the header and extract all the DBGWAS tags
//header is intentionally string and not const string &
static map<string, string> extractValuesWithRegex(string header, const regex &expression);
static map<string, string> extractValuesWithRegex(string header, const boost::regex &expression);
};
......
......@@ -36,7 +36,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
#include <boost/regex.hpp>
#include <assert.h>
#include <gatb/gatb_core.hpp>
......
......@@ -224,6 +224,50 @@ int getNbLinesInFile(const string &filename) {
return n;
}
void checkExecutables(Tool *tool) {
//configure the global vars of the executable paths
gemmaPath = tool->getInput()->getStr(STR_GEMMA_PATH);
blastPath = tool->getInput()->getStr(STR_BLAST_PATH);
phantomjsPath = tool->getInput()->getStr(STR_PHANTOMJS_PATH);
RscriptPath = tool->getInput()->getStr(STR_RSCRIPT_PATH);
//fix the variables
boost::replace_all(gemmaPath, "<DBGWAS_lib>", DBGWAS_lib);
boost::replace_all(blastPath, "<DBGWAS_lib>", DBGWAS_lib);
boost::replace_all(phantomjsPath, "<DBGWAS_lib>", DBGWAS_lib);
boost::replace_all(RscriptPath, "<DBGWAS_lib>", DBGWAS_lib);
//check if the executables work
//check gemma
executeCommand(gemmaPath, false, gemmaPath + " does not work, but it is required. You can install a version of GEMMA that works on your system and tell DBGWAS to use it through the parameter " + STR_GEMMA_PATH + " ."); //if it returns an exit status != 0, then it does not work and we issue a fatal error
//check Rscript
executeCommand(RscriptPath+" --version", false, RscriptPath + " does not work, but it is required. You can install a version of R that works on your system and tell DBGWAS to use it through the parameter " + STR_RSCRIPT_PATH + " .");
//check blast, if the user wants to annotate the subgraphs
if (tool->getInput()->get(STR_NUCLEOTIDE_DB)) {
executeCommand(blastPath+"/makeblastdb -version", false, string("You want to annotate the output subgraphs with ") + STR_NUCLEOTIDE_DB + " , but " + blastPath+"/makeblastdb does not work. You can install a version of the Blast suite that works on your system" +
" and tell DBGWAS to use it through the parameter " + STR_BLAST_PATH + " .");
executeCommand(blastPath+"/blastn -version", false, string("You want to annotate the output subgraphs with ") + STR_NUCLEOTIDE_DB + " , but " + blastPath+"/blastn does not work. You can install a version of the Blast suite that works on your system" +
" and tell DBGWAS to use it through the parameter " + STR_BLAST_PATH + " .");
}
if (tool->getInput()->get(STR_PROTEIN_DB)) {
executeCommand(blastPath+"/makeblastdb -version", false, string("You want to annotate the output subgraphs with ") + STR_PROTEIN_DB + " , but " + blastPath+"/makeblastdb does not work. You can install a version of the Blast suite that works on your system" +
" and tell DBGWAS to use it through the parameter " + STR_BLAST_PATH + " .");
executeCommand(blastPath+"/blastx -version", false, string("You want to annotate the output subgraphs with ") + STR_PROTEIN_DB + " , but " + blastPath+"/blastx does not work. You can install a version of the Blast suite that works on your system" +
" and tell DBGWAS to use it through the parameter " + STR_BLAST_PATH + " .");
}
//check phantomjs
if (tool->getInput()->get(STR_NO_PREVIEW) == 0) {
executeCommand(phantomjsPath+" --version", false, phantomjsPath + " does not work, DBGWAS cant produce the components preview on the summary output page." +
" Either you get a version of phantomjs that works on your system (DBGWAS was "
"tested with version 2.1.1) and tell DBGWAS to use it through the parameter " + STR_PHANTOMJS_PATH +
" , or you can choose to not produce the components preview "
"through the parameter " + STR_NO_PREVIEW + " .");
}
}
void checkParametersBuildDBG(Tool *tool) {
//check if we skip or not
......@@ -238,6 +282,9 @@ void checkParametersBuildDBG(Tool *tool) {
return;
}
//check executables and parameters
checkExecutables(tool);
//check the count mode
//TODO: seeveral questions are still unclear if we use the Freq count mode (how to run bugwas, the coloring, etc...). For now I am disabling this option
/*
......@@ -271,6 +318,9 @@ void checkParametersStatisticalTest(Tool *tool) {
return;
}
//check executables and parameters
checkExecutables(tool);
//check if newickTreeFilePath exists
if (hasNewickFile) {
string newickTreeFilePath = tool->getInput()->getStr(STR_NEWICK_PATH);
......@@ -285,6 +335,9 @@ void checkParametersStatisticalTest(Tool *tool) {
void checkParametersGenerateOutput(Tool *tool) {
//check executables and parameters
checkExecutables(tool);
//create the output folder for step 3
string outputFolder = stripLastSlashIfExists(tool->getInput()->getStr(STR_OUTPUT))+string("/step3");
createFolder(outputFolder);
......@@ -346,6 +399,9 @@ void checkParametersGenerateOutput(Tool *tool) {
proteinDBPath = Blast::makeblastdb("prot", tool->getInput()->getStr(STR_PROTEIN_DB), outputFolder);
thereIsProteinDB=true;
}
//get the -no-preview parameter
noPreview = tool->getInput()->get(STR_NO_PREVIEW) != 0;
}
......@@ -356,7 +412,7 @@ void fatalError (const string &message) {
}
void executeCommand(const string &command, bool verbose) {
void executeCommand(const string &command, bool verbose, const string &messageIfItFails) {
// run a process and create a streambuf that reads its stdout and stderr
if (verbose)
cerr << "Executing " << command << "..." << endl;
......@@ -381,7 +437,9 @@ void executeCommand(const string &command, bool verbose) {
if (proc.rdbuf()->exited()) {
if (proc.rdbuf()->status() != 0) {
stringstream ss;
ss << "Error executing " << command << ". Exit status: " << proc.rdbuf()->status();
ss << "Error executing " << command << ". Exit status: " << proc.rdbuf()->status() << endl;
if (messageIfItFails != "")
ss << "Message: " << messageIfItFails << endl;
fatalError(ss.str());
}
if (verbose)
......
......@@ -73,7 +73,7 @@ void checkParametersBuildDBG(Tool *tool);
void checkParametersGenerateOutput(Tool *tool);
void checkParametersStatisticalTest(Tool *tool);
void fatalError (const string &message);
void executeCommand(const string &command, bool verbose=true);
void executeCommand(const string &command, bool verbose=true, const string &messageIfItFails="");
void openFileForReading(const string &filePath, ifstream &stream);
void openFileForWriting(const string &filePath, ofstream &stream);
void createFolder(const string &path);
......
......@@ -74,9 +74,16 @@ void generate_output::createIndexFile(int numberOfComponents, const string &visu
dispatcher.iterate(rangeOfComponentsIt, [&](int i) {
string HTMLFile(boost::filesystem::canonical(visualisationsFolder+"/components/comp_"+std::to_string(i)+".html").string());
string PNGFile = HTMLFile+".png";
stringstream commandSS;
commandSS << dirWhereDBGWASIsInstalled << DBGWAS_lib << "/phantomjs " << dirWhereDBGWASIsInstalled << DBGWAS_lib << "/render_graph.js " << HTMLFile << " " << PNGFile;
executeCommand(commandSS.str(), false);
if (noPreview){
boost::filesystem::copy_file(DBGWAS_lib+"/lib/resources/nopreview.png", PNGFile, boost::filesystem::copy_option::overwrite_if_exists);
}
else {
stringstream commandSS;
commandSS << phantomjsPath << " " << DBGWAS_lib << "/render_graph.js " << HTMLFile << " " << PNGFile;
executeCommand(commandSS.str(), false);
}
});
cerr << "[Rendering thumbnails...] - Done!" << endl;
......@@ -89,7 +96,7 @@ void generate_output::createIndexFile(int numberOfComponents, const string &visu
//get the template preview
string templatePreview="";
{
auto indexTableTemplateAsStringVector = getVectorStringFromFile(dirWhereDBGWASIsInstalled + DBGWAS_lib + string("/index_table_template.html"));
auto indexTableTemplateAsStringVector = getVectorStringFromFile(DBGWAS_lib + "/index_table_template.html");
for (const auto &line : indexTableTemplateAsStringVector)
templatePreview += line;
}
......@@ -146,7 +153,7 @@ void generate_output::createIndexFile(int numberOfComponents, const string &visu
//create the index file
//read template file
string templatePath = dirWhereDBGWASIsInstalled + DBGWAS_lib + string("/index_template.html");
string templatePath = DBGWAS_lib + "/index_template.html";
string indexOutput = readFileAsString(templatePath.c_str());
......@@ -380,7 +387,7 @@ void generate_output::generateCytoscapeOutput(const graph_t &graph, const vector
//create the graph file
//read template file
string templatePath = dirWhereDBGWASIsInstalled + DBGWAS_lib + string("/cytoscape_template.html");
string templatePath = DBGWAS_lib + "/cytoscape_template.html";
string cytoscapeOutput = readFileAsString(templatePath.c_str());
//put the graph in the template file
......@@ -415,8 +422,8 @@ void generate_output::generateCytoscapeOutput(const graph_t &graph, const vector
//copy the lib folder, if it is not already copied
string fromLibPath = dirWhereDBGWASIsInstalled + DBGWAS_lib + string("/lib");
string toLibPath = visualisationsFolder + string("/components/lib");
string fromLibPath = DBGWAS_lib + "/lib";
string toLibPath = visualisationsFolder + "/components/lib";
if (!boost::filesystem::exists(toLibPath))
copyDirectoryRecursively(fromLibPath, toLibPath);
cerr << "Building Cytoscape graph... - Done!" << endl;
......
......@@ -40,16 +40,23 @@ const char* STR_SKIP1 = "-skip1";
const char* STR_SKIP2 = "-skip2";
const char* STR_NEWICK_PATH = "-newick";
const char* STR_SFF = "-SFF";
const char* STR_NUCLEOTIDE_DB = "-nc_db";
const char* STR_PROTEIN_DB = "-pt_db";
const char* STR_NUCLEOTIDE_DB = "-nc-db";
const char* STR_PROTEIN_DB = "-pt-db";
const char* STR_MAF_FILTER = "-maf";
const char* STR_GEMMA_PATH = "-GEMMA-path";
const char* STR_BLAST_PATH = "-Blast-path";
const char* STR_PHANTOMJS_PATH = "-phantomjs-path";
const char* STR_RSCRIPT_PATH = "-Rscript-path";
const char* STR_NO_PREVIEW = "-no-preview";
//TODO: seeveral questions are still unclear if we use the Freq count mode (how to run bugwas, the coloring, etc...). For now I am disabling this option
//const char* STR_COUNT_MODE = "-count";
//TODO: seeveral questions are still unclear if we use the Freq count mode (how to run bugwas, the coloring, etc...). For now I am disabling this option
string dirWhereDBGWASIsInstalled="";
string DBGWAS_lib = "/DBGWAS_lib";
//variables controlling where the executable is
string DBGWAS_lib = ""; //set on first command in main.cpp
bool skip1 = false;
bool skip2 = false;
bool hasNewickFile = false;
......@@ -59,6 +66,11 @@ bool thereIsNucleotideDB = false;
string nucleotideDBPath;
bool thereIsProteinDB = false;
string proteinDBPath;
string gemmaPath;
string blastPath;
string phantomjsPath;
string RscriptPath;
bool noPreview = false;
//global vars used by both programs
Graph* graph;
......@@ -67,14 +79,19 @@ vector< Strain >* strains = NULL;
void populateParser (Tool *tool) {
// We add some custom arguments for command line interface
tool->getParser()->push_front (new OptionOneParam (STR_RSCRIPT_PATH, "Path to Rscript.", false, "Rscript"));
tool->getParser()->push_front (new OptionNoParam (STR_NO_PREVIEW, "Do not produce the components preview in the summary output page.", false));
tool->getParser()->push_front (new OptionOneParam (STR_PHANTOMJS_PATH, "Path to phantomjs executable (DBGWAS was tested only with version 2.1.1).", false, "<DBGWAS_lib>/phantomjs"));
tool->getParser()->push_front (new OptionOneParam (STR_BLAST_PATH, "Path to the directory containing the Blast suite (should contain at least blastn, blastx, and makeblastdb).", false, "<DBGWAS_lib>/"));
tool->getParser()->push_front (new OptionOneParam (STR_GEMMA_PATH, "Path to the GEMMA executable.", false, "<DBGWAS_lib>/gemma.0.93b"));
tool->getParser()->push_front (new OptionOneParam (STR_MAF_FILTER, "Minor Allele Frequency Filter.", false, "0.01"));
tool->getParser()->push_front (new OptionOneParam (STR_MAX_NEIGHBOURHOOD, "Denotes the neighbourhood to be considered around the significant unitigs.", false, "5"));
tool->getParser()->push_front (new OptionOneParam (STR_SFF, "Denotes the Significant Features Filter - the features (or patterns) selected to create a visualisation around them. If it is a float number n, then only the features with q-value<=n are selected. If it is an integer n, then only the n first features are selected. Take a look at the output/step2/patterns.txt file to get a list of features ordered by q-value to better choose this parameter (re-run the tool with -skip2 in order to directly produce the visualisation of the features selected by your parameter).", false, "100"));
tool->getParser()->push_front (new OptionNoParam (STR_SKIP2, "Skips Steps 1 and 2, running only Step 3. Assumes that Steps 1 and 2 were correctly run and folders \"step1\" and \"step2\" are present in the output folder.", false));
tool->getParser()->push_front (new OptionNoParam (STR_SKIP1, "Skips Step 1, running only Steps 2 and 3. Assumes that Step 1 was correctly run and folder \"step1\" is present in the output folder.", false));
tool->getParser()->push_front (new OptionOneParam (STR_OUTPUT, "Path to the folder where the final and temporary files will be stored.", false, "output"));
tool->getParser()->push_front (new OptionOneParam (STR_PROTEIN_DB, "A list of Fasta files separated by comma containing annotations in a protein alphabet format (e.g.: -pt_db path/to/file_1.fa,path/to/file_2.fa,etc). You can customize these files to work better with DBGWAS (see https://gitlab.com/leoisl/dbgwas/tree/master#customizing-annotation-databases).", false));
tool->getParser()->push_front (new OptionOneParam (STR_NUCLEOTIDE_DB, "A list of Fasta files separated by comma containing annotations in a nucleotide alphabet format (e.g.: -nc_db path/to/file_1.fa,path/to/file_2.fa,etc). You can customize these files to work better with DBGWAS (see https://gitlab.com/leoisl/dbgwas/tree/master#customizing-annotation-databases).", false));
tool->getParser()->push_front (new OptionOneParam (STR_PROTEIN_DB, "A list of Fasta files separated by comma containing annotations in a protein alphabet format (e.g.: -pt-db path/to/file_1.fa,path/to/file_2.fa,etc). You can customize these files to work better with DBGWAS (see https://gitlab.com/leoisl/dbgwas/tree/master#customizing-annotation-databases).", false));
tool->getParser()->push_front (new OptionOneParam (STR_NUCLEOTIDE_DB, "A list of Fasta files separated by comma containing annotations in a nucleotide alphabet format (e.g.: -nc-db path/to/file_1.fa,path/to/file_2.fa,etc). You can customize these files to work better with DBGWAS (see https://gitlab.com/leoisl/dbgwas/tree/master#customizing-annotation-databases).", false));
tool->getParser()->push_front (new OptionOneParam (STR_NEWICK_PATH, "Optional path to a newick tree file. If (and only if) a newick tree file is provided, the lineage effect analysis is computed and PCs figures are generated.", false));
tool->getParser()->push_front (new OptionOneParam (STR_KSKMER_SIZE, "K-mer size.", false, "31"));
tool->getParser()->push_front (new OptionOneParam (STR_STRAINS_FILE, "A text file describing the strains containing 3 columns: 1) ID of the strain; 2) Phenotype (0/1/NA); 3) Path to a multi-fasta file containing the sequences of the strain. This file needs a header. Check the sample_example folder or https://gitlab.com/leoisl/dbgwas/raw/master/sample_example/strains for an example.", true));
......
......@@ -49,12 +49,15 @@ extern const char* STR_SFF;
extern const char* STR_NUCLEOTIDE_DB;
extern const char* STR_PROTEIN_DB;
extern const char* STR_MAF_FILTER;
extern const char* STR_GEMMA_PATH;
extern const char* STR_BLAST_PATH;
extern const char* STR_PHANTOMJS_PATH;
extern const char* STR_RSCRIPT_PATH;
extern const char* STR_NO_PREVIEW;
//TODO: seeveral questions are still unclear if we use the Freq count mode (how to run bugwas, the coloring, etc...). For now I am disabling this option
//extern const char* STR_COUNT_MODE;
//TODO: seeveral questions are still unclear if we use the Freq count mode (how to run bugwas, the coloring, etc...). For now I am disabling this option
extern string dirWhereDBGWASIsInstalled;
extern string DBGWAS_lib;
extern bool skip1;
extern bool skip2;
......@@ -65,6 +68,11 @@ extern string nucleotideDBPath;
extern bool thereIsProteinDB;
extern string proteinDBPath;
extern boost::variant< int, double > SFF;
extern string gemmaPath;
extern string blastPath;
extern string phantomjsPath;
extern string RscriptPath;
extern bool noPreview;
void populateParser (Tool *tool);
......
......@@ -38,7 +38,7 @@
#define BOOST_NO_CXX11_SCOPED_ENUMS
#include <boost/filesystem.hpp>
#undef BOOST_NO_CXX11_SCOPED_ENUMS
#include <regex>
#include <boost/regex.hpp>
using namespace std;
/********************************************************************************/
......@@ -49,7 +49,7 @@ int main (int argc, char* argv[])
srand (time(NULL));
//get the path to the dir were the executable is
dirWhereDBGWASIsInstalled = getDirWhereDBGWASIsInstalled();
DBGWAS_lib = getDirWhereDBGWASIsInstalled() + "/DBGWAS_lib/";
try
{
......
......@@ -66,13 +66,13 @@ void statistical_test::execute () {
//create the command line
stringstream ssCommand;
ssCommand << "Rscript --vanilla "
<< (dirWhereDBGWASIsInstalled+DBGWAS_lib) << "/DBGWAS.R "
<< (dirWhereDBGWASIsInstalled+DBGWAS_lib) << " "
ssCommand << RscriptPath << " --vanilla "
<< DBGWAS_lib << "/DBGWAS.R "
<< DBGWAS_lib << " "
<< step1OutputFolder.string() << " "
<< step1OutputFolder.string() << "/bugwas_input.id_phenotype "
<< "bugwas_out "
<< (dirWhereDBGWASIsInstalled+DBGWAS_lib) << "/gemma.0.93b "
<< gemmaPath << " "
<< mafFilter << " ";
if (hasNewickFile)
ssCommand << newickPath.string() << " ";
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment