Commit 1bd7f8e3 authored by Matthew Berkeley's avatar Matthew Berkeley

BUSCO 4.0.3

parent bb280b01
4.0.3
- Issue #190 fixed
- Issue #191 fixed
- Issue #196 fixed
- Issue #200 fixed
- Reintroduce full retraining for all eukaryote runs
- Fix retraining bug
4.0.2
- Issue #182 partially fixed
......
......@@ -29,13 +29,18 @@ To get help on BUSCO use: ``busco -h`` and ``python3 scripts/generate_plot.py -h
**!!!** Don't use "odb9" datasets with BUSCOv4. If you need to reproduce previous analyses, use BUSCOv3 (https://gitlab.com/ezlab/busco/-/tags/3.0.2)
Note: When running auto-lineage, the initial results for eukaryotes are incomplete. This is deliberate, as these initial
results are used merely to determine whether the genome scores highest against the bacteria, archaea or eukaryota
datasets. If the eukaryota dataset is selected, BUSCO then attempts to place the input assembly on the eukaryote
phylogenetic tree before running a complete BUSCO assessment using the selected child dataset. Unless the top-level
eukaryota dataset is selected as the best match for the input file, the eukaryota dataset run will not complete. So
while the specific dataset run will return accurate results, the generic eukaryota dataset run should be considered
unreliable.
Note: While preparing the release of v4.0.3, we found and fixed a bug in the genome mode for the eukaryote pipeline. We recommend repeating
any affected runs done using previous 4.x versions with the updated version of the software.
Note: For v4.0.2 and before, when running auto-lineage, the initial results for eukaryotes were incomplete. This was
deliberate, as these initial results are used merely to determine whether the genome scores highest against the
bacteria, archaea or eukaryota datasets. If the eukaryota dataset was selected, BUSCO then attempts to place the input
assembly on the eukaryote phylogenetic tree before running a complete BUSCO assessment using the selected child dataset.
Unless the top-level eukaryota dataset was selected as the best match for the input file, the eukaryota dataset run
would not complete. So while the specific dataset run returned accurate results, the generic eukaryota dataset run
should be considered unreliable.
This has been changed in v4.0.3. The eukaryota run now always completes so the final generic eukaryota results can be
considered reliable.
**How to cite BUSCO**
......
......@@ -80,15 +80,21 @@ class NucleotideAnalysis(metaclass=ABCMeta):
super().check_tool_dependencies()
def _get_blast_version(self):
blast_version_call = subprocess.check_output([self._tblastn_tool.cmd, "-version"], shell=False)
blast_version = ".".join(blast_version_call.decode("utf-8").split("\n")[0].split()[1].rsplit(".")[:-1])
return blast_version
mkblastdb_version_call = subprocess.check_output([self._mkblast_tool.cmd, "-version"], shell=False)
mkblastdb_version = ".".join(mkblastdb_version_call.decode("utf-8").split("\n")[0].split()[1].rsplit(".")[:-1])
tblastn_version_call = subprocess.check_output([self._tblastn_tool.cmd, "-version"], shell=False)
tblastn_version = ".".join(tblastn_version_call.decode("utf-8").split("\n")[0].split()[1].rsplit(".")[:-1])
if mkblastdb_version != tblastn_version:
logger.warning("You are using version {} of mkblastdb and version {} of tblastn.".format(mkblastdb_version, tblastn_version))
return tblastn_version
def _run_mkblast(self):
self.mkblast_runner = MKBLASTRunner(self._mkblast_tool, self._input_file, self.main_out, self._cpus)
self.mkblast_runner.run()
@log("Running a BLAST search for BUSCOs against created database", logger)
def _run_tblastn(self, missing_and_frag_only=False, ancestral_variants=False):
incomplete_buscos = (self.hmmer_runner.missing_buscos + list(self.hmmer_runner.fragmented_buscos.keys())
......
......@@ -202,26 +202,14 @@ class AutoSelectLineage:
run_folder = os.path.join(out_path, "auto_lineage", self.selected_runner.config.get("busco_run", "lineage_results_dir"))
bp = BuscoPlacer(self.selected_runner.config, run_folder, protein_seqs, self.selected_runner.analysis.hmmer_runner.single_copy_buscos)
dataset_details, placement_file_versions = bp.define_dataset()
self.config.placement_files = placement_file_versions # Necessary to pass these filenames to the final run to be recorded.
lineage, supporting_markers, placed_markers = dataset_details
lineage = "{}_{}".format(lineage, self.config.get("busco_run", "datasets_version")) # todo: this should probably be done in buscoplacer
self.best_match_lineage_dataset = os.path.join(self.config.get("busco_run", "download_path"),
"lineages",
os.path.basename(lineage))
self.record_placement_file_versions(run_folder, placement_file_versions)
return
def record_placement_file_versions(self, run_folder, placement_file_versions):
try:
with open(os.path.join(run_folder, "short_summary.txt"), "a") as summary_file:
summary_file.write("\nPlacement file versions:\n")
for placement_file in placement_file_versions:
summary_file.write("{}\n".format(placement_file))
except OSError:
pass
return
def _run_3_datasets(self, mollicutes_runner=None):
if mollicutes_runner:
datasets = ["mycoplasmatales", "entomoplasmatales"]
......
......@@ -82,6 +82,7 @@ class BuscoAnalysis(metaclass=ABCMeta):
self.s_percent = None
self.d_percent = None
self.f_percent = None
self.all_single_copy_buscos = {}
self._log_count = 0 # Dummy variable used to skip logging for intermediate eukaryote pipeline results.
# TODO: catch unicode encoding exception and report invalid character line instead of doing content validation
......@@ -320,8 +321,8 @@ class BuscoAnalysis(metaclass=ABCMeta):
self.hmmer_results_lines = []
self.hmmer_results_lines.append("***** Results: *****\n\n")
self.one_line_summary = "C:{}%[S:{}%,D:{}%],F:{}%,M:{}%,n:{}\t{}\n".format(
round(self.s_percent + self.d_percent, 1), self.s_percent, self.d_percent, self.f_percent,
round(100 - self.s_percent - self.d_percent - self.f_percent, 1), total_buscos, " ")
round(self.s_percent + self.d_percent, 1), self.s_percent, self.d_percent,
self.f_percent, abs(round(100 - self.s_percent - self.d_percent - self.f_percent, 1)), total_buscos, " ")
self.hmmer_results_lines.append(self.one_line_summary)
self.hmmer_results_lines.append("{}\tComplete BUSCOs (C)\t\t\t{}\n".format(single_copy + multi_copy, " "))
self.hmmer_results_lines.append("{}\tComplete and single-copy BUSCOs (S)\t{}\n".format(single_copy, " "))
......@@ -340,6 +341,13 @@ class BuscoAnalysis(metaclass=ABCMeta):
for line in self.hmmer_results_lines:
summary_file.write("\t{}".format(line))
if self._config.getboolean("busco_run", "auto-lineage") and isinstance(self._config, BuscoConfigMain) \
and hasattr(self._config, "placement_files"):
summary_file.write("\nPlacement file versions:\n")
for placement_file in self._config.placement_files:
summary_file.write("{}\n".format(placement_file))
if isinstance(self._config, BuscoConfigAuto): # todo: rework this if/else block
self._one_line_hmmer_summary()
elif self._domain == "eukaryota" and self._log_count == 0:
......@@ -381,6 +389,7 @@ class BuscoAnalysis(metaclass=ABCMeta):
self.hmmer_runner.load_buscos()
self.hmmer_runner.run()
self.hmmer_runner.process_output()
self.all_single_copy_buscos.update(self.hmmer_runner.single_copy_buscos)
self._write_hmmer_results()
self._produce_hmmer_summary()
return
......@@ -482,9 +491,9 @@ class BuscoAnalysis(metaclass=ABCMeta):
"""
with open(os.path.join(self.run_folder, "full_table.tsv"), "w") as f_out:
self._write_output_header(f_out)
output_lines = self.hmmer_runner._create_output_content()
self._write_output_header(f_out)
with open(os.path.join(self.run_folder, "missing_busco_list.tsv"), "w") as miss_out:
......
......@@ -132,11 +132,19 @@ class BuscoDownloadManager:
raise SystemExit("{} does not exist".format(data_name))
if self.offline:
if category == 'lineages':
return os.path.join(self.local_download_path, category, data_name)
local_dataset = os.path.join(self.local_download_path, category, data_name)
if os.path.exists(local_dataset):
return local_dataset
else:
raise SystemExit("Unable to run BUSCO in offline mode. Dataset {} does not exist.".format(local_dataset))
else:
basename, extension = os.path.splitext(data_name)
return sorted(glob.glob(os.path.join(
self.local_download_path, category, "{}.*.{}".format(basename, extension))))[-1]
placement_files = sorted(glob.glob(os.path.join(
self.local_download_path, category, "{}.*{}".format(basename, extension))))
if len(placement_files) > 0:
return placement_files[-1] # todo: for offline mode, log which files are being used (in case of more than one glob match)
else:
raise SystemExit("Unable to run BUSCO placer in offline mode. Cannot find necessary placement files in {}".format(self.local_download_path))
data_basename = os.path.basename(data_name)
local_filepath = os.path.join(self.local_download_path, category, data_basename)
present, up_to_date, latest_version, local_filepath, hash = self._check_existing_version(local_filepath, category,
......
......@@ -248,6 +248,8 @@ class BuscoLogger(logging.getLoggerClass()):
self.addHandler(self._err_hdlr)
# Random id used in filename to avoid complications for parallel BUSCO runs.
if not os.access(os.getcwd(), os.W_OK):
raise SystemExit("No permission to write in the current directory.")
self._file_hdlr = logging.FileHandler("busco_{}.log".format(type(self).random_id), mode="a")
self._file_hdlr.setLevel(logging.DEBUG)
self._file_hdlr.setFormatter(self._verbose_formatter)
......
......@@ -49,7 +49,8 @@ class BuscoRunner:
self.analysis._cleanup()
except NoGenesError as nge:
no_genes_msg = "{} did not recognize any genes matching the dataset {} in the input file.\n".format(
no_genes_msg = "{0} did not recognize any genes matching the dataset {1} in the input file. " \
"If this is unexpected, check your input file and your installation of {0}\n".format(
nge.gene_predictor, self.analysis._lineage_name)
fatal = (isinstance(self.config, BuscoConfigMain)
or (self.config.getboolean("busco_run", "auto-lineage-euk") and self.mode == "euk_genome")
......@@ -69,14 +70,6 @@ class BuscoRunner:
raise se
return callback(s_buscos, d_buscos, f_buscos, s_percent, d_percent, f_percent)
def complete_eukaryote_run(self):
try:
assert self.config.get("busco_run", "domain") == "eukaryota"
self.analysis.rerun_analysis()
except AssertionError:
raise SystemExit("Eukaryote analysis can only be completed using the eukaryota domain")
def format_results(self):
framed_output = []
if len(type(self).results_datasets) == 1:
......@@ -187,8 +180,8 @@ class BuscoRunner:
if not logger.has_warning():
logger.info("BUSCO analysis done. Total running time: {} seconds".format(str(round(elapsed_time))))
else:
logger.info("BUSCO analysis done with WARNING(s). Total running time: {} seconds\n"
"***** Summary of warnings: *****\n".format(str(round(elapsed_time))))
logger.info("BUSCO analysis done with WARNING(s). Total running time: {} seconds\n\n"
"***** Summary of warnings: *****".format(str(round(elapsed_time))))
for item in type(logger).warn_output.getvalue().split("\n"):
print(item)
......
......@@ -693,7 +693,7 @@ class HMMERRunner:
return links_info
def _format_output_lines(self, busco_dict):
def _format_output_lines(self, busco_dict, label):
"""
Format BUSCO matches from input dictionary into output lines for writing to a file.
:param busco_dict: one of [self.single_copy_buscos, self.multi_copy_buscos, self.fragmented_buscos]
......@@ -716,22 +716,27 @@ class HMMERRunner:
desc = links_info[busco]["description"]
link = links_info[busco]["link"]
self.extra_columns = True
output_lines.append("{}\tComplete\t{}\t{}\t{}\t{}\t{}\n".format(busco, gene_id, bit_score,
match_length, link, desc))
output_lines.append("{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(busco, label, gene_id, bit_score,
match_length, link, desc))
except KeyError:
output_lines.append("{}\tComplete\t{}\t{}\t{}\n".format(busco, gene_id,bit_score,
match_length))
output_lines.append("{}\t{}\t{}\t{}\t{}\n".format(busco, label, gene_id,bit_score,
match_length))
elif self.mode == "genome":
scaffold = self.gene_details[gene_id][m]
location_pattern = ":{}-{}".format(scaffold["gene_start"], scaffold["gene_end"])
if gene_id.endswith(location_pattern):
gene_id = gene_id.replace(location_pattern, "")
try:
desc = links_info[busco]["description"]
link = links_info[busco]["link"]
output_lines.append("{}\tComplete\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
busco, gene_id, scaffold["gene_start"], scaffold["gene_end"], bit_score, match_length,
link, desc))
self.extra_columns = True
output_lines.append("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
busco, label, gene_id, scaffold["gene_start"], scaffold["gene_end"], bit_score,
match_length, link, desc))
except KeyError:
output_lines.append("{}\tComplete\t{}\t{}\t{}\t{}\t{}\n".format(
busco, gene_id, scaffold["gene_start"], scaffold["gene_end"], bit_score, match_length))
output_lines.append("{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
busco, label, gene_id, scaffold["gene_start"], scaffold["gene_end"], bit_score,
match_length))
return output_lines
def _create_output_content(self):
......@@ -741,8 +746,11 @@ class HMMERRunner:
:rtype: list
"""
output_lines = []
for busco_dict in [self.single_copy_buscos, self.multi_copy_buscos, self.fragmented_buscos]:
output_lines += self._format_output_lines(busco_dict)
dict_labels = {"Complete": self.single_copy_buscos,
"Duplicated": self.multi_copy_buscos,
"Fragmented": self.fragmented_buscos}
for label, busco_dict in dict_labels.items():
output_lines += self._format_output_lines(busco_dict, label)
return output_lines
......@@ -884,6 +892,7 @@ class TBLASTNRunner:
if not os.path.exists(self.output_seqs):
os.makedirs(self.output_seqs)
@log("Running a BLAST search for BUSCOs against created database", logger)
def run(self):
self.tblastn_tool.total = 0
self.tblastn_tool.nb_done = 0
......@@ -943,8 +952,8 @@ class TBLASTNRunner:
if any(b in record.id for b in self.incomplete_buscos):
# Remove the ancestral variant identifier ("_1" etc) so it matches all other BUSCO IDs.
# The identifier is still present in the "name" and "description" Sequence Record attributes.
logger.debug("Found ancestral proteins for {}".format(record.id))
record.id = record.id.split("_")[0]
logger.debug("Found contig {}".format(record.id))
busco_ids_retrieved.add(record.id)
matched_seqs.append(record)
......@@ -1195,9 +1204,19 @@ class TBLASTNRunner:
SeqIO.write(record, out, "fasta")
return
class AugustusParsingError(Exception):
def __init__(self):
pass
class AugustusRunner:
ACCEPTED_PARAMETERS = ["strand", "genemodel", "singlestrand", "hintsfile", "extrinsicCfgFile", "maxDNAPieceSize",
"protein", "introns", "start", "stop", "cds", "AUGUSTUS_CONFIG_PATH",
"alternatives-from-evidence", "alternatives-from-sampling", "sample", "minexonintronprob",
"minmeanexonintronprob", "maxtracks", "gff3", "UTR", "outfile", "noInFrameStop",
"noprediction", "contentmodels", "translation_table", "temperature"]
def __init__(self, augustus_tool, output_folder, seqs_path, target_species, lineage_dataset, params, coords, cpus,
log_path, sequences_aa, sequences_nt):
self.augustus_tool = augustus_tool
......@@ -1207,6 +1226,8 @@ class AugustusRunner:
self.target_species = target_species
self.lineage_dataset = lineage_dataset
self.params = params
self.param_keys = []
self.param_values = []
self.coords = coords
self.cpus = cpus
......@@ -1234,11 +1255,40 @@ class AugustusRunner:
os.makedirs(self.tmp_dir)
return
def parse_parameters(self):
accepted_keys = []
accepted_values = []
if self.params:
self.params = self.params.strip("\" \'")
try:
if self.params.startswith("--"):
key_val_pairs = self.params.split(" --")
for kv in key_val_pairs:
key_vals = kv.strip("- ").split("=")
if len(key_vals) == 2:
key, val = key_vals
if key in type(self).ACCEPTED_PARAMETERS:
accepted_keys.append(key.strip())
accepted_values.append(val.strip())
else:
logger.warning("{} is not an accepted parameter for Augustus.".format(key))
else:
raise AugustusParsingError
else:
raise AugustusParsingError
except AugustusParsingError:
logger.warning(
"Augustus parameters are not correctly formatted. Please enter them as follows: "
"\"--param1=value1 --param2=value2\" etc. Proceeding without additional parameters.")
return [], []
return accepted_keys, accepted_values
def run(self):
# Todo: refactor logger calls into decorator pattern
logger.info("Running Augustus prediction using {} as species:".format(self.target_species))
if self.params:
logger.info("Additional parameters for Augustus are {}: ".format(self.params))
self.param_keys, self.param_values = self.parse_parameters()
self.augustus_tool.total = self._count_jobs()
self.augustus_tool.count_jobs_created = False
......@@ -1254,6 +1304,9 @@ class AugustusRunner:
for filename in files:
self._extract_genes_from_augustus_output(filename)
self.output_sequences = [os.path.join(self.extracted_prot_dir, f) for f in
os.listdir(self.extracted_prot_dir) if f.split(".")[-2] == "faa"]
if not self.any_gene_found:
raise NoGenesError("Augustus")
......@@ -1313,9 +1366,8 @@ class AugustusRunner:
augustus_job.add_parameter("--predictionStart={}".format(contig_start))
augustus_job.add_parameter("--predictionEnd={}".format(contig_end))
augustus_job.add_parameter("--species={}".format(self.target_species))
for p in self.params.split():
if len(p) > 2:
augustus_job.add_parameter(p)
for k, key in enumerate(self.param_keys):
augustus_job.add_parameter("--{}={}".format(key, self.param_values[k]))
augustus_job.add_parameter(os.path.join(self.seqs_path, contig_tmp_file))
return
......@@ -1418,7 +1470,7 @@ class AugustusRunner:
output_fna = os.path.join(self.extracted_prot_dir, filename.replace("out", "fna"))
output_faa = os.path.join(self.extracted_prot_dir, filename.replace("out", "faa"))
self.output_sequences.append(output_faa)
# self.output_sequences.append(output_faa)
with open(output_fna, "w") as out_fna:
SeqIO.write(sequences_nt, out_fna, "fasta")
......
......@@ -12,7 +12,7 @@ Licensed under the MIT license. See LICENSE.md file.
"""
from busco.BuscoAnalysis import BuscoAnalysis
from busco.Analysis import NucleotideAnalysis
from busco.BuscoTools import ProdigalRunner, AugustusRunner, GFF2GBRunner, NewSpeciesRunner, ETrainingRunner, OptimizeAugustusRunner
from busco.BuscoTools import ProdigalRunner, AugustusRunner, GFF2GBRunner, NewSpeciesRunner, ETrainingRunner, OptimizeAugustusRunner, NoGenesError
from busco.BuscoConfig import BuscoConfigAuto
import os
import shutil
......@@ -344,6 +344,10 @@ class GenomeAnalysisEukaryotes(GenomeAnalysis):
output_dir = os.path.join(self.run_folder, "augustus_output")
if not os.path.exists(output_dir): # TODO: consider grouping all create_dir calls into one function for all tools
os.mkdir(output_dir)
# if self.augustus_runner:
# self.augustus_runner.coords = coords
# self.augustus_runner.target_species = self._target_species
# else:
self.augustus_runner = AugustusRunner(self._augustus_tool, output_dir, self.tblastn_runner.output_seqs, self._target_species,
self._lineage_dataset, self._augustus_parameters, coords,
self._cpus, self.log_folder, self.sequences_aa, self.sequences_nt)
......@@ -410,9 +414,10 @@ class GenomeAnalysisEukaryotes(GenomeAnalysis):
self._run_augustus(coords)
self._gene_details = self.augustus_runner.gene_details
self.run_hmmer(self.augustus_runner.output_sequences)
if self.busco_type == "main":
self.rerun_analysis()
self.rerun_analysis()
@log("Starting second step of analysis. The gene predictor Augustus is retrained using the results from the "
"initial run to yield more accurate results.", logger)
def rerun_analysis(self):
# self._fix_restart_augustus_folder() # todo: reintegrate this when checkpoints are restored
......@@ -435,17 +440,20 @@ class GenomeAnalysisEukaryotes(GenomeAnalysis):
self._run_optimize_augustus(new_species_name)
self._run_etraining()
self._rerun_augustus(coords)
self._gene_details.update(self.augustus_runner.gene_details)
self.run_hmmer(self.augustus_runner.output_sequences)
self._write_buscos_to_file(self.sequences_aa, self.sequences_nt)
self._move_retraining_parameters() # todo: clean species folder on systemexit
try:
self._rerun_augustus(coords)
self._gene_details.update(self.augustus_runner.gene_details)
self.run_hmmer(self.augustus_runner.output_sequences)
self._write_buscos_to_file(self.sequences_aa, self.sequences_nt)
except NoGenesError:
logger.warning("No genes found on Augustus rerun.")
# self._move_retraining_parameters()
# if self._tarzip:
# self._run_tarzip_augustus_output()
# self._run_tarzip_hmmer_output()
# remove the checkpoint, run is done
self._set_checkpoint()
# self._set_checkpoint()
return
def _check_file_dependencies(self): # todo: currently only implemented for GenomeAnalysisEukaryotes, checking Augustus dirs. Does it need to be rolled out for all analyses?
......@@ -495,6 +503,11 @@ class GenomeAnalysisEukaryotes(GenomeAnalysis):
shutil.rmtree(augustus_tmp)
except:
pass
try:
if self._target_species.startswith("BUSCO"):
self._move_retraining_parameters()
except:
pass
super()._cleanup()
......
......@@ -6,4 +6,4 @@ Copyright (c) 2016-2020, Evgeny Zdobnov ([email protected])
Licensed under the MIT license. See LICENSE.md file.
"""
__version__ = "4.0.2"
__version__ = "4.0.3"
......@@ -64,6 +64,10 @@ def _parse_args():
help='Give your analysis run a recognisable short name. '
'Output folders and files will be labelled with this name. WARNING: do not provide a path')
optional.add_argument(
'--out_path', dest='out_path', required=False, metavar='OUTPUT_PATH',
help='Optional location for results folder, excluding results folder name. Default is current working directory.')
optional.add_argument(
'-e', '--evalue', dest='evalue', required=False, metavar='N', type=float,
help='E-value cutoff for BLAST searches. '
......@@ -174,8 +178,7 @@ def run_BUSCO(params):
if config.getboolean("busco_run", "auto-lineage"):
if lineage_basename.startswith(("bacteria", "archaea", "eukaryota")):
busco_run = config_manager.runner
if lineage_basename.startswith("eukaryota") and busco_run.mode == "genome":
busco_run.complete_eukaryote_run()
# It is possible that the following lineages were arrived at either by the Prodigal genetic code shortcut or by
# BuscoPlacer. If the former, the run will have already been completed. If the latter it still needs to be done.
elif lineage_basename.startswith(("mollicutes", "mycoplasmatales", "entomoplasmatales")) and \
......
INFO: ***** Start a BUSCO analysis, current time: 12/18/2019 16:14:53 *****
INFO: ***** Start a BUSCO v4.0.3 analysis, current time: 02/11/2020 14:22:28 *****
INFO: Configuring BUSCO with /busco/config/config.ini
INFO: Mode is genome
INFO: Input file is genome.fna
INFO: Downloading information on latest versions of BUSCO data...
WARNING: Running Auto Lineage Selector as no lineage dataset was specified. This may take a little longer than normal. If you know what lineage dataset you want to use, please specify this in the config file or using the -l (--lineage-dataset) flag in the command line.
WARNING: Running Auto Lineage Selector as no lineage dataset was specified. This will take a little longer than normal. If you know what lineage dataset you want to use, please specify this in the config file or using the -l (--lineage-dataset) flag in the command line.
INFO: No lineage specified. Running lineage auto selector.
INFO: ***** Starting Auto Select Lineage *****
......@@ -20,11 +20,12 @@ INFO: [prodigal] 1 of 1 task(s) completed
INFO: Genetic code 11 selected as optimal
INFO: ***** Run HMMER on gene sequences *****
INFO: Running 194 job(s) on hmmsearch
INFO: [hmmsearch] 20 of 194 task(s) completed
INFO: [hmmsearch] 39 of 194 task(s) completed
INFO: [hmmsearch] 59 of 194 task(s) completed
INFO: [hmmsearch] 78 of 194 task(s) completed
INFO: [hmmsearch] 97 of 194 task(s) completed
INFO: [hmmsearch] 117 of 194 task(s) completed
INFO: [hmmsearch] 136 of 194 task(s) completed
INFO: [hmmsearch] 156 of 194 task(s) completed
INFO: [hmmsearch] 175 of 194 task(s) completed
INFO: [hmmsearch] 194 of 194 task(s) completed
......@@ -40,8 +41,6 @@ INFO: Running 124 job(s) on hmmsearch
INFO: [hmmsearch] 13 of 124 task(s) completed
INFO: [hmmsearch] 25 of 124 task(s) completed
INFO: [hmmsearch] 38 of 124 task(s) completed
INFO: [hmmsearch] 50 of 124 task(s) completed
INFO: [hmmsearch] 63 of 124 task(s) completed
INFO: [hmmsearch] 75 of 124 task(s) completed
INFO: [hmmsearch] 87 of 124 task(s) completed
INFO: [hmmsearch] 100 of 124 task(s) completed
......@@ -81,7 +80,40 @@ INFO: [hmmsearch] 4 of 4 task(s) completed
WARNING: BUSCO did not find any match. Make sure to check the log files if this is unexpected.
INFO: Results: C:0.0%[S:0.0%,D:0.0%],F:0.0%,M:100.0%,n:255
INFO: bacteria_odb10 selected as the most likely domain.
INFO: Starting second step of analysis. The gene predictor Augustus is retrained using the results from the initial run to yield more accurate results.
INFO: Extracting missing and fragmented buscos from the file ancestral_variants...
INFO: Running a BLAST search for BUSCOs against created database
INFO: [tblastn] 1 of 1 task(s) completed
INFO: Training Augustus using Single-Copy Complete BUSCOs:
INFO: Converting predicted genes to short genbank files
INFO: All files converted to short genbank files, now running the training scripts
INFO: Running 1 job(s) on new_species.pl
INFO: [new_species.pl] 1 of 1 task(s) completed
INFO: Running 1 job(s) on etraining
INFO: [etraining] 1 of 1 task(s) completed
INFO: Re-running Augustus with the new metaparameters, number of target BUSCOs: 255
INFO: Running Augustus gene predictor on BLAST search results.
INFO: Running Augustus prediction using BUSCO_test_bacteria as species:
INFO: [augustus] 2 of 14 task(s) completed
INFO: [augustus] 3 of 14 task(s) completed
INFO: [augustus] 5 of 14 task(s) completed
INFO: [augustus] 6 of 14 task(s) completed
INFO: [augustus] 7 of 14 task(s) completed
INFO: [augustus] 9 of 14 task(s) completed
INFO: [augustus] 10 of 14 task(s) completed
INFO: [augustus] 12 of 14 task(s) completed
INFO: [augustus] 13 of 14 task(s) completed
INFO: [augustus] 14 of 14 task(s) completed
INFO: Extracting predicted proteins...
INFO: ***** Run HMMER on gene sequences *****
INFO: [hmmsearch] 1 of 4 task(s) completed
INFO: [hmmsearch] 2 of 4 task(s) completed
INFO: [hmmsearch] 3 of 4 task(s) completed
INFO: [hmmsearch] 4 of 4 task(s) completed
WARNING: BUSCO did not find any match. Make sure to check the log files if this is unexpected.
INFO: Results: C:0.0%[S:0.0%,D:0.0%],F:0.0%,M:100.0%,n:255
INFO: bacteria_odb10 selected
INFO: ***** Searching tree for chosen lineage to find best taxonomic match *****
......@@ -102,17 +134,6 @@ INFO: Place the markers on the reference tree...
INFO: Running 1 job(s) on sepp
INFO: [sepp] 1 of 1 task(s) completed
INFO: Not enough markers were placed on the tree (11). Root lineage bacteria is kept
INFO: Generic lineage selected. Results reproduced here.
***** Results: *****
C:21.0%[S:21.0%,D:0.0%],F:0.8%,M:78.2%,n:124
26 Complete BUSCOs (C)
26 Complete and single-copy BUSCOs (S)
0 Complete and duplicated BUSCOs (D)
1 Fragmented BUSCOs (F)
97 Missing BUSCOs (M)
124 Total BUSCO groups searched
INFO:
--------------------------------------------------
......@@ -126,10 +147,11 @@ INFO:
|97 Missing BUSCOs (M) |
|124 Total BUSCO groups searched |
--------------------------------------------------
INFO: BUSCO analysis done with WARNING(s). Total running time: 69 seconds
***** Summary of warnings: *****
INFO: BUSCO analysis done with WARNING(s). Total running time: 80 seconds
WARNING:busco.ConfigManager Running Auto Lineage Selector as no lineage dataset was specified. This may take a little longer than normal. If you know what lineage dataset you want to use, please specify this in the config file or using the -l (--lineage-dataset) flag in the command line.
***** Summary of warnings: *****
WARNING:busco.ConfigManager Running Auto Lineage Selector as no lineage dataset was specified. This will take a little longer than normal. If you know what lineage dataset you want to use, please specify this in the config file or using the -l (--lineage-dataset) flag in the command line.
WARNING:busco.BuscoTools BUSCO did not find any match. Make sure to check the log files if this is unexpected.
WARNING:busco.BuscoTools BUSCO did not find any match. Make sure to check the log files if this is unexpected.
INFO: Results written in /busco_wd/test_bacteria
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment