Commit 34d18e85 authored by cev's avatar cev

Initial commit

Charles E. Vejnar <>
This diff is collapsed.
miRmap --- Comprehensive prediction of microRNA target repression strength
The `miRmap` library is a Python_ library predicting the repression strength of microRNA (miRNA) targets. The model combines thermodynamic, evolutionary, probabilistic, or sequence-based approaches.
See `miRmap web site <>`_ for installation ans usage details.
The miRmap library is distributed under the GNU GPL v3 (see /LICENCE).
Copyright (C) 2011-2012 Charles E. Vejnar
# Makefile for Sphinx documentation
# You can set these variables from the command line.
SPHINXBUILD = sphinx-build
BUILDDIR = build
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
-rm -rf $(BUILDDIR)/*
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
@echo "Build finished; now you can process the pickle files."
@echo "Build finished; now you can process the JSON files."
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/miRmap.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/miRmap.qhc"
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/miRmap"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/miRmap"
@echo "# devhelp"
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
@echo "Running LaTeX files through pdflatex..."
make -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
@echo "Build finished. The text files are in $(BUILDDIR)/text."
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
@echo "The overview file is in $(BUILDDIR)/changes."
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
# -*- coding: utf-8 -*-
# miRmap documentation build configuration file, created by
# sphinx-quickstart on Thu Nov 17 19:07:14 2011.
# This file is execfile()d with the current directory set to its containing dir.
# Note that not all possible configuration values are present in this
# autogenerated file.
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys, os
import user
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'miRmap'
copyright = u'2011, Charles E. Vejnar'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
# The short X.Y version.
version = '1.0'
# The full version, including alpha/beta/rc tags.
release = '1.0'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# Show both class-level docstring and __init__ docstring in class
# documentation
autoclass_content = 'both'
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'default'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'miRmapdoc'
# -- Options for LaTeX output --------------------------------------------------
# The paper size ('letter' or 'a4').
#latex_paper_size = 'letter'
# The font size ('10pt', '11pt' or '12pt').
#latex_font_size = '10pt'
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'miRmap.tex', u'miRmap Documentation',
u'Charles E. Vejnar', 'manual'),
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Additional stuff for the LaTeX preamble.
#latex_preamble = ''
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'mirmap', u'miRmap Documentation',
[u'Charles E. Vejnar'], 1)
rst_epilog = """
.. _Python:
.. |mirmap_copyright| replace:: **Copyright 2011 Charles E. Vejnar**
:mod:`miRmap` --- Comprehensive prediction of microRNA target repression strength
:mod:`miRmap` library is Python_ library organized with ...
The :mod:`miRmap` library is a Python_ library predicting the repression strength of microRNA (miRNA) targets. The model combines:
- **thermodynamic** features: *ΔG duplex*, *ΔG binding*, *ΔG open* and *ΔG total*,
- **evolutionary** features: *BLS* and *PhyloP*,
- **probabilistic** features: *P.over binomial* and *P.over exact*, and
- **sequence-based** features: *AU content*, *UTR position* and *3' pairing*.
.. toctree::
:maxdepth: 2
:mod:`miRmap` distribution is available
.. note::
**To the reviewers.**
This section is temporary.
After acceptance of the article, we will open the public repository hosted on BitBucket The source code will be separated from the binaries. Features of BitBuket, including bugs and issues trackings, wiki, etc... will be available at the same time.
The :mod:`miRmap` library has the following requirements:
1. :mod:`miRmap` requires Python_ 2.7 but it can be used with Python_ 2.6 if the :mod:`collections` modules is installed (A version compatible with Python_ 2.4-2.6 is available as the `ordereddict <>`_ module.).
2. For the evolutionary features, the Python_ library :mod:`DendroPy` is needed for tree manipulation. You can install `DendroPy <>`_ directly from the `Python Package Index <>`_.
3. C librairies. A compiled version of the 3 libraries (\*.so) is included in the :mod:`miRmap` distribution. If you want/have to compile them, please follow these intructions:
- For the thermodynamic features, the `Vienna RNA <>`_ library is required.
Download the latest `Vienna package tarball <>`_, then do:
cd ViennaRNA-1.8.5
./configure --without-kinfold --without-forester --without-svm --without-perl
gcc -shared -Wl,-O2 -o lib/ `find lib/ -name "*.o"` -lm
- For the evolutionary features, the `PHAST <>`_ library is required (The `CLAPACK <>`_ has to be compiled first, please follow the instructions in Phast package).
svn co phast
cd phast/src
make CLAPACKPATH=../CLAPACK-3.2.1 sharedlib
- For the *P.over exact* feature, the `Spatt <>`_ library is required (You will need a working copy of `CMake <>`_ on your system).
git clone
cd spatt
mkdir build
cd build
From the directory you compiled the C libraries:
mv libspatt2/ mirmap/libs/compiled
mv ViennaRNA-1.8.5/lib/ mirmap/libs/compiled
mv phast2/lib/sharedlib/ mirmap/libs/compiled
Example with the pure Python features.
.. code-block:: python
>>> import mirmap
>>> mim =, seq_mirna)
>>> mim.find_potential_targets_with_seed(allowed_lengths=[6,7], allowed_gu_wobbles={6:0,7:0},\
... allowed_mismatches={6:0,7:0}, take_best=True)
>>> mim.end_sites # Coordinate(s) (3' end) of the target site on the target sequence
>>> mim.eval_tgs_au(with_correction=False) # TargetScan features manually evaluated with
>>> mim.eval_tgs_pairing3p(with_correction=False) # a non-default parameter.
>>> mim.eval_tgs_position(with_correction=False)
>>> mim.prob_binomial # mim's attribute: the feature is automatically computed
>>> print
155 186
| |
AU content 0.64942
UTR position 166.00000
3' pairing 1.00000
Probability (Binomial) 0.03312
With the C libraries installed:
.. code-block:: python
>>> import mirmap.library_link
>>> mim.libs = mirmap.library_link.LibraryLink('libs/compiled') # Change to the path where you unzipped the *.so files
>>> mim.dg_duplex
>>> mim.dg_open
>>> mim.prob_exact
>>> print
155 186
| |
ΔG duplex (kcal/mol) -13.50000
ΔG binding (kcal/mol) -11.91708
ΔG open (kcal/mol) 12.18059
AU content 0.64942
UTR position 166.00000
3' pairing 1.00000
Probability (Exact) 0.06799
Probability (Binomial) 0.03312
.. automodule:: mirmap
:member-order: groupwise
.. autoclass:: mirmap.library_link.LibraryLink
Copyright, License and Warranty
The miRmap library is:
.. container:: licenseblock
.. centered:: |mirmap_copyright|
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License version 3 as published by
the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program (in /LICENCE). If not, see `GNU Licenses <>`_.
.. * :ref:`genindex`
.. * :ref:`modindex`
.. * :ref:`search`
python2 -m unittest discover -s tests
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import argparse
import itertools
import multiprocessing
import os
import socket
import sys
import cevbio.empty as shared
import cevbio.mylog as mylog
import mirmap
import mirmap.library_link
def predict_on_mim(args):
mirna, transcript = args
mimset =[1], mirna[1])
if shared.libs:
mimset.libs = shared.libs
if len(mimset.end_sites) > 0:
# De novo features
# Rest of the features
mimset.cons_blss = [0.] * len(mimset.end_sites)
mimset.selec_phylops = [1.] * len(mimset.end_sites)
if hasattr(shared, 'aln_path'):
aln_fname = os.path.join(shared.aln_path, '%s.fa'%(transcript[0]))
if os.path.exists(aln_fname):
if shared.mod_path:
mod_fname = os.path.join(shared.mod_path, '%s.mod'%(transcript[0]))
if os.path.exists(mod_fname):
with open(mod_fname) as modf:
mod =
start = mod.find('TREE: ') + 6
end = mod.find(';', start) + 1
tree = mod[start:end]
mimset.eval_cons_bls(aln_fname=aln_fname, tree=tree, fitting_tree=False)
mimset.eval_selec_phylop(aln_fname=aln_fname, mod_fname=mod_fname)
mimset.eval_cons_bls(aln_fname=aln_fname, tree='species.tree', fitting_tree=True)
mimset.eval_selec_phylop(aln_fname=aln_fname, mod_fname=mod_fname)
return mirna[0], transcript[0], mimset.end_sites, mimset.tgs_aus, mimset.tgs_positions, mimset.tgs_pairing3ps, mimset.tgs_scores, mimset.dg_duplexs, mimset.dg_bindings, mimset.dg_opens, mimset.dg_totals, mimset.prob_exacts, mimset.prob_binomials, mimset.cons_blss, mimset.selec_phylops, mimset.scores
def main(argv=None):
# Parameters
if argv is None:
argv = sys.argv
parser = argparse.ArgumentParser(description='Predict miRNA targets.')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-m', '--mirna', dest='mirna_seqs', action='append', help='miRNA sequence')
parser.add_argument('-n', '--mirna-id', dest='mirna_ids', action='append', help='miRNA IDs')
group.add_argument('-a', '--mirna-fasta', dest='mirna_fname_fasta', action='store', help='miRNA Fasta file')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-t', '--transcript', dest='transcript_seqs', action='append', help='Transcript sequence')
parser.add_argument('-u', '--transcript-id', dest='transcript_ids', action='append', help='Transcript IDs')
group.add_argument('-f', '--transcript-fasta', dest='transcript_fname_fasta', action='store', help='Transcript Fasta file')
parser.add_argument('-l', '--library', dest='library_path', action='store', help='External C libraries path')
parser.add_argument('-s', '--aln', dest='aln_path', action='store', help='Multiple sequences alignment(s) path')
parser.add_argument('-d', '--mod', dest='mod_path', action='store', help='Model(s) path')
parser.add_argument('-o', '--output', dest='output_fname', action='store', default='-', help='Worker(s)')
parser.add_argument('-w', '--workers', dest='num_worker', action='store', type=int, default=1, help='Worker(s)')
args = parser.parse_args(argv[1:])
# Logging
logger = mylog.define_root_logger('mirmap', level='debug')
logger.debug('Starting on %s'%(socket.gethostname()))
# Paths
if args.aln_path:
shared.aln_path = args.aln_path
if args.mod_path:
shared.mod_path = args.mod_path
# Prediction defaults
if not os.getcwd() in sys.path:
sys.path.insert(1, os.getcwd())
import mirmap_defaults
logger.debug('Prediction defaults changed')
except ImportError:
logger.debug('Prediction defaults kept')
# Loading external C libraries
if args.library_path:
shared.libs = mirmap.library_link.LibraryLink(args.library_path)
shared.libs = None
# Reading sequences
if args.mirna_fname_fasta:
mirnas = mirmap.utils.load_fasta(args.mirna_fname_fasta)
if args.mirna_ids:
for mid in mirnas.keys():
if mid not in args.mirna_ids:
del mirnas[mid]
if args.mirna_ids:
mirnas = {}
for i in range(len(args.mirna_ids)):
mirnas[args.mirna_ids[i]] = args.mirna_seqs[i]
mirnas = dict(zip(range(1, len(args.mirna_seqs)+1), args.mirna_seqs))
if args.transcript_fname_fasta:
transcripts = mirmap.utils.load_fasta(args.transcript_fname_fasta)
if args.transcript_ids:
for tid in transcripts.keys():
if tid not in args.transcript_ids:
del transcripts[tid]
if args.transcript_ids:
transcripts = {}
for i in range(len(args.transcript_ids)):
transcripts[args.transcript_ids[i]] = args.transcript_seqs[i]
transcripts = dict(zip(range(1, len(args.transcript_seqs)+1), args.transcript_seqs))'Starting predictions with %i miRNA(s) and %i transcript(s)'%(len(mirnas), len(transcripts)))
# Predictions
if args.num_worker > 1:
pool = multiprocessing.Pool(args.num_worker)
tsp_results = pool.map_async(predict_on_mim, itertools.product(mirnas.items(), transcripts.items()), chunksize=5).get(1e+10)
tsp_results = map(predict_on_mim, itertools.product(mirnas.items(), transcripts.items()))
# Output
if args.output_fname == '-':
outf = sys.stdout
outf = open(args.output_fname, 'w')
for mim in tsp_results:
if mim is not None:
feats = zip(*mim[2:])
for imim in range(len(mim[2])):
outf.write('\t'.join([str(i) for i in [mim[0], mim[1], imim+1] + list(feats[imim])]) + '\n')
# End
if len(mirnas) == 1:'Predictions ready for miRNA %s'%(mirnas.keys()[0]))
else:'Predictions ready')
if __name__ == '__main__':
# -*- coding: utf-8 -*-
# Copyright (C) 2011-2012 Charles E. Vejnar
# This is free software, licensed under the GNU General Public License v3.
# See /LICENSE for more information.
""":class:`mm` and :class:`mmPP` base classes of :mod:`miRmap` that inherit their methods from all the modules. Each module define the methods for one category."""
import model
import prob_binomial
import report
import targetscan
import evolution