Commit c44989e4 authored by Mathieu Courcelles's avatar Mathieu Courcelles

Improvement of the 3D viewer:

-Added min histogram
-Changed the layout
-Fixed some bug.
parent 2c1b800c
......@@ -23,3 +23,5 @@ site_media/FastaDB
site_media/RawDataset
site_media/FastaDB
CLMSpipeline_20141017.db
CLMSVault/settings/production.py
CLMSVault/settings/local.py
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>CLMSpipeline</name>
<name>CLMSVault</name>
<comment></comment>
<projects>
</projects>
......
......@@ -6,10 +6,10 @@
<key>DJANGO_MANAGE_LOCATION</key>
<value>manage.py</value>
<key>DJANGO_SETTINGS_MODULE</key>
<value>CLMSpipeline.settings.local</value>
<value>CLMSVault.settings.local</value>
</pydev_variables_property>
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
<path>/CLMSpipeline</path>
<path>/CLMSVault</path>
</pydev_pathproperty>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
......
No preview for this file type
......@@ -321,7 +321,7 @@ class CLPeptideAdmin(admin.ModelAdmin):
queryset.query = pickle.loads(request.session.get('query_%s' % random))
clpeptide_set, alignments = compute_cl_distance(queryset,
pdb.file.file.name,
pdb,
form.cleaned_data['protein_identity'],
form.cleaned_data['peptide_identity'], )
......
# # Copyright 2013 Mathieu Courcelles
# # Copyright 2013-2014 Mathieu Courcelles
# # Mike Tyers's lab / IRIC / Universite de Montreal
......
......@@ -3,8 +3,13 @@
# Import standard libraries
import cPickle as pickle
import os
import time
from unipath import Path
import urllib2
# Import Django related libraries
from django.core.files import File
from django.core.files.temp import NamedTemporaryFile
......@@ -20,6 +25,15 @@ from .queryset_operation import clpeptide_set_2_protein_sequences
# Code from http://stackoverflow.com/questions/4529815/how-to-save-an-object-in-python
def save_object(obj, filename):
with open(filename, 'wb') as output:
pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)
def findPeptidePositionsInAligment(pep_seq, fasta_seq_aligned,
pdb_seq_aligned, link_index, peptide_identity):
"""
......@@ -138,7 +152,7 @@ def findPeptidePositionsInAligment(pep_seq, fasta_seq_aligned,
def crosslinkResidues(pep_seq, cl_position, alignments, peptide_identity):
def crosslinkResidues(pep_seq, cl_position, alignments, pdb, peptide_identity):
"""
Searches for a peptide sequence in alignment and then
return cross-linked position in PDB.
......@@ -148,8 +162,8 @@ def crosslinkResidues(pep_seq, cl_position, alignments, peptide_identity):
# Locate peptide in sequence alignments
for alignment in alignments:
chain = alignment['chain']
(pdb_seq, chain) = pdb.pdb_sequences[alignment['chain']]
fasta_seq_aligned = alignment['fasta_seq_aligned']
pdb_seq_aligned = alignment['pdb_seq_aligned']
......@@ -183,8 +197,7 @@ def crosslinkResidues(pep_seq, cl_position, alignments, peptide_identity):
if pep_aa != position['final_pep_seq'][pep_index]:
pdb_pep_seq_linked = pdb_seq_index
if pdb_aa == pep_aa:
if pep_index == 0:
......@@ -203,8 +216,8 @@ def crosslinkResidues(pep_seq, cl_position, alignments, peptide_identity):
pdb_seq_index = pdb_pep_seq_start
else:
pep_index = 0
pep_index = 0
if pep_index != 0:
pdb_seq_index = pdb_pep_seq_start
pep_index = 0
except KeyError:
......@@ -257,14 +270,14 @@ def retrievePDBfile(pdb_identifier):
return pdb_queryset[0]
def read_PDB_sequences(pdb_file):
def read_PDB_sequences(pdb):
"""
Opens specified PDB file and returns a dictionnary of with key=chain
and value=protein sequence string.
"""
parser = PDBParser()
structure = parser.get_structure('tmp', pdb_file)
structure = parser.get_structure('tmp', pdb.file.file.name)
pdb_sequences = dict()
......@@ -301,13 +314,13 @@ def read_PDB_sequences(pdb_file):
pass
pdb_sequences[chain] = sequence
pdb_sequences[str(chain.get_id())] = (sequence, chain)
return pdb_sequences
pdb.pdb_sequences = pdb_sequences
def sequences_alignment(fasta_sequences, pdb_sequences, protein_identity):
def sequences_alignment(fasta_sequences, pdb, protein_identity):
"""
Aligns protein sequences from FASTA against the PDB sequences.
Returns a dictionnary:
......@@ -315,49 +328,66 @@ def sequences_alignment(fasta_sequences, pdb_sequences, protein_identity):
Value: alignment
"""
alignments = dict()
alignments_dict = dict()
for (fasta_id, fasta_seq) in fasta_sequences.items():
alignments[fasta_id] = list()
for (chain, pdb_seq) in pdb_sequences.items():
# Skip empty pdb_sequence
if pdb_seq == '':
continue
# Do protein sequence aligment with FASTA sequence
alignment = pairwise2.align.localms(fasta_seq, pdb_seq, 2, -1, -1, -0.5)
fasta_seq_aligned = str(alignment[0][0])
pdb_seq_aligned = str(alignment[0][1])
# Check sequence identity
min_length = min(len(fasta_seq_aligned.replace('-', '')),
len(pdb_seq_aligned.replace('-', '')))
match_count = 0.0
for res1, res2 in zip(alignment[0][0], alignment[0][1]):
if res1 == res2 and res1 != '-' and res2 != '-':
match_count += 1
#print (res1, res2, match_count, min_length)
identity = match_count / min_length
# Keep aligment based on user protein identity threshold
if identity >= protein_identity:
alignments_list = list()
# Check cache for alignments
PROJECT_DIR = Path(__file__).ancestor(3)
pkl_file = PROJECT_DIR + '/site_media/pdb_alignments/%s-%s.pkl' % (fasta_id, pdb.pk)
if os.path.isfile(pkl_file):
with open(pkl_file, 'rb') as pickle_fh:
alignments_list = pickle.load(pickle_fh)
else:
# Do the alignments since it is not in the cache
for (chain_id, (pdb_seq, chain)) in pdb.pdb_sequences.items():
# Skip empty pdb_sequence
if pdb_seq == '':
continue
# Do protein sequence aligment with FASTA sequence
alignment = pairwise2.align.localms(fasta_seq, pdb_seq, 2, -1, -1, -0.5)
fasta_seq_aligned = str(alignment[0][0])
pdb_seq_aligned = str(alignment[0][1])
# Check sequence identity
min_length = min(len(fasta_seq_aligned.replace('-', '')),
len(pdb_seq_aligned.replace('-', '')))
match_count = 0.0
for res1, res2 in zip(alignment[0][0], alignment[0][1]):
if res1 == res2 and res1 != '-' and res2 != '-':
match_count += 1
#print (res1, res2, match_count, min_length)
identity = match_count / min_length
# Store aligment
alignment_dict = dict()
alignment_dict['chain'] = chain
alignment_dict['chain'] = chain_id
alignment_dict['fasta_seq_aligned'] = fasta_seq_aligned
alignment_dict['pdb_seq_aligned'] = pdb_seq_aligned
alignment_dict['identity'] = identity
alignments_list.append(alignment_dict)
alignments[fasta_id].append(alignment_dict)
return alignments
save_object(alignments_list, pkl_file)
# Keep aligment based on user protein identity threshold
alignments_dict[fasta_id] = [x for x in alignments_list if x['identity'] >= protein_identity ]
return alignments_dict
class CLDistance(object):
......@@ -411,7 +441,7 @@ class CLDistance(object):
def compute_cl_distance(clpeptide_set, pdb_file, protein_identity, peptide_identity, unique_key=None):
def compute_cl_distance(clpeptide_set, pdb, protein_identity, peptide_identity, unique_key=None):
"""
Measure distance of cross-linked residues and add it to clpep object.
"""
......@@ -422,13 +452,11 @@ def compute_cl_distance(clpeptide_set, pdb_file, protein_identity, peptide_ident
# Get original FASTA sequence for cross-links detected
fasta_sequences = clpeptide_set_2_protein_sequences(clpeptide_set)
# Read PDB file to get chain and protein sequences
pdb_sequences = read_PDB_sequences(pdb_file)
# Read PDB file to get chain and protein sequences
read_PDB_sequences(pdb)
# Do sequence alignments
alignments = sequences_alignment(fasta_sequences, pdb_sequences, protein_identity)
alignments = sequences_alignment(fasta_sequences, pdb, protein_identity)
# Iterate cross-linked peptide to get position in PDB
clpeptide_set_return = []
......@@ -438,24 +466,26 @@ def compute_cl_distance(clpeptide_set, pdb_file, protein_identity, peptide_ident
for clpep in clpeptide_set:
linked_residues_1 = []
if clpep.fs_prot1_id is not None:
linked_residues_1 = crosslinkResidues(clpep.peptide_wo_mod1,
clpep.pep1_link_pos,
alignments[clpep.fs_prot1_id.id], peptide_identity)
alignments[clpep.fs_prot1_id.id], pdb, peptide_identity)
linked_residues_2 = []
if clpep.fs_prot2_id is not None:
linked_residues_2 = crosslinkResidues(clpep.peptide_wo_mod2,
clpep.pep2_link_pos,
alignments[clpep.fs_prot2_id.id], peptide_identity)
alignments[clpep.fs_prot2_id.id], pdb, peptide_identity)
# Generates pair of residues and calculate distance
distance_set = set()
clpep.distances = []
# Cross-link mapped with both residues
for residue_1, identity_1 in linked_residues_1:
for residue_2, identity_2 in linked_residues_2:
......@@ -539,4 +569,5 @@ def compute_cl_distance(clpeptide_set, pdb_file, protein_identity, peptide_ident
else:
clpeptide_set_return.append(clpep)
return (clpeptide_set_return, alignments)
......@@ -49,7 +49,7 @@ CLMSVault is a web based software for storing, processing and visualizing protei
<strong>CLMSVault package (with source code) is available to download here:</strong><br /><br />
<ul>
<li>
2015/XX/XX - <a href="">CLMSVault_v1.zip</a> (available when manuscript published)
<a href="">CLMSVault_201X-XX-XX.zip</a> (available when manuscript published)
</li>
</ul>
......
......@@ -8,6 +8,8 @@
global_selected_series = '';
$(function () {
$('#container').width(Math.floor($('body').innerWidth()/3));
$('#container').highcharts({
chart: {
......@@ -80,7 +82,14 @@ $(function () {
},
series: [{
name: 'All',
data: [{% for val in bin_values%}
data: [{% for val in all_bin_values%}
{{val}},
{% endfor %}
]
},
{
name: 'Min',
data: [{% for val in min_bin_values%}
{{val}},
{% endfor %}
]
......
......@@ -3,6 +3,7 @@
# Import standard libraries
from math import floor
import numpy as np
# Import Django related libraries
......@@ -19,6 +20,7 @@ from Bio.PDB.PDBParser import PDBParser
# Import project libraries
from queryset_operation import clpeptide_set_2_protein_set
from models import (CLPeptide,
FastaDb_Sequence,
PDB,
QuantificationFC
)
......@@ -81,7 +83,7 @@ def jsmol_view(request, clpeptide_set, form=None):
ukey = dict()
ukey['proteinPpos'] = True
clpeptide_set, alignments = compute_cl_distance(clpeptide_set,
pdb.file.file.name,
pdb,
form.cleaned_data['protein_identity'],
form.cleaned_data['peptide_identity'],
unique_key=ukey)
......@@ -94,6 +96,7 @@ def jsmol_view(request, clpeptide_set, form=None):
distance_list = []
min_distance_list = []
# Quant
......@@ -135,20 +138,30 @@ def jsmol_view(request, clpeptide_set, form=None):
j = False;
min_distance = 1000000000000
for cldistance in clpep.distances:
if cldistance.distance != '':
j = True
distance_list.append(float(cldistance.distance))
if min_distance > float(cldistance.distance):
min_distance = float(cldistance.distance)
if min_distance != 1000000000000:
min_distance_list.append(min_distance)
cross_links.append([str(clpep.formated_url_jsmol()),
str(protein_1), str(protein_2),
cross_links.append('{pk: "%s", prot1: "%s", prot2: "%s", res1: "%s", res2: "%s", score: "%.2f", distance: "%s", ident1: "%s", ident2: "%s"}' % (clpep.pk,
str(protein_1),
str(protein_2),
cldistance.residue_1_str,
cldistance.residue_2_str,
cldistance.residue_2_str,
clpep.match_score,
cldistance.distance,
cldistance.identity_1,
cldistance.identity_2,
clpep.match_score,] )
))
if j:
mapped_cl_count += 1
......@@ -171,17 +184,33 @@ def jsmol_view(request, clpeptide_set, form=None):
# Histogram
bin_values, bin_labels = ([], [])
all_bin_values, all_bin_labels = ([], [])
floor_all_distance_list = [0,]
if len(distance_list) != 0:
bin_values, bin_labels = np.histogram(distance_list, bins=np.arange(min(distance_list),
max(distance_list) + 2,
floor_all_distance_list = [floor(x) for x in distance_list]
all_bin_values, all_bin_labels = np.histogram(floor_all_distance_list, bins=np.arange(min(floor_all_distance_list),
max(floor_all_distance_list) + 4,
2))
if len(min_distance_list) != 0:
floor_min_distance_list = [floor(x) for x in min_distance_list]
min_bin_values, min_bin_labels = np.histogram(floor_min_distance_list, bins=np.arange(min(floor_all_distance_list),
max(floor_all_distance_list) + 4,
2))
# Replace fasta_id with identifier
for key, value in alignments.items():
c = Context({'cross_links_array': mark_safe(repr(cross_links)),
seq = FastaDb_Sequence.objects.filter(pk=key)
identifier = seq[0].identifier
alignments[identifier] = alignments.pop(key)
# Ready
c = Context({'cross_links_array': mark_safe("[" + ','.join(cross_links) + "]"),
'pdb': pdb,
'cl_count': cl_count,
'mapped_cl_count': mapped_cl_count,
......@@ -189,11 +218,13 @@ def jsmol_view(request, clpeptide_set, form=None):
'molecules': molecules,
'chain_list': chain_list,
'alignments': alignments,
'bin_values': bin_values,
'bin_labels': bin_labels,
'all_bin_values': all_bin_values,
'min_bin_values': min_bin_values,
'bin_labels': all_bin_labels,
'color_scheme': form.cleaned_data['color_scheme'],
'fold_change_limit': limit,
'quant_set': quant_set,
'min_val': min(floor_all_distance_list),
})
return render_to_response('jsmol.html', c)
\ No newline at end of file
......@@ -96,6 +96,8 @@ export SECRET_KEY='***************************************'
python manage.py syncdb
python manage.py loaddata datadump.json
-Check that the apache user can write recursively to /home/user/CLMSVault/site_media
-Restart Apache and MySQL servers
-Use your web browser to access CLMSVault at: http://clmsvault.yourhost.com
......@@ -111,6 +111,8 @@ Follow the standalone installation instruction with the following modifications:
python manage.py syncdb
python manage.py loaddata datadump.json
-Check that the apache user can write recursively to C:/CLMSVault/site_media/
-Restart Apache and MySQL servers
-Use your web browser to access CLMSVault at: http://clmsvault.yourhost.com
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment