Commit c23231d5 authored by Mathieu Courcelles's avatar Mathieu Courcelles

Functional ProcessedDataset

parent b32e2f66
## Copyright 2013 Mathieu Courcelles
## Mike Tyers's lab / IRIC / Universite de Montreal
# Import standard librariesdjang
# Import Django related libraries
# Import project libraries
from CLMSpipeline.CLMSpipeline_app.models import ProcessedDataset
class DatasetProcessing:
@staticmethod
def process(instance):
datasets = instance.datasets.all()
# Transfer extra info
fields = dict()
fields['cross_linker'] = ''
fields['instrument_name'] = ''
fields['fasta_db'] = ''
fields['search_algorithm'] = ''
dataset_description = ''
for dataset in datasets:
dataset_description += dataset.description
# Take care first of CLPeptides
clpeptide_set = dataset.clpeptide_set.all()
# Apply filter/exclude to CLPeptides
if instance.clpeptidefilter:
for fe in instance.clpeptidefilter.clpeptidefilterparam_set.all():
d = dict()
d[fe.field + '__' + fe.field_lookup] = fe.value
if fe.method == 'filter':
clpeptide_set = clpeptide_set.filter(**d)
else:
clpeptide_set = clpeptide_set.exclude(**d)
# Determine false positive cutoff score
min_score = 100000;
if instance.clpeptidefilter.fp_cutoff != None:
clpep_count = 0.0;
clpep_decoy_count = 0.0;
for clpep in clpeptide_set:
clpep_count += 1
if clpep.not_decoy == False:
clpep_decoy_count += 1
print clpep_decoy_count / clpep_count
if (clpep_decoy_count / clpep_count) <= instance.clpeptidefilter.fp_cutoff:
min_score = clpep.match_score
# else:
# break
dataset_description += 'False positive cutoff (%s) at score %.2f.\n' % (instance.clpeptidefilter.fp_cutoff,
min_score)
# Add peptides to the new dataset
for clpep in clpeptide_set:
# Skip decoy if requested
if instance.clpeptidefilter.remove_decoy and clpep.not_decoy == False:
continue
# Limit false positive
if instance.clpeptidefilter.fp_cutoff != None and clpep.match_score < min_score:
break
# Add peptide to dataset
clpep.dataset.add(instance)
clpep.save()
# Check extra info homogenity
for field in fields:
if fields[field] == '':
fields[field] = dataset.__getattribute__(field)
elif fields[field] != dataset.__getattribute__(field):
fields[field] = 'Mixed'
# Update extra info in database
fields['description'] = dataset_description
ProcessedDataset.objects.filter(pk=instance.id).update(**fields)
\ No newline at end of file
......@@ -22,6 +22,7 @@ from django.dispatch import receiver
# Import project libraries
import CLMSpipeline.CLMSpipeline_app.models as CLMSpipelineModels
from CLMSpipeline.CLMSpipeline_app.parser.parser_generic import parser_generic
from CLMSpipeline.CLMSpipeline_app.DatasetProcessing import DatasetProcessing
import export
#from adminWidgets import *
......@@ -52,7 +53,7 @@ class CLPeptideAdmin(admin.ModelAdmin):
def getDataset(self, obj):
return ''.join([dataset.formated_url() for dataset in obj.dataset.all()])
getDataset.allow_tags = True
getDataset.short_description = 'Dataset'
getDataset.short_description = 'Datasets'
......@@ -93,7 +94,13 @@ class CLPeptideFilterAdmin(admin.ModelAdmin):
inlines = [ CLPeptideFilterAdminInline ]
date_hierarchy = 'creation_date'
list_display = ('name', 'description', 'creation_date')
save_as = True
search_fields = ('name', 'description')
......@@ -149,12 +156,83 @@ class RawDatasetAdmin(admin.ModelAdmin):
class ProcessedDatasetAdmin(admin.ModelAdmin):
actions = ['make_Xi_csv']
date_hierarchy = 'creation_date'
filter_horizontal = ('datasets',)
list_display = ('name', 'prefix', 'getDataset', 'clpeptidefilter', 'cross_linker', 'instrument_name', 'fasta_db',
'search_algorithm', 'description',
'creation_date', 'get_CLPeptides_URL')
list_filter = ('cross_linker', 'instrument_name', 'fasta_db', 'search_algorithm')
readonly_fields = ('cross_linker', 'instrument_name', 'fasta_db', 'search_algorithm')
search_fields = ('name', 'prefix', 'file', 'description', 'clpeptidefilter')
def getDataset(self, obj):
return ''.join([dataset.formated_url() for dataset in obj.datasets.all()])
getDataset.allow_tags = True
getDataset.short_description = 'Datasets'
def get_readonly_fields(self, request, obj=None):
readonly_fields = list(self.readonly_fields)
if obj != None and obj.datasets.count() != 0:
readonly_fields.extend(['datasets'])
readonly_fields.extend(['clpeptidefilter'])
return readonly_fields
def get_CLPeptides_URL(self, obj):
return '<a href="/admin/CLMSpipeline_app/clpeptide/?dataset__id__exact=%s">See</a>' % (obj.pk)
get_CLPeptides_URL.allow_tags = True
get_CLPeptides_URL.short_description = 'CLPeptides'
def make_Xi_csv(self, request, queryset):
from itertools import chain
return export.make_Xi_csv(self, request, list(chain(*[d.clpeptide_set.all() for d in queryset])), 'Dataset_CLPeptide')
make_Xi_csv.short_description = 'Export as Xi CSV'
def save_model(self, request, obj, form, change):
# Need to save m2m before launching the processing
# Advice from http://makkalot-opensource.blogspot.ca/2009/01/django-admin-manytomany-behaviour.html
super(ProcessedDatasetAdmin, self).save_model(request, obj, form, change)
form.save_m2m()
obj.save()
@staticmethod
@receiver(post_save, sender=CLMSpipelineModels.ProcessedDataset)
def process_file(sender, instance, created, **kwargs):
DatasetProcessing.process(instance)
## Register admin panels
admin.site.register(CLMSpipelineModels.RawDataset, RawDatasetAdmin)
admin.site.register(CLMSpipelineModels.ProcessedDataset)
admin.site.register(CLMSpipelineModels.ProcessedDataset, ProcessedDatasetAdmin)
admin.site.register(CLMSpipelineModels.CrossLinker)
admin.site.register(CLMSpipelineModels.FastaDB)
admin.site.register(CLMSpipelineModels.Instrument)
admin.site.register(CLMSpipelineModels.CLPeptide, CLPeptideAdmin)
admin.site.register(CLMSpipelineModels.searchAlgorithm)
admin.site.register(CLMSpipelineModels.CLPeptideFilter, CLPeptideFilterAdmin)
......@@ -124,13 +124,13 @@ class Dataset(models.Model, AdminURLMixin):
prefix = models.CharField(max_length=25,
help_text='Short name that will be appended in comparison.')
cross_linker = models.ForeignKey(CrossLinker)
cross_linker = models.ForeignKey(CrossLinker, null=True)
instrument_name = models.ForeignKey(Instrument)
instrument_name = models.ForeignKey(Instrument, null=True)
fasta_db = models.ForeignKey(FastaDB)
fasta_db = models.ForeignKey(FastaDB, null=True)
search_algorithm = models.ForeignKey(searchAlgorithm)
search_algorithm = models.ForeignKey(searchAlgorithm, null=True)
description = models.TextField('Detailed description', blank=True)
......@@ -141,14 +141,15 @@ class Dataset(models.Model, AdminURLMixin):
def formated_url(self):
if self.processed_datasets.count() == 0:
return '<a href="%s">%s</a>' % (self.rawdataset.get_admin_url(), self.pk)
else:
return '<a href="%s">%s</a>' % (self.processeddataset.get_admin_url(), self.pk)
try:
return '<a href="%s">%s</a><br />' % (self.rawdataset.get_admin_url(), self.pk)
except Dataset.DoesNotExist:
return '<a href="%s">%s</a><br />' % (self.processeddataset.get_admin_url(), self)
def __unicode__(self):
return str(self.pk)
return ('[%s] %s') % (self.pk, self.name)
......@@ -207,7 +208,7 @@ def save_file(sender, instance, created, **kwargs):
@receiver(pre_delete, sender = RawDataset)
def remove_CLPeptide(sender, instance, **kwargs):
def remove_CLPeptideRD(sender, instance, **kwargs):
# Remove CLpeptides not linked to any dataset after deletion
for clpep in instance.clpeptide_set.all():
......@@ -280,15 +281,15 @@ class CLPeptide(models.Model):
notes = models.CharField(max_length=100)
LINK_TYPE_CHOICES = (
(1, 'Inter-protein'),
(2, 'Intra-protein'),
(3, 'Intra-peptide'),
(4, 'Dead-end'),
('Inter-protein', 'Inter-protein'),
('Intra-protein', 'Intra-protein'),
('Intra-peptide', 'Intra-peptide'),
('Dead-end', 'Dead-end'),
)
link_type = models.IntegerField(choices=LINK_TYPE_CHOICES)
link_type = models.CharField(max_length=50, choices=LINK_TYPE_CHOICES)
cross_link = models.BooleanField()
cross_link = models.BooleanField('Inter-peptide cross-link')
not_decoy = models.BooleanField()
......@@ -311,24 +312,26 @@ class CLPeptide(models.Model):
providing this information.
"""
self.link_type = 1
self.cross_link = False
self.link_type = 'Inter-protein'
self.cross_link = True
if self.peptide_wo_mod1 == self.peptide_wo_mod2:
self.link_type = 1
self.link_type = 'Inter-protein'
self.cross_link = True
elif self.display_protein1 == self.display_protein2:
self.link_type = 2
self.link_type = 'Intra-protein'
self.cross_link = True
else:
if self.pep2_link_pos == -1:
self.link_type = 4
self.link_type = 'Dead-end'
self.cross_link = False
elif self.pep2_link_pos > -1 and self.peptide_wo_mod2 == '-':
self.link_type = 3
self.link_type = 'Intra-peptide'
self.cross_link = False
......@@ -352,6 +355,15 @@ class CLPeptideFilter(models.Model):
description = models.TextField('Detailed description', blank=True)
fp_cutoff = models.FloatField('False positive cutoff', help_text='Range from 0 to 1. Group datasets before applying this filter.',
blank=True, null=True)
remove_decoy = models.BooleanField('Remove decoy hits')
def __unicode__(self):
return ('[%s] %s') % (self.pk, self.name)
class CLPeptideFilterParam(models.Model):
......@@ -359,8 +371,8 @@ class CLPeptideFilterParam(models.Model):
clpeptidefilter = models.ForeignKey(CLPeptideFilter)
METHOD_CHOICES = (
('Exclude', 'Exclude'),
('Filter', 'Filter'),
('exclude', 'Exclude'),
('filter', 'Filter'),
)
......@@ -369,21 +381,21 @@ class CLPeptideFilterParam(models.Model):
field = models.CharField(max_length = 100, choices = [(field.name, field.name) for field in CLPeptide._meta.fields])
LOOKUP_CHOICES = (
('Exact match', 'exact'),
('Exact match case insentive', 'iexact'),
('Contains', 'contains'),
('Contains case insentive', 'contains'),
('Greater than', 'gt'),
('Greater than or equal to', 'gte'),
('Less than', 'lt'),
('Less than or equal to', 'lte'),
('Starts-with', 'startswith'),
('Starts-with case insensitive', 'istartswith'),
('Ends-with', 'endswith'),
('Ends-with case insensitive', 'iendswith'),
('Is null', 'isnull'),
('Regular expression match', 'regex'),
('Regular expression match case insensitive', 'iregex'),
('exact', 'Exact match'),
('iexact', 'Exact match case insentive'),
('contains', 'Contains'),
('icontains', 'Contains case insentive'),
('gt', 'Greater than'),
('gte', 'Greater than or equal to'),
('lt', 'Less than'),
('lte', 'Less than or equal to' ),
('startswith', 'Starts-with' ),
('istartswith', 'Starts-with case insensitive'),
('endswith', 'Ends-with'),
('iendswith', 'Ends-with case insensitive'),
('isnull', 'Is null'),
('regex', 'Regular expression match'),
('iregex', 'Regular expression match case insensitive'),
)
......@@ -402,6 +414,13 @@ class ProcessedDataset(Dataset):
datasets = models.ManyToManyField(Dataset, related_name='processed_datasets')
clpeptidefilter = models.ForeignKey(CLPeptideFilter)
clpeptidefilter = models.ForeignKey(CLPeptideFilter, blank=True, null=True)
@receiver(pre_delete, sender = ProcessedDataset)
def remove_CLPeptidePD(sender, instance, **kwargs):
# Remove CLpeptides not linked to any dataset after deletion
for clpep in instance.clpeptide_set.all():
if clpep.dataset.count() == 1:
clpep.delete()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment