Commit c23231d5 authored by Mathieu Courcelles's avatar Mathieu Courcelles

Functional ProcessedDataset

parent b32e2f66
## Copyright 2013 Mathieu Courcelles
## Mike Tyers's lab / IRIC / Universite de Montreal
# Import standard librariesdjang
# Import Django related libraries
# Import project libraries
from CLMSpipeline.CLMSpipeline_app.models import ProcessedDataset
class DatasetProcessing:
@staticmethod
def process(instance):
datasets = instance.datasets.all()
# Transfer extra info
fields = dict()
fields['cross_linker'] = ''
fields['instrument_name'] = ''
fields['fasta_db'] = ''
fields['search_algorithm'] = ''
dataset_description = ''
for dataset in datasets:
dataset_description += dataset.description
# Take care first of CLPeptides
clpeptide_set = dataset.clpeptide_set.all()
# Apply filter/exclude to CLPeptides
if instance.clpeptidefilter:
for fe in instance.clpeptidefilter.clpeptidefilterparam_set.all():
d = dict()
d[fe.field + '__' + fe.field_lookup] = fe.value
if fe.method == 'filter':
clpeptide_set = clpeptide_set.filter(**d)
else:
clpeptide_set = clpeptide_set.exclude(**d)
# Determine false positive cutoff score
min_score = 100000;
if instance.clpeptidefilter.fp_cutoff != None:
clpep_count = 0.0;
clpep_decoy_count = 0.0;
for clpep in clpeptide_set:
clpep_count += 1
if clpep.not_decoy == False:
clpep_decoy_count += 1
print clpep_decoy_count / clpep_count
if (clpep_decoy_count / clpep_count) <= instance.clpeptidefilter.fp_cutoff:
min_score = clpep.match_score
# else:
# break
dataset_description += 'False positive cutoff (%s) at score %.2f.\n' % (instance.clpeptidefilter.fp_cutoff,
min_score)
# Add peptides to the new dataset
for clpep in clpeptide_set:
# Skip decoy if requested
if instance.clpeptidefilter.remove_decoy and clpep.not_decoy == False:
continue
# Limit false positive
if instance.clpeptidefilter.fp_cutoff != None and clpep.match_score < min_score:
break
# Add peptide to dataset
clpep.dataset.add(instance)
clpep.save()
# Check extra info homogenity
for field in fields:
if fields[field] == '':
fields[field] = dataset.__getattribute__(field)
elif fields[field] != dataset.__getattribute__(field):
fields[field] = 'Mixed'
# Update extra info in database
fields['description'] = dataset_description
ProcessedDataset.objects.filter(pk=instance.id).update(**fields)
\ No newline at end of file
...@@ -22,6 +22,7 @@ from django.dispatch import receiver ...@@ -22,6 +22,7 @@ from django.dispatch import receiver
# Import project libraries # Import project libraries
import CLMSpipeline.CLMSpipeline_app.models as CLMSpipelineModels import CLMSpipeline.CLMSpipeline_app.models as CLMSpipelineModels
from CLMSpipeline.CLMSpipeline_app.parser.parser_generic import parser_generic from CLMSpipeline.CLMSpipeline_app.parser.parser_generic import parser_generic
from CLMSpipeline.CLMSpipeline_app.DatasetProcessing import DatasetProcessing
import export import export
#from adminWidgets import * #from adminWidgets import *
...@@ -52,7 +53,7 @@ class CLPeptideAdmin(admin.ModelAdmin): ...@@ -52,7 +53,7 @@ class CLPeptideAdmin(admin.ModelAdmin):
def getDataset(self, obj): def getDataset(self, obj):
return ''.join([dataset.formated_url() for dataset in obj.dataset.all()]) return ''.join([dataset.formated_url() for dataset in obj.dataset.all()])
getDataset.allow_tags = True getDataset.allow_tags = True
getDataset.short_description = 'Dataset' getDataset.short_description = 'Datasets'
...@@ -92,8 +93,14 @@ class CLPeptideFilterAdminInline(admin.TabularInline): ...@@ -92,8 +93,14 @@ class CLPeptideFilterAdminInline(admin.TabularInline):
class CLPeptideFilterAdmin(admin.ModelAdmin): class CLPeptideFilterAdmin(admin.ModelAdmin):
inlines = [ CLPeptideFilterAdminInline ] inlines = [ CLPeptideFilterAdminInline ]
date_hierarchy = 'creation_date'
list_display = ('name', 'description', 'creation_date')
save_as = True
search_fields = ('name', 'description')
...@@ -147,14 +154,85 @@ class RawDatasetAdmin(admin.ModelAdmin): ...@@ -147,14 +154,85 @@ class RawDatasetAdmin(admin.ModelAdmin):
if instance.parsing_status == False and created == False: if instance.parsing_status == False and created == False:
instance.parsing_log = parser_generic.parseResults(instance) instance.parsing_log = parser_generic.parseResults(instance)
class ProcessedDatasetAdmin(admin.ModelAdmin):
actions = ['make_Xi_csv']
date_hierarchy = 'creation_date'
filter_horizontal = ('datasets',)
list_display = ('name', 'prefix', 'getDataset', 'clpeptidefilter', 'cross_linker', 'instrument_name', 'fasta_db',
'search_algorithm', 'description',
'creation_date', 'get_CLPeptides_URL')
list_filter = ('cross_linker', 'instrument_name', 'fasta_db', 'search_algorithm')
readonly_fields = ('cross_linker', 'instrument_name', 'fasta_db', 'search_algorithm')
search_fields = ('name', 'prefix', 'file', 'description', 'clpeptidefilter')
def getDataset(self, obj):
return ''.join([dataset.formated_url() for dataset in obj.datasets.all()])
getDataset.allow_tags = True
getDataset.short_description = 'Datasets'
def get_readonly_fields(self, request, obj=None):
readonly_fields = list(self.readonly_fields)
if obj != None and obj.datasets.count() != 0:
readonly_fields.extend(['datasets'])
readonly_fields.extend(['clpeptidefilter'])
return readonly_fields
def get_CLPeptides_URL(self, obj):
return '<a href="/admin/CLMSpipeline_app/clpeptide/?dataset__id__exact=%s">See</a>' % (obj.pk)
get_CLPeptides_URL.allow_tags = True
get_CLPeptides_URL.short_description = 'CLPeptides'
def make_Xi_csv(self, request, queryset):
from itertools import chain
return export.make_Xi_csv(self, request, list(chain(*[d.clpeptide_set.all() for d in queryset])), 'Dataset_CLPeptide')
make_Xi_csv.short_description = 'Export as Xi CSV'
def save_model(self, request, obj, form, change):
# Need to save m2m before launching the processing
# Advice from http://makkalot-opensource.blogspot.ca/2009/01/django-admin-manytomany-behaviour.html
super(ProcessedDatasetAdmin, self).save_model(request, obj, form, change)
form.save_m2m()
obj.save()
@staticmethod
@receiver(post_save, sender=CLMSpipelineModels.ProcessedDataset)
def process_file(sender, instance, created, **kwargs):
DatasetProcessing.process(instance)
## Register admin panels ## Register admin panels
admin.site.register(CLMSpipelineModels.RawDataset, RawDatasetAdmin) admin.site.register(CLMSpipelineModels.RawDataset, RawDatasetAdmin)
admin.site.register(CLMSpipelineModels.ProcessedDataset) admin.site.register(CLMSpipelineModels.ProcessedDataset, ProcessedDatasetAdmin)
admin.site.register(CLMSpipelineModels.CrossLinker) admin.site.register(CLMSpipelineModels.CrossLinker)
admin.site.register(CLMSpipelineModels.FastaDB) admin.site.register(CLMSpipelineModels.FastaDB)
admin.site.register(CLMSpipelineModels.Instrument) admin.site.register(CLMSpipelineModels.Instrument)
admin.site.register(CLMSpipelineModels.CLPeptide, CLPeptideAdmin) admin.site.register(CLMSpipelineModels.CLPeptide, CLPeptideAdmin)
admin.site.register(CLMSpipelineModels.searchAlgorithm) admin.site.register(CLMSpipelineModels.searchAlgorithm)
admin.site.register(CLMSpipelineModels.CLPeptideFilter, CLPeptideFilterAdmin) admin.site.register(CLMSpipelineModels.CLPeptideFilter, CLPeptideFilterAdmin)
...@@ -124,13 +124,13 @@ class Dataset(models.Model, AdminURLMixin): ...@@ -124,13 +124,13 @@ class Dataset(models.Model, AdminURLMixin):
prefix = models.CharField(max_length=25, prefix = models.CharField(max_length=25,
help_text='Short name that will be appended in comparison.') help_text='Short name that will be appended in comparison.')
cross_linker = models.ForeignKey(CrossLinker) cross_linker = models.ForeignKey(CrossLinker, null=True)
instrument_name = models.ForeignKey(Instrument) instrument_name = models.ForeignKey(Instrument, null=True)
fasta_db = models.ForeignKey(FastaDB) fasta_db = models.ForeignKey(FastaDB, null=True)
search_algorithm = models.ForeignKey(searchAlgorithm) search_algorithm = models.ForeignKey(searchAlgorithm, null=True)
description = models.TextField('Detailed description', blank=True) description = models.TextField('Detailed description', blank=True)
...@@ -141,14 +141,15 @@ class Dataset(models.Model, AdminURLMixin): ...@@ -141,14 +141,15 @@ class Dataset(models.Model, AdminURLMixin):
def formated_url(self): def formated_url(self):
if self.processed_datasets.count() == 0:
return '<a href="%s">%s</a>' % (self.rawdataset.get_admin_url(), self.pk) try:
else: return '<a href="%s">%s</a><br />' % (self.rawdataset.get_admin_url(), self.pk)
return '<a href="%s">%s</a>' % (self.processeddataset.get_admin_url(), self.pk) except Dataset.DoesNotExist:
return '<a href="%s">%s</a><br />' % (self.processeddataset.get_admin_url(), self)
def __unicode__(self): def __unicode__(self):
return str(self.pk) return ('[%s] %s') % (self.pk, self.name)
...@@ -207,7 +208,7 @@ def save_file(sender, instance, created, **kwargs): ...@@ -207,7 +208,7 @@ def save_file(sender, instance, created, **kwargs):
@receiver(pre_delete, sender = RawDataset) @receiver(pre_delete, sender = RawDataset)
def remove_CLPeptide(sender, instance, **kwargs): def remove_CLPeptideRD(sender, instance, **kwargs):
# Remove CLpeptides not linked to any dataset after deletion # Remove CLpeptides not linked to any dataset after deletion
for clpep in instance.clpeptide_set.all(): for clpep in instance.clpeptide_set.all():
...@@ -280,15 +281,15 @@ class CLPeptide(models.Model): ...@@ -280,15 +281,15 @@ class CLPeptide(models.Model):
notes = models.CharField(max_length=100) notes = models.CharField(max_length=100)
LINK_TYPE_CHOICES = ( LINK_TYPE_CHOICES = (
(1, 'Inter-protein'), ('Inter-protein', 'Inter-protein'),
(2, 'Intra-protein'), ('Intra-protein', 'Intra-protein'),
(3, 'Intra-peptide'), ('Intra-peptide', 'Intra-peptide'),
(4, 'Dead-end'), ('Dead-end', 'Dead-end'),
) )
link_type = models.IntegerField(choices=LINK_TYPE_CHOICES) link_type = models.CharField(max_length=50, choices=LINK_TYPE_CHOICES)
cross_link = models.BooleanField() cross_link = models.BooleanField('Inter-peptide cross-link')
not_decoy = models.BooleanField() not_decoy = models.BooleanField()
...@@ -311,24 +312,26 @@ class CLPeptide(models.Model): ...@@ -311,24 +312,26 @@ class CLPeptide(models.Model):
providing this information. providing this information.
""" """
self.link_type = 1 self.link_type = 'Inter-protein'
self.cross_link = False self.cross_link = True
if self.peptide_wo_mod1 == self.peptide_wo_mod2: if self.peptide_wo_mod1 == self.peptide_wo_mod2:
self.link_type = 1 self.link_type = 'Inter-protein'
self.cross_link = True self.cross_link = True
elif self.display_protein1 == self.display_protein2: elif self.display_protein1 == self.display_protein2:
self.link_type = 2 self.link_type = 'Intra-protein'
self.cross_link = True self.cross_link = True
else: else:
if self.pep2_link_pos == -1: if self.pep2_link_pos == -1:
self.link_type = 4 self.link_type = 'Dead-end'
self.cross_link = False
elif self.pep2_link_pos > -1 and self.peptide_wo_mod2 == '-': elif self.pep2_link_pos > -1 and self.peptide_wo_mod2 == '-':
self.link_type = 3 self.link_type = 'Intra-peptide'
self.cross_link = False
...@@ -351,6 +354,15 @@ class CLPeptideFilter(models.Model): ...@@ -351,6 +354,15 @@ class CLPeptideFilter(models.Model):
name = models.CharField(max_length=100) name = models.CharField(max_length=100)
description = models.TextField('Detailed description', blank=True) description = models.TextField('Detailed description', blank=True)
fp_cutoff = models.FloatField('False positive cutoff', help_text='Range from 0 to 1. Group datasets before applying this filter.',
blank=True, null=True)
remove_decoy = models.BooleanField('Remove decoy hits')
def __unicode__(self):
return ('[%s] %s') % (self.pk, self.name)
...@@ -359,8 +371,8 @@ class CLPeptideFilterParam(models.Model): ...@@ -359,8 +371,8 @@ class CLPeptideFilterParam(models.Model):
clpeptidefilter = models.ForeignKey(CLPeptideFilter) clpeptidefilter = models.ForeignKey(CLPeptideFilter)
METHOD_CHOICES = ( METHOD_CHOICES = (
('Exclude', 'Exclude'), ('exclude', 'Exclude'),
('Filter', 'Filter'), ('filter', 'Filter'),
) )
...@@ -369,21 +381,21 @@ class CLPeptideFilterParam(models.Model): ...@@ -369,21 +381,21 @@ class CLPeptideFilterParam(models.Model):
field = models.CharField(max_length = 100, choices = [(field.name, field.name) for field in CLPeptide._meta.fields]) field = models.CharField(max_length = 100, choices = [(field.name, field.name) for field in CLPeptide._meta.fields])
LOOKUP_CHOICES = ( LOOKUP_CHOICES = (
('Exact match', 'exact'), ('exact', 'Exact match'),
('Exact match case insentive', 'iexact'), ('iexact', 'Exact match case insentive'),
('Contains', 'contains'), ('contains', 'Contains'),
('Contains case insentive', 'contains'), ('icontains', 'Contains case insentive'),
('Greater than', 'gt'), ('gt', 'Greater than'),
('Greater than or equal to', 'gte'), ('gte', 'Greater than or equal to'),
('Less than', 'lt'), ('lt', 'Less than'),
('Less than or equal to', 'lte'), ('lte', 'Less than or equal to' ),
('Starts-with', 'startswith'), ('startswith', 'Starts-with' ),
('Starts-with case insensitive', 'istartswith'), ('istartswith', 'Starts-with case insensitive'),
('Ends-with', 'endswith'), ('endswith', 'Ends-with'),
('Ends-with case insensitive', 'iendswith'), ('iendswith', 'Ends-with case insensitive'),
('Is null', 'isnull'), ('isnull', 'Is null'),
('Regular expression match', 'regex'), ('regex', 'Regular expression match'),
('Regular expression match case insensitive', 'iregex'), ('iregex', 'Regular expression match case insensitive'),
) )
...@@ -402,6 +414,13 @@ class ProcessedDataset(Dataset): ...@@ -402,6 +414,13 @@ class ProcessedDataset(Dataset):
datasets = models.ManyToManyField(Dataset, related_name='processed_datasets') datasets = models.ManyToManyField(Dataset, related_name='processed_datasets')
clpeptidefilter = models.ForeignKey(CLPeptideFilter) clpeptidefilter = models.ForeignKey(CLPeptideFilter, blank=True, null=True)
@receiver(pre_delete, sender = ProcessedDataset)
def remove_CLPeptidePD(sender, instance, **kwargs):
# Remove CLpeptides not linked to any dataset after deletion
for clpep in instance.clpeptide_set.all():
if clpep.dataset.count() == 1:
clpep.delete()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment