Commit 1e0b1757 authored by Mathieu Courcelles's avatar Mathieu Courcelles

-Functionnal stats

-Unique peptide filter
-Added Project
parent 310a18ea
......@@ -60,7 +60,7 @@ class DatasetProcessing:
# Determine false positive cutoff score
min_score = 100000;
if instance.clpeptidefilter.fp_cutoff != None:
if instance.clpeptidefilter != None and instance.clpeptidefilter.fp_cutoff != None:
clpep_count = 0.0;
clpep_decoy_count = 0.0;
......@@ -74,28 +74,66 @@ class DatasetProcessing:
if (clpep_decoy_count / clpep_count) <= instance.clpeptidefilter.fp_cutoff:
min_score = clpep.match_score
#else:
# break
dataset_description += 'False positive cutoff (%s) at score %.2f.\n' \
% (instance.clpeptidefilter.fp_cutoff, min_score)
# Unique peptide
unique_msrun_pep = dict()
unique_key = dict()
if instance.clpeptidefilter != None:
for key in instance.clpeptidefilter.unique_key.split('-'):
unique_key[key] = True;
# Add peptides to the new dataset
for clpep in clpeptide_set:
# Skip decoy if requested
if instance.clpeptidefilter.remove_decoy and clpep.not_decoy == False:
if instance.clpeptidefilter != None and instance.clpeptidefilter.remove_decoy and clpep.not_decoy == False:
continue
# Test peptide uniqueness
msrun = clpep.run_name
key = ''
if instance.clpeptidefilter != None and instance.clpeptidefilter.unique_key != None:
if instance.clpeptidefilter.unique_in == 'dataset':
msrun = 'Dataset'
# Skip peptide if already seen
print unique_key
key = clpep.uniqueKey(unique_key)
print key
if msrun in unique_msrun_pep and key in unique_msrun_pep[msrun]:
continue
# Limit false positives
if instance.clpeptidefilter.fp_cutoff != None and clpep.match_score < min_score:
if instance.clpeptidefilter != None and instance.clpeptidefilter.fp_cutoff != None and clpep.match_score < min_score:
break
# Add peptide to dataset
clpep.dataset.add(instance)
clpep.save()
# Add to unique dict
if instance.clpeptidefilter != None and instance.clpeptidefilter.unique_key != None:
if not msrun in unique_msrun_pep:
unique_msrun_pep[msrun] = dict()
unique_msrun_pep[msrun][key] = True
# Check extra info homogenity
for field in fields:
if fields[field] == '':
......
......@@ -32,7 +32,7 @@ class CLPeptideAdmin(admin.ModelAdmin):
actions = ['make_Xi_csv']
list_display = ('pk', 'getDataset', 'run_name', 'scan_number',
list_display = ('pk', 'run_name', 'scan_number',
'precursor_mz_2d', 'precursor_charge',
'match_score_2d', 'error_2d',
'spectrum_intensity_coverage_2d',
......@@ -41,9 +41,9 @@ class CLPeptideAdmin(admin.ModelAdmin):
'peptide_position1', 'pep1_link_pos',
'display_protein2', 'peptide2',
'peptide_position2', 'pep2_link_pos',
'link_type', 'cross_link', 'autovalidated', 'not_decoy')
'link_type', 'autovalidated_f', 'not_decoy_f')
list_filter = ('dataset', 'precursor_charge',
list_filter = ('dataset__project', 'dataset', 'precursor_charge',
'link_type', 'cross_link', 'autovalidated',
'not_decoy', 'rejected' )
......@@ -52,6 +52,25 @@ class CLPeptideAdmin(admin.ModelAdmin):
'display_protein2', 'peptide2', 'peptide_wo_mod2')
def autovalidated_f(self, obj):
"""
Format to a short column name
"""
return obj.autovalidated
autovalidated_f.short_description = 'AV'
autovalidated_f.admin_order_field = 'autovalidated'
autovalidated_f.boolean = True
def not_decoy_f(self, obj):
"""
Format to a short column name
"""
return obj.not_decoy
not_decoy_f.short_description = 'ND'
not_decoy_f.admin_order_field = 'not_decoy'
not_decoy_f.boolean = True
def error_2d(self, obj):
"""
Format the 'error' field with 2 digits.
......@@ -65,7 +84,7 @@ class CLPeptideAdmin(admin.ModelAdmin):
"""
Return HTML link for all datasets that the peptides belongs.
"""
return ''.join([dataset.formated_url() for dataset in obj.dataset.all()])
return ''.join([dataset.formated_url_short() for dataset in obj.dataset.all()])
getDataset.allow_tags = True
getDataset.short_description = 'Datasets'
......@@ -143,16 +162,16 @@ class RawDatasetAdmin(admin.ModelAdmin):
"""
actions = ['make_Xi_csv']
actions = ['dataset_stats_csv', 'make_Xi_csv']
date_hierarchy = 'creation_date'
list_display = ('name', 'prefix', 'file', 'cross_linker',
list_display = ('pk', 'name', 'prefix', 'getProject', 'file', 'cross_linker',
'instrument_name', 'fasta_db',
'search_algorithm', 'description', 'parsing_status',
'creation_date', 'get_CLPeptides_URL')
list_filter = ('cross_linker', 'instrument_name',
list_filter = ('project', 'cross_linker', 'instrument_name',
'fasta_db', 'search_algorithm')
readonly_fields = ('parsing_log', 'parsing_status')
......@@ -161,6 +180,17 @@ class RawDatasetAdmin(admin.ModelAdmin):
def dataset_stats_csv(self, request, queryset):
"""
Write dataset statistics about cross-linked peptides to CSV file.
"""
return export.dataset_stats_csv(self, request, queryset,
'RawDataset_Statistics')
dataset_stats_csv.short_description = 'Export dataset statistics to CSV'
def get_CLPeptides_URL(self, obj):
"""
Returns HTML link to see cross-linked peptides in the dataset.
......@@ -172,6 +202,15 @@ class RawDatasetAdmin(admin.ModelAdmin):
get_CLPeptides_URL.short_description = 'CLPeptides'
def getProject(self, obj):
"""
Returns HTML link to the associated project.
"""
return obj.project.formated_url()
getProject.allow_tags = True
getProject.short_description = 'Project'
getProject.admin_order_field = 'project'
def get_readonly_fields(self, request, obj=None):
"""
This method sets some fields read only after dataset creation and
......@@ -219,18 +258,18 @@ class ProcessedDatasetAdmin(admin.ModelAdmin):
"""
actions = ['make_Xi_csv']
actions = ['dataset_stats_csv', 'make_Xi_csv']
date_hierarchy = 'creation_date'
filter_horizontal = ('datasets',)
list_display = ('name', 'prefix', 'getDataset', 'getFilter',
list_display = ('pk', 'name', 'prefix', 'getProject', 'getDataset', 'getFilter',
'cross_linker', 'instrument_name', 'fasta_db',
'search_algorithm', 'description',
'creation_date', 'get_CLPeptides_URL')
list_filter = ('clpeptidefilter__name', 'cross_linker',
list_filter = ('project', 'clpeptidefilter__name', 'cross_linker',
'instrument_name', 'fasta_db', 'search_algorithm')
readonly_fields = ('cross_linker', 'instrument_name',
......@@ -240,6 +279,17 @@ class ProcessedDatasetAdmin(admin.ModelAdmin):
def dataset_stats_csv(self, request, queryset):
"""
Write dataset statistics about cross-linked peptides to CSV file.
"""
return export.dataset_stats_csv(self, request, queryset,
'ProcessedDataset_Statistics')
dataset_stats_csv.short_description = 'Export dataset statistics to CSV'
def get_CLPeptides_URL(self, obj):
"""
Returns HTML link to see cross-linked peptides in the dataset.
......@@ -258,15 +308,30 @@ class ProcessedDatasetAdmin(admin.ModelAdmin):
return ''.join([dataset.formated_url() for dataset in obj.datasets.all()])
getDataset.allow_tags = True
getDataset.short_description = 'Datasets'
getDataset.admin_order_field = 'datasets'
def getFilter(self, obj):
"""
Returns HTML link to the filter applied to create this dataset.
"""
return obj.clpeptidefilter.formated_url()
if obj.clpeptidefilter == None:
return ''
else:
return obj.clpeptidefilter.formated_url()
getFilter.allow_tags = True
getFilter.short_description = 'Filter'
getFilter.admin_order_field = 'clpeptidefilter'
def getProject(self, obj):
"""
Returns HTML link to the associated project.
"""
return obj.project.formated_url()
getProject.allow_tags = True
getProject.short_description = 'Project'
getProject.admin_order_field = 'project'
def get_readonly_fields(self, request, obj=None):
......@@ -321,4 +386,5 @@ admin.site.register(CLMSpipelineModels.FastaDB)
admin.site.register(CLMSpipelineModels.Instrument)
admin.site.register(CLMSpipelineModels.CLPeptide, CLPeptideAdmin)
admin.site.register(CLMSpipelineModels.searchAlgorithm)
admin.site.register(CLMSpipelineModels.Project)
admin.site.register(CLMSpipelineModels.CLPeptideFilter, CLPeptideFilterAdmin)
......@@ -17,6 +17,108 @@ from django.http import HttpResponse
def dataset_stats_csv(self, request, queryset, tableName):
"""
Write dataset statistics about cross-linked peptides to CSV file.
"""
response = HttpResponse(mimetype='text/csv')
response['Content-Disposition'] = 'attachment; filename=' + tableName + \
'_' + request.META['QUERY_STRING'] + '.csv'
writer = csv.writer(response, quoting=csv.QUOTE_ALL)
fields = OrderedDict([('Dataset',''),
('Run', ''),
('PSM', 0),
('Inter-protein', 0),
('Intra-protein', 0),
('Intra-peptide', 0),
('Dead-end', 0),
('Min. match score', 10000000),
('Max. match score', 0),
('Autovalidated', 0),
('Decoy', 0),
('False positive ratio', '-'),
('z2', 0),
('z3', 0),
('z4', 0),
('z5', 0),
('z6', 0),
('z7', 0),
('z8', 0),
])
writer.writerow(fields.keys())
# Iterates through datasets
for dataset in queryset:
fields_dict = OrderedDict([(dataset.name, fields.copy())])
fields_dict[dataset.name]['Dataset'] = '[%s] %s' % (dataset.pk, dataset.name)
fields_dict[dataset.name]['Run'] = 'dataset'
# Iterates over Clpeptides
for clpep in dataset.clpeptide_set.all():
if not fields_dict.has_key(clpep.run_name):
fields_dict[clpep.run_name] = fields.copy()
fields_dict[clpep.run_name]['Dataset'] = '[%s] %s' % (dataset.pk, dataset.name)
fields_dict[clpep.run_name]['Run'] = clpep.run_name
fields_dict[dataset.name]['PSM'] += 1
fields_dict[clpep.run_name]['PSM'] += 1
fields_dict[dataset.name][clpep.link_type] += 1
fields_dict[clpep.run_name][clpep.link_type] += 1
fields_dict[dataset.name]['z' + clpep.precursor_charge] += 1
fields_dict[clpep.run_name]['z' + clpep.precursor_charge] += 1
if clpep.match_score < fields_dict[dataset.name]['Min. match score']:
fields_dict[dataset.name]['Min. match score'] = clpep.match_score
if clpep.match_score < fields_dict[clpep.run_name]['Min. match score']:
fields_dict[clpep.run_name]['Min. match score'] = clpep.match_score
if clpep.match_score > fields_dict[dataset.name]['Max. match score']:
fields_dict[dataset.name]['Max. match score'] = clpep.match_score
if clpep.match_score > fields_dict[clpep.run_name]['Max. match score']:
fields_dict[clpep.run_name]['Max. match score'] = clpep.match_score
if clpep.autovalidated:
fields_dict[dataset.name]['Autovalidated'] += 1
fields_dict[clpep.run_name]['Autovalidated'] += 1
if not clpep.not_decoy:
fields_dict[dataset.name]['Decoy'] += 1
fields_dict[clpep.run_name]['Decoy'] += 1
for dataset_run in fields_dict.keys():
# Skip MS run stats if unique filter applied to whole dataset
if hasattr(dataset, 'clpeptidefilter') and dataset.clpeptidefilter != None:
if dataset.clpeptidefilter.unique_in == 'dataset':
if fields_dict[dataset_run]['Run'] != 'dataset':
continue
elif dataset.clpeptidefilter.unique_in == 'msrun':
if fields_dict[dataset_run]['Run'] == 'dataset':
continue
if fields_dict[dataset_run]['PSM'] != 0 :
fields_dict[dataset_run]['False positive ratio'] = '%.2f' \
% (float(fields_dict[dataset_run]['Decoy']) / float(fields_dict[dataset_run]['PSM']))
writer.writerow(fields_dict[dataset_run].values())
return response
def make_Xi_csv(self, request, queryset, tableName):
"""
Export selected objects as CSV file
......@@ -46,6 +148,7 @@ def make_Xi_csv(self, request, queryset, tableName):
('validated','validated'),
('rejected','rejected'),
('notes','notes'),
])
response = HttpResponse(mimetype='text/csv')
......@@ -73,4 +176,7 @@ def make_Xi_csv(self, request, queryset, tableName):
writer.writerow( columns )
return response
\ No newline at end of file
return response
......@@ -108,6 +108,25 @@ class searchAlgorithm(models.Model):
ordering = ['name']
class Project(models.Model, AdminURLMixin):
"""
This class groups datasets.
"""
name = models.CharField(max_length=250, unique=True)
def formated_url(self):
"""
Returns HTML link for the admin panel.
"""
return '<a href="%s">%s</a><br />' % (self.get_admin_url(), self)
def __unicode__(self):
return ('[%s] %s') % (self.pk, self.name)
class Dataset(models.Model, AdminURLMixin):
"""
......@@ -122,6 +141,8 @@ class Dataset(models.Model, AdminURLMixin):
help_text='Short name that will be appended in \
comparison.')
project = models.ForeignKey(Project)
cross_linker = models.ForeignKey(CrossLinker, null=True)
instrument_name = models.ForeignKey(Instrument, null=True)
......@@ -144,13 +165,23 @@ class Dataset(models.Model, AdminURLMixin):
"""
try:
return '<a href="%s">%s</a><br />' % (self.rawdataset.get_admin_url(), self.pk)
return '<a href="%s">%s</a><br />' % (self.rawdataset.get_admin_url(), self)
except Dataset.DoesNotExist:
return '<a href="%s">%s</a><br />' % (self.processeddataset.get_admin_url(), self)
def formated_url_short(self):
"""
Returns a formated HTML link for the CLPeptide admin panel.
"""
try:
return '<a href="%s">[%s]</a><br />' % (self.rawdataset.get_admin_url(), self.pk)
except Dataset.DoesNotExist:
return '<a href="%s">[%s]</a><br />' % (self.processeddataset.get_admin_url(), self.pk)
def __unicode__(self):
return ('%s') % (self.pk)
return ('[%s] %s') % (self.pk, self.name)
......@@ -348,6 +379,44 @@ class CLPeptide(models.Model):
else:
self.not_decoy = True
def uniqueKey(self, params):
"""
Returns a unique key that describe the cross-linked peptide.
"""
key = list()
if params.has_key('sequence'):
seq_list = [self.peptide_wo_mod1, self.peptide_wo_mod2]
seq_list.sort()
key += seq_list
if params.has_key('sequenceMods'):
seq_list = [self.peptide1, self.peptide2]
seq_list.sort()
key += seq_list
if params.has_key('sequenceCpos'):
seq_list = [self.peptide_wo_mod1 + '-' + self.pep1_link_pos,
self.peptide_wo_mod2 + '-' + self.pep2_link_pos ]
seq_list.sort()
key += seq_list
if params.has_key('proteinPpos'):
seq_list = [self.peptide_wo_mod1 + '-' + self.pep1_link_pos,
self.peptide_wo_mod2 + '-' + self.pep2_link_pos ]
seq_list.sort()
key += seq_list
if params.has_key('charge'):
key += self.precursor_charge
print key
return '-'.join(key)
class CLPeptideFilter(models.Model, AdminURLMixin):
......@@ -369,6 +438,30 @@ class CLPeptideFilter(models.Model, AdminURLMixin):
remove_decoy = models.BooleanField('Remove decoy hits')
UIN_CHOICES = (
('dataset', 'Dataset'),
('msrun', 'MS run'),
)
unique_in = models.CharField('Unique peptide in', max_length = 20,
choices = UIN_CHOICES,
blank=True, null=True)
UKEY_CHOICES = (
('sequence', 'Peptide sequences'),
('sequence-charge', 'Peptide sequences & charge'),
('sequenceMods', 'Peptide sequences & mods'),
('sequenceCpos', 'Peptide sequences & cross-link positions'),
('protein-ppos', 'Proteins & cross-link positions'),
)
unique_key = models.CharField('Unique peptide key', max_length = 20,
choices = UKEY_CHOICES,
blank=True, null=True)
def formated_url(self):
"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment