Commit 448dedd0 authored by Mathieu Courcelles's avatar Mathieu Courcelles

Adds data set import for XlinkX

parent 33ca7199
Pipeline #9340838 canceled with stage
No preview for this file type
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
class Migration(migrations.Migration):
dependencies = [
('CLMSVault_app', '0011_auto_20170316_0911'),
]
operations = [
migrations.AddField(
model_name='clpeptide',
name='n_score_a',
field=models.FloatField(null=True, blank=True),
preserve_default=True,
),
migrations.AddField(
model_name='clpeptide',
name='n_score_a_MS2_MS3',
field=models.FloatField(null=True, blank=True),
preserve_default=True,
),
migrations.AddField(
model_name='clpeptide',
name='n_score_b',
field=models.FloatField(null=True, blank=True),
preserve_default=True,
),
migrations.AddField(
model_name='clpeptide',
name='n_score_b_MS2_MS3',
field=models.FloatField(null=True, blank=True),
preserve_default=True,
),
migrations.AlterField(
model_name='clpeptidefilterparam',
name='field',
field=models.CharField(max_length=100, choices=[('id', 'id'), (b'run_name', b'run_name'), (b'scan_number', b'scan_number'), (b'precursor_mz', b'precursor_mz'), (b'precursor_charge', b'precursor_charge'), (b'precursor_intensity', b'precursor_intensity'), (b'rank', b'rank'), (b'match_score', b'match_score'), (b'spectrum_intensity_coverage', b'spectrum_intensity_coverage'), (b'total_fragment_matches', b'total_fragment_matches'), (b'delta', b'delta'), (b'error', b'error'), (b'peptide1', b'peptide1'), (b'peptide_wo_mod1', b'peptide_wo_mod1'), (b'display_protein1', b'display_protein1'), (b'fs_prot1_id', b'fs_prot1_id'), (b'peptide_position1', b'peptide_position1'), (b'pep1_link_pos', b'pep1_link_pos'), (b'peptide2', b'peptide2'), (b'peptide_wo_mod2', b'peptide_wo_mod2'), (b'display_protein2', b'display_protein2'), (b'fs_prot2_id', b'fs_prot2_id'), (b'peptide_position2', b'peptide_position2'), (b'pep2_link_pos', b'pep2_link_pos'), (b'autovalidated', b'autovalidated'), (b'validated', b'validated'), (b'rejected', b'rejected'), (b'notes', b'notes'), (b'link_type', b'link_type'), (b'cross_link', b'cross_link'), (b'not_decoy', b'not_decoy'), (b'scan_file_index', b'scan_file_index'), (b'retention_time', b'retention_time'), (b'n_score_a', b'n_score_a'), (b'n_score_a_MS2_MS3', b'n_score_a_MS2_MS3'), (b'n_score_b', b'n_score_b'), (b'n_score_b_MS2_MS3', b'n_score_b_MS2_MS3')]),
),
]
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
class Migration(migrations.Migration):
dependencies = [
('CLMSVault_app', '0012_auto_20170626_1047'),
]
operations = [
migrations.AddField(
model_name='clpeptide',
name='spectra_num',
field=models.IntegerField(null=True, blank=True),
preserve_default=True,
),
migrations.AlterField(
model_name='clpeptidefilterparam',
name='field',
field=models.CharField(max_length=100, choices=[('id', 'id'), (b'run_name', b'run_name'), (b'scan_number', b'scan_number'), (b'precursor_mz', b'precursor_mz'), (b'precursor_charge', b'precursor_charge'), (b'precursor_intensity', b'precursor_intensity'), (b'rank', b'rank'), (b'match_score', b'match_score'), (b'spectrum_intensity_coverage', b'spectrum_intensity_coverage'), (b'total_fragment_matches', b'total_fragment_matches'), (b'delta', b'delta'), (b'error', b'error'), (b'peptide1', b'peptide1'), (b'peptide_wo_mod1', b'peptide_wo_mod1'), (b'display_protein1', b'display_protein1'), (b'fs_prot1_id', b'fs_prot1_id'), (b'peptide_position1', b'peptide_position1'), (b'pep1_link_pos', b'pep1_link_pos'), (b'peptide2', b'peptide2'), (b'peptide_wo_mod2', b'peptide_wo_mod2'), (b'display_protein2', b'display_protein2'), (b'fs_prot2_id', b'fs_prot2_id'), (b'peptide_position2', b'peptide_position2'), (b'pep2_link_pos', b'pep2_link_pos'), (b'autovalidated', b'autovalidated'), (b'validated', b'validated'), (b'rejected', b'rejected'), (b'notes', b'notes'), (b'link_type', b'link_type'), (b'cross_link', b'cross_link'), (b'not_decoy', b'not_decoy'), (b'scan_file_index', b'scan_file_index'), (b'retention_time', b'retention_time'), (b'n_score_a', b'n_score_a'), (b'n_score_a_MS2_MS3', b'n_score_a_MS2_MS3'), (b'n_score_b', b'n_score_b'), (b'n_score_b_MS2_MS3', b'n_score_b_MS2_MS3'), (b'spectra_num', b'spectra_num')]),
),
]
......@@ -470,8 +470,19 @@ class CLPeptide(AdminURLMixin, models.Model):
scan_file_index = models.BigIntegerField(default=-1)
retention_time = models.FloatField(default=0)
# Fields for XlinkX
n_score_a = models.FloatField(blank=True, null=True)
n_score_a_MS2_MS3 = models.FloatField(blank=True, null=True)
n_score_b = models.FloatField(blank=True, null=True)
n_score_b_MS2_MS3 = models.FloatField(blank=True, null=True)
spectra_num = models.IntegerField(blank=True, null=True)
class Meta:
ordering = ['-match_score']
......
"""
Copyright 2013-2017 Mathieu Courcelles
CAPA - Center for Advanced Proteomics Analyses
Mike Tyers's lab
Pierre Thibault's lab
IRIC - Universite de Montreal
"""
# Import standard libraries
import csv
from math import log10
import re
# Import Django related libraries
# Import project libraries
from ..models import CLPeptide
from ..models import RawDataset
from exception import InvalidFileFormatException
from sequences import sequencesMatcher
class XlinkXParser:
"""
Parser for XlinkX results.
"""
@staticmethod
def parseResults(dataset):
"""
This method parses XlinkX results file provided by the data set.
Success or failure of this procedure is noted in the data set object.
"""
# Prepare sequenceMatcher
sM = sequencesMatcher(dataset.fasta_db.pk)
try:
with open(dataset.file.path, 'rb') as f:
# Check header to validate file format
line = f.readline().rstrip('\n\r')
# Check header to validate file format
header_template = 'index,file_name,scan,charge,precursor_mass,spectra_num,peptide_a,xl_a,pep_pos_a,' \
'protein_a,mass_a,n_score_a,n_score_a_MS2_MS3,peptide_b,xl_b,pep_pos_b,protein_b,' \
'mass_b,n_score_b,n_score_b_MS2_MS3,protein_a_b'
if not line.startswith(header_template):
raise InvalidFileFormatException(
'Uploaded extra file is not recognized as a XlinkX results file (bad file header row 1)')
f.seek(0)
# Create CSV reader
reader = csv.DictReader(f, delimiter=',')
# Iterate through lines and create clPeptide
for row in reader:
# Format each field to the right format for the data model
fields = dict()
fields['run_name'] = row['file_name']
fields['scan_number'] = row['scan']
charge = float(row['charge'])
fields['precursor_mz'] = '%4f' % ((float(row['precursor_mass']) + charge * 1.00727646677)/charge)
fields['precursor_charge'] = row['charge']
fields['precursor_intensity'] = -1
fields['rank'] = -1
fields['spectra_num'] = row['spectra_num']
fields['match_score'] = log10(float(row['n_score_a_MS2_MS3']) * float(row['n_score_b_MS2_MS3'])) * -1.0
fields['n_score_a'] = row['n_score_a']
fields['n_score_a_MS2_MS3'] = row['n_score_a_MS2_MS3']
fields['n_score_b'] = row['n_score_b']
fields['n_score_b_MS2_MS3'] = row['n_score_b_MS2_MS3']
fields['spectrum_intensity_coverage'] = -1
fields['total_fragment_matches'] = -1
fields['delta'] = -1
fields['error'] = -1
fields['peptide1'] = row['peptide_a']
fields['peptide_wo_mod1'] = row['peptide_a'] # Need example
fields['pep1_link_pos'] = int(row['xl_a'])
fields['peptide_position1'] = row['pep_pos_a']
fields['peptide2'] = row['peptide_b']
fields['peptide_wo_mod2'] = row['peptide_b'] # Need example
fields['pep2_link_pos'] = int(row['xl_b'])
fields['peptide_position2'] = row['pep_pos_b']
fields['display_protein1'] = row['protein_a']
fields['display_protein1'] = re.sub('(^>)', '', fields['display_protein1'])
fields['display_protein2'] = row['protein_b']
fields['display_protein2'] = re.sub('(^>)', '', fields['display_protein2'])
if fields['peptide_position2'] == -1 and fields['pep2_link_pos'] != -1:
fields['peptide_position2'] = fields['peptide_position1']
fields['autovalidated'] = False
fields['validated'] = ''
fields['rejected'] = False
fields['notes'] = ''
fields['not_decoy'] = True
if 'decoy' in row['protein_a'] or 'decoy' in row['protein_b']:
fields['not_decoy'] = False
# Create the CLPeptide object
clpep = CLPeptide(**fields)
# Match protein sequences
clpep.fs_prot1_id = sM.sequencePk(clpep.display_protein1)
clpep.fs_prot2_id = sM.sequencePk(clpep.display_protein2)
clpep.guessLinkType()
# Save object to db
clpep.save()
clpep.dataset.add(dataset.pk)
# Append filter string to data set description
RawDataset.objects.filter(pk=dataset.id).update(
parsing_log='Ok', parsing_status=True)
except InvalidFileFormatException as e:
RawDataset.objects.filter(pk=dataset.id).update(
parsing_status=False, parsing_log='Error: ' + e.value)
# # Copyright 2013-2015 Mathieu Courcelles
# # Mike Tyers's lab / IRIC / Universite de Montreal
"""
Copyright 2013-2017 Mathieu Courcelles
CAPA - Center for Advanced Proteomics Analyses
Mike Tyers's lab
Pierre Thibault's lab
IRIC - Universite de Montreal
"""
# Import standard librariesdjang
# Import standard libraries
# Import Django related libraries
from django.db import transaction
......@@ -18,7 +20,7 @@ from FastaParser import FastaParser
from mascotParser import MascotParser
from QuantFC_Parser import QuantFC_Parser
from KojakParser import KojakParser
from XlinkXParser import XlinkXParser
......@@ -41,6 +43,7 @@ class parser_generic:
dispatch['pLink'] = pLinkParser
dispatch['Xi'] = XiParser
dispatch['xQuest'] = xQuestParser
dispatch['XlinkX'] = XlinkXParser
dispatch[instance.search_algorithm.name].parseResults(instance)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment