Commit 54011a34 authored by Mathieu Courcelles's avatar Mathieu Courcelles

Migrates to Python 3.6 and Django 1.11

parent 8c30bca4
Pipeline #14893669 (#) passed with stage
in 2 minutes and 16 seconds
No preview for this file type
......@@ -20,10 +20,10 @@ from .models import (CLPeptide,
QuantificationFC,
)
from queryset_operation import (dataset_set_2_clpeptide_set,
from .queryset_operation import (dataset_set_2_clpeptide_set,
)
import export
import CLMSVault.CLMSVault_app.export as export
......
This diff is collapsed.
......@@ -16,9 +16,9 @@ from django.http import HttpResponse
from django.template import loader, Context
# Import project libraries
from parser.exception import InvalidFileFormatException
from pdb_structure import compute_cl_distance
from queryset_operation import dataset_set_2_clpeptide_set
from .parser.exception import InvalidFileFormatException
from .pdb_structure import compute_cl_distance
from .queryset_operation import dataset_set_2_clpeptide_set
def clpep_header():
......
# # Copyright 2013-2014 Mathieu Courcelles
# # Copyright 2013-2017 Mathieu Courcelles
# # Mike Tyers's lab / IRIC / Universite de Montreal
# Import standard librariesdjang
import csv
# Import standard libraries
import re
# Import Django related libraries
......@@ -11,167 +10,151 @@ import re
# Import project libraries
from ..models import CLPeptide
from ..models import RawDataset
from exception import InvalidFileFormatException
from sequences import sequencesMatcher
from .exception import InvalidFileFormatException
from .sequences import sequencesMatcher
class DXmsms14N15NParser:
"""
Parser for DXmsms14N15N results.
"""
@staticmethod
def parseResults(dataset):
"""
This method parses DXmsms14N15N results file provided by the dataset.
Sucess or failure of this procedure is noted in the dataset object.
"""
# Prepare sequenceMatcher
sM = sequencesMatcher(dataset.fasta_db.pk)
# Id to db id
try:
with open(dataset.file.path, 'rb') as f:
# Check header to validate file format
line = f.readline().rstrip('\n\r')
# Check header to validate file format
header_template = 'Program Version:\t14N15N DX MSMS Match ESI Sorting Mass List and MGF 20141007KM'
if header_template != line:
raise InvalidFileFormatException(
'Uploaded extra file is not recognized as a '
'14N15N DX MSMS Match results file (bad file header)')
'Uploaded extra file is not recognized as a '
'14N15N DX MSMS Match results file (bad file header)')
# Get run name from MGF file
line = f.readline()
line = f.readline()
line = f.readline().rstrip('\n\r')
run_name = ''
run_name = line.split('\\')[-1]
run_name = run_name[0:-4]
# Search for protein sequences until id stats
protein_counter = 0
protein_dict = dict()
while not line.startswith('BEGIN'):
line = f.readline().rstrip('\n\r')
# Read protein entries
if line.startswith('>'):
protein_counter += 1
protein_dict[protein_counter] = line[1:]
# Iterate through lines and create clPeptide
scan_num_dict = dict()
line = f.readline().rstrip('\n\r')
while line:
values = line.split('\t')
if len(values) != 49:
raise InvalidFileFormatException(
'Uploaded extra file is not recognized as a '
'14N15N DX MSMS Match results file (bad number of fields)')
'Uploaded extra file is not recognized as a '
'14N15N DX MSMS Match results file (bad number of fields)')
# Prepare next line
line = f.readline().rstrip('\n\r')
# Format each field to the right format for the data model
fields = dict()
fields['run_name'] = run_name
fields['scan_number'] = values[7]
# Skip redundant MS/MS entries
if scan_num_dict.get(values[7]):
continue
scan_num_dict[values[7]] = True
fields['precursor_mz'] = values[3]
fields['precursor_charge'] = values[5]
fields['precursor_intensity'] = values[1]
fields['rank'] = values[13]
if values[26] == '':
values[26] = 0
if values[31] == '':
values[31] = 0
fields['spectrum_intensity_coverage'] = float(values[31])/100.0
fields['spectrum_intensity_coverage'] = float(values[31]) / 100.0
fields['total_fragment_matches'] = values[30]
fields['match_score'] = float(values[30]) * float(values[31])
fields['delta'] = 0
fields['error'] = values[15]
fields['peptide1'] = values[21]
fields['peptide_wo_mod1'] = values[21]
fields['pep1_link_pos'] = int(values[19]) - int(values[17]) + 1
fields['peptide_position1'] = int(values[17])
fields['peptide2'] = values[28]
fields['peptide_wo_mod2'] = values[28]
fields['pep2_link_pos'] = int(values[26]) - int(values[24]) + 1
fields['peptide_position2'] = int(values[24])
fields['display_protein1'] = protein_dict[int(values[16])]
fields['display_protein2'] = protein_dict[int(values[23])]
fields['autovalidated'] = False
fields['validated'] = ''
fields['rejected'] = False
fields['notes'] = ''
fields['not_decoy'] = True
if (re.match('^R\d+$', fields['display_protein1']) or
re.match('^R\d+$', fields['display_protein2'])):
fields['not_decoy'] = False
re.match('^R\d+$', fields['display_protein2'])):
fields['not_decoy'] = False
# Create the CLPeptide object
clpep = CLPeptide(**fields)
clpep.guessLinkType()
# Match protein sequences
clpep.fs_prot1_id = sM.sequencePk(clpep.display_protein1)
clpep.fs_prot2_id = sM.sequencePk(clpep.display_protein2)
# Save object to db
clpep.save()
# Link object to dataset
clpep.dataset.add(dataset)
# Append filter string to dataset description
RawDataset.objects.filter(pk=dataset.id).update(
parsing_log = 'Ok', parsing_status = True )
RawDataset.objects.filter(pk=dataset.id).update(
parsing_log='Ok', parsing_status=True)
except InvalidFileFormatException as e:
RawDataset.objects.filter(pk=dataset.id).update(
parsing_status = False, parsing_log = 'Error: ' + e.value)
\ No newline at end of file
parsing_status=False, parsing_log='Error: ' + e.value)
......@@ -12,8 +12,8 @@ import re
# Import project libraries
from ..models import CLPeptide
from ..models import RawDataset
from exception import InvalidFileFormatException
from sequences import sequencesMatcher
from .exception import InvalidFileFormatException
from .sequences import sequencesMatcher
class KojakParser:
......
# # Copyright 2013-2015 Mathieu Courcelles
# # Copyright 2013-2017 Mathieu Courcelles
# # Mike Tyers's lab / IRIC / Universite de Montreal
# Import standard librariesdjang
# Import standard libraries
import csv
import re
# Import Django related libraries
......@@ -15,112 +12,100 @@ from ..models import (CLPeptide,
Quantification,
QuantificationFC
)
from exception import InvalidFileFormatException
from sequences import sequencesMatcher
from .exception import InvalidFileFormatException
class QuantFC_Parser:
"""
Parser for Quantification fold change results.
"""
@staticmethod
def parseResults(quantification):
"""
This method parses quantification file provided by the quantification object.
Success or failure of this procedure is noted in the quantification object.
"""
try:
with open(quantification.file.path, 'rb') as f:
# Create CSV reader
reader = csv.DictReader(f)
# Check header to validate file format
header_template = ''
if quantification.file_header == 'CF':
header_template = ('"CLPeptideId","FoldChange"')
else:
header_template = ('"File","ScanNumber","FoldChange"')
header_file = '"' + '","'.join(name for name in reader.fieldnames) + '"'
if not header_file.startswith(header_template):
raise InvalidFileFormatException(header_file +
'Uploaded extra file is not recognized as a '
'Quantification fold change results file (bad file header)')
raise InvalidFileFormatException(header_file +
'Uploaded extra file is not recognized as a '
'Quantification fold change results file (bad file header)')
id_no_match = ''
# Iterate through lines and create QuantificationFC
if quantification.file_header == 'CF':
if quantification.file_header == 'CF':
for row in reader:
# Split CLPeptideId
clpeptides_id = row['CLPeptideId'].split('-')
clpeptides_id = row['CLPeptideId'].split('-')
for clpep_id in clpeptides_id:
# Match clpep_id to database
queryset = CLPeptide.objects.filter(pk=clpep_id)
if len(queryset):
# Create the QuantificationFC object
quantificationFC = QuantificationFC()
quantificationFC.quantification = quantification
quantificationFC.clpeptide = queryset[0]
quantificationFC.fold_change = row['FoldChange']
# Save object to db
quantificationFC.save()
else:
id_no_match += clpep_id + '-'
elif quantification.file_header == 'FSF':
for row in reader:
# Match clpep_id to database
queryset = CLPeptide.objects.filter(run_name=row['File'],
scan_number=row['ScanNumber'])
if len(queryset):
# Create the QuantificationFC object
quantificationFC = QuantificationFC()
quantificationFC.quantification = quantification
quantificationFC.clpeptide = queryset[0]
quantificationFC.fold_change = row['FoldChange']
# Save object to db
quantificationFC.save()
else:
id_no_match += row['File'] + ':' + row['ScanNumber'] + '-'
if id_no_match == '':
id_no_match = 'Ok'
# Append filter string to dataset description
Quantification.objects.filter(pk=quantification.id).update(
parsing_log = id_no_match,
parsing_status = True
parsing_log=id_no_match,
parsing_status=True
)
except InvalidFileFormatException as e:
Quantification.objects.filter(pk=quantification.id).update(
parsing_status = False, parsing_log = 'Error: ' + e.value)
parsing_status=False, parsing_log='Error: ' + e.value)
# # Copyright 2013-2015 Mathieu Courcelles
# # Copyright 2013-2017 Mathieu Courcelles
# # Mike Tyers's lab / IRIC / Universite de Montreal
# Import standard librariesdjang
# Import standard libraries
import csv
import re
......@@ -13,40 +10,35 @@ import re
# Import project libraries
from ..models import CLPeptide
from ..models import RawDataset
from exception import InvalidFileFormatException
from sequences import sequencesMatcher
from .exception import InvalidFileFormatException
from .sequences import sequencesMatcher
class XiParser:
"""
Parser for Xi results.
"""
@staticmethod
def parseResults(dataset):
"""
This method parses Xi results file provided by the dataset.
Success or failure of this procedure is noted in the dataset object.
"""
# Prepare sequenceMatcher
sM = sequencesMatcher(dataset.fasta_db.pk)
try:
with open(dataset.file.path, 'rb') as f:
# Skip line to header
filters_string = f.readline()
f.readline()
# Create CSV reader
reader = csv.DictReader(f)
# Check header to validate file format
header_template = ('"run_name","scan_number","precursor_mz",'
'"precursor_charge","precursor_intensity",'
......@@ -57,79 +49,74 @@ class XiParser:
'"peptide_position2","pep2_link_pos",'
'"autovalidated","validated","rejected",'
'"notes",')
header_file = '"' + '","'.join(name for name in reader.fieldnames) + '",'
if header_template != header_file:
raise InvalidFileFormatException(
'Uploaded extra file is not recognized as a '
'Xi results file (bad file header)')
'Uploaded extra file is not recognized as a '
'Xi results file (bad file header)')
# Iterate through lines and create CLPeptide
for row in reader:
# Create the CLPeptide object
row['total_fragment_matches'] = int(float(row['total_fragment_matches']))
row['total_fragment_matches'] = int(float(row['total_fragment_matches']))
clpep = CLPeptide(**row)
# Add manual fields
clpep.peptide_wo_mod1 = re.sub("[a-z0-9_]", "", clpep.peptide1)
clpep.peptide_wo_mod2 = re.sub("[a-z0-9_]", "", clpep.peptide2)
if clpep.pep1_link_pos != '':
clpep.pep1_link_pos = int(clpep.pep1_link_pos) + 1
else:
clpep.pep1_link_pos = -1
if clpep.pep2_link_pos != '':
clpep.pep2_link_pos = int(clpep.pep2_link_pos) + 1
else:
clpep.pep2_link_pos = -1
if clpep.peptide_position1 != '':
clpep.peptide_position1 = int(clpep.peptide_position1) + 1
else:
clpep.peptide_position1 = -1
if clpep.peptide_position2 != '':
clpep.peptide_position2 = int(clpep.peptide_position2) + 1
else:
clpep.peptide_position2 = -1
clpep.peptide2 = '-'
clpep.peptide_wo_mod2 = '-'
clpep.guessLinkType()
clpep.fixAutovalidated()
clpep.isDecoy()
# Fix very large delta
if float(clpep.delta) > 100000:
clpep.delta = 100000
# Match protein sequences
clpep.fs_prot1_id = sM.sequencePk(clpep.display_protein1)
clpep.fs_prot2_id = sM.sequencePk(clpep.display_protein2)
# Save object to db
clpep.save()
# Link object to dataset
clpep.dataset.add(dataset)
# Append filter string to dataset description
RawDataset.objects.filter(pk=dataset.id).update(
description = dataset.description + filters_string,
parsing_log = 'Ok',
parsing_status = True
description=dataset.description + filters_string,
parsing_log='Ok',
parsing_status=True
)
except InvalidFileFormatException as e:
RawDataset.objects.filter(pk=dataset.id).update(
parsing_status = False, parsing_log = 'Error: ' + e.value)
parsing_status=False, parsing_log='Error: ' + e.value)
......@@ -16,9 +16,8 @@ import re
# Import project libraries
from ..models import CLPeptide
from ..models import RawDataset
from exception import InvalidFileFormatException
from sequences import sequencesMatcher
from .exception import InvalidFileFormatException
from .sequences import sequencesMatcher
class XlinkXParser:
......@@ -26,7 +25,6 @@ class XlinkXParser:
Parser for XlinkX results.
"""
@staticmethod
def parseResults(dataset):
"""
......@@ -66,12 +64,13 @@ class XlinkXParser:
fields['run_name'] = row['file_name']
fields['scan_number'] = row['scan']
charge = float(row['charge'])
fields['precursor_mz'] = '%4f' % ((float(row['precursor_mass']) + charge * 1.00727646677)/charge)
fields['precursor_mz'] = '%4f' % ((float(row['precursor_mass']) + charge * 1.00727646677) / charge)
fields['precursor_charge'] = row['charge']
fields['precursor_intensity'] = -1
fields['rank'] = -1
fields['spectra_num'] = row['spectra_num']
fields['match_score'] = log10(float(row['n_score_a_MS2_MS3']) * float(row['n_score_b_MS2_MS3'])) * -1.0
fields['match_score'] = log10(
float(row['n_score_a_MS2_MS3']) * float(row['n_score_b_MS2_MS3'])) * -1.0
fields['n_score_a'] = row['n_score_a']
fields['n_score_a_MS2_MS3'] = row['n_score_a_MS2_MS3']
fields['n_score_b'] = row['n_score_b']
......
# Copyright 2013 Mathieu Courcelles
# Copyright 2013-2017 Mathieu Courcelles
# Mike Tyers's lab / IRIC / Universite de Montreal
......
......@@ -15,9 +15,8 @@ import re
# Import project libraries
from ..models import CLPeptide
from ..models import RawDataset
from exception import InvalidFileFormatException
from sequences import sequencesMatcher
from .exception import InvalidFileFormatException
from .sequences import sequencesMatcher
class MascotParser:
......@@ -25,7 +24,6 @@ class MascotParser:
Parser for Mascot results.
"""
@staticmethod
def formatPeptideSequence(pep_seq, mods, pep_var_mod_pos, cross_linker):
"""
......@@ -63,7 +61,6 @@ class MascotParser:
return ''.join(peptide), linker_pos
@staticmethod
def parseResults(dataset):
"""
......
# # Copyright 2013 Mathieu Courcelles
# # Copyright 2013-2017 Mathieu Courcelles
# # Mike Tyers's lab / IRIC / Universite de Montreal
# Import standard librariesdjang
# Import standard libraries
import csv
import re
......@@ -10,29 +10,26 @@ import re
# Import project libraries
from ..models import CLPeptide
from ..models import RawDataset
from exception import InvalidFileFormatException
from sequences import sequencesMatcher
from .exception import InvalidFileFormatException
from .sequences import sequencesMatcher
class pLinkParser:
"""
Parser for pLink results.
"""
@staticmethod
def formatPeptideSequence(peptide):
"""
This method transform peptide modification from pLink format to Xi
format.
"""
# Convert the fixed modification of cysteine to Xi format
peptide = re.sub('C', 'Cm', peptide)
peptide = re.sub('C', 'Cm', peptide)
return peptide
@staticmethod
def parseResults(dataset):
......@@ -40,64 +37,64 @@ class pLinkParser:
This method parses pLink results file provided by the dataset.
Sucess or failure of this procedure is noted in the dataset object.
"""
# Prepare sequenceMatcher
sM = sequencesMatcher(dataset.fasta_db.pk)