Commit 6daafcc8 authored by Lars Yunker's avatar Lars Yunker
Browse files

Merge branch '45-support-autoconversion-of-d-files' into 'master'

Resolve "support autoconversion of d files"

Closes #45

See merge request !41
parents 4d31af0f 3e47fcba
Pipeline #188935588 passed with stages
in 3 minutes and 1 second
__version__ = '2.2.1'
__version__ = '2.2.2'
......@@ -132,6 +132,7 @@ class IPMolecule(Molecule):
self.ipmethod = ipmethod
self._spectrum_raw = None # spectrum object holder
self._raw = None # raw isotope pattern
self._calculated_bounds = None # calculated bounds for the instance
self.bar_isotope_pattern = [[], []]
self.criticalerror = criticalerror
self.decpl = decpl
......@@ -420,6 +421,8 @@ class IPMolecule(Molecule):
'1116.14036964': {'bounds': (1115.9557393427547, 1116.3249999321531)}}
"""
if self._calculated_bounds is not None:
return self._calculated_bounds
logger.info('calculating bounds from simulated gaussian isotope pattern')
threshold = threshold * max(self.bar_isotope_pattern[1])
tempip = [[], []]
......@@ -436,10 +439,13 @@ class IPMolecule(Molecule):
out = [stats.norm.interval(conf, tempip[0][0], scale=self.sigma)[0],
stats.norm.interval(conf, tempip[0][-1], scale=self.sigma)[1]]
logger.debug(f'caclulated bounds: {out[0]:.3f}-{out[1]:.3f}')
self._calculated_bounds = out
return out
def _calculate_ips(self):
"""Call to calculate isotope patterns based on the specified parameters"""
# reset calculated bounds
self._calculated_bounds = None
# generates the raw isotope pattern (charge of 1)
if self.ipmethod == 'combinatorics':
calculator = isotope_pattern_combinatoric
......
......@@ -3,6 +3,8 @@ import pathlib
import subprocess
import sys
from typing import Union
from .logging import logger
......@@ -14,13 +16,13 @@ def file_present(filepath):
return tf
def pw_convert(filename,
def pw_convert(filename: Union[str, pathlib.Path],
bit=64,
compression=True,
gzip=True,
verbose=True,
out_directory=None
):
) -> pathlib.Path:
"""
Runs msconvert.exe from ProteoWizard to convert Waters .RAW format to .mzXML
which can then be parsed by python.
......@@ -80,7 +82,8 @@ def pw_convert(filename,
'Please ensure that ProteoWizard is installed in either:\n'
'c:\\program files\\proteowizard\nor\nc:\\program files (x86)\\proteowizard')
filename = pathlib.Path(filename)
if type(filename) is str:
filename = pathlib.Path(filename)
if out_directory is None:
out_directory = filename.parent
......@@ -98,12 +101,12 @@ def pw_convert(filename,
'--verbose' if verbose else '', # verbose mode
])
out_exten = f'.mzML{".gz" if gzip else ""}'
logger.info(f'Generating mzML file from {filename}')
subprocess.call(callstring)
logger.info(f'Generating mzML file from "{filename}"')
code = subprocess.call(callstring) # todo check that this correctly raises
if code != 0:
raise ValueError(f'an error was encountered converting the file "{filename}"')
logger.info('conversion DONE')
filename.with_suffix(f'.mzML{".gz" if gzip else ""}')
return filename.with_suffix(f'.mzML{".gz" if gzip else ""}')
def fix_extension(fn):
......
......@@ -2,8 +2,9 @@
Data structure for working with mzML files
"""
import gzip
import sys
import warnings
import pathlib
from random import random
from typing import Generator, List, MutableMapping, Union
from xml.etree import ElementTree
......@@ -210,10 +211,10 @@ class mzML(object):
self.gzip_file = gzip_file
self.obo = obo
self.filename = self.check_for_file(filename)
self.filename: pathlib.Path = self.check_for_file(filename)
logger.info(f'Loading {self.filename} into memory')
if self.filename.lower().endswith('.mzml.gz'): # if mzml is gzipped
if self.filename.suffix == '.gz': # if mzml is gzipped
handle = gzip.open(self.filename) # unzip the file
else:
handle = self.filename
......@@ -272,7 +273,7 @@ class mzML(object):
def __repr__(self):
"""The representation that is returned"""
return "%s('%s')" % (self.__class__.__name__, self.filename)
return "%s('%s')" % (self.__class__.__name__, self.filename.parts[-1])
def __len__(self):
return self.nscans
......@@ -532,40 +533,32 @@ class mzML(object):
res = [y for y in res if y is not None] # removes None values (below S/N)
return sum(res) / len(res) # return average
def check_for_file(self, fn):
def check_for_file(self, fn: Union[str, pathlib.Path]) -> pathlib.Path:
"""checks for the mzML file in the working directory and converts it if necessary"""
# cast path-like to string to enable extension check
if type(fn) is not str:
fn = str(fn)
valid = [ # supported extensions
'.raw',
'.mzml.gz',
'.mzml',
if type(fn) is str:
fn = pathlib.Path(fn)
target_suffixes = [
'.mzML',
'.gz',
]
if fn.lower().endswith('.raw') is True: # extension is raw
if file_present(fn[:-4] + '.mzML.gz') is True: # if corresponding gzipped mzml is present
return fn[:-4] + '.mzML.gz'
if file_present(fn[:-4] + '.mzML') is True: # if corresponding mzml is present
return fn[:-4] + '.mzML'
# otherwise convert and return mzml
return pw_convert(fn, self.precision, self.compression, self.gzip_file, verbose=self.verbose)
elif file_present(fn) is True: # if the specified file is present
for exten in valid: # checks for supported extensions
if fn.lower().endswith(exten) is True:
return fn
# otherwise asks user whether to continue
if input(
'The extension of the supplied filename "%s" is unexpected and may not be supported.\n'
'Do you wish to proceed with file loading? [Y/N] ' % fn).lower() in ['y', 'yes']:
for ind, suffix in enumerate(target_suffixes):
# if file already has correct suffixes
if fn.suffixes == target_suffixes[:ind + 1]:
return fn
else:
sys.exit('The user cancelled mzML loading.')
else:
fn = fix_extension(fn) # try to fix extension
if fn.lower().endswith('.raw') is True: # convert if only raw file is found
return pw_convert(fn, self.precision, self.compression, self.gzip_file, verbose=self.verbose)
return fn
# check for presence of this suffix
with_suffixes = fn.with_suffix("".join(target_suffixes[:ind + 1]))
if with_suffixes.is_file():
return with_suffixes
# otherwise return converted extension
return pw_convert(
fn,
self.precision,
self.compression,
self.gzip_file,
verbose=self.verbose,
)
def _ensure_ftt_extracted(fn):
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment