Source code for modifinder.classes.Spectrum

import json
from modifinder.utilities.gnps_types import adduct_mapping
import modifinder.utilities.general_utils as general_utils
from modifinder import convert
import numpy as np
import bisect
import uuid

[docs]class Spectrum: """A class to represent a spectrum. Parameters ---------- mz: list A list of m/z values. intensity: list A list of intensity values. precursor_mz: float The precursor m/z value. precursor_charge: int The precursor charge. adduct: str The adduct. adduct_mass: float The adduct mass. ms_level: int The ms level, default is 2. instrument: str, optional The instrument used. ms_mass_analyzer: str, optional The mass analyzer used. ms_dissociation_method: str, optional The dissociation method used. spectrum_id: str, optional The spectrum id. if not provided, it will be generated. peak_fragments_map: dict, optional A dictionary mapping peaks to fragments Examples -------- """
[docs] def __init__(self, incoming_data=None, normalize_peaks = True, **kwargs): """Constructor for the Spectrum class. the spectrum class can be initialized with a dictionary of data or with the individual values. Parameters ---------- incoming_data : Input data (optional, default is None). The data to initialize the Spectrum object. The data can be a dictionary, a usi, or a Spectrum object. normalize_peaks : bool, default is True. If True, the intensity of the peaks will be normalized at initialization. """ = None self.intensity = None self.precursor_mz = None self.precursor_charge = None self._adduct = None self._adduct_mass = None self.ms_level = None self.instrument = None self.ms_mass_analyzer = None self.ms_dissociation_method = None self.spectrum_id = None self.peak_fragments_map = {} if incoming_data is None and len(kwargs) == 0: return if incoming_data is not None: convert.to_spectrum(incoming_data, self) self.update(normalize_peaks = normalize_peaks, **kwargs)
@property def adduct(self): return self._adduct @adduct.setter def adduct(self, value): self._adduct = adduct_mapping.get(value, value) if self._adduct is not None: self._adduct_mass = general_utils.get_adduct_mass(self._adduct) else: self._adduct_mass = None @property def adduct_mass(self): return self._adduct_mass
[docs] def update(self, peaks = None, peaks_json = None, mz=None, intensity=None, precursor_mz=None, precursor_charge=None, _adduct = None, adduct=None, adduct_mass = None, ms_level=None, instrument=None, ms_mass_analyzer=None, ms_dissociation_method=None, spectrum_id=None, normalize_peaks = False, ratio_to_base_peak = None, remove_large_peaks = False, keep_top_k=None, peak_fragments_map: dict = None, **kwargs): """Update the Spectrum object with the given values. Args: peaks (list): A list of peaks in the form of [[mz1, intensity1], [mz2, intensity2], ...]. peaks_json (str): A json string of peaks. mz (list): A list of m/z values. intensity (list): A list of intensity values. precursor_mz (float): The precursor m/z value. precursor_charge (int): The precursor charge. adduct (str): The adduct. ms_level (int): The ms level. instrument (str): The instrument used. ms_mass_analyzer (str): The mass analyzer used. ms_dissociation_method (str): The dissociation method used. spectrum_id (str): The spectrum id. normalize_peaks (bool): If True, the intensity of the peaks will be normalized. ratio_to_base_peak (float): If None, no filtering is done, if a float number, it removes all the peaks with intensity less than ratio times the base peak. remove_large_peaks (bool): If True, remove all the peaks that are larger than the precursor m/z value. keep_top_k (int): If not None, only keep the top k peaks. peak_fragments_map (dict): A dictionary mapping peaks to fragments """ if peaks_json is not None: peaks = json.loads(peaks_json) if peaks is not None: = [peak[0] for peak in peaks] self.intensity = [peak[1] for peak in peaks] = np.array(mz) if mz is not None else self.intensity = np.array(intensity) if intensity is not None else self.intensity self.precursor_mz = float(precursor_mz) if precursor_mz is not None else self.precursor_mz self.precursor_charge = int(float(precursor_charge)) if precursor_charge is not None else self.precursor_charge self.adduct = _adduct if _adduct is not None else self.adduct self.adduct = adduct if adduct is not None else self.adduct self.ms_level = int(ms_level) if ms_level is not None else self.ms_level self.instrument = instrument if instrument is not None else self.instrument self.ms_mass_analyzer = ms_mass_analyzer if ms_mass_analyzer is not None else self.ms_mass_analyzer self.ms_dissociation_method = ms_dissociation_method if ms_dissociation_method is not None else self.ms_dissociation_method self.spectrum_id = spectrum_id if spectrum_id is not None else self.spectrum_id self.peak_fragments_map = peak_fragments_map if peak_fragments_map is not None else self.peak_fragments_map if is not None:, self.intensity = zip(*sorted(zip(, self.intensity))) if normalize_peaks: self.normalize_peaks() if ratio_to_base_peak is not None: self.remove_small_peaks(ratio_to_base_peak) if remove_large_peaks: self.remove_larger_than_precursor_peaks() if keep_top_k is not None: self.keep_top_k(keep_top_k) if self.spectrum_id is None: self.spectrum_id = str(uuid.uuid4())
def __str__(self): object_dict = self.__dict__ to_delete = [keys for keys in object_dict.keys() if object_dict[keys] is None] for key in to_delete: del object_dict[key] return json.dumps(object_dict, indent=4)
[docs] def clear(self): """Clear the Spectrum object.""" = None self.intensity = None self.precursor_mz = None self.precursor_charge = None self._adduct = None self._adduct_mass = None self.ms_level = None self.instrument = None self.ms_mass_analyzer = None self.ms_dissociation_method = None self.spectrum_id = None self.peak_fragments_map = {}
def copy(self): """Return a copy of the Spectrum object.""" copied_spectrum = Spectrum() convert.to_spectrum(self, use_object=copied_spectrum, needs_parse=False) return copied_spectrum
[docs] def normalize_peaks(self, change_self = True): """l2 Normalize the intensity of the Spectrum object. Parameters ---------- change_self : bool, default is True If True, the intensity of the Spectrum object will be normalized in place. If False, a new Spectrum object with the normalized intensity will be returned. Returns ------- None If change_self is True, the intensity of the Spectrum object will be normalized in place. Spectrum A new Spectrum object with the normalized intensity. """ l2_norm = np.linalg.norm(self.intensity) new_intensity = [intensity / l2_norm for intensity in self.intensity] if change_self: self.intensity = new_intensity else: new_spectrum = self.copy() new_spectrum.intensity = new_intensity return new_spectrum
[docs] def remove_small_peaks(self, ratio_to_base_peak:float = 0.01, change_self = True): """Remove peaks with intensity lower than a given ratio to the base peak. Parameters ---------- ratio_to_base_peak : float (0, 1), default is 0.01 The ratio to the base peak. change_self : bool, default is True If True, the peaks with intensity lower than the given ratio will be removed in place. If False, a new Spectrum object with the peaks removed will be returned. Returns ------- map from old index to new index If change_self is True, the peaks with intensity lower than the given ratio will be removed in place and a map from the old index to the new index will be returned. (Spectrum, map from old index to new index) A new Spectrum object with the peaks removed and a map from the old index to the new index. """ base_peak = max(self.intensity) new_mz = [] new_intensity = [] index_mapping = {} for index, intensity in enumerate(self.intensity): if intensity >= ratio_to_base_peak * base_peak: new_mz.append([index]) new_intensity.append(intensity) index_mapping[index] = len(new_mz) - 1 if change_self: = new_mz self.intensity = new_intensity return index_mapping else: new_spectrum = self.copy() = new_mz new_spectrum.intensity = new_intensity return new_spectrum, index_mapping
[docs] def keep_top_k(self, k:int = 100, change_self: bool = True): """Keep only the top k peaks in the Spectrum object. Parameters ---------- k : int, default is 100 The number of peaks to keep. change_self : bool, default is True If True, only the top k peaks will be kept in place. If False, a new Spectrum object with only the top k peaks will be returned. Returns ------- map from old index to new index If change_self is True, only the top k peaks will be kept in place and a map from the old index to the new index will be returned. (Spectrum, map from old index to new index) A new Spectrum object with only the top k peaks and a map from the old index to the new index. """ top_k_indices = np.argsort(self.intensity)[::-1][:k] new_mz = np.array([[index] for index in top_k_indices]) new_intensity = np.array([self.intensity[index] for index in top_k_indices]) # sort the peaks by mz new_mz, new_intensity = zip(*sorted(zip(new_mz, new_intensity))) index_mapping = {index: new_index for new_index, index in enumerate(top_k_indices)} if change_self: = new_mz self.intensity = new_intensity return index_mapping else: new_spectrum = self.copy() = new_mz new_spectrum.intensity = new_intensity return new_spectrum, index_mapping
def remove_larger_than_precursor_peaks(self, change_self: bool = True): """ Remove peaks that are larger than the precursor m/z value. Parameters ---------- change_self : bool, default is True If True, the peaks that are larger than the precursor m/z value will be removed in place. If False, a new Spectrum object with the peaks removed will be returned. """ new_mz = [] new_intensity = [] for mz, intensity in zip(, self.intensity): if mz < self.precursor_mz * 0.99: new_mz.append(mz) new_intensity.append(intensity) if change_self: = new_mz self.intensity = new_intensity else: new_spectrum = self.copy() = new_mz new_spectrum.intensity = new_intensity return new_spectrum def get_peak_indexes(self, mz, mz_tolerance = 0.02, ppm_tolerance = 40.0, **kwargs): """Get the indexes of the peaks within the given m/z tolerance. Parameters ---------- mz : float The m/z value of the peak. mz_tolerance : float, optional The m/z tolerance for the peak, default is 0.02. ppm_tolerance : float, optional The ppm tolerance for the peak, default is 40.0. Returns ------- list The indexes of the peaks within the given m/z tolerance. """ min_range = max(mz-mz_tolerance, mz - (mz * ppm_tolerance / 1e6)) max_range = min(mz+mz_tolerance, mz + (mz * ppm_tolerance / 1e6)) # Find the leftmost index where min_val could be inserted left_index = bisect.bisect_left(, min_range) # Find the rightmost index where max_val could be inserted right_index = bisect.bisect_right(, max_range) # Return the range of indices between left_index and right_index (exclusive of right_index) return list(range(left_index, right_index))