Source code for modifinder.utilities.gnps_types

"""

"""

import collections
import json
from dataclasses import dataclass, field
from typing import List, Tuple

SpectrumTuple = collections.namedtuple(
    "SpectrumTuple", ["precursor_mz", "precursor_charge", "mz", "intensity"]
)


[docs]def convert_to_SpectrumTuple(peaks, precursor_mz, precursor_charge): if peaks is None or len(peaks) == 0: return None if not all(isinstance(peak, (list, tuple)) and len(peak) == 2 for peak in peaks): raise ValueError("Peaks must be a list of (mz, intensity) pairs") mz = [peak[0] for peak in peaks] intensity = [peak[1] for peak in peaks] return SpectrumTuple( precursor_mz=float(precursor_mz), precursor_charge=int(precursor_charge), mz=mz, intensity=intensity, )
[docs]def convert_to_SpectrumTuple_seprated(mz, intensity, precursor_mz, precursor_charge): if mz is None or intensity is None or len(mz) == 0: return None if len(mz) != len(intensity): raise ValueError("mz and intensity must have the same length") return SpectrumTuple( precursor_mz=float(precursor_mz), precursor_charge=int(precursor_charge), mz=list(mz), intensity=list(intensity), )
[docs]def Convert_SpectrumTuple_to_peaks(spectrum): return list(zip(spectrum.mz, spectrum.intensity))
[docs]def convert_to_universal_key(key: str) -> str: key = key.lower() key = key.replace(" ", "_") return gnps_keys_mapping.get(key, key)
adduct_mapping = {'M+H': '[M+H]+', '[M+H]': '[M+H]+', '[M+H]+': '[M+H]+', 'M+H]': '[M+H]+', 'M+Na': '[M+Na]+', '[M+Na]': '[M+Na]+', '[M+Na]+': '[M+Na]+', '2M+Na': '[2M+Na]+', 'M2+Na': '[2M+Na]+', '[2M+Na]+': '[2M+Na]+', '[2M+Na]': '[2M+Na]+', 'M+K': '[M+K]+', '[M+K]': '[M+K]+', '[M+K]+': '[M+K]+', '[2M+K]+': '[2M+K]+', '2M+K': '[2M+K]+', '[2M+K]': '[2M+K]+', 'M+H-H20': '[M-H2O+H]+', 'M+H-H2O': '[M-H2O+H]+', '[M-H2O+H]+': '[M-H2O+H]+', 'M-H20+H': '[M-H2O+H]+', '[M+H-H2O]+': '[M-H2O+H]+', 'M-H2O+H': '[M-H2O+H]+', 'M+H-2H2O': '[M-2H2O+H]+', 'M-2H2O+H': '[M-2H2O+H]+', '[M-2H2O+H]+': '[M-2H2O+H]+', 'M-2(H2O)+H': '[M-2H2O+H]+', '2M+Na-2H': '[2M-2H+Na]-', '2M-2H+Na': '[2M-2H+Na]-', 'M-H': '[M-H]-', '[M-H]': '[M-H]-', '[M-H]-': '[M-H]-', 'M-H-': '[M-H]-', 'M-H1': '[M-H]-', '3M+Na': '[3M+Na]+', '[3M+Na]+': '[3M+Na]+', '[M]+': '[M]+', 'M+': '[M]+', 'M-e': '[M]+', 'M2+H': '[2M+H]+', '2M+H': '[2M+H]+', '[2M+H]+': '[2M+H]+', '[2M+H]': '[2M+H]+', '[M+2H]': '[M+2H]2+', '[M+2H]2+': '[M+2H]2+', 'M+2H]': '[M+2H]2+', 'M+2H+2': '[M+2H]2+', 'M+2H': '[M+2H]2+', 'M+acetate': '[M+CH3COOH-H]-', 'M+CH3COOH-H': '[M+CH3COOH-H]-', 'M+CH3COO': '[M+CH3COOH-H]-', 'M+ACN+H': '[M+CH3CN+H]+', '[M+ACN+H]+': '[M+CH3CN+H]+', '[M+H+CH3CN]': '[M+CH3CN+H]+', 'M+2Na': '[M+2Na]2+', 'M+2Na]': '[M+2Na]2+', 'M+HCOO': '[M+HCOOH-H]-', '[M-H+HCOOH]': '[M+HCOOH-H]-', 'M+FA-H': '[M+HCOOH-H]-', 'M+formate': '[M+HCOOH-H]-', '[M+H+HCOOH]': '[M+HCOOH-H]-', '2M+FA-H': '[2M+HCOOH-H]-', '[2M-H+HCOOH]': '[2M+HCOOH-H]-', 'M+NH4': '[M+NH3+H]+', '[M+NH4]+': '[M+NH3+H]+', '[M+NH4]': '[M+NH3+H]+', '2M+Hac-H': '[2M+CH3COOH-H]-', '2M-H': '[2M-H]-', '[2M-H]': '[2M-H]-', '2M+NH4': '[2M+NH3+H]+', '[2M+NH4]+': '[2M+NH3+H]+', '[2M+NH4]': '[2M+NH3+H]+', '[2M+Ca]2+': '[2M+Ca]2+', '[M+Ca]2+': '[M+Ca]2+', '[3M+Ca]2+': '[3M+Ca]2+', '[2M+Ca-H]+': '[2M-H+Ca]+', '[2M-H2O+H]+': '[2M-H2O+H]+', '[4M+Ca]2+': '[4M+Ca]2+', '[3M+NH4]+': '[3M+NH3+H]+', '3M+NH4': '[3M+NH3+H]+', '[2M-2H2O+H]+': '[2M-2H2O+H]+', '[M+ACN+NH4]+': '[M+CH3CN+NH3+H]+', '[5M+Ca]2+': '[5M+Ca]2+', '[3M+K]+': '[3M+K]+', '[3M+Ca-H]+': '[3M-H+Ca]2+', '[M-H+2Na]+': '[M-H+2Na]+', 'M-H+2Na': '[M-H+2Na]+', '[M-3H2O+H]+': '[M-3H2O+H]+', 'M-3H2O+H': '[M-3H2O+H]+', '[M-3H2O+2H]2+': '[M-3H2O+2H]2+', '[M-2H2O+2H]2+': '[M-2H2O+2H]2+', '[M-4H2O+H]+': '[M-4H2O+H]+', '[M-5H2O+H]+': '[M-5H2O+H]+', '[M+Ca-H]+': '[M+Ca-H]+', '[2M-H+2Na]+': '[2M-H+2Na]+', '[2M-3H2O+H]+': '[2M-3H2O+H]+', '[M+H+Na]2+': '[M+Na+H]2+', '[M-2H2O+NH4]+': '[M-2H2O+NH3+H]+', '[2M-2H+Na]': '[2M-2H+Na]-', '[M-H+CH3OH]': '[M+CH3OH-H]-', 'M+MeOH-H': '[M+CH3OH-H]-', 'M-H2O-H': '[M-H2O-H]-', '[M-H-H2O]': '[M-H2O-H]-', 'M+Cl-': '[M+Cl]-', 'M+Cl': '[M+Cl]-', '[M+Cl]': '[M+Cl]-', 'M+K-2H': '[M-2H+K]-', '[M-2H+K]': '[M-2H+K]-', 'M-2H]': '[M-2H]2-', 'M-2H': '[M-2H]2-', 'M-2H-': '[M-2H]2-', 'M+Na-2H': '[M-2H+Na]-', '[M-2H+Na]': '[M-2H+Na]-', 'M+Br': '[M+Br]-', '3M-H': '[3M-H]-', '[3M-H]': '[3M-H]-', '[M+H+CH3OH]': '[M+CH3OH+H]+', 'M+CH3OH+H': '[M+CH3OH+H]+', '[2M+H+CH3CN]': '[2M+CH3CN+H]+', 'M-CO2-H': '[M-CO2-H]-', '[2M-2H+K]': '[2M-2H+K]-', '2M+K-2H': '[2M-2H+K]-', '[M+Na+CH3CN]': '[M+CH3CN+Na]+', 'M-H2+H': '[M-H2+H]-', 'M-H+Cl]': '[M-H+Cl]2-', 'M-H+Cl': '[M-H+Cl]2-', '3M+H': '[3M+H]+', '[3M+H]': '[3M+H]+', 'M+H-NH3': '[M-NH3+H]+', 'M-NH3+H': '[M-NH3+H]+', 'M-H+C2H2O': '[M+C2H2O-H]-', 'M+H-C2H2O': '[M+C2H2O-H]-', 'M-H+CH2O2': '[M+CH2O2-H]-', 'M+CH2O2-H': '[M+CH2O2-H]-', 'M+TFA-H': '[M+C2HF3O2-H]-', 'M-C2HF3O2-H': '[M+C2HF3O2-H]-', '[M]1+': '[M]1+'} gnps_keys_mapping = { ## precursor "precursor_mz": "precursor_mz", ## charge "precursor_charge": "precursor_charge", "charge": "precursor_charge", ## smiles "smiles": "smiles", "smile": "smiles", ## adduct "adduct": "adduct", ## peaks "peaks": "peaks", ## instrument "instrument": "instrument", ## name "name": "name", "compound_name": "name", ## spectrum_id "spectrum_id": "spectrum_id", "spectrumid": "spectrum_id", ## exact mass "exact_mass": "exact_mass", "exactmass": "exact_mass", ## mz "fragment_mz": "mz", "mz": "mz", "mzs": "mz", ## intensity "fragment_intensities": "intensity", "intensities": "intensity", }