"""
"""
import collections
import json
from dataclasses import dataclass, field
from typing import List, Tuple
SpectrumTuple = collections.namedtuple(
"SpectrumTuple", ["precursor_mz", "precursor_charge", "mz", "intensity"]
)
[docs]def convert_to_SpectrumTuple(peaks, precursor_mz, precursor_charge):
if peaks is None or len(peaks) == 0:
return None
if not all(isinstance(peak, (list, tuple)) and len(peak) == 2 for peak in peaks):
raise ValueError("Peaks must be a list of (mz, intensity) pairs")
mz = [peak[0] for peak in peaks]
intensity = [peak[1] for peak in peaks]
return SpectrumTuple(
precursor_mz=float(precursor_mz),
precursor_charge=int(precursor_charge),
mz=mz,
intensity=intensity,
)
[docs]def convert_to_SpectrumTuple_seprated(mz, intensity, precursor_mz, precursor_charge):
if mz is None or intensity is None or len(mz) == 0:
return None
if len(mz) != len(intensity):
raise ValueError("mz and intensity must have the same length")
return SpectrumTuple(
precursor_mz=float(precursor_mz),
precursor_charge=int(precursor_charge),
mz=list(mz),
intensity=list(intensity),
)
[docs]def Convert_SpectrumTuple_to_peaks(spectrum):
return list(zip(spectrum.mz, spectrum.intensity))
[docs]def convert_to_universal_key(key: str) -> str:
key = key.lower()
key = key.replace(" ", "_")
return gnps_keys_mapping.get(key, key)
adduct_mapping = {'M+H': '[M+H]+',
'[M+H]': '[M+H]+',
'[M+H]+': '[M+H]+',
'M+H]': '[M+H]+',
'M+Na': '[M+Na]+',
'[M+Na]': '[M+Na]+',
'[M+Na]+': '[M+Na]+',
'2M+Na': '[2M+Na]+',
'M2+Na': '[2M+Na]+',
'[2M+Na]+': '[2M+Na]+',
'[2M+Na]': '[2M+Na]+',
'M+K': '[M+K]+',
'[M+K]': '[M+K]+',
'[M+K]+': '[M+K]+',
'[2M+K]+': '[2M+K]+',
'2M+K': '[2M+K]+',
'[2M+K]': '[2M+K]+',
'M+H-H20': '[M-H2O+H]+',
'M+H-H2O': '[M-H2O+H]+',
'[M-H2O+H]+': '[M-H2O+H]+',
'M-H20+H': '[M-H2O+H]+',
'[M+H-H2O]+': '[M-H2O+H]+',
'M-H2O+H': '[M-H2O+H]+',
'M+H-2H2O': '[M-2H2O+H]+',
'M-2H2O+H': '[M-2H2O+H]+',
'[M-2H2O+H]+': '[M-2H2O+H]+',
'M-2(H2O)+H': '[M-2H2O+H]+',
'2M+Na-2H': '[2M-2H+Na]-',
'2M-2H+Na': '[2M-2H+Na]-',
'M-H': '[M-H]-',
'[M-H]': '[M-H]-',
'[M-H]-': '[M-H]-',
'M-H-': '[M-H]-',
'M-H1': '[M-H]-',
'3M+Na': '[3M+Na]+',
'[3M+Na]+': '[3M+Na]+',
'[M]+': '[M]+',
'M+': '[M]+',
'M-e': '[M]+',
'M2+H': '[2M+H]+',
'2M+H': '[2M+H]+',
'[2M+H]+': '[2M+H]+',
'[2M+H]': '[2M+H]+',
'[M+2H]': '[M+2H]2+',
'[M+2H]2+': '[M+2H]2+',
'M+2H]': '[M+2H]2+',
'M+2H+2': '[M+2H]2+',
'M+2H': '[M+2H]2+',
'M+acetate': '[M+CH3COOH-H]-',
'M+CH3COOH-H': '[M+CH3COOH-H]-',
'M+CH3COO': '[M+CH3COOH-H]-',
'M+ACN+H': '[M+CH3CN+H]+',
'[M+ACN+H]+': '[M+CH3CN+H]+',
'[M+H+CH3CN]': '[M+CH3CN+H]+',
'M+2Na': '[M+2Na]2+',
'M+2Na]': '[M+2Na]2+',
'M+HCOO': '[M+HCOOH-H]-',
'[M-H+HCOOH]': '[M+HCOOH-H]-',
'M+FA-H': '[M+HCOOH-H]-',
'M+formate': '[M+HCOOH-H]-',
'[M+H+HCOOH]': '[M+HCOOH-H]-',
'2M+FA-H': '[2M+HCOOH-H]-',
'[2M-H+HCOOH]': '[2M+HCOOH-H]-',
'M+NH4': '[M+NH3+H]+',
'[M+NH4]+': '[M+NH3+H]+',
'[M+NH4]': '[M+NH3+H]+',
'2M+Hac-H': '[2M+CH3COOH-H]-',
'2M-H': '[2M-H]-',
'[2M-H]': '[2M-H]-',
'2M+NH4': '[2M+NH3+H]+',
'[2M+NH4]+': '[2M+NH3+H]+',
'[2M+NH4]': '[2M+NH3+H]+',
'[2M+Ca]2+': '[2M+Ca]2+',
'[M+Ca]2+': '[M+Ca]2+',
'[3M+Ca]2+': '[3M+Ca]2+',
'[2M+Ca-H]+': '[2M-H+Ca]+',
'[2M-H2O+H]+': '[2M-H2O+H]+',
'[4M+Ca]2+': '[4M+Ca]2+',
'[3M+NH4]+': '[3M+NH3+H]+',
'3M+NH4': '[3M+NH3+H]+',
'[2M-2H2O+H]+': '[2M-2H2O+H]+',
'[M+ACN+NH4]+': '[M+CH3CN+NH3+H]+',
'[5M+Ca]2+': '[5M+Ca]2+',
'[3M+K]+': '[3M+K]+',
'[3M+Ca-H]+': '[3M-H+Ca]2+',
'[M-H+2Na]+': '[M-H+2Na]+',
'M-H+2Na': '[M-H+2Na]+',
'[M-3H2O+H]+': '[M-3H2O+H]+',
'M-3H2O+H': '[M-3H2O+H]+',
'[M-3H2O+2H]2+': '[M-3H2O+2H]2+',
'[M-2H2O+2H]2+': '[M-2H2O+2H]2+',
'[M-4H2O+H]+': '[M-4H2O+H]+',
'[M-5H2O+H]+': '[M-5H2O+H]+',
'[M+Ca-H]+': '[M+Ca-H]+',
'[2M-H+2Na]+': '[2M-H+2Na]+',
'[2M-3H2O+H]+': '[2M-3H2O+H]+',
'[M+H+Na]2+': '[M+Na+H]2+',
'[M-2H2O+NH4]+': '[M-2H2O+NH3+H]+',
'[2M-2H+Na]': '[2M-2H+Na]-',
'[M-H+CH3OH]': '[M+CH3OH-H]-',
'M+MeOH-H': '[M+CH3OH-H]-',
'M-H2O-H': '[M-H2O-H]-',
'[M-H-H2O]': '[M-H2O-H]-',
'M+Cl-': '[M+Cl]-',
'M+Cl': '[M+Cl]-',
'[M+Cl]': '[M+Cl]-',
'M+K-2H': '[M-2H+K]-',
'[M-2H+K]': '[M-2H+K]-',
'M-2H]': '[M-2H]2-',
'M-2H': '[M-2H]2-',
'M-2H-': '[M-2H]2-',
'M+Na-2H': '[M-2H+Na]-',
'[M-2H+Na]': '[M-2H+Na]-',
'M+Br': '[M+Br]-',
'3M-H': '[3M-H]-',
'[3M-H]': '[3M-H]-',
'[M+H+CH3OH]': '[M+CH3OH+H]+',
'M+CH3OH+H': '[M+CH3OH+H]+',
'[2M+H+CH3CN]': '[2M+CH3CN+H]+',
'M-CO2-H': '[M-CO2-H]-',
'[2M-2H+K]': '[2M-2H+K]-',
'2M+K-2H': '[2M-2H+K]-',
'[M+Na+CH3CN]': '[M+CH3CN+Na]+',
'M-H2+H': '[M-H2+H]-',
'M-H+Cl]': '[M-H+Cl]2-',
'M-H+Cl': '[M-H+Cl]2-',
'3M+H': '[3M+H]+',
'[3M+H]': '[3M+H]+',
'M+H-NH3': '[M-NH3+H]+',
'M-NH3+H': '[M-NH3+H]+',
'M-H+C2H2O': '[M+C2H2O-H]-',
'M+H-C2H2O': '[M+C2H2O-H]-',
'M-H+CH2O2': '[M+CH2O2-H]-',
'M+CH2O2-H': '[M+CH2O2-H]-',
'M+TFA-H': '[M+C2HF3O2-H]-',
'M-C2HF3O2-H': '[M+C2HF3O2-H]-',
'[M]1+': '[M]1+'}
gnps_keys_mapping = {
## precursor
"precursor_mz": "precursor_mz",
## charge
"precursor_charge": "precursor_charge",
"charge": "precursor_charge",
## smiles
"smiles": "smiles",
"smile": "smiles",
## adduct
"adduct": "adduct",
## peaks
"peaks": "peaks",
## instrument
"instrument": "instrument",
## name
"name": "name",
"compound_name": "name",
## spectrum_id
"spectrum_id": "spectrum_id",
"spectrumid": "spectrum_id",
## exact mass
"exact_mass": "exact_mass",
"exactmass": "exact_mass",
## mz
"fragment_mz": "mz",
"mz": "mz",
"mzs": "mz",
## intensity
"fragment_intensities": "intensity",
"intensities": "intensity",
}