Source code for modifinder.engines.annotation.MAGMaAnnotationEngine

from modifinder.engines.Abtracts import AnnotationEngine
from modifinder.classes.Compound import Compound
from modifinder.classes.EdgeDetail import EdgeDetail, MatchType
from typing import List, Tuple
from modifinder.engines.annotation.magma import (
    fragmentation_py,
    rdkit_engine as rdkit_engine,
)
from modifinder.utilities.general_utils import mims, is_submolecule
from modifinder.utilities.mol_utils import get_modification_nodes
import networkx as nx
from rdkit import Chem
from msbuddy import assign_subformula


[docs]class MAGMaAnnotationEngine(AnnotationEngine):
    def __init__(self, breaks = 2, max_water_losses = 2, ionisation_mode = 0, mz_tolerance = 0.1, ppm_tolerance = 40, **kwargs):
        """
        Initializes the MAGMa annotation engine

        Parameters:
            :kwargs: additional arguments
        """
        self.breaks = breaks
        self.max_water_losses = max_water_losses
        self.ionisation_mode = ionisation_mode
        self.mz_tolerance = mz_tolerance
        self.ppm_tolerance = ppm_tolerance
        self.args = kwargs


[docs]    def annotate(self, network: nx.DiGraph, annotate_all: bool = False, **kwargs):
        """
        Annotates the network using the MAGMa model

        Parameters:
            :network (Network): the network to be annotated
            :annotate_all (bool): whether to annotate all compounds or only the ones that have not been annotated yet
            :kwargs: additional arguments
        """
        for node in network.nodes:
            compound = network.nodes[node]["compound"]
            if compound is not None and compound.is_known:
                if annotate_all or (compound.spectrum.peak_fragments_map is None or len(compound.spectrum.peak_fragments_map) == 0):
                    self.annotate_single(compound, modify_compound=True, **kwargs)
        
        # refine by helpers
        for edge in network.edges:
            edge_detail = network.edges[edge]["edgedetail"]
            if edge_detail is not None:
                first_compound = network.nodes[edge[0]]["compound"]
                second_compound = network.nodes[edge[1]]["compound"]
                if first_compound.is_known and second_compound.is_known:
                    try:
                        self.refine_annotations_by_helper(network.nodes[edge[0]]["compound"], network.nodes[edge[1]]["compound"], edge_detail, modify_compound = True)
                        self.refine_annotations_by_helper(network.nodes[edge[1]]["compound"], network.nodes[edge[0]]["compound"], edge_detail, modify_compound = True)
                    except Exception as e:
                        pass


[docs]    def annotate_single(
        self, compound: Compound, modify_compound: bool = True, **kwargs
    ) -> List[Tuple[int, List[str]]]:
        """
        provides annotation for the peaks in a single compound

        Parameters:
            :compound (Compound): the compound to be annotated
            :modify_compound (bool): whether to modify the passed compound with the annotations
            :kwargs: additional arguments

        Returns:
            :Mapping[int, List[str]]: a dictionary with the indices of the peaks as keys and the list of annotations as values
        """

        if "breaks" not in kwargs:
            kwargs["breaks"] = self.breaks
        if "mz_tolerance" not in kwargs:
            kwargs["mz_tolerance"] = self.mz_tolerance
        if "ppm_tolerance" not in kwargs:
            kwargs["ppm_tolerance"] = self.ppm_tolerance

        for item in ["breaks", "mz_tolerance", "ppm_tolerance"]:
            if item not in kwargs or kwargs[item] is None:
                raise ValueError(
                    f"Missing parameters for MAGMa annotation engine {item}"
                )

        fragmentation_instance = fragmentation_py.FragmentEngine(
            compound.structure, kwargs["breaks"], max_water_losses=self.max_water_losses, ionisation_mode=self.ionisation_mode, skip_fragmentation=False, molcharge=compound.spectrum.precursor_charge
        )
        fragmentation_instance.generate_fragments()

        # generate peak to fragment map
        base_precision = 1 + kwargs["ppm_tolerance"] / 1000000
        peak_fragments_map = [set() for i in range(len(compound.spectrum.mz))]
        for i in range(len(compound.spectrum.mz)):
            search_weight = compound.spectrum.mz[i] - compound.spectrum.adduct_mass
            annotations = fragmentation_instance.find_fragments(
                search_weight, 0.1, base_precision, kwargs["mz_tolerance"]
            )
            for annotation in annotations:
                peak_fragments_map[i].add(annotation[0])

        # refine by formula
        # peak_fragments_map = self.refine_annotations_by_formula(compound, peak_fragments_map, modify_compound = False)
        
        if modify_compound:
            compound.spectrum.peak_fragments_map = peak_fragments_map

        return peak_fragments_map


[docs]    def get_fragment_info(self, Compound: Compound, fragment):
        """
        converts a fragment to a SMILES string

        Parameters
        ----------
            Compound (Compound): the compound
            fragment: the fragment representation

        Returns
        -------
            tuple(atomlist -> List[int], edge_list -> List[Tuple[int, int]], formula -> str, smiles -> str): a tuple containing the atom list, the edge list, the formula and the SMILES string
        """
        
        atomstring = ""
        atomlist = []
        edgeList = []
        elements = dict([(e, 0) for e in mims.keys()])
        natoms = Compound.structure.GetNumAtoms()
        for atom in range(natoms):
            if 1 << atom & fragment:
                atomstring += "," + str(atom)
                atomlist.append(atom)
                elements[rdkit_engine.GetAtomSymbol(Compound.structure, atom)] += 1
                elements["H"] += rdkit_engine.GetAtomHs(Compound.structure, atom)

        for bond in Compound.structure.GetBonds():
            if bond.GetBeginAtomIdx() in atomlist and bond.GetEndAtomIdx() in atomlist:
                edgeList.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()))

        formula = ""
        for el in mims.keys():
            nel = elements[el]
            if nel > 0:
                formula += el
            if nel > 1:
                formula += str(nel)

        return (
            atomlist,
            edgeList,
            formula,
            rdkit_engine.FragmentToSmiles(Compound.structure, atomlist),
        )
    
    
[docs]    def refine_annotations_by_formula(self, compound: Compound, peak_fragments_map, modify_compound: bool = True):
        """
        Refines the annotations of a compound by using the formula from msbuddy

        See Also
        --------
        `MSBuddy <https://github.com/Philipbear/msbuddy>`_
        
        
        Parameters
        ----------
            compound (Compound): the compound
            
        """
        
        structure = compound.structure
        spectrum = compound.spectrum
        if structure is None:
            return peak_fragments_map
        
        main_compound_formula = Chem.rdMolDescriptors.CalcMolFormula(structure)
        peak_mz = spectrum.mz
        
        if len(peak_mz) == 0:
            return peak_fragments_map
        
        subformla_list = assign_subformula(peak_mz,
                                        precursor_formula=main_compound_formula, adduct=spectrum.adduct,
                                        ms2_tol=self.ppm_tolerance, ppm=True, dbe_cutoff=-1.0)
        
        new_peak_fragments_map = [set() for i in range(len(peak_mz))]
        for i in range(len(self.peaks)):
            possibilites = set()
            for subformula in subformla_list[i].subform_list:
                formula = subformula.formula
                # formula = utils.remove_adduct_from_formula(formula, self.Adduct)
                
                # find the fragments that contains the formula
                for frag_id in peak_fragments_map[i]:
                    molSubFormula = self.get_fragment_info(compound, frag_id)[2]
                    if is_submolecule(molSubFormula, formula, self.args["formula_ignore_H"]) and is_submolecule(formula, molSubFormula, self.args["formula_ignore_H"]):
                        possibilites.add(frag_id)
            
            if len(possibilites) > 0:
                new_peak_fragments_map[i] = possibilites
        
        if modify_compound:
            compound.spectrum.peak_fragments_map = new_peak_fragments_map
        
        return new_peak_fragments_map
    
    
[docs]    def refine_annotations_by_helper(self, main_compound: Compound, helper_compound: Compound, edgeDetail: EdgeDetail, modify_compound: bool = True):
        """
        Refines the annotations of a compound by using the helper compound
        """
        if not modify_compound:
            main_compound = main_compound.copy()
            
        if helper_compound.exact_mass < main_compound.exact_mass:
            modification_site = get_modification_nodes(main_compound.structure, helper_compound.structure, True)
            shifted_peaks = [match.second_peak_index for match in edgeDetail.matches if match.match_type == MatchType.shifted]
            sub_match_indices = main_compound.structure.GetSubstructMatch(helper_compound.structure)
            mapping = dict()
            for i, atom in enumerate(sub_match_indices):
                mapping[i] = atom
            unshifted = [(match.second_peak_index, match.first_peak_index) for match in edgeDetail.matches if match.match_type == MatchType.unshifted]
        else:
            modification_site = get_modification_nodes(helper_compound.structure, main_compound.structure, False)
            shifted_peaks = [match.first_peak_index for match in edgeDetail.matches if match.match_type == MatchType.shifted]
            sub_match_indices = helper_compound.structure.GetSubstructMatch(main_compound.structure)
            mapping = dict()
            for i, atom in enumerate(sub_match_indices):
                mapping[atom] = i
            unshifted = [(match.first_peak_index, match.second_peak_index) for match in edgeDetail.matches if match.match_type == MatchType.unshifted]
        
        main_compound.filter_fragments_by_atoms(modification_site, shifted_peaks)
        
        # filter unshifted peaks by intersection
        
        for peak in unshifted:
            helper_peak_fragment_map = set()
            for fragment in helper_compound.spectrum.peak_fragments_map[peak[1]]:
                new_fragment = 0
                for i in range(len(helper_compound.structure.GetAtoms())):
                    if 1 << i & fragment:
                        if i not in mapping:
                            new_fragment = -1
                            break
                        else:
                            new_fragment += 1 << mapping[i]
                if new_fragment != -1:
                    helper_peak_fragment_map.add(new_fragment)
            
            main_compound.spectrum.peak_fragments_map[peak[0]] = main_compound.spectrum.peak_fragments_map[peak[0]].intersection(helper_peak_fragment_map)
        
        return main_compound.spectrum.peak_fragments_map