Source code for modifinder.utilities.network

"""
GNPS Utils - Network Module

This module provides functionality to connect to GNPS and retrieve data.

Author: Shahneh
"""

import json
import requests
from modifinder.utilities.gnps_types import *
from modifinder.utilities.general_utils import parse_data_to_universal
from modifinder.exceptions import ModiFinderNetworkError

[docs]def usi_to_accession(usi: str) -> str: """ Get the accession number from a USI param usi: str return: str if found, None otherwise """ if "accession" in usi: return usi.split(":")[-1] else: if isinstance(usi, str): return usi else: return None
[docs]def accession_to_usi(accession: str) -> str: """ Get the USI from an accession id param accession: str return: str """ return "mzspec:GNPS:{}:accession:{}".format("GNPS-LIBRARY", accession)
[docs]def get_data(identifier: str) -> dict: """ Get data from GNPS, either from USI or Accession. if the identifier points to a known item in gnps, it will return the full data, otherwise it will return partial data (ms2 data) param identifier: str - USI or Accession return: dict - dictionary of data """ if not identifier: raise ModiFinderNetworkError("No identifier provided") if not isinstance(identifier, str): raise ModiFinderNetworkError("Identifier must be a string") data = dict() if _is_usi(identifier): if _is_known(identifier): identifier = usi_to_accession(identifier) else: data = _get_partial_data(identifier) data['usi'] = identifier data['id'] = identifier data = parse_data_to_universal(data) return data link = "https://external.gnps2.org/gnpsspectrum?SpectrumID={}".format(identifier) try: res = requests.get(link) parsed = res.json() except: raise ModiFinderNetworkError("Error in retrieving data from GNPS for identifier: {}, link: {}".format(identifier, link)) try: data.update(parsed['annotations'][0]) except KeyError: pass try: data.update(parsed['spectruminfo']) except KeyError: pass try: data['comments'] = parsed['comments'] except KeyError: pass data = parse_data_to_universal(data) data['usi'] = accession_to_usi(identifier) data['id'] = identifier return data
[docs]def get_matched_peaks(identifier1: str, identifier2: str) -> dict: """ runs the gnps modified cosine matching algorithm and returns the matched peaks param identifier1: str - USI or Accession param identifier2: str - USI or Accession return: dict - dictionary of matched peaks """ if not _is_usi(identifier1): identifier1 = accession_to_usi(identifier1) if not _is_usi(identifier2): identifier2 = accession_to_usi(identifier2) payload = { 'usi1': identifier1, 'usi2': identifier2, 'mz_min': 'None', 'mz_max':'None', 'cosine':'shifted', 'mz_tolerance':'0.1', 'grid': 'True'} r = requests.get('https://metabolomics-usi.gnps2.org/json/mirror/', params=payload, timeout=5) return json.loads(r.text)
def _get_partial_data(identifier: str) -> dict: """ Get partial data (ms2 data) from USI param identifier: str - USI return: dict - dictionary of data with keys: precursor_mz, precursor_charge, mz: list, intensity: list """ url = 'https://metabolomics-usi.gnps2.org/json/' + "?usi1=" + identifier try: r = requests.get(url) data = json.loads(r.text) except: raise Exception("Error in retrieving data from GNPS for identifier: {}, link: {}".format(identifier, url)) data = parse_data_to_universal(data) return data def _is_usi(identifier: str) -> bool: """ Check if the identifier is a USI param identifier: str return: bool """ return "mzspec" in identifier def _is_known(identifier: str) -> bool: """ Check if the identifier is a known identifier in GNPS param identifier: str return: bool """ return "accession" in identifier