Source code for padmet.utils.sbmlPlugin

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=anomalous-backslash-in-string
import re


[docs] def parseNotes(element): """ From an SBML element (ex: species or reaction) will return all the section note in a dictionary. ex: <notes> <html:body> <html:p>BIOCYC: |Alkylphosphonates|</html:p> <html:p>CHEBI: 60983</html:p> </html:body> </notes> output: {'BIOCYC': |Alkylphosphonates|,'CHEBI':'60983'} value is a list in case diff lines for the same type of info Parameters ---------- element: libsbml.element an element from libsbml Returns ------- dict: the dictionary of note """ notes = element.getNotesString() notes_list = notes.splitlines() notes_dict = {} for line in notes_list: try: # line = <html:p>BIOCYC: |Alkylphosphonates|</html:p> start = line.index(">") + 1 end = line.index("<", start) line = line[start:end] # line = BIOCYC: |Alkylphosphonates| key, val = line.split(":") # line = [BIOCYC,|Alkylphosphonates|] key = re.sub(" ", "_", key) if len(val) != 0 and val.count(" ") != len(val): notes_dict[key] = [val] except ValueError: continue return notes_dict
[docs] def parseGeneAssoc(GeneAssocStr): """ Given a grammar of 'and', 'or' and '(' ')'. Extracts genes ids to a list. (geneX and geneY) or geneW' => [geneX,geneY,geneW] Parameters ---------- GeneAssocStr: str the string containing genes ids Returns ------- list: the list of unique ids """ # remplace ' and ' or ' or ' by a tag '_FORSPLIT_' GeneAssocStr_tmp = re.sub(r" and | or ", "_FORSPLIT_", GeneAssocStr) # remove '(' or ')' or ' ' resultat = re.sub(r"\(|\)|\s", "", GeneAssocStr_tmp) # create a set by splitting '_FORSPLIT_' then convert to list, set for unique genes if len(resultat) != 0: resultat = list(set(resultat.split("_FORSPLIT_"))) else: resultat = [] return resultat
[docs] def extractFormula(elementR): """ From an SBML reaction_element will return the formula in a string ex: '1.0 FRUCTOSELYSINE_p => 1.0 FRUCTOSELYSINE_c' Parameters ---------- elementR: libsbml.element a reaction from libsbml.element Returns ------- str: the formula """ # get direction of reaction direction = elementR.getReversible() formula = "" # generator of reactants reactants = [ str(reactant.getStoichiometry()) + " " + reactant.getSpecies() for reactant in elementR.getListOfReactants() ] # generator of products products = [ str(product.getStoichiometry()) + " " + product.getSpecies() for product in elementR.getListOfProducts() ] # formula = "" formula = " + ".join(reactants) # formula = "1.0 FRUCTOSELYSINE_p + 1.0 Z" if direction: formula += " <=> " else: formula += " => " formula += " + ".join(products) return formula
[docs] def convert_to_coded_id(uncoded, _type=None, compart=None): """ convert an id to sbml valid format. First add type of id "R" for reaction "M" for compound at the start and the compart at the end. _type+"_"+uncoded+"_"+compart then replace not allowed char by integer ordinal Parameters ---------- uncoded: str the original id to code _type: str the type of the id (ex: 'R' or 'M') compart: str the compartment of the id (ex: 'c' or 'e') Returns ------- str: the coded id """ # add type and compart if _type is not None: uncoded = _type + "_" + uncoded if compart is not None: uncoded += "_" + compart # char list that are not allowed in a sbml id charlist = [ "-", "|", "/", "(", ")", "'", "=", "#", "*", ".", ":", "!", "+", "[", "]", ",", " ", ] for char in charlist: # if a banned char in the uncoded id, convert it using the integer ordinal uncoded = uncoded.replace(char, "__" + str(ord(char)) + "__") return uncoded
[docs] def ascii_replace(match): """ recover banned char from the integer ordinal in the reg.match """ return chr(int(match.group(1)))
[docs] def convert_from_coded_id( coded, pattern="__", compart_in_id=False, reaction_tag="R", species_tag="M" ): """ convert an id from sbml format to the original id. try to extract the type of the id and the compart using strong regular expression ex: M_METABOLITE__45__12_c => ('METABOLITE-12', 'M', 'c') Parameters ---------- coded: str the encoded id pattern: str pattern used to delimit interger ordinal compart_in_id: bool if true: the last _* is not mean to be the compart is part of the id reaciton_tag: str First letter used to tag a reaction species_tag: str First letter used to tag a species Returns ------- str: the uncoded id str, None: type of ID (ex: 'M' or 'R') str, None: compart of the id """ # replace DASH from very old sbmls coded = coded.replace("_DASH_", "__") # an original id starting with int will start with '_' in sbml if coded.startswith("_"): coded = coded[1:] # reg ex to find the ascii used to replace not allowed char ascii_pattern = r"{0}(\d+){0}".format(pattern) codepat = re.compile(ascii_pattern) # replace ascii by the not allowed char of sbml coded = codepat.sub(ascii_replace, coded) # Regular expression for coded ID from: # https://github.com/SBRG/bigg_models/wiki/BiGG-Models-ID-Specification-and-Guidelines str_reg = r"(?P<_type>^[{0}{1}]_)(?P<_id>.*)(?P<compart>_[a-z][a-z0-9]?$)".format( species_tag, reaction_tag ) reg_expr = re.compile(str_reg) search_result = reg_expr.search(coded) if search_result is not None: compart = search_result.group("compart").replace("_", "") _type = search_result.group("_type").replace("_", "") uncoded = search_result.group("_id") else: str_reg = r"(?P<_type>^[{0}{1}]_)(?P<_id>.*)".format(species_tag, reaction_tag) reg_expr = re.compile(str_reg) search_result = reg_expr.search(coded) if search_result is not None: compart = None _type = search_result.group("_type").replace("_", "") uncoded = search_result.group("_id") else: reg_expr = re.compile(r"(?P<_id>.*)(?P<compart>_.*)") search_result = reg_expr.search(coded) if search_result is not None: _type = None compart = search_result.group("compart").replace("_", "") uncoded = search_result.group("_id") else: uncoded = coded _type = None compart = None if compart and compart_in_id: uncoded += "_" + compart return (uncoded, _type, compart)
[docs] def get_all_decoded_version(element_id, _type): """ Use convert_from_coded function to convert a element_id (reaction or species) _type use define if element is a 'reaction' or un 'species'. Try different decoding combination based on old and new sbml id encoding. Parameters ---------- element_id: str the encoded id _type: str _type is 'reaction' or 'species' Returns ------- list: list of encoded id """ all_element_id_decoded = list() # 1st attemp: decoded id with classic encoding convention. cf sbmlplugin.convert_from_coded_id all_element_id_decoded.append(convert_from_coded_id(element_id)[0]) if _type == "species": # 2st attemp: decoded id with classic non-conventionnal encoding. cf sbmlplugin.convert_from_coded_id if ( convert_from_coded_id(element_id, compart_in_id=True)[0] not in all_element_id_decoded ): all_element_id_decoded.append( convert_from_coded_id(element_id, compart_in_id=True)[0] ) if ( convert_from_coded_id(element_id, pattern="_", species_tag="S")[0] not in all_element_id_decoded ): all_element_id_decoded.append( convert_from_coded_id(element_id, pattern="_", species_tag="S")[0] ) elif _type == "reaction": # 1st attemp: decoded id with classic encoding convention. cf sbmlplugin.convert_from_coded_id all_element_id_decoded.append(convert_from_coded_id(element_id)[0]) # 2st attemp: decoded id with classic non-conventionnal encoding. cf sbmlplugin.convert_from_coded_id if ( convert_from_coded_id(element_id, compart_in_id=True)[0] not in all_element_id_decoded ): all_element_id_decoded.append( convert_from_coded_id(element_id, compart_in_id=True)[0] ) return all_element_id_decoded