Source code for padmet.utils.connection.get_metacyc_ontology

# -*- coding: utf-8 -*-
"""
Description:
    From the padmetRef of MetaCyc creates the MetaCyc ontology.
    At this moment, all the element of the tree begins with a ont_ and all the '+' or '-' are removed.
    This is a limitation from lxml tag.

::

    usage:
        padmet get_metacyc_to_ontology -p=FILE -o=FILE

    options:
        -h --help     Show help.
        -p=FILE    path of the padmet file of MetaCyc
        -o=FILE   pathname of the XML output file
"""
import docopt
from lxml import etree
from padmet.classes.padmetRef import PadmetRef


[docs] def command_help(): """ Show help for analysis command. """ print(docopt.docopt(__doc__))
[docs] def get_metacyc_ontology_cli(command_args): #parsing args args = docopt.docopt(__doc__, argv=command_args) padmetRef_file = args["-p"] output_file = args["-o"] metacyc_to_ontology(padmetRef_file, output_file, ontology_root='FRAMES')
[docs] def metacyc_to_ontology(padmetRef_file, output_file, ontology_root='FRAMES'): """ Extract the ontology of MetaCyc from the padmetRef. Parameters ---------- padmetRef_file: str path to padmetRef file output_file: str pathname of the output sbml ontology_root: str name of the roots to use to create the tree (FRAMES, Generalized-Reactions, Compounds, ...) """ padmetref = PadmetRef(padmetRef_file) class_nodes = [node for node in padmetref.dicOfNode.values() if node.type == "class"] known_parents = {} # Create the root of the xml tree. frames = etree.Element("element_" + str(len(known_parents)), name=ontology_root) known_parents[ontology_root] = frames # Extract the parent of each objects from classes. child_parents = {} for class_node in class_nodes: if class_node.id != 'FRAMES': parent_classes = [rlt.id_out for rlt in padmetref.dicOfRelationIn[class_node.id] if rlt.type == 'is_a_class'] child_parents[class_node.id] = parent_classes def get_child(parent_id, known_parents, child_parents): # Search all the child of the parent_id for child_parent in child_parents: for parent_class in child_parents[child_parent]: if parent_class == parent_id: if parent_class in known_parents: et_subelement = etree.SubElement(known_parents[parent_class], "element_" + str(len(known_parents)), name=child_parent) known_parents[child_parent] = et_subelement get_child(child_parent, known_parents, child_parents) get_child(ontology_root, known_parents, child_parents) tree = etree.ElementTree(frames) tree.write(output_file, pretty_print=True)
[docs] def add_element_to_tree(element_ids, padmet_instance, ontology_elements, element_type): count = 0 for element_id in element_ids: if element_id in padmet_instance.dicOfRelationIn: if element_type == 'reaction': element_classes = [rlt.id_out for rlt in padmet_instance.dicOfRelationIn[element_id] if rlt.type == "is_in_pathway"] else: element_classes = [rlt.id_out for rlt in padmet_instance.dicOfRelationIn[element_id] if rlt.type == "is_a_class"] for element_class in element_classes: if element_class in ontology_elements: for subclass in ontology_elements[element_class]: etree_sublement = etree.SubElement(subclass, element_type + '_element_' + str(count), name=element_id) if element_id not in ontology_elements: ontology_elements[element_id] = [etree_sublement] else: ontology_elements[element_id].append(etree_sublement) count += 1 return ontology_elements
[docs] def extract_element_ontology(metacyc_ontology_file, padmetRef_file, output_file): onttree = etree.parse(metacyc_ontology_file) ontology_elements = {} for element in onttree.iter(): if element.attrib['name'] not in ontology_elements: ontology_elements[element.attrib['name']] = [element] else: ontology_elements[element.attrib['name']].append(element) padmetref = PadmetRef(padmetRef_file) compound_ids = [node.id for node in padmetref.dicOfNode.values() if node.type == "compound"] ontology_elements = add_element_to_tree(compound_ids, padmetref, ontology_elements, 'compound') pathway_ids = [node.id for node in padmetref.dicOfNode.values() if node.type == "pathway"] ontology_elements = add_element_to_tree(pathway_ids, padmetref, ontology_elements, 'pathway') reaction_ids = [node.id for node in padmetref.dicOfNode.values() if node.type == "reaction"] ontology_elements = add_element_to_tree(reaction_ids, padmetref, ontology_elements, 'reaction') tree = etree.ElementTree(onttree.getroot()) tree.write(output_file, pretty_print=True)
[docs] def ontology_to_newick(metacyc_ontology_file, newick_output_file): onttree = etree.parse(metacyc_ontology_file) def child_to_newick(node): childrens = [] if node.getparent() is None: childrens.append(node.attrib['name']) for children in node.getchildren(): if len(children.getchildren()) > 0: subchilds = [] childs = child_to_newick(children) subchilds.append("'"+children.attrib['name']+"'") subchilds.append(childs) childrens.append('(' + ','.join(subchilds) + ')') else: childrens.append("'"+children.attrib['name']+"'") return '(' + ','.join(childrens) + ')' with open(newick_output_file, 'w') as newick: newick.write(child_to_newick(onttree.getroot())+';')