# -*- coding: utf-8 -*-
r"""
Description:
This tool is use the MetaNetX database to check or convert a sbml. Flat files
from MetaNetx are required to run this tool. They can be found in the aureme workflow
or from the MetaNetx website.
To use the tool set:
mnx_folder= the path to a folder containing MetaNetx flat files.
the files must be named as 'reac_xref.tsv' and 'chem_xref.tsv'
or set manually the different path of the flat files with:
mnx_reac= path to the flat file for reactions
mnx_chem= path to the flat file for chemical compounds (species)
To check the database used in a sbml:
to check all element of sbml (reaction and species) set:
to--map=all
to check only reaction of sbml set:
to--map=reaction
to check only species of sbml set:
to--map=species
To map a sbml and obtain a file of mapping ids to a given database set:
to-map:
as previously explained
db_out:
the name of the database target: ['metacyc', 'bigg', 'kegg'] only
output:
the path to the output file
For a given sbml using a specific database.
Return a dictionnary of mapping.
the output is a file with line = reaction_id/or species in sbml, reaction_id/species in db_out database
ex:
For a sbml based on kegg database, db_out=metacyc: the output file will contains for ex:
R02283 ACETYLORNTRANSAM-RXN
::
usage:
padmet convert_sbml_db --mnx_reac=FILE --mnx_chem=FILE --sbml=FILE --to-map=STR [-v]
padmet convert_sbml_db --mnx_folder=DIR --sbml=FILE --to-map=STR [-v]
padmet convert_sbml_db --mnx_folder=DIR --sbml=FILE --output=FILE --db_out=ID --to-map=STR [-v]
padmet convert_sbml_db --mnx_reac=FILE --mnx_chem=FILE --sbml=FILE --output=FILE --db_out=ID --to-map=STR [-v]
options:
-h --help Show help.
--to-map=STR select the part of the sbml to check or convert, must be in ['all', 'reaction', 'species']
--mnx_reac=FILE path to the MetaNetX file for reactions
--mnx_chem=FILE path to the MetaNetX file for compounds
--sbml=FILE path to the sbml file to convert
--output=FILE path to the file containing the mapping, sep = "\t"
--db_out=FILE id of the output database in ["BIGG","METACYC","KEGG"]
-v verbose.
"""
import docopt
import libsbml
import os
from padmet.utils.sbmlPlugin import get_all_decoded_version
[docs]
def command_help():
"""
Show help for analysis command.
"""
print(docopt.docopt(__doc__))
[docs]
def convert_sbml_db_cli():
args = docopt.docopt(__doc__, argv=command_args)
verbose = args["-v"]
to_map = args["--to-map"]
mnx_folder = args["--mnx_folder"]
mnx_reac_file = args["--mnx_reac"]
mnx_chem_file = args["--mnx_chem"]
sbml_file = args["--sbml"]
if args["--db_out"]:
db_out = args["--db_out"].upper()
output = args["--output"]
map_sbml(sbml_file, to_map, db_out, output, verbose, mnx_reac_file, mnx_chem_file, mnx_folder)
else:
db_select, db_found = check_sbml_db(sbml_file, to_map, verbose, mnx_reac_file, mnx_chem_file, mnx_folder)
print("Best matching database: %s" %db_select)
print(db_found)
[docs]
def check_sbml_db(sbml_file, to_map, verbose = False, mnx_reac_file = None, mnx_chem_file = None, mnx_folder = None):
"""
Check sbml database of a given sbml.
Parameters
----------
sbml_file: str
path to the sbml file to convert
to_map: str
select the part of the sbml to check must be in ['all', 'reaction', 'species']
verbose: bool
if true: more info during process
mnx_reac_file: str
path to the flat file for reactions (can be None if given mnx_folder)
mnx_chem_file: str
path to the flat file for chemical compounds (species) (can be None if given mnx_folder)
mnx_folder: str
the path to a folder containing MetaNetx flat files
Returns
-------
tuple:
(name of the best matching database, dict of matching)
"""
if to_map not in ["all", "reaction", "species"]:
raise ValueError("%s must be in [all, reaction, species]" %to_map)
if mnx_folder:
mnx_reac_file = os.path.join(mnx_folder, "reac_xref.tsv")
mnx_chem_file = os.path.join(mnx_folder, "chem_xref.tsv")
if mnx_reac_file:
if not os.path.exists(mnx_reac_file):
raise FileNotFoundError("No MetaNetX file for reactions accessible at " + mnx_reac_file)
if mnx_chem_file:
if not os.path.exists(mnx_chem_file):
raise FileNotFoundError("No MetaNetX file for compounds accessible at " + mnx_chem_file)
if not os.path.exists(sbml_file):
raise FileNotFoundError("No SBML file accessible at " + sbml_file)
reader = libsbml.SBMLReader()
document = reader.readSBML(sbml_file)
for i in range(document.getNumErrors()):
print(document.getError(i).getMessage())
model = document.getModel()
listOfReactions = model.getListOfReactions()
listOfSpecies = model.getListOfSpecies()
unknown_db = "Unknown"
db_found = {unknown_db: 0}
if to_map == "all":
db_found["total_reaction_species"] = 0
if verbose:
print("Check from which database is this sbml:")
if to_map in ["all", "reaction"]:
db_found['total_reaction'] = 0
with open(mnx_reac_file, "r") as f:
dict_reaction_id_db = dict([(line.split("\t")[0].split(":")[1], line.split("\t")[0].split(":")[0]) for line in f.read().splitlines()
if not line.startswith("#") and ":" in line.split("\t")[0]])
for reaction in listOfReactions:
reaction_id = reaction.id
db_found['total_reaction'] += 1
if to_map == "all":
db_found["total_reaction_species"] += 1
all_reaction_id_decoded = get_all_decoded_version(reaction_id, "reaction")
for reaction_id_decoded in all_reaction_id_decoded:
db_match = dict_reaction_id_db.get(reaction_id_decoded, unknown_db)
if db_match != unknown_db:
break
if verbose:
if db_match != unknown_db:
print("reaction: original id: %s; decoded id:%s; db_match:%s" %(reaction_id, reaction_id_decoded, db_match))
else:
print("reaction: original id:%s; all decoded id:%s; no db found" %(reaction_id, all_reaction_id_decoded))
try:
db_found[db_match] += 1
except KeyError:
db_found[db_match] = 1
if to_map in ["all", "species"]:
db_found["total_species"] = 0
with open(mnx_chem_file, "r") as f:
dict_species_id_db = dict([(line.split("\t")[0].split(":")[1], line.split("\t")[0].split(":")[0]) for line in f.read().splitlines()
if not line.startswith("#") and ":" in line.split("\t")[0]])
for species in listOfSpecies:
species_id = species.id
db_found['total_species'] += 1
if to_map == "all":
db_found["total_reaction_species"] += 1
all_species_id_decoded = get_all_decoded_version(species_id, "species")
for species_id_decoded in all_species_id_decoded:
db_match = dict_species_id_db.get(species_id_decoded, unknown_db)
if db_match != unknown_db:
break
if verbose:
if db_match != unknown_db:
print("species: original id: %s; decoded id:%s; db_match:%s" %(species_id, species_id_decoded, db_match))
else:
print("species: original id:%s; all decoded id:%s; no db found" %(species_id, all_species_id_decoded))
try:
db_found[db_match] += 1
except KeyError:
db_found[db_match] = 1
if to_map == "all":
db_select = [k for k, v in list(db_found.items())
if v == max([j for i,j in list(db_found.items()) if i != 'total_reaction_species'])][0]
elif to_map == "reaction":
db_select = [k for k, v in list(db_found.items())
if v == max([j for i,j in list(db_found.items()) if i != 'total_reaction'])][0]
elif to_map == "species":
db_select = [k for k, v in list(db_found.items())
if v == max([j for i,j in list(db_found.items()) if i != 'total_species'])][0]
return (db_select, db_found)
[docs]
def map_sbml(sbml_file, to_map, db_out, output, verbose = False, mnx_reac_file = None, mnx_chem_file = None, mnx_folder = None):
"""
map a sbml and obtain a file of mapping ids to a given database.
Parameters
----------
sbml_file: str
path to the sbml file to convert
to_map: str
select the part of the sbml to check must be in ['all', 'reaction', 'species']
db_out: str
the name of the database target: ['metacyc', 'bigg', 'kegg'] only
output: str
path to the file containing the mapping, sep = "\t"
verbose: bool
if true: more info during process
mnx_reac_file: str
path to the flat file for reactions (can be None if given mnx_folder)
mnx_chem_file: str
path to the flat file for chemical compounds (species) (can be None if given mnx_folder)
mnx_folder: str
the path to a folder containing MetaNetx flat files
Returns
-------
tuple:
(name of the best matching database, dict of matching)
"""
map_from_cpd = False
if to_map not in ["all", "reaction", "species"]:
raise ValueError("%s must be in [all, reaction, species]" %to_map)
if mnx_folder:
mnx_reac_file = os.path.join(mnx_folder, "reac_xref.tsv")
mnx_chem_file = os.path.join(mnx_folder, "chem_xref.tsv")
if mnx_reac_file:
if not os.path.exists(mnx_reac_file):
raise FileNotFoundError("No MetaNetX file for reactions accessible at " + mnx_reac_file)
if mnx_chem_file:
if not os.path.exists(mnx_chem_file):
raise FileNotFoundError("No MetaNetX file for compounds accessible at " + mnx_chem_file)
if not os.path.exists(sbml_file):
raise FileNotFoundError("No SBML file accessible at " + sbml_file)
reader = libsbml.SBMLReader()
document = reader.readSBML(sbml_file)
for i in range(document.getNumErrors()):
print(document.getError(i).getMessage())
model = document.getModel()
listOfReactions = model.getListOfReactions()
listOfSpecies = model.getListOfSpecies()
if db_out not in ["BIGG","METACYC","KEGG"]:
raise ValueError('Please choose a database id in ["BIGG","METACYC","KEGG"]')
#k: orignial id, v = ref id
dict_sbml_id_mapped_id = {}
if to_map in ["all", "reaction"]:
#For reactions: k = MNXid, v = {k=db_id,v=[list of ids]}
mnx_reac_dict = mnx_reader(mnx_reac_file, db_out)
reaction_with_more_one_mapping = 0
count_nb_stric_reac_mapped = 0
for reaction in listOfReactions:
reaction_id = reaction.id
match_ids = None
all_reaction_id_decoded = get_all_decoded_version(reaction_id, "reaction")
for reaction_id_decoded in all_reaction_id_decoded:
if not match_ids:
#first check in intern dict mapping
match_ids = intern_mapping(reaction_id_decoded, db_out, "reaction")
if match_ids:
dict_sbml_id_mapped_id[reaction_id] = match_ids
count_nb_stric_reac_mapped += 1
if verbose:
print("reaction: original id:%s; decoded id:%s; match_with:%s from intern mapping" %(reaction_id, reaction_id_decoded, match_ids))
#check if in mnx_reac_dict
else:
match_ids = get_from_mnx(mnx_reac_dict, reaction_id_decoded, db_out)
if match_ids:
if len(match_ids) > 1:
reaction_with_more_one_mapping += 1
if verbose:
print("reaction: original id:%s; decoded id:%s; More than one mapping:%s" %(reaction_id, reaction_id_decoded, match_ids))
else:
dict_sbml_id_mapped_id[reaction_id] = match_ids[0]
count_nb_stric_reac_mapped += 1
if verbose:
print("reaction: original id:%s; decoded id:%s; match_with:%s from MNX mapping" %(reaction_id, reaction_id_decoded, match_ids[0]))
if verbose and not match_ids:
print("reaction: original id:%s; all decoded id:%s; not mapped" %(reaction_id, all_reaction_id_decoded))
if to_map in ["all", "species"]:
#For species: k = MNXid, v = {k=db_id,v=[list of ids]}
mnx_chem_dict = mnx_reader(mnx_chem_file, db_out)
species_with_more_one_mapping = 0
count_nb_stric_species_mapped = 0
for species in listOfSpecies:
species_id = species.id
match_ids = None
all_species_id_decoded = get_all_decoded_version(species_id, "species")
for species_id_decoded in all_species_id_decoded:
#first check in intern dict mapping
match_ids = intern_mapping(species_id_decoded, db_out, "species")
if match_ids:
dict_sbml_id_mapped_id[species_id] = match_ids
count_nb_stric_species_mapped += 1
if verbose:
print("species: original id:%s; decoded id:%s; match_with:%s from intern mapping" %(species_id, species_id_decoded, match_ids))
break
#check if in mnx_chem_dict
else:
match_ids = get_from_mnx(mnx_chem_dict, species_id_decoded, db_out)
if match_ids:
if len(match_ids) > 1:
species_with_more_one_mapping += 1
if verbose:
print("species: original id:%s; decoded id:%s; More than one mapping:%s" %(species_id, species_id_decoded, match_ids))
else:
dict_sbml_id_mapped_id[species_id] = match_ids[0]
count_nb_stric_species_mapped += 1
if verbose:
print("species: original id:%s; decoded id:%s; match_with:%s from MNX mapping" %(species_id, species_id_decoded, match_ids))
if verbose and not match_ids:
print("species: original id:%s; all decoded id:%s; not mapped" %(species_id, all_species_id_decoded))
if map_from_cpd:
reaction_mapped_with_cpds = []
#for all non mapped rxn, check if able to map all speices
for sbml_rxn in [i for i in listOfReactions if i.id not in list(dict_sbml_id_mapped_id.keys())]:
all_cpds = set([r.getSpecies() for r in sbml_rxn.getListOfReactants()] + [r.getSpecies() for r in sbml_rxn.getListOfProducts()])
match_cpd_in_rxn = set([cpd_id for cpd_id in all_cpds if cpd_id in list(dict_sbml_id_mapped_id.keys())])
if len(match_cpd_in_rxn) == len(all_cpds):
reaction_mapped_with_cpds.append(sbml_rxn.id)
if verbose:
print("#######")
if to_map in ["all", "reaction"]:
print("Mapped reactions: %s/%s" %(count_nb_stric_reac_mapped,len(listOfReactions)))
print("Reactions with more than one mapping: %s" %reaction_with_more_one_mapping)
if to_map in ["all", "species"]:
print("Mapped species: %s/%s" %(count_nb_stric_species_mapped,len(listOfSpecies)))
print("Species with more than one mapping: %s" %species_with_more_one_mapping)
if map_from_cpd:
print("Mapped reactions from species: %s" %(len(reaction_mapped_with_cpds)))
for i in reaction_mapped_with_cpds:
print("\t%s" %i)
print("Total reactions mapped:%s/%s" %(count_nb_stric_reac_mapped+len(reaction_mapped_with_cpds),len(listOfReactions)))
print("#######")
with open(output, 'w') as f:
for k,v in list(dict_sbml_id_mapped_id.items()):
f.write(k+"\t"+v+"\n")
[docs]
def get_from_mnx(mnx_dict, element_id, db_out):
"""
#TODO
"""
match_ids = None
for map_dict in list(mnx_dict.values()):
#print(map_dict)
all_element_id = []
[all_element_id.extend(i) for i in list(map_dict.values())]
if element_id in all_element_id:
match_ids = map_dict[db_out]
break
return match_ids
[docs]
def mnx_reader(input_file, db_out):
"""
#TODO
"""
with open(input_file, "r") as f:
dataInArray = [line.split("\t")[:2] for line in f.read().splitlines() if not line.startswith("#")]
#print list_data[:10]
mnx_dict = dict()
all_mnxs = set([k for v,k in dataInArray])
mnx_dict = dict([(k, dict()) for k in all_mnxs])
#print a[:10]
#print mnx_dict.items()[:10]
for v,k in dataInArray:
try:
db, _id = v.split(":")
except ValueError:
db = v.split(":")[0]
_id = ":".join(v.split(":")[1:])
db = db.upper()
try:
mnx_dict[k][db].append(_id)
except KeyError:
mnx_dict[k][db] = [_id]
#clean ids not in db_out
for k,v in list(mnx_dict.items()):
if db_out not in list(v.keys()) or len(list(v.keys())) == 1:
mnx_dict.pop(k)
"""
for v in mnx_dict.values():
try:
rxn_db_out = v[db_out]
all_mapp_rxn = []
[all_mapp_rxn.extend(i) for i in v.values()]
"""
return mnx_dict
[docs]
def intern_mapping(id_to_map, db_out, _type):
"""
#TODO
"""
intern_reac_dict = {
"UNIQ_ID_1":{"METACYC":["RXN-6382"],"KEGG":["R00904"],"UNKNOWN":["APor"]},
"UNIQ_ID_2":{"METACYC":["CARNOSINE-SYNTHASE-RXN"],"UNKNOWN":["HAL"]},
"UNIQ_ID_3":{"METACYC":["ALANINE-AMINOTRANSFERASE-RXN"],"KEGG":["R00258"]},
"UNIQ_ID_4":{"METACYC":["ASPAMINOTRANS-RXN"],"KEGG":["R00355"],"BIGG":["ASPATh"]},
"UNIQ_ID_5":{"METACYC":["PHEAMINOTRANS-RXN"],"KEGG":["R00694"],"BIGG":["POAT","POATh","POATm"]},
"UNIQ_ID_6":{"METACYC":["ORNCARBAMTRANSFER-RXN"],"KEGG":["R01398"],"BIGG":["OCT","OCTh","OCTm"]},
"UNIQ_ID_7":{"METACYC":["3PGAREARR-RXN"],"KEGG":["R01518"],"BIGG":["PGM","PGMf","PGMm"]},
"UNIQ_ID_8":{"METACYC":["6PGLUCONDEHYDROG-RXN"],"KEGG":["R01528"],"BIGG":["PGDHh"]},
"UNIQ_ID_9":{"METACYC":["4-HYDROXY-2-KETOPIMELATE-LYSIS-RXN"],"KEGG":["R01645"]},
"UNIQ_ID_10":{"METACYC":["AMINEPHEN-RXN"],"KEGG":["R02613"]},
"UNIQ_ID_11":{"METACYC":["SHIKIMATE-5-DEHYDROGENASE-RXN"],"KEGG":["R02413"]},
"UNIQ_ID_12":{"METACYC":["CARBODEHYDRAT-RXN"],"BIGG":["HCO3E","HCO3Em","HCO3Ehi"]}
}
intern_chem_dict = {
"UNIQ_ID_1":{"METACYC":["WATER"],"KEGG":["C00001"],"BIGG":["h2o"],"UNKNOWN":["H2O"]},
"UNIQ_ID_2":{"METACYC":["2-KETOGLUTARATE"],"BIGG":["akg"],"KEGG":["C00026"]},
"UNIQ_ID_3":{"METACYC":["Pi"],"BIGG":["pi"],"KEGG":["C00009"]},
"UNIQ_ID_4":{"METACYC":["OXALACETIC_ACID"],"BIGG":["oaa"],"KEGG":["C00036"]},
"UNIQ_ID_5":{"METACYC":["HS"],"BIGG":["h2s"]},
"UNIQ_ID_6":{"METACYC":["NITRATE"],"BIGG":["no3"]},
"UNIQ_ID_7":{"METACYC":["HEME_O"],"BIGG":["heme0"]},
"UNIQ_ID_8":{"METACYC":["FORMATE"],"BIGG":["for"]},
"UNIQ_ID_9":{"METACYC":["GLN-tRNAs"],"BIGG":["trnagln"]},
"UNIQ_ID_10":{"METACYC":["CO-A"],"BIGG":["coa"]},
"UNIQ_ID_11":{"METACYC":["ADENOSINE"],"BIGG":["adn"]},
"UNIQ_ID_12":{"METACYC":["ADENINE"],"BIGG":["ade"]},
"UNIQ_ID_13":{"METACYC":["CYTIDINE"],"BIGG":["cytd"]},
"UNIQ_ID_14":{"METACYC":["CYTIDINE"],"BIGG":["dtdp"]},
"UNIQ_ID_15":{"METACYC":["DIHYDRO-NEO-PTERIN"],"BIGG":["dhnpt"]},
"UNIQ_ID_16":{"METACYC":["SUCROSE-6P"],"BIGG":["suc6p"]},
"UNIQ_ID_17":{"METACYC":["ALKYL-SN-GLYCERO-PHOSPHOETHANOLAMINE"],"BIGG":["g3pe"]},
"UNIQ_ID_18":{"METACYC":["Retinols"],"BIGG":["retinol"]},
"UNIQ_ID_19":{"METACYC":["PHENYL-PYRUVATE"],"BIGG":["phpyr"]},
"UNIQ_ID_20":{"METACYC":["CPD-6082"],"BIGG":["bamppald"]},
"UNIQ_ID_21":{"METACYC":["S-3-HYDROXYBUTANOYL-COA"],"BIGG":["3hbcoa"]},
"UNIQ_ID_22":{"METACYC":["Thiopurines"],"BIGG":["6mpur"]},
"UNIQ_ID_23":{"METACYC":["N-ACETYL-D-GLUCOSAMINE-6-P"],"BIGG":["acgam6p"]},
"UNIQ_ID_24":{"METACYC":["CHLOROPHYLL-B"],"BIGG":["chlb"]},
"UNIQ_ID_25":{"METACYC":["FORMALDEHYDE"],"BIGG":["fald"]},
"UNIQ_ID_26":{"METACYC":["GLC-1-P"],"BIGG":["g1p"]},
"UNIQ_ID_27":{"METACYC":["NA+"],"BIGG":["na1"]},
"UNIQ_ID_28":{"METACYC":["ADP"],"KEGG":["C00008"]},
"UNIQ_ID_29":{"METACYC":["L-DELTA1-PYRROLINE_5-CARBOXYLATE"],"KEGG":["C03912"]},
"UNIQ_ID_30":{"METACYC":["AMMONIA"],"KEGG":["C00014"]},
"UNIQ_ID_31":{"METACYC":["PPI"],"KEGG":["C00013"]},
"UNIQ_ID_32":{"METACYC":["NADP"],"KEGG":["C00006"]},
"UNIQ_ID_33":{"METACYC":["NADPH"],"KEGG":["C00005"]},
"UNIQ_ID_34":{"METACYC":["GLY"],"UNKNOWN":["Glycine"]},
"UNIQ_ID_35":{"METACYC":["METOH"],"UNKNOWN":["Methanol"]},
"UNIQ_ID_36":{"METACYC":["PPI"],"UNKNOWN":["Pyrophosphate"]},
}
if _type == "reaction":
for mapp_dict in list(intern_reac_dict.values()):
all_reaction_id = []
[all_reaction_id.extend(i) for i in list(mapp_dict.values())]
if id_to_map in all_reaction_id:
return mapp_dict.get(db_out, [None])[0]
elif _type == "species":
for mapp_dict in list(intern_chem_dict.values()):
all_species_id = []
[all_species_id.extend(i) for i in list(mapp_dict.values())]
if id_to_map in all_species_id:
return mapp_dict.get(db_out, [None])[0]
return None