Source code for padmet.utils.exploration.visu_similarity_gsmn

# -*- coding: utf-8 -*-
"""
Description:
    Visualize similarity between metabolic networks using MDS.

::

    usage:
        padmet visu_similarity_gsmn --reaction=FILE --output=FILE [--group=FILE]

    options:
        -h --help     Show help.
        --reaction=FILE    pathname to the reaction file output of compare_padmet or compare_sbml.
        --output=FILE    pathname to the picture output file containing the MDS projection
        --group=FILE    pathname to the group file containing a column named "species" with the organism ID and a column "group" classifying species in group (you can also use a "color" column to associate group to specific color)
"""
import docopt
import numpy as np
import matplotlib.pyplot as plt
import pandas as pa

try:
    from sklearn.manifold import MDS
except ImportError:
    raise ImportError('Requires sklearn, try:\npip install scikit-learn')


[docs] def command_help(): """ Show help for analysis command. """ print(docopt.docopt(__doc__))
[docs] def visu_similarity_gsmn_cli(command_args): args = docopt.docopt(__doc__, argv=command_args) reaction_file = args["--reaction"] output_file = args["--output"] group_file = args["--group"] visu_similarity_gsmn(reaction_file, output_file, group_file)
[docs] def visu_similarity_gsmn(reaction_file, output_file, group_file=None): """ Create dendrogram, upset figure (if upset argument) and compare reactiosn in species. Parameters ---------- reaction_file: str path to reaction file from compare_padmet/compare_sbml. output_file: str path to picture ouput file. group_file: str path to group file containing group assignation for each metabolic network. """ # Convert the reaction table into a matrix. df = pa.read_csv(reaction_file, sep='\t') df.set_index('reaction', inplace=True) df = df[[column for column in df.columns if "(" not in column and "_formula" not in column]] df = df.transpose() if group_file: # Retrieve the group from the group file. df_family = pa.read_csv(group_file, sep='\t') df_family.set_index('species', inplace=True) df_join = df.join(df_family) groups = {index: group for index, group in enumerate(df_join['group'].unique().tolist())} # Select only species with a group. df_join = df_join[df_join['group'].isin([groups[group] for group in groups])] for group in groups: df_join = df_join.replace(groups[group], group) X = df_join[df.columns.tolist()] if "color" in df_family.columns: # Extract color from group file. color_map = dict(zip(df_family.group, df_family.color)) else: # Create color map for each group in the figure. color_map = plt.cm.get_cmap('hsv', len(groups)) else: X = df # Projection with MDS. embedding = MDS(n_components=2) X_transformed = embedding.fit_transform(X) X = X_transformed plt.rcParams['figure.figsize'] = [20, 20] plt.rc('font', size=14) # Plot each point in a matplotlib figure. if group_file: # Use group as provided by the user. for i in groups: if isinstance(color_map, dict): group_color = color_map[groups[i]] else: group_color = np.asarray(color_map(i)).reshape(1,-1) subset = X[df_join.group == i] labels = df_join[df_join.group == i].index x = [row[0] for row in subset] y = [row[1] for row in subset] plt.scatter(x, y, c=group_color, label=groups[i], s=15**2) for i in range(len(x)): plt.annotate(labels[i], (x[i],y[i])) plt.legend() else: for genome in df.index: subset = X[df.index == genome] x = [row[0] for row in subset] y = [row[1] for row in subset] plt.scatter(x, y, c="black", label=genome, s=15**2) for i in range(len(x)): plt.annotate(genome, (x[i],y[i])) plt.savefig(output_file)