widdowquinn / pyani

@@ -41,10 +41,19 @@
Loading
41 41
import matplotlib  # pylint: disable=C0411
42 42
import pandas as pd
43 43
import seaborn as sns
44 +
import logging
45 +
from pathlib import Path
46 +
from scipy.cluster import hierarchy
47 +
from ete3 import ClusterTree
48 +
from ete3 import Tree, TreeStyle, faces, AttrFace, PhyloTree
44 49
45 50
matplotlib.use("Agg")
46 51
import matplotlib.pyplot as plt  # noqa: E402,E501 # pylint: disable=wrong-import-position,wrong-import-order,ungrouped-imports
47 52
53 +
LABEL_DICT = {}
54 +
55 +
logger = logging.getLogger(__name__)
56 +
48 57
49 58
# Add classes colorbar to Seaborn plot
50 59
def get_colorbar(dfr, classes):
@@ -97,6 +106,28 @@
Loading
97 106
    return fig
98 107
99 108
109 +
def build_label_dict(fig, axis, params):
110 +
    """Label info for tree plots.
111 +
112 +
    :param fig:  a Seaborn clustermap instance
113 +
    :param axis:  one of {'row', 'col'}
114 +
    :param params:  plot parameters; this is where the labels come from
115 +
116 +
    """
117 +
    if axis == "col":
118 +
        for idx, _ in zip(
119 +
            fig.dendrogram_col.reordered_ind, fig.ax_heatmap.get_yticklabels()
120 +
        ):
121 +
            LABEL_DICT[str(idx + 1)] = params.labels.get(_, _.get_text())
122 +
    elif axis == "row":
123 +
        for idx, _ in zip(
124 +
            fig.dendrogram_row.reordered_ind, fig.ax_heatmap.get_xticklabels()
125 +
        ):
126 +
            LABEL_DICT[str(idx + 1)] = params.labels.get(_, _.get_text())
127 +
    logger.debug(f"{LABEL_DICT}")
128 +
    return LABEL_DICT
129 +
130 +
100 131
# Return a clustermap
101 132
def get_clustermap(dfr, params, title=None, annot=True):
102 133
    """Return a Seaborn clustermap for the passed dataframe.
@@ -130,7 +161,7 @@
Loading
130 161
131 162
132 163
# Generate Seaborn heatmap output
133 -
def heatmap(dfr, outfilename=None, title=None, params=None):
164 +
def heatmap(dfr, outfilename=None, title=None, params=None, args=None):
134 165
    """Return seaborn heatmap with cluster dendrograms.
135 166
136 167
    :param dfr:  pandas DataFrame with relevant data
@@ -164,8 +195,13 @@
Loading
164 195
    if outfilename:
165 196
        fig.savefig(outfilename)
166 197
198 +
    # Tree
199 +
    newicks = None
200 +
    if args.tree:
201 +
        newicks = tree(dfr, fig, outfilename, title, params, args)
202 +
167 203
    # Return clustermap
168 -
    return fig
204 +
    return fig, newicks
169 205
170 206
171 207
def distribution(dfr, outfilename, matname, title=None):
@@ -263,3 +299,119 @@
Loading
263 299
264 300
    # Return clustermap
265 301
    return fig
302 +
303 +
304 +
def get_newick(node, parentdist, leaf_names, newick=""):
305 +
    """Generates a newick formatted file from a tree,
306 +
    using recursion to traverse it.
307 +
308 +
    :param node:  a (portion of a) tree to be traversed
309 +
    :param parentdist:  distance from the parent node
310 +
    :param leaf_names:  lables that will be attached to the terminal nodes
311 +
    :param newick:  the current newick-formatted tree structure
312 +
313 +
    """
314 +
    # logger = logging.getLogger(__name__)
315 +
    # logger.debug(f"{type(parentdist)}, {parentdist}")
316 +
    # logger.debug(f"{type(node.dist)}, {node.dist}")
317 +
    diff = parentdist - node.dist
318 +
    if node.is_leaf():
319 +
        return f"{leaf_names[node.id]}:{diff:.2f}{newick}"
320 +
    else:
321 +
        if len(newick) > 0:
322 +
            newick = f"):{diff:.2f}{newick}"
323 +
        else:
324 +
            newick = ");"
325 +
        newick = get_newick(node.get_left(), node.dist, leaf_names, newick)
326 +
        newick = get_newick(node.get_right(), node.dist, leaf_names, f",{newick}")
327 +
        newick = f"({newick}"
328 +
        return newick
329 +
330 +
331 +
def tree(dfr, fig, matfname, mat_title, params, args):
332 +
    """Generate a newick file and dendrogram plot for the given dataframe.
333 +
334 +
    :param dfr:  a dataframe
335 +
    :param fig:  a figure produced by sns.clustermap
336 +
    :param matfname:  name of the matrix plot file
337 +
    :param params:  matrix plot parameters; including labels
338 +
    :param args:  Namespace
339 +
340 +
    """
341 +
    logger = logging.getLogger(__name__)
342 +
    # The header row must start with '#Names' for ClusterTree()'s text_array parameter to be satisfied
343 +
    # Create a text stream with the dataframe contents in the necessary format for ClusterTree
344 +
    # dfr.index.name = "#Names"
345 +
    # matrix = dfr.to_csv(None, sep="\t", header=True)
346 +
347 +
    sides = {
348 +
        "col": {
349 +
            "axis": fig.dendrogram_col,
350 +
            "names": dfr.columns,  # fig.dendrogram_col.reordered_ind,
351 +
        },
352 +
        "row": {
353 +
            "axis": fig.dendrogram_row,
354 +
            "names": dfr.index,  # fig.dendrogram_row.reordered_ind,
355 +
        },
356 +
    }
357 +
358 +
    # Create a linkage dendrogram and newick string for both rows and columns
359 +
    # newicks = {}
360 +
361 +
    for axis in sides.keys():
362 +
        # Generate newick format
363 +
        tree = hierarchy.to_tree(sides[axis]["axis"].linkage, False)
364 +
        logger.debug(f"Names: {sides[axis]['names']}")
365 +
        newick = get_newick(tree, tree.dist, sides[axis]["names"], "")
366 +
        newick_file = Path(args.outdir) / str(
367 +
            mat_title.replace("matrix", f"{axis}_newick") + ".nw"
368 +
        )
369 +
370 +
        # newicks.update({axis: newick})
371 +
372 +
        # Generate dendrogram
373 +
        # if 'dendrogram' in args.tree:
374 +
        # if args.tree:
375 +
        build_label_dict(fig, axis, params)
376 +
        # figtree = ClusterTree(newick, text_array=matrix)
377 +
        figtree = PhyloTree(newick)
378 +
        figtree.set_species_naming_function(get_species_name)
379 +
        figtree_file = str(matfname).replace("matrix", f"{axis}_tree")
380 +
        logger.debug(f"{figtree}")
381 +
        figtree.render(figtree_file, layout=tree_layout)
382 +
        # with open(newick_file, 'w') as ofh:
383 +
        figtree.write(outfile=newick_file)
384 +
385 +
    # Return the newick strings so we can save them in the database (eventually)
386 +
    # return newicks
387 +
388 +
389 +
def tree_layout(node):
390 +
391 +
    # Add taxonomy to nodes, and align to right
392 +
    if node.is_leaf():
393 +
        # if node.name == "F962_00589":
394 +
        #     faces.add_face_to_node(
395 +
        #         AttrFace("name", fgcolor="white"),
396 +
        #         node,
397 +
        #         column=0,
398 +
        #         position="branch-right",
399 +
        #     )
400 +
        #     faces.add_face_to_node(
401 +
        #         AttrFace("species", fgcolor="white"), node, column=0, position="aligned"
402 +
        #     )
403 +
        #     node.img_style["bgcolor"] == "darkred"
404 +
        # else:
405 +
406 +
        faces.add_face_to_node(
407 +
            AttrFace("name", fgcolor="black"),
408 +
            node,
409 +
            column=0,
410 +
            position="branch-right",
411 +
        )
412 +
        faces.add_face_to_node(AttrFace("species"), node, column=0, position="aligned")
413 +
414 +
415 +
def get_species_name(node_name_string):
416 +
    """Return `Genus species` (where known) for a node."""
417 +
    return LABEL_DICT[node_name_string]

@@ -104,4 +104,12 @@
Loading
104 104
        help="graphics method to use for plotting",
105 105
        choices=["seaborn", "mpl", "plotly"],
106 106
    )
107 +
    parser.add_argument(
108 +
        "--tree",
109 +
        dest="tree",
110 +
        action="store_true",
111 +
        default=False,
112 +
        help="tree formats to generate",
113 +
        # choices=["newick", "dendrogram"]
114 +
    )
107 115
    parser.set_defaults(func=subcommands.subcmd_plot)

@@ -202,6 +202,7 @@
Loading
202 202
            outfname,
203 203
            title=f"matrix_{matdata.name}_run{run_id}",
204 204
            params=params,
205 +
            args=args,
205 206
        )
206 207
207 208
    # Be tidy with matplotlib caches
Files Coverage
pyani 20.17%
Project Totals (52 files) 20.17%
2084
Build #2084 -
2087
Build #2087 -
2085
Build #2085 -
2086
Build #2086 -
1
#
2
# This codecov.yml is the default configuration for
3
# all repositories on Codecov. You may adjust the settings
4
# below in your own codecov.yml in your repository.
5
#
6
coverage:
7
  precision: 2
8
  round: down
9
  range: 70...100
10

11
  status:
12
    # Learn more at https://docs.codecov.io/docs/commit-status
13
    project: true
14
    patch: true
15
    changes: false
16

17
comment:
18
  layout: "header, diff"
19
  behavior: default  # update if exists else create new
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading