widdowquinn / pyani

Compare 62949c4 ... +15 ... 1acbdca

Coverage Reach
scripts/subcommands/subcmd_anim.py scripts/subcommands/subcmd_fastani.py scripts/subcommands/subcmd_download.py scripts/subcommands/subcmd_anib.py scripts/subcommands/subcmd_plot.py scripts/subcommands/subcmd_report.py scripts/subcommands/subcmd_tree.py scripts/subcommands/subcmd_classify.py scripts/subcommands/subcmd_index.py scripts/subcommands/subcmd_listdeps.py scripts/subcommands/subcmd_createdb.py scripts/subcommands/__init__.py scripts/subcommands/subcmd_aniblastall.py scripts/genbank_get_genomes_by_taxon.py scripts/average_nucleotide_identity.py scripts/parsers/__init__.py scripts/parsers/tree_parser.py scripts/parsers/download_parser.py scripts/parsers/fastani_parser.py scripts/parsers/report_parser.py scripts/parsers/anim_parser.py scripts/parsers/classify_parser.py scripts/parsers/anib_parser.py scripts/parsers/plot_parser.py scripts/parsers/common_parser.py scripts/parsers/index_parser.py scripts/parsers/scheduling_parser.py scripts/parsers/run_common_parser.py scripts/parsers/createdb_parser.py scripts/parsers/aniblastall_parser.py scripts/parsers/listdeps_parser.py scripts/pyani_script.py scripts/__init__.py scripts/delta_filter_wrapper.py pyani_graphics/mpl/__init__.py pyani_graphics/sns/__init__.py pyani_graphics/tree/__init__.py pyani_graphics/__init__.py pyani_orm.py download.py anib.py nucmer.py run_sge.py anim.py fastani.py pyani_tools.py pyani_jobs.py pyani_files.py tetra.py pyani_report.py run_multiprocessing.py pyani_classify.py logger.py aniblastall.py pyani_config.py dependencies.py blast.py __init__.py

No flags found

Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.

e.g., #unittest #integration

#production #enterprise

#frontend #backend

Learn more about Codecov Flags here.


@@ -819,11 +819,11 @@
Loading
819 819
        )
820 820
        if args.gmethod == "mpl":
821 821
            pyani_graphics.mpl.heatmap(
822 -
                dfm, outfilename=outfilename, title=filestem, params=params
822 +
                dfm, outfilename=outfilename, title=filestem, params=params, args=args
823 823
            )
824 824
        elif args.gmethod == "seaborn":
825 825
            pyani_graphics.sns.heatmap(
826 -
                dfm, outfilename=outfilename, title=filestem, params=params
826 +
                dfm, outfilename=outfilename, title=filestem, params=params, args=args
827 827
            )
828 828
829 829

@@ -0,0 +1,159 @@
Loading
1 +
import logging
2 +
import os
3 +
import sys
4 +
import multiprocessing
5 +
6 +
from argparse import Namespace
7 +
from pathlib import Path
8 +
from typing import Dict, List
9 +
import pandas as pd
10 +
11 +
from pyani import pyani_config, pyani_orm, pyani_graphics
12 +
from pyani.pyani_tools import termcolor, MatrixData
13 +
14 +
# TREEMETHODS = {}
15 +
TREEMETHODS = {"ete3": pyani_graphics.tree.tree}
16 +
17 +
NEWICKS = {}
18 +
19 +
20 +
def subcmd_tree(args: Namespace) -> int:
21 +
    """Produce tree output for an analysis.
22 +
23 +
    :param args:  Namespace of command-line arguments
24 +
25 +
    This is graphical output for representing the ANI analysis results, and
26 +
    takes the form of a tree, or dendrogram.
27 +
    """
28 +
    logger = logging.getLogger(__name__)
29 +
30 +
    # Announce what's going on to the user
31 +
    logger.info(termcolor("Generating tree output for analyses", "red"))
32 +
    logger.info("Writing output to: %s", args.outdir)
33 +
    os.makedirs(args.outdir, exist_ok=True)
34 +
    logger.info("Rendering method: %s", args.method)
35 +
36 +
    # Connect to database session
37 +
    logger.debug("Activating session for database: %s", args.dbpath)
38 +
    session = pyani_orm.get_session(args.dbpath)
39 +
40 +
    # Parse output formats
41 +
    outfmts = args.formats
42 +
    logger.debug("Requested output formats: %s", outfmts)
43 +
    logger.debug("Type of formats variable: %s", type(outfmts))
44 +
45 +
    # Work on each run:
46 +
    run_ids = args.run_ids
47 +
    logger.debug("Generating trees for runs: %s", run_ids)
48 +
    for run_id in run_ids:
49 +
        write_run_trees(run_id, session, outfmts, args)
50 +
51 +
        if NEWICKS:
52 +
            write_newicks(args, run_id)
53 +
        NEWICKS.clear()
54 +
55 +
    return 0
56 +
57 +
58 +
def write_run_trees(
59 +
    run_id: int,
60 +
    session,
61 +
    outfmts: List[str],
62 +
    args: Namespace,
63 +
) -> None:
64 +
    """Write tree plots for each matrix type.
65 +
66 +
    :param run_id:  int, run_id for this run
67 +
    :param matdata:  MatrixData object for this distribution plot
68 +
    :param args:  Namespace for command-line arguments
69 +
    :param outfmts:  list of output formats for files
70 +
    """
71 +
    logger = logging.getLogger(__name__)
72 +
    logger.debug("Retrieving results matrices for run %s", run_id)
73 +
74 +
    results = (
75 +
        session.query(pyani_orm.Run).filter(pyani_orm.Run.run_id == run_id).first()
76 +
    )
77 +
    result_label_dict = pyani_orm.get_matrix_labels_for_run(session, run_id)
78 +
    result_class_dict = pyani_orm.get_matrix_classes_for_run(session, run_id)
79 +
    logger.debug(
80 +
        f"Have {len(result_label_dict)} labels and {len(result_class_dict)} classes"
81 +
    )
82 +
83 +
    # Create worker pool and empty command list
84 +
    pool = multiprocessing.Pool(processes=args.workers)
85 +
    plotting_commands = []
86 +
87 +
    # Build and collect the plotting commands
88 +
    for matdata in [
89 +
        MatrixData(*_)
90 +
        for _ in [
91 +
            ("identity", pd.read_json(results.df_identity), {}),
92 +
            ("coverage", pd.read_json(results.df_coverage), {}),
93 +
            ("aln_lengths", pd.read_json(results.df_alnlength), {}),
94 +
            ("sim_errors", pd.read_json(results.df_simerrors), {}),
95 +
            ("hadamard", pd.read_json(results.df_hadamard), {}),
96 +
        ]
97 +
        if _[0] in args.trees
98 +
    ]:
99 +
        logger.info("Writing tree plot for %s matrix", matdata.name)
100 +
        plotting_commands.append(
101 +
            (
102 +
                write_tree,
103 +
                [run_id, matdata, result_label_dict, result_class_dict, outfmts, args],
104 +
            )
105 +
        )
106 +
107 +
    sys.stdout.write(str(plotting_commands))
108 +
109 +
    # Run the plotting commands
110 +
    for func, options in plotting_commands:
111 +
        result = pool.apply_async(func, options, {}, callback=logger.debug)
112 +
        result.get()
113 +
114 +
    # Close worker pool
115 +
    pool.close()
116 +
    pool.join()
117 +
118 +
119 +
def write_tree(
120 +
    run_id: int,
121 +
    matdata: MatrixData,
122 +
    result_labels: Dict,
123 +
    result_classes: Dict,
124 +
    outfmts: List[str],
125 +
    args: Namespace,
126 +
) -> None:
127 +
    """Write a single tree for a pyani run.
128 +
129 +
    :param run_id:  int, run_id for this run
130 +
    :param matdata:  MatrixData object for this heatmap
131 +
    :param result_labels:  dict of result labels
132 +
    :param result_classes: dict of result classes
133 +
    :param args:  Namespace for command-line arguments
134 +
    :param outfmts:  list of output formats for files
135 +
    """
136 +
    # logger = logging.getLogger(__name__)
137 +
    cmap = pyani_config.get_colormap(matdata.data, matdata.name)
138 +
139 +
    for fmt in outfmts:
140 +
        outfname = Path(args.outdir) / f"distribution_{matdata.name}_run{run_id}.{fmt}"
141 +
142 +
        params = pyani_graphics.Params(cmap, result_labels, result_classes)
143 +
144 +
        TREEMETHODS[args.method](
145 +
            matdata.data,
146 +
            outfname,
147 +
            title=f"matrix_{matdata.name}_run{run_id}",
148 +
            params=params,
149 +
            format=fmt,
150 +
            args=args,
151 +
        )
152 +
153 +
154 +
def write_newicks(args: Namespace, run_id):
155 +
    # If Newick strings were generated, write them out.
156 +
    newick_file = Path(args.outdir) / f"newicks_run{run_id}.nw"
157 +
    with open(newick_file, "w") as nfh:
158 +
        for name, nw in NEWICKS.items():
159 +
            nfh.write(f"{name}\t{nw}\n")

@@ -48,4 +48,5 @@
Loading
48 48
from .subcmd_listdeps import subcmd_listdeps
49 49
from .subcmd_plot import subcmd_plot
50 50
from .subcmd_report import subcmd_report
51 +
from .subcmd_tree import subcmd_tree
51 52
from .subcmd_fastani import subcmd_fastani

@@ -0,0 +1,166 @@
Loading
1 +
import logging
2 +
from pyani import pyani_graphics
3 +
from scipy.cluster import hierarchy
4 +
from ete3 import ClusterTree, Tree, TreeStyle, faces, AttrFace, PhyloTree
5 +
from pathlib import Path
6 +
import sys
7 +
import seaborn as sns
8 +
9 +
LABEL_DICT = {}
10 +
11 +
12 +
def build_label_dict(fig, axis, params):
13 +
    """Label info for tree plots.
14 +
15 +
    :param fig:  a Seaborn clustermap instance
16 +
    :param axis:  one of {'row', 'col'}
17 +
    :param params:  plot parameters; this is where the labels come from
18 +
19 +
    """
20 +
    logger = logging.getLogger(__name__)
21 +
    if axis == "col":
22 +
        for idx, _ in zip(
23 +
            fig.dendrogram_col.reordered_ind, fig.ax_heatmap.get_yticklabels()
24 +
        ):
25 +
            LABEL_DICT[str(idx + 1)] = params.labels.get(_, _.get_text())
26 +
    elif axis == "row":
27 +
        for idx, _ in zip(
28 +
            fig.dendrogram_row.reordered_ind, fig.ax_heatmap.get_xticklabels()
29 +
        ):
30 +
            LABEL_DICT[str(idx + 1)] = params.labels.get(_, _.get_text())
31 +
    logger.debug(f"Label dict: {LABEL_DICT}")
32 +
    return LABEL_DICT
33 +
34 +
35 +
def get_newick(node, parentdist, leaf_names, newick=""):
36 +
    """Generates a newick formatted file from a tree,
37 +
    using recursion to traverse it.
38 +
39 +
    :param node:  a (portion of a) tree to be traversed
40 +
    :param parentdist:  distance from the parent node
41 +
    :param leaf_names:  lables that will be attached to the terminal nodes
42 +
    :param newick:  the current newick-formatted tree structure
43 +
44 +
    """
45 +
    # logger = logging.getLogger(__name__)
46 +
    # logger.debug(f"{type(parentdist)}, {parentdist}")
47 +
    # logger.debug(f"{type(node.dist)}, {node.dist}")
48 +
    diff = parentdist - node.dist
49 +
    if node.is_leaf():
50 +
        return f"{leaf_names[node.id]}:{diff:.2f}{newick}"
51 +
    else:
52 +
        if len(newick) > 0:
53 +
            newick = f"):{diff:.2f}{newick}"
54 +
        else:
55 +
            newick = ");"
56 +
        newick = get_newick(node.get_left(), node.dist, leaf_names, newick)
57 +
        newick = get_newick(node.get_right(), node.dist, leaf_names, f",{newick}")
58 +
        newick = f"({newick}"
59 +
        return newick
60 +
61 +
62 +
def tree(dfr, outfname, title, params, format, args):
63 +
    """Generate a newick file and dendrogram plot for the given dataframe.
64 +
65 +
    :param dfr:  a dataframe
66 +
    # :param fig:  a figure produced by sns.clustermap
67 +
    :param title:  name of the matrix plot
68 +
    :param format:  image file format being used
69 +
    :param params:  matrix plot parameters; including labels
70 +
    :param args:  Namespace
71 +
72 +
    """
73 +
    logger = logging.getLogger(__name__)
74 +
75 +
    # Get matrix name and run_id from the plot title
76 +
    matname, run_id = title.split("_", 1)[-1].rsplit("_", 1)
77 +
78 +
    maxfigsize = 120
79 +
    calcfigsize = dfr.shape[0] * 1.1
80 +
    figsize = min(max(8, calcfigsize), maxfigsize)
81 +
    if figsize == maxfigsize:
82 +
        scale = maxfigsize / calcfigsize
83 +
        sns.set_context("notebook", font_scale=scale)
84 +
85 +
    # Add a colorbar?
86 +
    if params.classes is None:
87 +
        col_cb = None
88 +
    else:
89 +
        col_cb = pyani_graphics.sns.get_colorbar(dfr, params.classes)
90 +
91 +
    params.colorbar = col_cb
92 +
    params.figsize = figsize
93 +
    params.linewidths = 0.25
94 +
95 +
    fig = pyani_graphics.sns.get_clustermap(dfr, params)
96 +
97 +
    # Dictionary to allow abstraction over axes
98 +
    sides = {
99 +
        "columns": {
100 +
            "axis": fig.dendrogram_col,
101 +
            "names": dfr.columns,  # fig.dendrogram_col.reordered_ind,
102 +
        },
103 +
        "rows": {
104 +
            "axis": fig.dendrogram_row,
105 +
            "names": dfr.index,  # fig.dendrogram_row.reordered_ind,
106 +
        },
107 +
    }
108 +
109 +
    # Create a linkage dendrogram and newick string for both rows and columns
110 +
    newicks = {}
111 +
112 +
    for axis in args.axes:
113 +
        # Generate newick format
114 +
        tree = hierarchy.to_tree(sides[axis]["axis"].linkage, False)
115 +
        logger.debug(f"Names: {sides[axis]['names']}")
116 +
117 +
        newick = get_newick(tree, tree.dist, sides[axis]["names"], "")
118 +
        newicks.update({f"[{axis}_newick_{matname}_{run_id}]": newick})
119 +
120 +
        # Generate dendrogram
121 +
        # if 'dendrogram' in args.tree:
122 +
        # if args.tree:
123 +
        build_label_dict(fig, axis, params)
124 +
        sys.stderr.write(f"Label dict: {LABEL_DICT}\n")
125 +
        # figtree = ClusterTree(newick, text_array=matrix)
126 +
        figtree = PhyloTree(newick)
127 +
        figtree.set_species_naming_function(get_species_name)
128 +
        figtree_file = Path(args.outdir) / f"{axis}_tree_{matname}_{run_id}.{format}"
129 +
        logger.debug(f"{figtree}")
130 +
131 +
        # Write the tree to file
132 +
        figtree.render(str(figtree_file), layout=tree_layout)
133 +
134 +
    # Return the newick strings so we can save them in the database (eventually)
135 +
    return newicks
136 +
137 +
138 +
def tree_layout(node):
139 +
140 +
    # Add taxonomy to nodes, and align to right
141 +
    if node.is_leaf():
142 +
        # if node.name == "F962_00589":
143 +
        #     faces.add_face_to_node(
144 +
        #         AttrFace("name", fgcolor="white"),
145 +
        #         node,
146 +
        #         column=0,
147 +
        #         position="branch-right",
148 +
        #     )
149 +
        #     faces.add_face_to_node(
150 +
        #         AttrFace("species", fgcolor="white"), node, column=0, position="aligned"
151 +
        #     )
152 +
        #     node.img_style["bgcolor"] == "darkred"
153 +
        # else:
154 +
155 +
        faces.add_face_to_node(
156 +
            AttrFace("name", fgcolor="black"),
157 +
            node,
158 +
            column=0,
159 +
            position="branch-right",
160 +
        )
161 +
        faces.add_face_to_node(AttrFace("species"), node, column=0, position="aligned")
162 +
163 +
164 +
def get_species_name(node_name_string):
165 +
    """Return `Genus species` (where known) for a node."""
166 +
    return LABEL_DICT[node_name_string]

@@ -56,12 +56,15 @@
Loading
56 56
# Distribution dictionary of matrix graphics methods
57 57
GMETHODS = {"mpl": pyani_graphics.mpl.heatmap, "seaborn": pyani_graphics.sns.heatmap}
58 58
SMETHODS = {"mpl": pyani_graphics.mpl.scatter, "seaborn": pyani_graphics.sns.scatter}
59 +
# TMETHODS = {"seaborn": pyani_graphics.seaborn.}
59 60
# Distribution dictionary of distribution graphics methods
60 61
DISTMETHODS = {
61 62
    "mpl": pyani_graphics.mpl.distribution,
62 63
    "seaborn": pyani_graphics.sns.distribution,
63 64
}
64 65
66 +
NEWICKS = {}
67 +
65 68
66 69
def subcmd_plot(args: Namespace) -> int:
67 70
    """Produce graphical output for an analysis.
@@ -94,6 +97,10 @@
Loading
94 97
    for run_id in run_ids:
95 98
        write_run_plots(run_id, session, outfmts, args)
96 99
100 +
        if NEWICKS:
101 +
            write_newicks(args, run_id)
102 +
        NEWICKS.clear()
103 +
97 104
    return 0
98 105
99 106
@@ -163,8 +170,8 @@
Loading
163 170
    # Run the plotting commands
164 171
    logger.debug("Running plotting commands")
165 172
    for func, options in plotting_commands:
166 -
        logger.debug("Running %s with options %s", func, options)
167 -
        pool.apply_async(func, args=options)
173 +
        result = pool.apply_async(func, options, {}, callback=logger.debug)
174 +
        result.get()
168 175
169 176
    # Close worker pool
170 177
    pool.close()
@@ -187,7 +194,7 @@
Loading
187 194
    for fmt in outfmts:
188 195
        outfname = Path(args.outdir) / f"distribution_{matdata.name}_run{run_id}.{fmt}"
189 196
        logger.debug("\tWriting graphics to %s", outfname)
190 -
        DISTMETHODS[args.method[0]](
197 +
        DISTMETHODS[args.method](
191 198
            matdata.data,
192 199
            outfname,
193 200
            matdata.name,
@@ -220,19 +227,28 @@
Loading
220 227
    logger.info("Writing %s matrix heatmaps", matdata.name)
221 228
    cmap = pyani_config.get_colormap(matdata.data, matdata.name)
222 229
    for fmt in outfmts:
223 -
        outfname = Path(args.outdir) / f"matrix_{matdata.name}_run{run_id}.{fmt}"
230 +
        outfname = (
231 +
            Path(args.outdir) / f"matrix_{matdata.name}_run{run_id}_{args.method}.{fmt}"
232 +
        )
224 233
        logger.debug("\tWriting graphics to %s", outfname)
225 234
        params = pyani_graphics.Params(cmap, result_labels, result_classes)
226 235
        # Draw heatmap
227 -
        GMETHODS[args.method[0]](
236 +
        _, newicks = GMETHODS[args.method](
228 237
            matdata.data,
229 238
            outfname,
230 239
            title=f"matrix_{matdata.name}_run{run_id}",
231 240
            params=params,
241 +
            format=fmt,
242 +
            args=args,
232 243
        )
233 244
245 +
    # If Newick strings were generated, add them to NEWICKS.
246 +
    if newicks:
247 +
        NEWICKS.update(newicks)
248 +
234 249
    # Be tidy with matplotlib caches
235 250
    plt.close("all")
251 +
    return
236 252
237 253
238 254
def write_scatter(
@@ -266,7 +282,7 @@
Loading
266 282
        logger.debug("\tWriting graphics to %s", outfname)
267 283
        params = pyani_graphics.Params(cmap, result_labels, result_classes)
268 284
        # Draw scatterplot
269 -
        SMETHODS[args.method[0]](
285 +
        SMETHODS[args.method](
270 286
            matdata1.data,
271 287
            matdata2.data,
272 288
            outfname,
@@ -278,3 +294,11 @@
Loading
278 294
279 295
        # Be tidy with matplotlib caches
280 296
        plt.close("all")
297 +
298 +
299 +
def write_newicks(args: Namespace, run_id):
300 +
    # If Newick strings were generated, write them out.
301 +
    newick_file = Path(args.outdir) / f"newicks_run{run_id}.nw"
302 +
    with open(newick_file, "w") as nfh:
303 +
        for name, nw in NEWICKS.items():
304 +
            nfh.write(f"{name}\t{nw}\n")

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Learn more Showing 8 files with coverage changes found.

Changes in pyani/scripts/subcommands/subcmd_download.py
-4
+4
Loading file...
Changes in pyani/download.py
-2
+2
Loading file...
New file pyani/scripts/parsers/tree_parser.py
New
Loading file...
New file pyani/pyani_graphics/tree/__init__.py
New
Loading file...
New file pyani/scripts/subcommands/subcmd_tree.py
New
Loading file...
Changes in pyani/scripts/subcommands/subcmd_plot.py
-1
+1
Loading file...
Changes in pyani/pyani_graphics/mpl/__init__.py
-19
+19
Loading file...
Changes in pyani/pyani_graphics/sns/__init__.py
-19
+19
Loading file...

17 Commits

Hiding 3 contexual commits
+4 Files
+422
+285
+137
Hiding 11 contexual commits
-1 Files
-202
-177
-25
Files Coverage
pyani -1.50% 74.48%
Project Totals (58 files) 74.48%
Loading