No flags found
Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.
e.g., #unittest #integration
#production #enterprise
#frontend #backend
1bed428
... +15 ...
1acbdca
Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.
e.g., #unittest #integration
#production #enterprise
#frontend #backend
819 | 819 | ) |
|
820 | 820 | if args.gmethod == "mpl": |
|
821 | 821 | pyani_graphics.mpl.heatmap( |
|
822 | - | dfm, outfilename=outfilename, title=filestem, params=params |
|
822 | + | dfm, outfilename=outfilename, title=filestem, params=params, args=args |
|
823 | 823 | ) |
|
824 | 824 | elif args.gmethod == "seaborn": |
|
825 | 825 | pyani_graphics.sns.heatmap( |
|
826 | - | dfm, outfilename=outfilename, title=filestem, params=params |
|
826 | + | dfm, outfilename=outfilename, title=filestem, params=params, args=args |
|
827 | 827 | ) |
|
828 | 828 | ||
829 | 829 |
1 | + | import logging |
|
2 | + | import os |
|
3 | + | import sys |
|
4 | + | import multiprocessing |
|
5 | + | ||
6 | + | from argparse import Namespace |
|
7 | + | from pathlib import Path |
|
8 | + | from typing import Dict, List |
|
9 | + | import pandas as pd |
|
10 | + | ||
11 | + | from pyani import pyani_config, pyani_orm, pyani_graphics |
|
12 | + | from pyani.pyani_tools import termcolor, MatrixData |
|
13 | + | ||
14 | + | # TREEMETHODS = {} |
|
15 | + | TREEMETHODS = {"ete3": pyani_graphics.tree.tree} |
|
16 | + | ||
17 | + | NEWICKS = {} |
|
18 | + | ||
19 | + | ||
20 | + | def subcmd_tree(args: Namespace) -> int: |
|
21 | + | """Produce tree output for an analysis. |
|
22 | + | ||
23 | + | :param args: Namespace of command-line arguments |
|
24 | + | ||
25 | + | This is graphical output for representing the ANI analysis results, and |
|
26 | + | takes the form of a tree, or dendrogram. |
|
27 | + | """ |
|
28 | + | logger = logging.getLogger(__name__) |
|
29 | + | ||
30 | + | # Announce what's going on to the user |
|
31 | + | logger.info(termcolor("Generating tree output for analyses", "red")) |
|
32 | + | logger.info("Writing output to: %s", args.outdir) |
|
33 | + | os.makedirs(args.outdir, exist_ok=True) |
|
34 | + | logger.info("Rendering method: %s", args.method) |
|
35 | + | ||
36 | + | # Connect to database session |
|
37 | + | logger.debug("Activating session for database: %s", args.dbpath) |
|
38 | + | session = pyani_orm.get_session(args.dbpath) |
|
39 | + | ||
40 | + | # Parse output formats |
|
41 | + | outfmts = args.formats |
|
42 | + | logger.debug("Requested output formats: %s", outfmts) |
|
43 | + | logger.debug("Type of formats variable: %s", type(outfmts)) |
|
44 | + | ||
45 | + | # Work on each run: |
|
46 | + | run_ids = args.run_ids |
|
47 | + | logger.debug("Generating trees for runs: %s", run_ids) |
|
48 | + | for run_id in run_ids: |
|
49 | + | write_run_trees(run_id, session, outfmts, args) |
|
50 | + | ||
51 | + | if NEWICKS: |
|
52 | + | write_newicks(args, run_id) |
|
53 | + | NEWICKS.clear() |
|
54 | + | ||
55 | + | return 0 |
|
56 | + | ||
57 | + | ||
58 | + | def write_run_trees( |
|
59 | + | run_id: int, |
|
60 | + | session, |
|
61 | + | outfmts: List[str], |
|
62 | + | args: Namespace, |
|
63 | + | ) -> None: |
|
64 | + | """Write tree plots for each matrix type. |
|
65 | + | ||
66 | + | :param run_id: int, run_id for this run |
|
67 | + | :param matdata: MatrixData object for this distribution plot |
|
68 | + | :param args: Namespace for command-line arguments |
|
69 | + | :param outfmts: list of output formats for files |
|
70 | + | """ |
|
71 | + | logger = logging.getLogger(__name__) |
|
72 | + | logger.debug("Retrieving results matrices for run %s", run_id) |
|
73 | + | ||
74 | + | results = ( |
|
75 | + | session.query(pyani_orm.Run).filter(pyani_orm.Run.run_id == run_id).first() |
|
76 | + | ) |
|
77 | + | result_label_dict = pyani_orm.get_matrix_labels_for_run(session, run_id) |
|
78 | + | result_class_dict = pyani_orm.get_matrix_classes_for_run(session, run_id) |
|
79 | + | logger.debug( |
|
80 | + | f"Have {len(result_label_dict)} labels and {len(result_class_dict)} classes" |
|
81 | + | ) |
|
82 | + | ||
83 | + | # Create worker pool and empty command list |
|
84 | + | pool = multiprocessing.Pool(processes=args.workers) |
|
85 | + | plotting_commands = [] |
|
86 | + | ||
87 | + | # Build and collect the plotting commands |
|
88 | + | for matdata in [ |
|
89 | + | MatrixData(*_) |
|
90 | + | for _ in [ |
|
91 | + | ("identity", pd.read_json(results.df_identity), {}), |
|
92 | + | ("coverage", pd.read_json(results.df_coverage), {}), |
|
93 | + | ("aln_lengths", pd.read_json(results.df_alnlength), {}), |
|
94 | + | ("sim_errors", pd.read_json(results.df_simerrors), {}), |
|
95 | + | ("hadamard", pd.read_json(results.df_hadamard), {}), |
|
96 | + | ] |
|
97 | + | if _[0] in args.trees |
|
98 | + | ]: |
|
99 | + | logger.info("Writing tree plot for %s matrix", matdata.name) |
|
100 | + | plotting_commands.append( |
|
101 | + | ( |
|
102 | + | write_tree, |
|
103 | + | [run_id, matdata, result_label_dict, result_class_dict, outfmts, args], |
|
104 | + | ) |
|
105 | + | ) |
|
106 | + | ||
107 | + | sys.stdout.write(str(plotting_commands)) |
|
108 | + | ||
109 | + | # Run the plotting commands |
|
110 | + | for func, options in plotting_commands: |
|
111 | + | result = pool.apply_async(func, options, {}, callback=logger.debug) |
|
112 | + | result.get() |
|
113 | + | ||
114 | + | # Close worker pool |
|
115 | + | pool.close() |
|
116 | + | pool.join() |
|
117 | + | ||
118 | + | ||
119 | + | def write_tree( |
|
120 | + | run_id: int, |
|
121 | + | matdata: MatrixData, |
|
122 | + | result_labels: Dict, |
|
123 | + | result_classes: Dict, |
|
124 | + | outfmts: List[str], |
|
125 | + | args: Namespace, |
|
126 | + | ) -> None: |
|
127 | + | """Write a single tree for a pyani run. |
|
128 | + | ||
129 | + | :param run_id: int, run_id for this run |
|
130 | + | :param matdata: MatrixData object for this heatmap |
|
131 | + | :param result_labels: dict of result labels |
|
132 | + | :param result_classes: dict of result classes |
|
133 | + | :param args: Namespace for command-line arguments |
|
134 | + | :param outfmts: list of output formats for files |
|
135 | + | """ |
|
136 | + | # logger = logging.getLogger(__name__) |
|
137 | + | cmap = pyani_config.get_colormap(matdata.data, matdata.name) |
|
138 | + | ||
139 | + | for fmt in outfmts: |
|
140 | + | outfname = Path(args.outdir) / f"distribution_{matdata.name}_run{run_id}.{fmt}" |
|
141 | + | ||
142 | + | params = pyani_graphics.Params(cmap, result_labels, result_classes) |
|
143 | + | ||
144 | + | TREEMETHODS[args.method]( |
|
145 | + | matdata.data, |
|
146 | + | outfname, |
|
147 | + | title=f"matrix_{matdata.name}_run{run_id}", |
|
148 | + | params=params, |
|
149 | + | format=fmt, |
|
150 | + | args=args, |
|
151 | + | ) |
|
152 | + | ||
153 | + | ||
154 | + | def write_newicks(args: Namespace, run_id): |
|
155 | + | # If Newick strings were generated, write them out. |
|
156 | + | newick_file = Path(args.outdir) / f"newicks_run{run_id}.nw" |
|
157 | + | with open(newick_file, "w") as nfh: |
|
158 | + | for name, nw in NEWICKS.items(): |
|
159 | + | nfh.write(f"{name}\t{nw}\n") |
1 | + | import logging |
|
2 | + | from pyani import pyani_graphics |
|
3 | + | from scipy.cluster import hierarchy |
|
4 | + | from ete3 import ClusterTree, Tree, TreeStyle, faces, AttrFace, PhyloTree |
|
5 | + | from pathlib import Path |
|
6 | + | import sys |
|
7 | + | import seaborn as sns |
|
8 | + | ||
9 | + | LABEL_DICT = {} |
|
10 | + | ||
11 | + | ||
12 | + | def build_label_dict(fig, axis, params): |
|
13 | + | """Label info for tree plots. |
|
14 | + | ||
15 | + | :param fig: a Seaborn clustermap instance |
|
16 | + | :param axis: one of {'row', 'col'} |
|
17 | + | :param params: plot parameters; this is where the labels come from |
|
18 | + | ||
19 | + | """ |
|
20 | + | logger = logging.getLogger(__name__) |
|
21 | + | if axis == "col": |
|
22 | + | for idx, _ in zip( |
|
23 | + | fig.dendrogram_col.reordered_ind, fig.ax_heatmap.get_yticklabels() |
|
24 | + | ): |
|
25 | + | LABEL_DICT[str(idx + 1)] = params.labels.get(_, _.get_text()) |
|
26 | + | elif axis == "row": |
|
27 | + | for idx, _ in zip( |
|
28 | + | fig.dendrogram_row.reordered_ind, fig.ax_heatmap.get_xticklabels() |
|
29 | + | ): |
|
30 | + | LABEL_DICT[str(idx + 1)] = params.labels.get(_, _.get_text()) |
|
31 | + | logger.debug(f"Label dict: {LABEL_DICT}") |
|
32 | + | return LABEL_DICT |
|
33 | + | ||
34 | + | ||
35 | + | def get_newick(node, parentdist, leaf_names, newick=""): |
|
36 | + | """Generates a newick formatted file from a tree, |
|
37 | + | using recursion to traverse it. |
|
38 | + | ||
39 | + | :param node: a (portion of a) tree to be traversed |
|
40 | + | :param parentdist: distance from the parent node |
|
41 | + | :param leaf_names: lables that will be attached to the terminal nodes |
|
42 | + | :param newick: the current newick-formatted tree structure |
|
43 | + | ||
44 | + | """ |
|
45 | + | # logger = logging.getLogger(__name__) |
|
46 | + | # logger.debug(f"{type(parentdist)}, {parentdist}") |
|
47 | + | # logger.debug(f"{type(node.dist)}, {node.dist}") |
|
48 | + | diff = parentdist - node.dist |
|
49 | + | if node.is_leaf(): |
|
50 | + | return f"{leaf_names[node.id]}:{diff:.2f}{newick}" |
|
51 | + | else: |
|
52 | + | if len(newick) > 0: |
|
53 | + | newick = f"):{diff:.2f}{newick}" |
|
54 | + | else: |
|
55 | + | newick = ");" |
|
56 | + | newick = get_newick(node.get_left(), node.dist, leaf_names, newick) |
|
57 | + | newick = get_newick(node.get_right(), node.dist, leaf_names, f",{newick}") |
|
58 | + | newick = f"({newick}" |
|
59 | + | return newick |
|
60 | + | ||
61 | + | ||
62 | + | def tree(dfr, outfname, title, params, format, args): |
|
63 | + | """Generate a newick file and dendrogram plot for the given dataframe. |
|
64 | + | ||
65 | + | :param dfr: a dataframe |
|
66 | + | # :param fig: a figure produced by sns.clustermap |
|
67 | + | :param title: name of the matrix plot |
|
68 | + | :param format: image file format being used |
|
69 | + | :param params: matrix plot parameters; including labels |
|
70 | + | :param args: Namespace |
|
71 | + | ||
72 | + | """ |
|
73 | + | logger = logging.getLogger(__name__) |
|
74 | + | ||
75 | + | # Get matrix name and run_id from the plot title |
|
76 | + | matname, run_id = title.split("_", 1)[-1].rsplit("_", 1) |
|
77 | + | ||
78 | + | maxfigsize = 120 |
|
79 | + | calcfigsize = dfr.shape[0] * 1.1 |
|
80 | + | figsize = min(max(8, calcfigsize), maxfigsize) |
|
81 | + | if figsize == maxfigsize: |
|
82 | + | scale = maxfigsize / calcfigsize |
|
83 | + | sns.set_context("notebook", font_scale=scale) |
|
84 | + | ||
85 | + | # Add a colorbar? |
|
86 | + | if params.classes is None: |
|
87 | + | col_cb = None |
|
88 | + | else: |
|
89 | + | col_cb = pyani_graphics.sns.get_colorbar(dfr, params.classes) |
|
90 | + | ||
91 | + | params.colorbar = col_cb |
|
92 | + | params.figsize = figsize |
|
93 | + | params.linewidths = 0.25 |
|
94 | + | ||
95 | + | fig = pyani_graphics.sns.get_clustermap(dfr, params) |
|
96 | + | ||
97 | + | # Dictionary to allow abstraction over axes |
|
98 | + | sides = { |
|
99 | + | "columns": { |
|
100 | + | "axis": fig.dendrogram_col, |
|
101 | + | "names": dfr.columns, # fig.dendrogram_col.reordered_ind, |
|
102 | + | }, |
|
103 | + | "rows": { |
|
104 | + | "axis": fig.dendrogram_row, |
|
105 | + | "names": dfr.index, # fig.dendrogram_row.reordered_ind, |
|
106 | + | }, |
|
107 | + | } |
|
108 | + | ||
109 | + | # Create a linkage dendrogram and newick string for both rows and columns |
|
110 | + | newicks = {} |
|
111 | + | ||
112 | + | for axis in args.axes: |
|
113 | + | # Generate newick format |
|
114 | + | tree = hierarchy.to_tree(sides[axis]["axis"].linkage, False) |
|
115 | + | logger.debug(f"Names: {sides[axis]['names']}") |
|
116 | + | ||
117 | + | newick = get_newick(tree, tree.dist, sides[axis]["names"], "") |
|
118 | + | newicks.update({f"[{axis}_newick_{matname}_{run_id}]": newick}) |
|
119 | + | ||
120 | + | # Generate dendrogram |
|
121 | + | # if 'dendrogram' in args.tree: |
|
122 | + | # if args.tree: |
|
123 | + | build_label_dict(fig, axis, params) |
|
124 | + | sys.stderr.write(f"Label dict: {LABEL_DICT}\n") |
|
125 | + | # figtree = ClusterTree(newick, text_array=matrix) |
|
126 | + | figtree = PhyloTree(newick) |
|
127 | + | figtree.set_species_naming_function(get_species_name) |
|
128 | + | figtree_file = Path(args.outdir) / f"{axis}_tree_{matname}_{run_id}.{format}" |
|
129 | + | logger.debug(f"{figtree}") |
|
130 | + | ||
131 | + | # Write the tree to file |
|
132 | + | figtree.render(str(figtree_file), layout=tree_layout) |
|
133 | + | ||
134 | + | # Return the newick strings so we can save them in the database (eventually) |
|
135 | + | return newicks |
|
136 | + | ||
137 | + | ||
138 | + | def tree_layout(node): |
|
139 | + | ||
140 | + | # Add taxonomy to nodes, and align to right |
|
141 | + | if node.is_leaf(): |
|
142 | + | # if node.name == "F962_00589": |
|
143 | + | # faces.add_face_to_node( |
|
144 | + | # AttrFace("name", fgcolor="white"), |
|
145 | + | # node, |
|
146 | + | # column=0, |
|
147 | + | # position="branch-right", |
|
148 | + | # ) |
|
149 | + | # faces.add_face_to_node( |
|
150 | + | # AttrFace("species", fgcolor="white"), node, column=0, position="aligned" |
|
151 | + | # ) |
|
152 | + | # node.img_style["bgcolor"] == "darkred" |
|
153 | + | # else: |
|
154 | + | ||
155 | + | faces.add_face_to_node( |
|
156 | + | AttrFace("name", fgcolor="black"), |
|
157 | + | node, |
|
158 | + | column=0, |
|
159 | + | position="branch-right", |
|
160 | + | ) |
|
161 | + | faces.add_face_to_node(AttrFace("species"), node, column=0, position="aligned") |
|
162 | + | ||
163 | + | ||
164 | + | def get_species_name(node_name_string): |
|
165 | + | """Return `Genus species` (where known) for a node.""" |
|
166 | + | return LABEL_DICT[node_name_string] |
56 | 56 | # Distribution dictionary of matrix graphics methods |
|
57 | 57 | GMETHODS = {"mpl": pyani_graphics.mpl.heatmap, "seaborn": pyani_graphics.sns.heatmap} |
|
58 | 58 | SMETHODS = {"mpl": pyani_graphics.mpl.scatter, "seaborn": pyani_graphics.sns.scatter} |
|
59 | + | # TMETHODS = {"seaborn": pyani_graphics.seaborn.} |
|
59 | 60 | # Distribution dictionary of distribution graphics methods |
|
60 | 61 | DISTMETHODS = { |
|
61 | 62 | "mpl": pyani_graphics.mpl.distribution, |
|
62 | 63 | "seaborn": pyani_graphics.sns.distribution, |
|
63 | 64 | } |
|
64 | 65 | ||
66 | + | NEWICKS = {} |
|
67 | + | ||
65 | 68 | ||
66 | 69 | def subcmd_plot(args: Namespace) -> int: |
|
67 | 70 | """Produce graphical output for an analysis. |
94 | 97 | for run_id in run_ids: |
|
95 | 98 | write_run_plots(run_id, session, outfmts, args) |
|
96 | 99 | ||
100 | + | if NEWICKS: |
|
101 | + | write_newicks(args, run_id) |
|
102 | + | NEWICKS.clear() |
|
103 | + | ||
97 | 104 | return 0 |
|
98 | 105 | ||
99 | 106 |
163 | 170 | # Run the plotting commands |
|
164 | 171 | logger.debug("Running plotting commands") |
|
165 | 172 | for func, options in plotting_commands: |
|
166 | - | logger.debug("Running %s with options %s", func, options) |
|
167 | - | pool.apply_async(func, args=options) |
|
173 | + | result = pool.apply_async(func, options, {}, callback=logger.debug) |
|
174 | + | result.get() |
|
168 | 175 | ||
169 | 176 | # Close worker pool |
|
170 | 177 | pool.close() |
187 | 194 | for fmt in outfmts: |
|
188 | 195 | outfname = Path(args.outdir) / f"distribution_{matdata.name}_run{run_id}.{fmt}" |
|
189 | 196 | logger.debug("\tWriting graphics to %s", outfname) |
|
190 | - | DISTMETHODS[args.method[0]]( |
|
197 | + | DISTMETHODS[args.method]( |
|
191 | 198 | matdata.data, |
|
192 | 199 | outfname, |
|
193 | 200 | matdata.name, |
220 | 227 | logger.info("Writing %s matrix heatmaps", matdata.name) |
|
221 | 228 | cmap = pyani_config.get_colormap(matdata.data, matdata.name) |
|
222 | 229 | for fmt in outfmts: |
|
223 | - | outfname = Path(args.outdir) / f"matrix_{matdata.name}_run{run_id}.{fmt}" |
|
230 | + | outfname = ( |
|
231 | + | Path(args.outdir) / f"matrix_{matdata.name}_run{run_id}_{args.method}.{fmt}" |
|
232 | + | ) |
|
224 | 233 | logger.debug("\tWriting graphics to %s", outfname) |
|
225 | 234 | params = pyani_graphics.Params(cmap, result_labels, result_classes) |
|
226 | 235 | # Draw heatmap |
|
227 | - | GMETHODS[args.method[0]]( |
|
236 | + | _, newicks = GMETHODS[args.method]( |
|
228 | 237 | matdata.data, |
|
229 | 238 | outfname, |
|
230 | 239 | title=f"matrix_{matdata.name}_run{run_id}", |
|
231 | 240 | params=params, |
|
241 | + | format=fmt, |
|
242 | + | args=args, |
|
232 | 243 | ) |
|
233 | 244 | ||
245 | + | # If Newick strings were generated, add them to NEWICKS. |
|
246 | + | if newicks: |
|
247 | + | NEWICKS.update(newicks) |
|
248 | + | ||
234 | 249 | # Be tidy with matplotlib caches |
|
235 | 250 | plt.close("all") |
|
251 | + | return |
|
236 | 252 | ||
237 | 253 | ||
238 | 254 | def write_scatter( |
266 | 282 | logger.debug("\tWriting graphics to %s", outfname) |
|
267 | 283 | params = pyani_graphics.Params(cmap, result_labels, result_classes) |
|
268 | 284 | # Draw scatterplot |
|
269 | - | SMETHODS[args.method[0]]( |
|
285 | + | SMETHODS[args.method]( |
|
270 | 286 | matdata1.data, |
|
271 | 287 | matdata2.data, |
|
272 | 288 | outfname, |
278 | 294 | ||
279 | 295 | # Be tidy with matplotlib caches |
|
280 | 296 | plt.close("all") |
|
297 | + | ||
298 | + | ||
299 | + | def write_newicks(args: Namespace, run_id): |
|
300 | + | # If Newick strings were generated, write them out. |
|
301 | + | newick_file = Path(args.outdir) / f"newicks_run{run_id}.nw" |
|
302 | + | with open(newick_file, "w") as nfh: |
|
303 | + | for name, nw in NEWICKS.items(): |
|
304 | + | nfh.write(f"{name}\t{nw}\n") |
Learn more Showing 6 files with coverage changes found.
pyani/scripts/parsers/tree_parser.py
pyani/pyani_graphics/tree/__init__.py
pyani/scripts/subcommands/subcmd_tree.py
pyani/scripts/subcommands/subcmd_plot.py
pyani/pyani_graphics/mpl/__init__.py
pyani/pyani_graphics/sns/__init__.py
Files | Coverage |
---|---|
pyani | -1.34% 74.48% |
Project Totals (58 files) | 74.48% |
#370
1acbdca
7255824
bc3eae0
60182b1
#370
ebf261f
7f42742
64a314f
80082c4
54c33ea
0949a46
f471992
5a65685
f16476f
a31e9bb
9c2246a
15f46d3
1bed428