chrisjsewell / ipypublish
1
"""in this pytest conftest file we define an ipypublish test app fixture
2

3
This fixture supplies a configurable IpubTestApp, that can
4
run IPyPubMain on a specified source folder,
5
in a temporary folder (which will be deleted on teardown),
6
and has methods to test the output.
7

8
Usage:
9

10
.. code-block:: python
11

12
    @pytest.mark.ipynb('folder_name')
13
    def test_example(ipynb_app):
14
        ipynb_app.run({"conversion": "latex_ipypublish_main"})
15
        ipynb_app.assert_converted_exists()
16
        ipynb_app.assert_converted_contains(["regex1", "regex2"])
17
        ipynb_app.assert_converted_equals_expected("file_name")
18

19
@pytest.mark.ipynb accepts the key-word arguments:
20

21
- root: the root folder containing the test folders
22
  (default = ipypublish.tests.TEST_FILES_DIR))
23
- source: the folder name within <root>/<folder_name>
24
  that contains the source files (default = 'source')
25
- main_file: the file within <root>/<folder_name>/<source> to be converted
26
  (default = 'main.ipynb'). If None is given the source folder is used.
27
- converted: the folder name to output converted files to,
28
  either in the temporary folder (if out_to_temp=True) or
29
  <root>/<folder_name>/<converted> (if out_to_temp=False)
30
  (default = 'converted')
31
- expected: the folder name within <root>/<folder_name>
32
  that contains the expected output files (default = 'expected')
33
- out_to_temp: if True, converted files output to a temporary folder,
34
  that will be removed on teardown,
35
  otherwise output to <root>/<folder_name>/<converted> (will not be removed)
36
  (default=True)
37

38
"""
39 3
from collections import namedtuple
40 3
import copy
41 3
from difflib import context_diff
42 3
import io
43 3
import logging
44 3
import os
45 3
import shutil
46 3
import re
47 3
import tempfile
48

49 3
from nbconvert.utils.pandoc import get_pandoc_version
50 3
import pytest
51

52 3
from ipypublish.utils import pathlib
53 3
from ipypublish.tests import TEST_FILES_DIR
54 3
from ipypublish.convert.main import IpyPubMain
55

56 3
logger = logging.getLogger(__name__)
57

58

59 3
@pytest.fixture(autouse=True)
60 3
def dont_open_webbrowser(monkeypatch):
61 3
    def nullfunc(*arg, **kwrgs):
62 3
        pass
63

64 3
    monkeypatch.setattr("webbrowser.open", nullfunc)
65

66

67 3
@pytest.fixture
68 3
def external_export_plugin():
69 3
    return pathlib.Path(os.path.join(TEST_FILES_DIR, "example_new_plugin.json"))
70

71

72 3
@pytest.fixture
73 3
def temp_folder():
74 3
    out_folder = tempfile.mkdtemp()
75 3
    yield out_folder
76 3
    shutil.rmtree(out_folder)
77

78

79 3
@pytest.fixture
80 3
def ipynb_params(request):
81
    """
82
    parameters that is specified by '@pytest.mark.ipynb'
83
    for ipynb conversion tests
84
    """
85

86
    # ##### process pytest.mark.ipynb
87

88 3
    if hasattr(request.node, "iter_markers"):  # pytest-3.6.0 or newer
89 3
        markers = request.node.iter_markers("ipynb")
90
    else:
91 0
        markers = request.node.get_marker("ipynb")
92 3
    pargs = {}
93 3
    kwargs = {}
94

95 3
    if markers is not None:
96
        # to avoid stacking positional args
97 3
        for info in reversed(list(markers)):
98 3
            for i, a in enumerate(info.args):
99 3
                pargs[i] = a
100 3
            kwargs.update(info.kwargs)
101

102 3
    args = [pargs[i] for i in sorted(pargs.keys())]
103

104 3
    return namedtuple("ipynb_params", "args,kwargs")(args, kwargs)  # type: ignore
105

106

107 3
@pytest.fixture(scope="function")
108 3
def ipynb_app(temp_folder, ipynb_params):
109

110 3
    args, kwargs = ipynb_params
111 3
    if len(args) <= 0:
112 0
        raise ValueError(
113
            "a subfolder must be supplied as the first argument to "
114
            "@pytest.mark.ipynb"
115
        )
116

117 3
    subfolder = args[0]  # 'ipynb_with_glossary'
118 3
    input_file = kwargs.get("main_file", "main.ipynb")
119 3
    test_files_dir = kwargs.get("root", TEST_FILES_DIR)
120 3
    source_folder = kwargs.get("source", "source")
121 3
    convert_folder = kwargs.get("converted", "converted")
122 3
    expected_folder = kwargs.get("expected", "expected")
123 3
    use_temp = kwargs.get("out_to_temp", True)
124

125 3
    source_folder_path = os.path.join(test_files_dir, subfolder, source_folder)
126 3
    expected_folder_path = os.path.join(test_files_dir, subfolder, expected_folder)
127

128 3
    temp_source_path = os.path.join(temp_folder, source_folder)
129 3
    shutil.copytree(source_folder_path, temp_source_path)
130

131 3
    if use_temp:
132 3
        converted_path = os.path.join(temp_folder, convert_folder)
133
    else:
134 0
        converted_path = os.path.join(test_files_dir, subfolder, convert_folder)
135

136 3
    yield IpyTestApp(temp_source_path, input_file, converted_path, expected_folder_path)
137

138

139 3
class IpyTestApp(object):
140 3
    def __init__(self, src_path, input_file, converted_path, expected_folder_path):
141 3
        self._src_folder_path = src_path
142 3
        self._converted_folder_path = converted_path
143 3
        self._expected_folder_path = expected_folder_path
144 3
        self._input_file = input_file
145 3
        self._output_data = None
146

147 3
    @property
148 3
    def source_path(self):
149 3
        return pathlib.Path(self._src_folder_path)
150

151 3
    @property
152 3
    def input_file(self):
153 3
        if self._input_file is None:
154 3
            return None
155 3
        return self.source_path.joinpath(self._input_file)
156

157 3
    @property
158 3
    def converted_path(self):
159 3
        return pathlib.Path(self._converted_folder_path)
160

161 3
    @property
162 3
    def expected_path(self):
163 3
        return pathlib.Path(self._expected_folder_path)
164

165 3
    @property
166 3
    def pandoc_version(self):
167 3
        return get_pandoc_version()
168

169 3
    def run(self, ipub_config=None):
170 3
        if ipub_config is None:
171 3
            ipub_config = {}
172 3
        ipub_config["outpath"] = str(self.converted_path)
173 3
        app = IpyPubMain(config={"IpyPubMain": ipub_config})
174 3
        self._output_data = app(
175
            self.input_file if self.input_file is not None else self.source_path
176
        )
177

178 3
    @property
179 3
    def output_data(self):
180 0
        if self._output_data is None:
181 0
            raise ValueError("the app must be run first to retrieve output data")
182 0
        return copy.copy(self._output_data)
183

184 3
    @property
185 3
    def export_extension(self):
186 3
        if self._output_data is None:
187 0
            raise ValueError(
188
                "the app must be run first to retrieve export file extension"
189
            )
190 3
        exporter = self._output_data["exporter"]
191 3
        return exporter.file_extension
192

193 3
    @property
194 3
    def export_mimetype(self):
195 3
        if self._output_data is None:
196 0
            raise ValueError("the app must be run first to retrieve export mimetype")
197 3
        exporter = self._output_data["exporter"]
198 3
        return exporter.output_mimetype
199

200 3
    def assert_converted_exists(self, file_name=None, extension=None):
201 3
        if file_name is None:
202 3
            if self.input_file is None:
203 3
                file_name = self.source_path.name
204
            else:
205 3
                file_name = os.path.splitext(self.input_file.name)[0]
206 3
        if extension is None:
207 3
            extension = self.export_extension
208

209 3
        converted_path = self.converted_path.joinpath(file_name + extension)
210 3
        if not self.converted_path.joinpath(file_name + extension).exists():
211 0
            raise AssertionError("could not find: {}".format(converted_path))
212

213 3
    def assert_converted_contains(self, regexes, encoding="utf8"):
214

215 3
        if self.input_file is None:
216 0
            file_name = self.source_path.name
217
        else:
218 3
            file_name = os.path.splitext(self.input_file.name)[0]
219 3
        extension = self.export_extension
220 3
        converted_path = self.converted_path.joinpath(file_name + extension)
221

222 3
        with io.open(str(converted_path), encoding=encoding) as fobj:
223 3
            content = fobj.read()
224

225 3
        if not isinstance(regexes, (list, tuple)):
226 0
            regexes = [regexes]
227

228 3
        for regex in regexes:
229

230 3
            if not re.search(regex, content):
231 0
                raise AssertionError("content does not contain regex: {}".format(regex))
232

233 3
    def assert_converted_equals_expected(self, expected_file_name, encoding="utf8"):
234

235 3
        if self.input_file is None:
236 0
            file_name = self.source_path.name
237
        else:
238 3
            file_name = os.path.splitext(self.input_file.name)[0]
239 3
        extension = self.export_extension
240 3
        converted_path = self.converted_path.joinpath(file_name + extension)
241

242 3
        expected_path = self.expected_path.joinpath(expected_file_name + extension)
243

244 3
        mime_type = self.export_mimetype
245 3
        if mime_type == "text/latex":
246 3
            compare_tex_files(converted_path, expected_path, encoding=encoding)
247 3
        elif mime_type == "text/html":
248 3
            compare_html_files(converted_path, expected_path, encoding=encoding)
249 3
        elif mime_type == "text/restructuredtext":
250 3
            compare_rst_files(converted_path, expected_path, encoding=encoding)
251
        else:
252
            # TODO add comparison for nb (applicatio/json)
253
            # and python (application/x-python)
254 3
            message = "no comparison function exists for " "mimetype: {}".format(
255
                mime_type
256
            )
257
            # raise ValueError(message)
258 3
            logger.warning(message)
259

260

261 3
def compare_rst_files(testpath, outpath, encoding="utf8"):
262
    # only compare body of html, since styles differ by
263
    # nbconvert/pandoc version (e.g. different versions of font-awesome)
264

265 3
    output = []
266 3
    for path in [testpath, outpath]:
267

268 3
        with io.open(str(path), encoding=encoding) as fobj:
269 3
            content = fobj.read()
270

271
        # python 3.5 used .jpg instead of .jpeg
272 3
        content = content.replace(".jpg", ".jpeg")
273

274
        # a recent dependency change is inserting new lines at the top of the file
275 3
        content = content.lstrip()
276

277 3
        output.append(content)
278

279 3
    test_content, out_content = output
280

281
    # only report differences
282 3
    if out_content != test_content:
283 0
        raise AssertionError(
284
            "\n"
285
            + "\n".join(
286
                context_diff(
287
                    test_content.splitlines(),
288
                    out_content.splitlines(),
289
                    fromfile=str(testpath),
290
                    tofile=str(outpath),
291
                )
292
            )
293
        )
294

295

296 3
def compare_html_files(testpath, outpath, encoding="utf8"):
297
    # only compare body of html, since styles differ by
298
    # nbconvert/pandoc version (e.g. different versions of font-awesome)
299

300 3
    output = []
301 3
    for path in [testpath, outpath]:
302

303 3
        with io.open(str(path), encoding=encoding) as fobj:
304 3
            content = fobj.read()
305

306
        # extract only the body
307
        # could use html.parser or beautifulsoup to do this better
308 3
        body_rgx = re.compile("\\<body\\>(.*)\\</body\\>", re.DOTALL)
309 3
        body_search = body_rgx.search(content)
310 3
        if not body_search:
311 0
            raise IOError("could not find body content of {}".format(path))
312 3
        content = body_search.group(1)
313

314
        # remove script environments which can change (e.g. reveal)
315 3
        script_rgx = re.compile("\\<script\\>(.*)\\</script\\>", re.DOTALL)
316 3
        content = script_rgx.sub("<script></script>", content)
317

318
        # remove trailing whitespace
319 3
        content = "\n".join([l.rstrip() for l in content.splitlines()])
320

321 3
        output.append(content)
322

323 3
    test_content, out_content = output
324

325
    # only report differences
326 3
    if out_content != test_content:
327 0
        raise AssertionError(
328
            "\n"
329
            + "\n".join(
330
                context_diff(
331
                    test_content.splitlines(),
332
                    out_content.splitlines(),
333
                    fromfile=str(testpath),
334
                    tofile=str(outpath),
335
                )
336
            )
337
        )
338

339

340 3
def compare_tex_files(testpath, outpath, encoding="utf8"):
341

342 3
    output = []
343 3
    for path in [testpath, outpath]:
344

345 3
        with io.open(str(path), encoding=encoding) as fobj:
346 3
            content = fobj.read()
347

348
        # only certain versions of pandoc wrap sections with \hypertarget
349
        # NOTE a better way to do this might be to use TexSoup
350 3
        ht_rgx = re.compile(
351
            "\\\\hypertarget\\{[^\\}]*\\}\\{[^\\\\]*"
352
            "(\\\\[sub]*section\\{[^\\}]*\\}"
353
            "\\\\label\\{[^\\}]*\\})"
354
            "\\}",
355
            re.DOTALL,
356
        )
357 3
        content = ht_rgx.sub("\\g<1>", content)
358

359
        # newer versions of pandoc convert ![](file) to \begin{figure}[htbp]
360
        # TODO override pandoc figure placement of ![](file) in markdown2latex
361 3
        content = content.replace("\\begin{figure}[htbp]", "\\begin{figure}")
362

363
        # at start of itemize
364 3
        content = content.replace("\\itemsep1pt\\parskip0pt\\parsep0pt\n", "")
365
        # at start of enumerate
366 3
        content = content.replace("\\tightlist\n", "")
367

368
        # python 3.5 used .jpg instead of .jpeg
369 3
        content = content.replace(".jpg", ".jpeg")
370

371
        # python < 3.6 sorts these differently
372 3
        pyg_rgx = re.compile(
373
            (
374
                "\\\\expandafter\\\\def\\\\csname "
375
                "PY\\@tok\\@[0-9a-zA-Z]*\\\\endcsname[^\n]*"
376
            ),
377
            re.MULTILINE,
378
        )
379 3
        content = pyg_rgx.sub(r"\<pygments definition\>", content)
380

381
        # also remove all space from start of lines
382 3
        space_rgx = re.compile(r"^[\s]*", re.MULTILINE)
383 3
        content = space_rgx.sub("", content)
384

385
        # remove trailing whitespace
386 3
        content = "\n".join([l.rstrip() for l in content.splitlines()])
387

388 3
        output.append(content)
389

390 3
    test_content, out_content = output
391

392
    # only report differences
393 3
    if out_content != test_content:
394 0
        raise AssertionError(
395
            "\n"
396
            + "\n".join(
397
                context_diff(
398
                    test_content.splitlines(),
399
                    out_content.splitlines(),
400
                    fromfile=str(testpath),
401
                    tofile=str(outpath),
402
                )
403
            )
404
        )

Read our documentation on viewing source code .

Loading