1 1
import logging
2 1
import os
3 1
from binascii import a2b_base64
4 1
import sys
5 1
import json
6 1
import re
7 1
from mimetypes import guess_extension
8

9 1
from six import string_types
10 1
import traitlets as traits
11 1
from nbconvert.preprocessors import Preprocessor
12

13 1
if sys.version_info[0] == 2:
14 0
    from urlparse import urlparse
15
else:
16 1
    from urllib.parse import urlparse
17

18 1
logger = logging.getLogger("resolve_links")
19

20

21 1
def guess_extension_without_jpe(mimetype):
22
    """
23
    This function fixes a problem with '.jpe' extensions
24
    of jpeg images which are then not recognised by latex.
25
    For any other case, the function works in the same way
26
    as mimetypes.guess_extension
27
    """
28 1
    ext = guess_extension(mimetype)
29 1
    if ext == ".jpe":
30 0
        ext = ".jpeg"
31 1
    return ext
32

33

34 1
def is_hyperlink(path):
35
    """test whether a path is a hyperlink, e.g. https://site.org"""
36 1
    if urlparse(path).scheme:
37 0
        return True
38 1
    return False
39

40

41 1
def resolve_path(fpath, filepath):
42
    """resolve a relative path, w.r.t. another filepath """
43

44 1
    if is_hyperlink(fpath):
45 0
        return fpath
46

47 1
    if not os.path.isabs(fpath):
48 1
        fpath = os.path.join(os.path.dirname(str(filepath)), fpath)
49 1
        fpath = os.path.abspath(fpath)
50

51 1
    return os.path.normpath(fpath)
52

53

54 1
def extract_file_links(source, parent_path, redirect_path, replace_nonexistent=False):
55
    """ extract local linked files
56

57
    Examples
58
    --------
59
    >>> import os, pytest
60
    >>> if os.name == 'nt':
61
    ...     pytest.skip()
62

63
    >>> source = '''## Cell with Linked Image
64
    ... ![test_image](subdir/logo_example.png)
65
    ... a [test_link](other_doc#a-link)'''
66
    >>> src, rpaths, npaths = extract_file_links(
67
    ...                             source, '/root/nb.ipynb', 'redirect', True)
68
    >>> print(src)
69
    ## Cell with Linked Image
70
    ![test_image](redirect/logo_example.png)
71
    a [test_link](redirect/other_doc#a-link)
72

73
    >>> print(rpaths[0])
74
    /root/subdir/logo_example.png
75
    >>> print(rpaths[1])
76
    /root/other_doc
77

78

79
    """
80
    # TODO is this robust enough
81 1
    regex = re.compile("\\[([^\\]]*)\\]\\(([^\\)^\\#]*)([^\\)]*)\\)")
82 1
    new_source = source
83 1
    redirected_paths = []
84 1
    nonexistent_paths = []
85 1
    for text, path, label in regex.findall(source):
86 1
        if path.startswith("attachment:"):
87 1
            continue
88 1
        if not path:  # internal links
89 1
            continue
90 1
        respath = resolve_path(path, parent_path)
91 1
        if is_hyperlink(respath):
92 0
            continue
93 1
        if not os.path.exists(respath):
94 1
            nonexistent_paths.append(respath)
95 1
        if os.path.exists(respath) or replace_nonexistent:
96 1
            redirected_paths.append(respath)
97 1
            new_path = os.path.normpath(
98
                os.path.join(redirect_path, os.path.basename(path))
99
            )
100 1
            new_source = new_source.replace(
101
                "[{0}]({1}{2})".format(text, path, label),
102
                "[{0}]({1}{2})".format(text, new_path, label),
103
            )
104

105 1
    return new_source, redirected_paths, nonexistent_paths
106

107

108 1
class LatexDocLinks(Preprocessor):
109
    """ a preprocessor to resolve file paths in the notebook:
110

111
    1. Extract attachments from markdown cells, to resources['outputs'],
112
       and redirect their file links to self.filesfolder
113

114
    2. If nb.metadata.ipub.bibliography, create resources['bibliopath']
115

116
    3. Creates resources['external_file_paths'] = [] and adds to it:
117

118
        - local relative file paths referenced in markdown cells by
119
          '[](path/to/file)'
120
        - path to nb.metadata.ipub.bibliography (if present)
121
        - path to nb.metadata.ipub.titlepage.logo (if present)
122

123
    4. If self.redirect_external=True,
124
       redirects relative external file paths to self.filesfolder
125

126
    """
127

128 1
    metapath = traits.Unicode("", help="the file path to the notebook").tag(config=True)
129 1
    filesfolder = traits.Unicode(
130
        "",
131
        help=("the folder path to dump dump internal content to " "(e.g. images, etc)"),
132
    ).tag(config=True)
133 1
    redirect_external = traits.Bool(
134
        True, help="if True, redirect relatively linked paths to filesfolder"
135
    ).tag(config=True)
136 1
    extract_attachments = traits.Bool(
137
        True,
138
        help=(
139
            "extract attachments stored in the notebook"
140
            "(created by dragging and dropping files into markdown cells)"
141
        ),
142
    ).tag(config=True)
143 1
    output_attachment_template = traits.Unicode(
144
        "{unique_key}_{cell_index}_{key}{extension}"
145
    ).tag(config=True)
146

147 1
    def preprocess_cell(self, cell, resources, cell_index):
148
        """
149
        Extract attachment
150

151
        Parameters
152
        ----------
153
        cell : nbformat.notebooknode.NotebookNode
154
            Notebook cell being processed
155
        resources : dict
156
            Additional resources used in the conversion process.  Allows
157
            preprocessors to pass variables into the Jinja engine.
158
        cell_index : int
159
            Index of the cell being processed
160
        """
161 1
        if cell.cell_type != "markdown":
162 1
            return cell, resources
163

164
        # extract local linked files
165 1
        source, rpaths, npaths = extract_file_links(
166
            cell.source, self.metapath, self.filesfolder
167
        )
168 1
        if self.redirect_external:
169 1
            cell.source = source
170 1
        resources["external_file_paths"].extend(rpaths)
171 1
        resources["unfound_file_paths"].extend(npaths)
172

173
        # extract attachments
174 1
        unique_key = resources.get("unique_key", "attach")
175 1
        if "attachments" in cell and self.extract_attachments:
176 1
            attachments = cell.pop("attachments")
177

178 1
            for key, attachment in attachments.items():
179
                # TODO this only works if there is a single MIME bundle
180 1
                (mime_type, data), = attachment.items()
181

182 1
                ext = guess_extension_without_jpe(mime_type)
183 1
                if ext is None:
184 0
                    ext = "." + mime_type.rsplit("/")[-1]
185

186
                # replace the pointer to the attachment
187 1
                filepath = os.path.normpath(
188
                    os.path.join(
189
                        self.filesfolder,
190
                        self.output_attachment_template.format(
191
                            unique_key=unique_key,
192
                            cell_index=cell_index,
193
                            key=os.path.splitext(key)[0],
194
                            extension=ext,
195
                        ),
196
                    )
197
                )
198 1
                if "source" in cell:
199 1
                    cell["source"] = cell["source"].replace(
200
                        "attachment:{}".format(key), filepath
201
                    )
202

203
                # code taken from nbconvert.ExtractOutputPreprocessor
204 1
                if (
205
                    not isinstance(data, string_types)
206
                    or mime_type == "application/json"
207
                ):
208
                    # Data is either JSON-like and was parsed into a Python
209
                    # object according to the spec, or data is for sure
210
                    # JSON.
211
                    # In the latter case we want to go extra sure that
212
                    # we enclose a scalar string value into extra quotes by
213
                    # serializing it properly.
214 0
                    data = json.dumps(data)
215

216
                # Binary files are base64-encoded, SVG is already XML
217 1
                if mime_type in {"image/png", "image/jpeg", "application/pdf"}:
218
                    # data is b64-encoded as text (str, unicode),
219
                    # we want the original bytes
220 1
                    data = a2b_base64(data)
221 0
                elif sys.platform == "win32":
222 0
                    data = data.replace("\n", "\r\n").encode("UTF-8")
223
                else:
224 0
                    data = data.encode("UTF-8")
225

226 1
                if filepath in resources["outputs"]:
227 0
                    raise ValueError(
228
                        "Your outputs have filename metadata associated "
229
                        "with them. Nbconvert saves these outputs to "
230
                        "external files using this filename metadata. "
231
                        "Filenames need to be unique across the notebook, "
232
                        "or images will be overwritten. The filename {} is"
233
                        " associated with more than one output. The second"
234
                        " output associated with this filename is in cell "
235
                        "{}.".format(filepath, cell_index)
236
                    )
237
                # In the resources, make the figure available
238 1
                resources["outputs"][filepath] = data
239

240 1
        return cell, resources
241

242 1
    def preprocess(self, nb, resources):
243
        """
244
        Preprocessing to apply on each notebook.
245
        """
246

247 1
        logger.info(
248
            "resolving external file paths"
249
            + " in ipub metadata to: {}".format(self.metapath)
250
        )
251

252 1
        resources.setdefault("external_file_paths", [])
253 1
        resources.setdefault("unfound_file_paths", [])
254

255 1
        if "ipub" in nb.metadata:
256

257 1
            if "bibliography" in nb.metadata.ipub:
258 1
                bib = nb.metadata.ipub.bibliography
259 1
                bib = resolve_path(bib, self.metapath)
260 1
                if not os.path.exists(bib):
261 0
                    resources["unfound_file_paths"].append(bib)
262
                else:
263 1
                    resources["external_file_paths"].append(bib)
264 1
                    resources["bibliopath"] = bib
265

266 1
                if self.redirect_external:
267 1
                    nb.metadata.ipub.bibliography = os.path.join(
268
                        self.filesfolder, os.path.basename(bib)
269
                    )
270

271 1
            if "filepath" in nb.metadata.ipub.get("bibglossary", {}):
272 1
                gloss = nb.metadata.ipub.bibglossary.filepath
273 1
                gloss = resolve_path(gloss, self.metapath)
274 1
                if not os.path.exists(gloss):
275 0
                    resources["unfound_file_paths"].append(gloss)
276
                else:
277 1
                    resources["external_file_paths"].append(gloss)
278 1
                    resources["bibglosspath"] = gloss
279

280 1
                if self.redirect_external:
281 1
                    nb.metadata.ipub.bibglossary.filepath = os.path.join(
282
                        self.filesfolder, os.path.basename(gloss)
283
                    )
284

285 1
            if "logo" in nb.metadata.ipub.get("titlepage", {}):
286 1
                logo = nb.metadata.ipub.titlepage.logo
287 1
                logo = resolve_path(logo, self.metapath)
288 1
                if not os.path.exists(logo):
289 0
                    resources["unfound_file_paths"].append(logo)
290
                else:
291 1
                    resources["external_file_paths"].append(logo)
292

293 1
                if self.redirect_external:
294 1
                    nb.metadata.ipub.titlepage.logo = os.path.join(
295
                        self.filesfolder, os.path.basename(logo)
296
                    )
297

298 1
        for index, cell in enumerate(nb.cells):
299 1
            nb.cells[index], resources = self.preprocess_cell(cell, resources, index)
300

301
        # filter unique
302 1
        resources["external_file_paths"] = list(set(resources["external_file_paths"]))
303

304 1
        upaths = set(resources.pop("unfound_file_paths"))
305 1
        if upaths:
306 0
            logger.warning(
307
                "referenced file(s) do not exist" ": {}".format(list(upaths))
308
            )
309

310 1
        return nb, resources

Read our documentation on viewing source code .

Loading