Mange / roadie
1
# frozen_string_literal: true
2

3 5
module Roadie
4
  # @api private
5
  #
6
  # Class that rewrites URLs in the DOM.
7 5
  class UrlRewriter
8
    # @param [UrlGenerator] generator
9 5
    def initialize(generator)
10 5
      @generator = generator
11
    end
12

13
    # Mutates the passed DOM tree, rewriting certain element's attributes.
14
    #
15
    # This will make all a[href] and img[src] into absolute URLs, as well as
16
    # all "url()" directives inside style-attributes.
17
    #
18
    # [nil] is returned so no one can misunderstand that this method mutates
19
    # the passed instance.
20
    #
21
    # @param [Nokogiri::HTML::Document] dom
22
    # @return [nil] DOM tree is mutated
23 5
    def transform_dom(dom)
24
      # Use only a single loop to do this
25 3
      dom.css(
26
        "a[href]:not([data-roadie-ignore]), " \
27
          "img[src]:not([data-roadie-ignore]), " \
28
          "*[style]:not([data-roadie-ignore])",
29 2
      ).each do |element|
30 5
        transform_element_style element if element.has_attribute?('style')
31 5
        transform_element element
32
      end
33 1
      nil
34
    end
35

36
    # Mutates passed CSS, rewriting url() directives.
37
    #
38
    # This will make all URLs inside url() absolute.
39
    #
40
    # Copy of CSS that is mutated is returned, passed string is not mutated.
41
    #
42
    # @param [String] css the css to mutate
43
    # @return [String] copy of css that is mutated
44 5
    def transform_css(css)
45 5
      css.gsub(CSS_URL_REGEXP) do
46 5
        matches = Regexp.last_match
47 5
        "url(#{matches[:quote]}#{generate_url(matches[:url])}#{matches[:quote]})"
48
      end
49
    end
50

51 5
    private
52 5
    def generate_url(*args) @generator.generate_url(*args) end
53

54
    # Regexp matching all the url() declarations in CSS
55
    #
56
    # It matches without any quotes and with both single and double quotes
57
    # inside the parenthesis. There's much room for improvement, of course.
58 5
    CSS_URL_REGEXP = %r{
59
      url\(
60
        (?<quote>
61
          (?:["']|%22)?    # Optional opening quote
62
        )
63
        (?<url>            # The URL.
64
                           # We match URLs with parenthesis inside it here,
65
                           # so url(foo(bar)baz) will match correctly.
66
          [^(]*               # Text leading up to before opening parens
67
          (?:\([^)]*\))*      # Texts containing parens pairs
68
          [^(]+               # Texts without parens - required
69
        )
70
        \k'quote'          # Closing quote
71
      \)
72
    }x
73

74 5
    def transform_element(element)
75 5
      case element.name
76 5
      when "a" then element["href"] = generate_url element["href"]
77 5
      when "img" then element["src"] = generate_url element["src"]
78
      end
79
    end
80

81 5
    def transform_element_style(element)
82
      # We need to use a setter for Nokogiri to detect the string mutation.
83
      # If nokogiri used "dumber" data structures, this would all be redundant.
84 5
      css = element["style"]
85 5
      element["style"] = transform_css(css)
86
    end
87
  end
88
end

Read our documentation on viewing source code .

Loading