1
#' Discover MIME type of a file based on contents
2
#'
3
#' There are a limited number of header "magic" bytes checked directly by
4
#' this function but cover quite a bit of ground. After that, [guess_content_type()]
5
#' is called which uses file extension-to-MIME mappings.
6
#'
7
#' File an issue or PR if more magic-byte-level
8
#' comparisons are required/desired. If no match is found, `???` is returned
9
#' (see [guess_content_type()] for how to override this behaviour).
10
#'
11
#' @md
12
#' @param path path to a file
13
#' @param ... passed on to [guess_content_type()]
14
#' @return character vector
15
#' @export
16
#' @examples
17
#' get_content_type(system.file("extdat", "test.pdf", package="wand"))
18
get_content_type <- function(path, ...) {
19

20 1
  path <- path.expand(path[1])
21 0
  if (!file.exists(path)) stop("File not found.", call.=FALSE)
22

23 1
  hdr <- readBin(path, "raw", n=1024)
24

25 0
  if (all(c(0x4F,0x62,0x6A,0x01) == hdr[1:4])) return("application/vnd.apache.avro+binary")
26 0
  if (all(c(0x50,0x41,0x52,0x31) == hdr[1:4])) return("application/x-parquet")
27

28 0
  if (all(c(0xCA,0xFE,0xBA,0xBE) == hdr[1:4])) return("application/java-vm")
29

30 1
  if (all(c(0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1) == hdr[1:8])) {
31 1
    guessed_name <- guess_content_type(path)
32 1
    if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name)
33 1
    return("application/msword")
34
  }
35

36 1
  if (all(c(0x25,0x50,0x44,0x46,0x2d,0x31,0x2e) == hdr[1:7])) return("application/pdf")
37 0
  if (all(c(0x25,0x50,0x44,0x46) == hdr[1:4])) return("application/x-pdf")
38

39 1
  if (all(c(0x38,0x42,0x50,0x53,0x00,0x01) == hdr[1:6])) return("image/photoshop")
40

41 1
  if (all(c(0x25,0x21,0x50,0x53) == hdr[1:4])) return("application/postscript")
42

43 0
  if (all(c(0xff,0xfb,0x30) == hdr[1:3])) return("audio/mp3")
44 0
  if (all(c(0xff,0xfb,0xd0) == hdr[1:3])) return("audio/mp3")
45 1
  if (all(c(0xff,0xfb,0x90) == hdr[1:3])) return("audio/mp3")
46 0
  if (all(c(0x49,0x44,0x33) == hdr[1:3])) return("audio/mp3")
47 0
  if (all(c(0xAC,0xED) == hdr[1:2])) return("application/x-java-serialized-object")
48

49 0
  if (all(c(0x4c,0x5a,0x49,0x50) == hdr[1:4])) return("application/x-lzip")
50

51 1
  if (hdr[1] == 0x3c) { # "<"
52 0
    if (all(c(0x68,0x74,0x6d,0x6c) == hdr[2:5])) return("text/html") # "html"
53 0
    if (all(c(0x48,0x54,0x4d,0x4c) == hdr[2:5])) return("text/html") # "HTML"
54 0
    if (all(c(0x48,0x45,0x41,0x44) == hdr[2:5])) return("text/html") # "HEAD"
55 0
    if (all(c(0x68,0x65,0x61,0x64) == hdr[2:5])) return("text/html") # "head"
56 1
    if (all(c(0x3f,0x78,0x6d,0x6c,0x20) == hdr[2:6])) return("application/xml")
57
  }
58

59 0
  if (all(c(0x0a,0x0d,0x0d,0x0a) == hdr[1:4])) "application/x-pcapng"
60

61 1
  if (all(c(0xa1,0xb2,0xc3,0xd4) == hdr[1:4]) ||
62 0
      all(c(0xd4,0xc3,0xb2,0xa1) == hdr[1:4])) return("application/x-cap")
63

64 1
  if (all(c(0xfe,0xff) == hdr[1:2])) {
65 0
    if (all(c(0x00,0x3c,0x00,0x3f,0x00,0x78) == hdr[3:8])) return("application/xml")
66
  }
67

68 1
  if (all(c(0x42,0x4d) == hdr[1:2])) return("image/bmp")
69 1
  if (all(c(0x49,0x49,0x2a,0x00) == hdr[1:4])) return("image/tiff")
70 0
  if (all(c(0x4D,0x4D,0x00,0x2a) == hdr[1:4])) return("image/tiff")
71 1
  if (all(c(0x47,0x49,0x46,0x38) == hdr[1:4])) return("image/gif")
72 0
  if (all(c(0x23,0x64,0x65,0x66) == hdr[1:4])) return("image/x-bitmap")
73 0
  if (all(c(0x21,0x20,0x58,0x50,0x4d,0x32) == hdr[1:6])) return("image/x-pixmap")
74 1
  if (all(c(137,80,78,71,13,10,26,10) == hdr[1:8])) return("image/png")
75

76 1
  if (all(c(0x23,0x21,0x2f,0x62,0x69,0x6e,0x2f,0x6e,0x6f,0x64,0x65) == hdr[1:11]))
77 0
    return("application/javascript")
78 1
  if (all(c(0x23,0x21,0x2f,0x62,0x69,0x6e,0x2f,0x6e,0x6f,0x64,0x65,0x6a,0x73) == hdr[1:13]))
79 0
    return("application/javascript")
80 1
  if (all(c(0x23,0x21,0x2f,0x75,0x73,0x72,0x2f,0x62,0x69,0x6e,0x2f,0x6e,0x6f,0x64,0x65) == hdr[1:15]))
81 0
    return("application/javascript")
82 1
  if (all(c(0x23,0x21,0x2f,0x75,0x73,0x72,0x2f,0x62,0x69,0x6e,0x2f,0x6e,0x6f,0x64,0x65,0x6a,0x73) == hdr[1:17]))
83 0
    return("application/javascript")
84 1
  if (all(c(0x23,0x21,0x2f,0x75,0x73,0x72,0x2f,0x62,0x69,0x6e,0x2f,0x65,0x6e,0x76,0x20,0x6e,0x6f,0x64,0x65) == hdr[1:19]))
85 0
    return("application/javascript")
86 1
  if (all(c(0x23,0x21,0x2f,0x75,0x73,0x72,0x2f,0x62,0x69,0x6e,0x2f,0x65,0x6e,0x76,0x20,0x6e,0x6f,0x64,0x65,0x6a,0x73) == hdr[1:21]))
87 0
    return("application/javascript")
88

89 1
  if (all(c(0xFF,0xD8,0xFF) == hdr[1:3])) {
90 0
    if (0xDB == hdr[4]) return("image/jpeg")
91 1
    if (0xE0 == hdr[4]) return("image/jpeg")
92 0
    if (0xE1 == hdr[4]) {
93 0
      if (all(c(0x45,0x78,0x69,0x66,0x00) == hdr[7:11])) return("image/jpeg") # Exif
94
    }
95 0
    if (0xEE == hdr[4]) return("image/jpg")
96
  }
97

98 1
  if (all(c(0x41,0x43) == hdr[1:2]) && all(c(0x00,0x00,0x00,0x00,0x00) == hdr[7:11]))
99 0
    return("application/acad")
100

101 1
  if (all(c(0x2E,0x73,0x6E,0x64) == hdr[1:4])) return("audio/basic")
102 0
  if (all(c(0x64,0x6E,0x73,0x2E) == hdr[1:4])) return("audio/basic")
103 1
  if (all(c(0x52,0x49,0x46,0x46) == hdr[1:4])) return("audio/x-wav") # "RIFF"
104

105 1
  if (all(c(0x50, 0x4b) == hdr[1:2])) { # "PK"
106

107 1
    office_type <- check_office(hdr, path)
108 1
    if (length(office_type) > 0) return(office_type)
109

110 1
    guessed_name <- guess_content_type(path)
111 1
    if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name)
112

113 1
    return("application/zip")
114

115
  }
116

117 0
  if (all(c(0x00,0x61,0x73,0x6d) == hdr[1:4])) return("application/wasm")
118

119 0
  if (all(c(0x37,0x7A,0xBC,0xAF,0x27,0x1C) == hdr[1:6])) return("application/x-7z-compressed")
120

121 0
  if (all(c(0x5a,0x4d) == hdr[1:2])) return("x-system/exe")
122

123 1
  if (all(c(0x75,0x73,0x74,0x61,0x72) == hdr[258:262])) {
124 1
    if (all(c(0x00,0x30,0x30) == hdr[263:265]) || all(c(0x20,0x20,0x00) == hdr[263:265])) {
125 1
      return("application/tar")
126
    } else {
127 0
      return("application/pax")
128
    }
129
  }
130

131 0
  if (all(c(0x00,0x00,0x01,0xBA) == hdr[1:4])) return("video/mpeg")
132 0
  if (all(c(0x00,0x00,0x01,0xB3) == hdr[1:4])) return("video/mpeg")
133

134

135 1
  return(guess_content_type(path, ...))
136

137
}

Read our documentation on viewing source code .

Loading