1
<?php
2

3
namespace SilverStripe\TextExtraction\Extension;
4

5
use SilverStripe\Assets\File;
6
use SilverStripe\ORM\DataExtension;
7
use SilverStripe\TextExtraction\Cache\FileTextCache;
8
use SilverStripe\TextExtraction\Extractor\FileTextExtractor;
9

10
/**
11
 * Decorate File or a File derivative to enable text extraction from the file content. Uses a set of subclasses of
12
 * FileTextExtractor to do the extraction based on the content type of the file.
13
 *
14
 * Adds an additional property which is the cached contents, which is populated on demand.
15
 *
16
 * @author mstephens
17
 */
18
class FileTextExtractable extends DataExtension
19
{
20
    /**
21
     * @var array
22
     * @config
23
     */
24
    private static $db = [
25
        'FileContentCache' => 'Text'
26
    ];
27

28
    /**
29
     * @var array
30
     * @config
31
     */
32
    private static $casting = [
33
        'FileContent' => 'Text'
34
    ];
35

36
    /**
37
     * @var array
38
     * @config
39
     */
40
    private static $dependencies = [
41
        'TextCache' => '%$' . FileTextCache::class,
42
    ];
43

44
    /**
45
     * @var FileTextCache
46
     */
47
    protected $fileTextCache = null;
48

49
    /**
50
     * @param  FileTextCache $cache
51
     * @return $this
52
     */
53 1
    public function setTextCache(FileTextCache $cache)
54
    {
55 1
        $this->fileTextCache = $cache;
56 1
        return $this;
57
    }
58

59
    /**
60
     * @return FileTextCache
61
     */
62 1
    public function getTextCache()
63
    {
64 1
        return $this->fileTextCache;
65
    }
66

67
    /**
68
     * Helper function for template
69
     *
70
     * @return string
71
     */
72 0
    public function getFileContent()
73
    {
74 0
        return $this->extractFileAsText();
75
    }
76

77
    /**
78
     * Tries to parse the file contents if a FileTextExtractor class exists to handle the file type, and
79
     * returns the text. The value is also cached into the File record itself.
80
     *
81
     * @param boolean $disableCache If false, the file content is only parsed on demand.
82
     *                              If true, the content parsing is forced, bypassing
83
     *                              the cached version
84
     * @return string|null
85
     */
86 1
    public function extractFileAsText($disableCache = false)
87
    {
88
        /** @var File $file */
89 1
        $file = $this->owner;
90 1
        if (!$disableCache) {
91 1
            $text = $this->getTextCache()->load($file);
92 1
            if ($text) {
93 0
                return $text;
94
            }
95
        }
96

97
        // Determine which extractor can process this file.
98 1
        $extractor = FileTextExtractor::for_file($file);
99 1
        if (!$extractor) {
100 0
            return null;
101
        }
102

103 1
        $text = $extractor->getContent($file);
104 1
        if (!$text) {
105 0
            return null;
106
        }
107

108 1
        if (!$disableCache) {
109 1
            $this->getTextCache()->save($file, $text);
110
        }
111

112 1
        return $text;
113
    }
114

115
    /**
116
     * @return void
117
     */
118 1
    public function onBeforeWrite()
119
    {
120
        // Clear cache before changing file
121 1
        $this->getTextCache()->invalidate($this->owner);
122
    }
123
}

Read our documentation on viewing source code .

Loading