MDL-59961 core_files: make content hash validation reusable
[moodle.git] / lib / filestorage / file_system.php
CommitLineData
16a34ae1
AN
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 * Core file system class definition.
19 *
20 * @package core_files
21 * @copyright 2017 Andrew Nicols <andrew@nicols.co.uk>
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25defined('MOODLE_INTERNAL') || die();
26
27/**
28 * File system class used for low level access to real files in filedir.
29 *
30 * @package core_files
31 * @category files
32 * @copyright 2017 Andrew Nicols <andrew@nicols.co.uk>
33 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
34 */
35abstract class file_system {
36
37 /**
38 * Private clone method to prevent cloning of the instance.
39 */
40 final protected function __clone() {
41 return;
42 }
43
44 /**
45 * Private wakeup method to prevent unserialising of the instance.
46 */
47 final protected function __wakeup() {
48 return;
49 }
50
51 /**
52 * Output the content of the specified stored file.
53 *
54 * Note, this is different to get_content() as it uses the built-in php
55 * readfile function which is more efficient.
56 *
57 * @param stored_file $file The file to serve.
58 * @return void
59 */
60 public function readfile(stored_file $file) {
61 if ($this->is_file_readable_locally_by_storedfile($file, false)) {
62 $path = $this->get_local_path_from_storedfile($file, false);
63 } else {
64 $path = $this->get_remote_path_from_storedfile($file);
65 }
66 readfile_allow_large($path, $file->get_filesize());
67 }
68
69 /**
70 * Get the full path on disk for the specified stored file.
71 *
72 * Note: This must return a consistent path for the file's contenthash
73 * and the path _will_ be in a standard local format.
74 * Streamable paths will not work.
75 * A local copy of the file _will_ be fetched if $fetchifnotfound is tree.
76 *
77 * The $fetchifnotfound allows you to determine the expected path of the file.
78 *
79 * @param stored_file $file The file to serve.
80 * @param bool $fetchifnotfound Whether to attempt to fetch from the remote path if not found.
81 * @return string full path to pool file with file content
82 */
83 protected function get_local_path_from_storedfile(stored_file $file, $fetchifnotfound = false) {
84 return $this->get_local_path_from_hash($file->get_contenthash(), $fetchifnotfound);
85 }
86
87 /**
88 * Get a remote filepath for the specified stored file.
89 *
90 * This is typically either the same as the local filepath, or it is a streamable resource.
91 *
92 * See https://secure.php.net/manual/en/wrappers.php for further information on valid wrappers.
93 *
94 * @param stored_file $file The file to serve.
95 * @return string full path to pool file with file content
96 */
97 protected function get_remote_path_from_storedfile(stored_file $file) {
98 return $this->get_remote_path_from_hash($file->get_contenthash(), false);
99 }
100
101 /**
102 * Get the full path for the specified hash, including the path to the filedir.
103 *
104 * Note: This must return a consistent path for the file's contenthash
105 * and the path _will_ be in a standard local format.
106 * Streamable paths will not work.
107 * A local copy of the file _will_ be fetched if $fetchifnotfound is tree.
108 *
109 * The $fetchifnotfound allows you to determine the expected path of the file.
110 *
111 * @param string $contenthash The content hash
112 * @param bool $fetchifnotfound Whether to attempt to fetch from the remote path if not found.
113 * @return string The full path to the content file
114 */
115 abstract protected function get_local_path_from_hash($contenthash, $fetchifnotfound = false);
116
117 /**
118 * Get the full path for the specified hash, including the path to the filedir.
119 *
120 * This is typically either the same as the local filepath, or it is a streamable resource.
121 *
122 * See https://secure.php.net/manual/en/wrappers.php for further information on valid wrappers.
123 *
124 * @param string $contenthash The content hash
125 * @return string The full path to the content file
126 */
127 abstract protected function get_remote_path_from_hash($contenthash);
128
129 /**
130 * Determine whether the file is present on the file system somewhere.
131 * A local copy of the file _will_ be fetched if $fetchifnotfound is tree.
132 *
133 * The $fetchifnotfound allows you to determine the expected path of the file.
134 *
135 * @param stored_file $file The file to ensure is available.
136 * @param bool $fetchifnotfound Whether to attempt to fetch from the remote path if not found.
137 * @return bool
138 */
139 public function is_file_readable_locally_by_storedfile(stored_file $file, $fetchifnotfound = false) {
140 if (!$file->get_filesize()) {
141 // Files with empty size are either directories or empty.
142 // We handle these virtually.
143 return true;
144 }
145
146 // Check to see if the file is currently readable.
147 $path = $this->get_local_path_from_storedfile($file, $fetchifnotfound);
148 if (is_readable($path)) {
149 return true;
150 }
151
152 return false;
153 }
154
155 /**
156 * Determine whether the file is present on the local file system somewhere.
157 *
158 * @param stored_file $file The file to ensure is available.
159 * @return bool
160 */
161 public function is_file_readable_remotely_by_storedfile(stored_file $file) {
162 if (!$file->get_filesize()) {
163 // Files with empty size are either directories or empty.
164 // We handle these virtually.
165 return true;
166 }
167
168 $path = $this->get_remote_path_from_storedfile($file, false);
169 if (is_readable($path)) {
170 return true;
171 }
172
173 return false;
174 }
175
176 /**
177 * Determine whether the file is present on the file system somewhere given
178 * the contenthash.
179 *
180 * @param string $contenthash The contenthash of the file to check.
181 * @param bool $fetchifnotfound Whether to attempt to fetch from the remote path if not found.
182 * @return bool
183 */
184 public function is_file_readable_locally_by_hash($contenthash, $fetchifnotfound = false) {
a30a04fa 185 if ($contenthash === file_storage::hash_from_string('')) {
16a34ae1
AN
186 // Files with empty size are either directories or empty.
187 // We handle these virtually.
188 return true;
189 }
190
191 // This is called by file_storage::content_exists(), and in turn by the repository system.
192 $path = $this->get_local_path_from_hash($contenthash, $fetchifnotfound);
193
194 // Note - it is not possible to perform a content recovery safely from a hash alone.
195 return is_readable($path);
196 }
197
198 /**
199 * Determine whether the file is present locally on the file system somewhere given
200 * the contenthash.
201 *
202 * @param string $contenthash The contenthash of the file to check.
203 * @return bool
204 */
205 public function is_file_readable_remotely_by_hash($contenthash) {
a30a04fa 206 if ($contenthash === file_storage::hash_from_string('')) {
16a34ae1
AN
207 // Files with empty size are either directories or empty.
208 // We handle these virtually.
209 return true;
210 }
211
212 $path = $this->get_remote_path_from_hash($contenthash, false);
213
214 // Note - it is not possible to perform a content recovery safely from a hash alone.
215 return is_readable($path);
216 }
217
218 /**
219 * Copy content of file to given pathname.
220 *
221 * @param stored_file $file The file to be copied
222 * @param string $target real path to the new file
223 * @return bool success
224 */
225 abstract public function copy_content_from_storedfile(stored_file $file, $target);
226
227 /**
228 * Remove the file with the specified contenthash.
229 *
230 * Note, if overriding this function, you _must_ check that the file is
231 * no longer in use - see {check_file_usage}.
232 *
233 * DO NOT call directly - reserved for core!!
234 *
235 * @param string $contenthash
236 */
237 abstract public function remove_file($contenthash);
238
239 /**
240 * Check whether a file is removable.
241 *
242 * This must be called prior to file removal.
243 *
244 * @param string $contenthash
245 * @return bool
246 */
247 protected static function is_file_removable($contenthash) {
248 global $DB;
249
a30a04fa
AN
250 if ($contenthash === file_storage::hash_from_string('')) {
251 // No need to delete files without content.
16a34ae1
AN
252 return false;
253 }
254
255 // Note: This section is critical - in theory file could be reused at the same time, if this
256 // happens we can still recover the file from trash.
257 // Technically this is the responsibility of the file_storage API, but as this method is public, we go belt-and-braces.
258 if ($DB->record_exists('files', array('contenthash' => $contenthash))) {
259 // File content is still used.
260 return false;
261 }
262
263 return true;
264 }
265
266 /**
267 * Get the content of the specified stored file.
268 *
269 * Generally you will probably want to use readfile() to serve content,
270 * and where possible you should see if you can use
271 * get_content_file_handle and work with the file stream instead.
272 *
273 * @param stored_file $file The file to retrieve
274 * @return string The full file content
275 */
276 public function get_content(stored_file $file) {
277 if (!$file->get_filesize()) {
278 // Directories are empty. Empty files are not worth fetching.
279 return '';
280 }
281
282 $source = $this->get_remote_path_from_storedfile($file);
283 return file_get_contents($source);
284 }
285
286 /**
287 * List contents of archive.
288 *
289 * @param stored_file $file The archive to inspect
290 * @param file_packer $packer file packer instance
291 * @return array of file infos
292 */
293 public function list_files($file, file_packer $packer) {
294 $archivefile = $this->get_local_path_from_storedfile($file, true);
295 return $packer->list_files($archivefile);
296 }
297
298 /**
299 * Extract file to given file path (real OS filesystem), existing files are overwritten.
300 *
301 * @param stored_file $file The archive to inspect
302 * @param file_packer $packer File packer instance
303 * @param string $pathname Target directory
304 * @param file_progress $progress progress indicator callback or null if not required
305 * @return array|bool List of processed files; false if error
306 */
307 public function extract_to_pathname(stored_file $file, file_packer $packer, $pathname, file_progress $progress = null) {
308 $archivefile = $this->get_local_path_from_storedfile($file, true);
309 return $packer->extract_to_pathname($archivefile, $pathname, null, $progress);
310 }
311
312 /**
313 * Extract file to given file path (real OS filesystem), existing files are overwritten.
314 *
315 * @param stored_file $file The archive to inspect
316 * @param file_packer $packer file packer instance
317 * @param int $contextid context ID
318 * @param string $component component
319 * @param string $filearea file area
320 * @param int $itemid item ID
321 * @param string $pathbase path base
322 * @param int $userid user ID
323 * @param file_progress $progress Progress indicator callback or null if not required
324 * @return array|bool list of processed files; false if error
325 */
326 public function extract_to_storage(stored_file $file, file_packer $packer, $contextid,
327 $component, $filearea, $itemid, $pathbase, $userid = null, file_progress $progress = null) {
328
329 // Since we do not know which extractor we have, and whether it supports remote paths, use a local path here.
330 $archivefile = $this->get_local_path_from_storedfile($file, true);
331 return $packer->extract_to_storage($archivefile, $contextid,
332 $component, $filearea, $itemid, $pathbase, $userid, $progress);
333 }
334
335 /**
336 * Add file/directory into archive.
337 *
338 * @param stored_file $file The file to archive
339 * @param file_archive $filearch file archive instance
340 * @param string $archivepath pathname in archive
341 * @return bool success
342 */
343 public function add_storedfile_to_archive(stored_file $file, file_archive $filearch, $archivepath) {
344 if ($file->is_directory()) {
345 return $filearch->add_directory($archivepath);
346 } else {
347 // Since we do not know which extractor we have, and whether it supports remote paths, use a local path here.
348 return $filearch->add_file_from_pathname($archivepath, $this->get_local_path_from_storedfile($file, true));
349 }
350 }
351
352 /**
353 * Adds this file path to a curl request (POST only).
354 *
355 * @param stored_file $file The file to add to the curl request
356 * @param curl $curlrequest The curl request object
357 * @param string $key What key to use in the POST request
358 * @return void
359 * This needs the fullpath for the storedfile :/
360 * Can this be achieved in some other fashion?
361 */
362 public function add_to_curl_request(stored_file $file, &$curlrequest, $key) {
363 // Note: curl_file_create does not work with remote paths.
364 $path = $this->get_local_path_from_storedfile($file, true);
365 $curlrequest->_tmp_file_post_params[$key] = curl_file_create($path);
366 }
367
368 /**
369 * Returns information about image.
370 * Information is determined from the file content
371 *
372 * @param stored_file $file The file to inspect
373 * @return mixed array with width, height and mimetype; false if not an image
374 */
375 public function get_imageinfo(stored_file $file) {
376 if (!$this->is_image_from_storedfile($file)) {
377 return false;
378 }
379
380 // Whilst get_imageinfo_from_path can use remote paths, it must download the entire file first.
381 // It is more efficient to use a local file when possible.
382 return $this->get_imageinfo_from_path($this->get_local_path_from_storedfile($file, true));
383 }
384
385 /**
386 * Attempt to determine whether the specified file is likely to be an
387 * image.
388 * Since this relies upon the mimetype stored in the files table, there
389 * may be times when this information is not 100% accurate.
390 *
391 * @param stored_file $file The file to check
392 * @return bool
393 */
394 public function is_image_from_storedfile(stored_file $file) {
395 if (!$file->get_filesize()) {
396 // An empty file cannot be an image.
397 return false;
398 }
399
400 $mimetype = $file->get_mimetype();
401 if (!preg_match('|^image/|', $mimetype)) {
402 // The mimetype does not include image.
403 return false;
404 }
405
406 // If it looks like an image, and it smells like an image, perhaps it's an image!
407 return true;
408 }
409
410 /**
411 * Returns image information relating to the specified path or URL.
412 *
413 * @param string $path The path to pass to getimagesize.
414 * @return array Containing width, height, and mimetype.
415 */
416 protected function get_imageinfo_from_path($path) {
417 $imageinfo = getimagesize($path);
418
419 $image = array(
420 'width' => $imageinfo[0],
421 'height' => $imageinfo[1],
422 'mimetype' => image_type_to_mime_type($imageinfo[2]),
423 );
424 if (empty($image['width']) or empty($image['height']) or empty($image['mimetype'])) {
425 // GD can not parse it, sorry.
426 return false;
427 }
428 return $image;
429 }
430
431 /**
432 * Serve file content using X-Sendfile header.
433 * Please make sure that all headers are already sent and the all
434 * access control checks passed.
435 *
436 * @param string $contenthash The content hash of the file to be served
437 * @return bool success
438 */
439 public function xsendfile($contenthash) {
440 global $CFG;
441 require_once($CFG->libdir . "/xsendfilelib.php");
442
443 return xsendfile($this->get_remote_path_from_hash($contenthash));
444 }
445
9eb1a2c3
MN
446 /**
447 * Validate that the content hash matches the content hash of the file on disk.
448 *
449 * @param string $contenthash The current content hash to validate
450 * @param string $pathname The path to the file on disk
451 * @return array The content hash (it might change) and file size
452 */
453 protected function validate_hash_and_file_size($contenthash, $pathname) {
454 global $CFG;
455
456 if (!is_readable($pathname)) {
457 throw new file_exception('storedfilecannotread', '', $pathname);
458 }
459
460 $filesize = filesize($pathname);
461 if ($filesize === false) {
462 throw new file_exception('storedfilecannotread', '', $pathname);
463 }
464
465 if (is_null($contenthash)) {
466 $contenthash = file_storage::hash_from_path($pathname);
467 } else if ($CFG->debugdeveloper) {
468 $filehash = file_storage::hash_from_path($pathname);
469 if ($filehash === false) {
470 throw new file_exception('storedfilecannotread', '', $pathname);
471 }
472 if ($filehash !== $contenthash) {
473 // Hopefully this never happens, if yes we need to fix calling code.
474 debugging("Invalid contenthash submitted for file $pathname", DEBUG_DEVELOPER);
475 $contenthash = $filehash;
476 }
477 }
478 if ($contenthash === false) {
479 throw new file_exception('storedfilecannotread', '', $pathname);
480 }
481
482 if ($filesize > 0 and $contenthash === file_storage::hash_from_string('')) {
483 // Did the file change or is file_storage::hash_from_path() borked for this file?
484 clearstatcache();
485 $contenthash = file_storage::hash_from_path($pathname);
486 $filesize = filesize($pathname);
487
488 if ($contenthash === false or $filesize === false) {
489 throw new file_exception('storedfilecannotread', '', $pathname);
490 }
491 if ($filesize > 0 and $contenthash === file_storage::hash_from_string('')) {
492 // This is very weird...
493 throw new file_exception('storedfilecannotread', '', $pathname);
494 }
495 }
496
497 return [$contenthash, $filesize];
498 }
499
16a34ae1
AN
500 /**
501 * Add the supplied file to the file system.
502 *
503 * Note: If overriding this function, it is advisable to store the file
504 * in the path returned by get_local_path_from_hash as there may be
505 * subsequent uses of the file in the same request.
506 *
507 * @param string $pathname Path to file currently on disk
508 * @param string $contenthash SHA1 hash of content if known (performance only)
509 * @return array (contenthash, filesize, newfile)
510 */
511 abstract public function add_file_from_path($pathname, $contenthash = null);
512
513 /**
514 * Add a file with the supplied content to the file system.
515 *
516 * Note: If overriding this function, it is advisable to store the file
517 * in the path returned by get_local_path_from_hash as there may be
518 * subsequent uses of the file in the same request.
519 *
520 * @param string $content file content - binary string
521 * @return array (contenthash, filesize, newfile)
522 */
523 abstract public function add_file_from_string($content);
524
525 /**
526 * Returns file handle - read only mode, no writing allowed into pool files!
527 *
528 * When you want to modify a file, create a new file and delete the old one.
529 *
530 * @param stored_file $file The file to retrieve a handle for
531 * @param int $type Type of file handle (FILE_HANDLE_xx constant)
532 * @return resource file handle
533 */
534 public function get_content_file_handle(stored_file $file, $type = stored_file::FILE_HANDLE_FOPEN) {
535 $path = $this->get_remote_path_from_storedfile($file);
536
537 return self::get_file_handle_for_path($path, $type);
538 }
539
540 /**
541 * Return a file handle for the specified path.
542 *
543 * This abstraction should be used when overriding get_content_file_handle in a new file system.
544 *
545 * @param string $path The path to the file. This shoudl be any type of path that fopen and gzopen accept.
546 * @param int $type Type of file handle (FILE_HANDLE_xx constant)
547 * @return resource
548 * @throws coding_exception When an unexpected type of file handle is requested
549 */
550 protected static function get_file_handle_for_path($path, $type = stored_file::FILE_HANDLE_FOPEN) {
551 switch ($type) {
552 case stored_file::FILE_HANDLE_FOPEN:
553 // Binary reading.
554 return fopen($path, 'rb');
555 case stored_file::FILE_HANDLE_GZOPEN:
556 // Binary reading of file in gz format.
557 return gzopen($path, 'rb');
558 default:
559 throw new coding_exception('Unexpected file handle type');
560 }
561 }
562
563 /**
564 * Retrieve the mime information for the specified stored file.
565 *
566 * @param string $contenthash
567 * @param string $filename
568 * @return string The MIME type.
569 */
570 public function mimetype_from_hash($contenthash, $filename) {
571 $pathname = $this->get_remote_path_from_hash($contenthash);
572 $mimetype = file_storage::mimetype($pathname, $filename);
573
574 if (!$this->is_file_readable_locally_by_hash($contenthash, false) && $mimetype === 'document/unknown') {
575 // The type is unknown, but the full checks weren't completed because the file isn't locally available.
576 // Ensure we have a local copy and try again.
577 $pathname = $this->get_local_path_from_hash($contenthash, true);
578
579 $mimetype = file_storage::mimetype_from_file($pathname);
580 }
581
582 return $mimetype;
583 }
584
585 /**
586 * Retrieve the mime information for the specified stored file.
587 *
588 * @param stored_file $file The stored file to retrieve mime information for
589 * @return string The MIME type.
590 */
591 public function mimetype_from_storedfile($file) {
592 if (!$file->get_filesize()) {
593 // Files with an empty filesize are treated as directories and have no mimetype.
594 return null;
595 }
596 $pathname = $this->get_remote_path_from_storedfile($file);
597 $mimetype = file_storage::mimetype($pathname, $file->get_filename());
598
599 if (!$this->is_file_readable_locally_by_storedfile($file) && $mimetype === 'document/unknown') {
600 // The type is unknown, but the full checks weren't completed because the file isn't locally available.
601 // Ensure we have a local copy and try again.
602 $pathname = $this->get_local_path_from_storedfile($file, true);
603
604 $mimetype = file_storage::mimetype_from_file($pathname);
605 }
606
607 return $mimetype;
608 }
609
610 /**
611 * Run any periodic tasks which must be performed.
612 */
613 public function cron() {
614 }
615}