Difference between revisions of "File System API"

Jump to: navigation, search
(SHA1 => hash_from_)
 
(3 intermediate revisions by 2 users not shown)
Line 8: Line 8:
 
The standard file system implementation uses the Moodle filedir, which is a locally available directory on disk, and which can be shared between clustered servers via network file systems such as NFS.
 
The standard file system implementation uses the Moodle filedir, which is a locally available directory on disk, and which can be shared between clustered servers via network file systems such as NFS.
  
Since Moodle 3.3 it is possible to use alternative file systems, including remote file systems. These are easy to setup and configure, and allow for greater scalability which does not depends so heavily upon traditional network file systems.
+
Since Moodle 3.3 it is possible to use alternative file systems, including remote file systems. These are easy to setup and configure, and allow for greater scalability which does not depends so heavily upon traditional network file systems. See config-dist.php for more details.  
  
 
Please note that the file *must* exist remotely and a local file is *optional*. The remote file is seen as canonical and this system is *not* designed to be used to cater to a split brain solution where some files are present in one place, and some files are present in another.
 
Please note that the file *must* exist remotely and a local file is *optional*. The remote file is seen as canonical and this system is *not* designed to be used to cater to a split brain solution where some files are present in one place, and some files are present in another.
Line 184: Line 184:
 
The <code>remove_file</code> function is responsible for removing a file.
 
The <code>remove_file</code> function is responsible for removing a file.
 
A file system '''may''' choose to implement it's own trash mechanism, as in the case of <code>filesystem_filedir</code>
 
A file system '''may''' choose to implement it's own trash mechanism, as in the case of <code>filesystem_filedir</code>
 +
 +
=== Example ===
 +
 +
==== FTP file system ====
 +
Tying together the information in the previous sections, we'll now set up an example FTP file system. Please note: Neither the sample <code>core_file_system_ftp</code> class, nor the use as FTP as a file system is recommended for use in production installs. It simply serves as an example to aid developers in grasping the concept of a remote file system.
 +
 +
 +
The first step is creating the <code>core_file_system_ftp</code> class and setting it up as the alternative file system. To do this, create a file called '''file_system_ftp.php''' in your '''lib/classes''' directory and add the following content:
 +
 +
 +
<code>
 +
<?php
 +
/**
 +
* FTP File System.
 +
*
 +
* @license  http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 +
*/
 +
 +
defined('MOODLE_INTERNAL') || die();
 +
 +
/**
 +
* File system class used for low level access to real files via ftp (aieee! don't do it!).
 +
*
 +
* @package  core_files
 +
* @category  files
 +
* @copyright 2017 Damyon Wiese
 +
* @license  http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 +
*/
 +
class core_file_system_ftp extends file_system {
 +
 +
    public function setup_instance() {
 +
        global $CFG;
 +
 +
        $this->dirpermissions = $CFG->directorypermissions;
 +
        $this->filepermissions = $CFG->filepermissions;
 +
 +
        $this->cachedir = make_request_directory();
 +
        $this->check_root_files_exist();
 +
    }
 +
 +
    protected function check_root_files_exist() {
 +
        // Make sure the file pool directory exists.
 +
        $filedir = $this->get_root_path();
 +
        if (!is_dir($filedir)) {
 +
            if (!mkdir($filedir, $this->dirpermissions, true)) {
 +
                // Permission trouble.
 +
                throw new file_exception('storedfilecannotcreatefiledirs');
 +
            }
 +
 +
            // Place warning file in file pool root.
 +
            $warningfile = $this->get_root_path('warning.txt');
 +
            if (!file_exists($warningfile)) {
 +
                file_put_contents($warningfile,
 +
                        'This directory contains the content of uploaded files and is controlled by Moodle code. ' .
 +
                        'Do not manually move, change or rename any of the files and subdirectories here.');
 +
            }
 +
        }
 +
    }
 +
 +
    protected function get_remote_path_from_hash($contenthash) {
 +
        return $this->get_root_path($contenthash);
 +
    }
 +
 +
    protected function get_local_path_from_hash($contenthash, $fetchifnotfound = false) {
 +
        $this->setup_instance();
 +
        $localurl = $this->cachedir . DIRECTORY_SEPARATOR . $contenthash;
 +
 +
        if ($fetchifnotfound && !is_readable($localurl)) {
 +
            $remoteurl = $this->get_remote_path_from_hash($contenthash);
 +
            copy($remoteurl, $localurl);
 +
        }
 +
 +
        return $localurl;
 +
    }
 +
 +
    public function get_root_url() {
 +
        global $CFG;
 +
 +
        static $path = null;
 +
 +
        if ($path === null) {
 +
            $path = "ftp://{$CFG->file_system_ftp_user}:{$CFG->file_system_ftp_pass}@" .
 +
                    "{$CFG->file_system_ftp_host}/{$CFG->file_system_ftp_path}";
 +
        }
 +
 +
        return $path;
 +
    }
 +
 +
    public function get_root_path($file = null) {
 +
        $path = $this->get_root_url() . DIRECTORY_SEPARATOR . 'filedir';
 +
        if ($file) {
 +
          $path .= DIRECTORY_SEPARATOR . $file;
 +
        }
 +
 +
        return $path;
 +
    }
 +
 +
    /**
 +
    * Copy content of file to given pathname.
 +
    *
 +
    * @param stored_file $file The file to be copied
 +
    * @param string $target real path to the new file
 +
    * @return bool success
 +
    */
 +
    public function copy_content_from_storedfile(stored_file $file, $target) {
 +
        return copy($this->get_remote_path_from_stored_file($file), $target);
 +
    }
 +
 +
    /**
 +
    * Removes the file.
 +
    *
 +
    * @param string $contenthash
 +
    */
 +
    public function remove_file($contenthash) {
 +
        if (!self::is_file_removable($contenthash)) {
 +
            return false;
 +
        }
 +
        $contentfile = $this->get_remote_path_from_hash($contenthash, false);
 +
 +
        unlink($contentfile);
 +
 +
        return true;
 +
    }
 +
 +
    /**
 +
    * Add file content to sha1 pool.
 +
    *
 +
    * @param string $pathname Path to file currently on disk
 +
    * @param string $contenthash SHA1 hash of content if known (performance only)
 +
    * @return array (contenthash, filesize, newfile)
 +
    */
 +
    public function add_file_from_path($pathname, $contenthash = null) {
 +
        if (!is_readable($pathname)) {
 +
            throw new file_exception('storedfilecannotread', '', $pathname);
 +
        }
 +
 +
        $filesize = filesize($pathname);
 +
        if ($filesize === false) {
 +
            throw new file_exception('storedfilecannotread', '', $pathname);
 +
        }
 +
        if (is_null($contenthash)) {
 +
            $contenthash = sha1_file($pathname);
 +
        }
 +
        if (is_null($contenthash)) {
 +
            throw new file_exception('storedfilecannotread', '', $pathname);
 +
        }
 +
 +
        // Copy the file to it's local position. This helps perf in some
 +
        // situations.
 +
        $localpath = $this->get_local_path_from_hash($contenthash, false);
 +
        copy($pathname, $localpath);
 +
 +
        $remotepath = $this->get_remote_path_from_hash($contenthash, false);
 +
 +
        $newfile = false;
 +
        if (!file_exists($remotepath)) {
 +
            $newfile = true;
 +
            $result = copy($localpath, $remotepath);
 +
 +
            if ($result === false) {
 +
                throw new file_exception('storedfilecannotcreatefile');
 +
            }
 +
        }
 +
        return [$contenthash, $filesize, $newfile];
 +
    }
 +
 +
    /**
 +
    * Add string content to sha1 pool.
 +
    *
 +
    * @param string $content file content - binary string
 +
    * @return array (contenthash, filesize, newfile)
 +
    */
 +
    public function add_file_from_string($content) {
 +
        $contenthash = sha1($content);
 +
        $filesize = strlen($content);
 +
 +
        if ($content === '') {
 +
            return [$contenthash, $filesize, false];
 +
        }
 +
 +
        $newfile = false;
 +
        $remotepath = $this->get_remote_path_from_hash($contenthash, false);
 +
        if (!file_exists($remotepath)) {
 +
            $newfile = true;
 +
            // Store the file remotely first - it's likely to be used again.
 +
            $localpath = $this->get_local_path_from_hash($contenthash, false);
 +
            file_put_contents($localpath, $content);
 +
 +
            copy($localpath, $remotepath);
 +
        }
 +
 +
        return [$contenthash, $filesize, $newfile];
 +
    }
 +
 +
}
 +
</code>
 +
 +
Note that you can place this file anywhere that supports class autoloading in Moodle, you'll just need to adjust the $CFG var in the next step appropriately.
 +
 +
Now, in your config.php, add:
 +
<code>
 +
$CFG->alternative_file_system_class = '\\core_file_system_ftp';
 +
$CFG->file_system_ftp_user = 'test';
 +
$CFG->file_system_ftp_pass = 'test';
 +
$CFG->file_system_ftp_host = 'localhost';
 +
$CFG->file_system_ftp_path = 'testing';
 +
</code>
 +
 +
Finally, you'll need to set up an FTP server on localhost, setting up a user with the above credentials and making sure that the directory '''/testing/filedir/''' is writable by the '''test''' user. There are many virtualisation options allowing you to do this fairly quickly and easily.
 +
 +
Once you're done, you should be able to upload files to your new remote file system.

Latest revision as of 06:27, 20 September 2017

Moodle 3.3

As standard Moodle uses the locally available file system for all files. Since Moodle 3.3 it is possible to extend the file system component of the File Storage API to support alternative File Systems.

Introduction

Moodle ships with a file system API which enables the internal Moodle File Storage system to set, and retrieve files, and file content. The standard file system implementation uses the Moodle filedir, which is a locally available directory on disk, and which can be shared between clustered servers via network file systems such as NFS.

Since Moodle 3.3 it is possible to use alternative file systems, including remote file systems. These are easy to setup and configure, and allow for greater scalability which does not depends so heavily upon traditional network file systems. See config-dist.php for more details.

Please note that the file *must* exist remotely and a local file is *optional*. The remote file is seen as canonical and this system is *not* designed to be used to cater to a split brain solution where some files are present in one place, and some files are present in another.

All files accessed via the standard File Storage API is processed using this API, however the existing tempdir, cachedir, and localcachedir parameters remain separate.

Defining a new filesystem

All file system implementations must extend the
file_system
class, and define the required abstract functions.

It is entirely up to the individual implementation how it handles storage, saving, and retrieval of files from it's file system, however certain key concepts apply.

Concepts

Moodle File API

The Moodle File API is broken into different components, each having a related but fundamentally separate purpose.

File Storage API

The File Storage API is responsible for all interactions with the rest of Moodle.

Files can be accessed using this API.

Stored File
Any file stored in Moodle's File Storage can be represented as a
stored_file
. The
stored_file
class holds various metadata about the files in the repository.

Content Hash

The Moodle File Storage API performs de-duplication by generating a checksum of the content of the file. The checksum can be generated using the file_storage::hash_from_path and file_storage::hash_from_string functions.

This checksum, referred to as a Content Hash (typically
$contenthash
) is stored for each file in the Moodle File Storage Database tables.

Files are always referred to using this content hash, and can be both stored and fetched using it.

Helper functions allow conversion of
stored_file
objects into a content hash.

Distinction between local and remote file paths

The file system API makes the distinction between a local, and a remote file path.

Local file paths must be capable of existing on disk. Remote file paths may be either the valid local path, or a Protocol.

Note: Several functions offer a
$fetchifnotfound

option. This is particularly useful when dealing with remote file systems which do not keep a local copy of the file on disk. These functions should only be called with a truthful value if the file is required locally.

It is also advisable when dealing with remote file systems to initially store the file on disk in this path as there is a strong possibility that subsequent actions will attempt to read the file further - for example, in order to generate an image thumbnail.

Local file paths

Local file paths must be formatted as a standard local file path. They must not be a streamable URL. This is because some PHP functions are unable to work with seekable, or streamable resources and can only work with local files. These include, but are not limited to:

  • The
    ZipArchive
    used in the
    zip_packer
    ; and
  • curl_file_create
    , used to add files to a curl request; and
  • finfo
    , used to determine mime information about files.

Additionally there are some cases which may suffer performance issues when dealing with streamable files. This includes:

  • getimagesize()
    which must fetch the entire image first in order to determine size.
Remote file paths

Remote file paths may be formatted as a standard local file path. They may be a streamable URL.

See the PHP documentation on Protocols for more information on the accepted formats.

Remote files may be passed into PHP functions such as:

  • file_get_contents
    ; and
  • readfile
    .

Explanation of required functions

setup_instance

The
setup_instance()
function is called during instantiation and allows you to setup any required configuration for your file system implementation.

An example implementation might look like:

protected function setup_instance() {
    // Setup the client.
    self::$client = new Awesome\Remote\File\Storage\System();

    // Create a directory for use during the current request.
    // This directory will be automatically removed at the end of the request.
    self::$filedir = make_request_directory();
}

get_local_filepath_from_hash

The
get_local_filepath_from_hash
function is responsible for returning the correct path to the file in disk.

This path must be consistent for each contenthash.

The file does not need to exist on disk unless the
$fetchifnotfound
parameter is truthy.

An example implementation might look like:

protected function get_local_path_from_hash($contenthash, $fetchifnotfound = false) {
    $path = self::$filedir . DIRECTORY_SEPARATOR . $contenthash;

    if ($fetchifnotfound && !is_readable($path)) {
        $this->fetch_local_copy($contenthash, $path);
    }

    return $path;
}

get_remote_filepath_from_hash =

The
get_remote_filepath_from_hash($contenthash)
function is responsible for returning the correct path to the file.

The returned path must be in either:

  • a local file format; or
  • a remote file path as per the Protocol documentation.

Remote paths should not be passed outside of the File System implementation.

An example implementation might look like:

protected function get_remote_path_from_hash($contenthash, $fetchifnotfound = false) {
    return $this->get_presigned_url($contenthash, '+6 hours');
}

Note: If using a one-time/pre-signed URL, please ensure that the lifetime of the URL is sufficient for larger files.

add_file_to_pool

The
add_file_to_pool
function is responsible for storing the provided local file on disk into your file system.

It is your responsibility to:

  • generate the file's contenthash (if it is not provided);
  • check whether an existing file with the same contenthash exists in the file system;
  • ensure that the contenthash matches if there is a matching file; and
  • copy the file to your file system; and
  • ensure that file permissions are correct.

add_string_to_pool

Similar to
add_file_to_pool
, the
add_string_to_pool
function is responsible for storing the provided string content into your file system.

copy_content_from_storedfile

The
copy_content_from_storedfile
function is responsible for copying an existing file in the file system to a new local file.

If you are using a local file system, you will likely just copy the file:

public function copy_content_from_storedfile(stored_file $file, $target) {
    return copy($this->get_local_filepath_from_storedfile($file), $target);
}

However, if you are implementing a remote file system, you can likely make certain performance improvements by downloading the file straight to the intended target:

public function copy_content_from_storedfile(stored_file $file, $target) {
    if ($this->is_readable_locally_from_storedfile($file, false)) {
        return copy($this->get_local_filepath_from_storedfile($file), $target);
    } else {
        return $this->fetch_local_copy($file->get_contenthash(), $target);
    }
}

remove_file

The
remove_file
function is responsible for removing a file. A file system may choose to implement it's own trash mechanism, as in the case of
filesystem_filedir

Example

FTP file system

Tying together the information in the previous sections, we'll now set up an example FTP file system. Please note: Neither the sample
core_file_system_ftp
class, nor the use as FTP as a file system is recommended for use in production installs. It simply serves as an example to aid developers in grasping the concept of a remote file system.


The first step is creating the
core_file_system_ftp
class and setting it up as the alternative file system. To do this, create a file called file_system_ftp.php in your lib/classes directory and add the following content:


<?php
/**
 * FTP File System.
 *
 * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 */

defined('MOODLE_INTERNAL') || die();

/**
 * File system class used for low level access to real files via ftp (aieee! don't do it!).
 *
 * @package   core_files
 * @category  files
 * @copyright 2017 Damyon Wiese
 * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 */
class core_file_system_ftp extends file_system {

    public function setup_instance() {
        global $CFG;

        $this->dirpermissions = $CFG->directorypermissions;
        $this->filepermissions = $CFG->filepermissions;

        $this->cachedir = make_request_directory();
        $this->check_root_files_exist();
    }

    protected function check_root_files_exist() {
        // Make sure the file pool directory exists.
        $filedir = $this->get_root_path();
        if (!is_dir($filedir)) {
            if (!mkdir($filedir, $this->dirpermissions, true)) {
                // Permission trouble.
                throw new file_exception('storedfilecannotcreatefiledirs');
            }

            // Place warning file in file pool root.
            $warningfile = $this->get_root_path('warning.txt');
            if (!file_exists($warningfile)) {
                file_put_contents($warningfile,
                        'This directory contains the content of uploaded files and is controlled by Moodle code. ' .
                        'Do not manually move, change or rename any of the files and subdirectories here.');
            }
        }
    }

    protected function get_remote_path_from_hash($contenthash) {
        return $this->get_root_path($contenthash);
    }

    protected function get_local_path_from_hash($contenthash, $fetchifnotfound = false) {
        $this->setup_instance();
        $localurl = $this->cachedir . DIRECTORY_SEPARATOR . $contenthash;

        if ($fetchifnotfound && !is_readable($localurl)) {
            $remoteurl = $this->get_remote_path_from_hash($contenthash);
            copy($remoteurl, $localurl);
        }

        return $localurl;
    }

    public function get_root_url() {
        global $CFG;

        static $path = null;

        if ($path === null) {
            $path = "ftp://{$CFG->file_system_ftp_user}:{$CFG->file_system_ftp_pass}@" .
                    "{$CFG->file_system_ftp_host}/{$CFG->file_system_ftp_path}";
        }

        return $path;
    }

    public function get_root_path($file = null) {
        $path = $this->get_root_url() . DIRECTORY_SEPARATOR . 'filedir';
        if ($file) {
           $path .= DIRECTORY_SEPARATOR . $file;
        }

        return $path;
    }

    /**
     * Copy content of file to given pathname.
     *
     * @param stored_file $file The file to be copied
     * @param string $target real path to the new file
     * @return bool success
     */
    public function copy_content_from_storedfile(stored_file $file, $target) {
        return copy($this->get_remote_path_from_stored_file($file), $target);
    }

    /**
     * Removes the file.
     *
     * @param string $contenthash
     */
    public function remove_file($contenthash) {
        if (!self::is_file_removable($contenthash)) {
            return false;
        }
        $contentfile = $this->get_remote_path_from_hash($contenthash, false);

        unlink($contentfile);

        return true;
    }

    /**
     * Add file content to sha1 pool.
     *
     * @param string $pathname Path to file currently on disk
     * @param string $contenthash SHA1 hash of content if known (performance only)
     * @return array (contenthash, filesize, newfile)
     */
    public function add_file_from_path($pathname, $contenthash = null) {
        if (!is_readable($pathname)) {
            throw new file_exception('storedfilecannotread', '', $pathname);
        }

        $filesize = filesize($pathname);
        if ($filesize === false) {
            throw new file_exception('storedfilecannotread', '', $pathname);
        }
        if (is_null($contenthash)) {
            $contenthash = sha1_file($pathname);
        }
        if (is_null($contenthash)) {
            throw new file_exception('storedfilecannotread', '', $pathname);
        }

        // Copy the file to it's local position. This helps perf in some
        // situations.
        $localpath = $this->get_local_path_from_hash($contenthash, false);
        copy($pathname, $localpath);

        $remotepath = $this->get_remote_path_from_hash($contenthash, false);

        $newfile = false;
        if (!file_exists($remotepath)) {
            $newfile = true;
            $result = copy($localpath, $remotepath);

            if ($result === false) {
                throw new file_exception('storedfilecannotcreatefile');
            }
        }
        return [$contenthash, $filesize, $newfile];
    }

    /**
     * Add string content to sha1 pool.
     *
     * @param string $content file content - binary string
     * @return array (contenthash, filesize, newfile)
     */
    public function add_file_from_string($content) {
        $contenthash = sha1($content);
        $filesize = strlen($content);

        if ($content === '') {
            return [$contenthash, $filesize, false];
        }

        $newfile = false;
        $remotepath = $this->get_remote_path_from_hash($contenthash, false);
        if (!file_exists($remotepath)) {
            $newfile = true;
            // Store the file remotely first - it's likely to be used again.
            $localpath = $this->get_local_path_from_hash($contenthash, false);
            file_put_contents($localpath, $content);

            copy($localpath, $remotepath);
        }

        return [$contenthash, $filesize, $newfile];
    }

}

Note that you can place this file anywhere that supports class autoloading in Moodle, you'll just need to adjust the $CFG var in the next step appropriately.

Now, in your config.php, add:

$CFG->alternative_file_system_class = '\\core_file_system_ftp';
$CFG->file_system_ftp_user = 'test';
$CFG->file_system_ftp_pass = 'test';
$CFG->file_system_ftp_host = 'localhost';
$CFG->file_system_ftp_path = 'testing';

Finally, you'll need to set up an FTP server on localhost, setting up a user with the above credentials and making sure that the directory /testing/filedir/ is writable by the test user. There are many virtualisation options allowing you to do this fairly quickly and easily.

Once you're done, you should be able to upload files to your new remote file system.