diff options
Diffstat (limited to 'Documentation/filesystems')
| -rw-r--r-- | Documentation/filesystems/Locking | 4 | ||||
| -rw-r--r-- | Documentation/filesystems/f2fs.txt | 159 | ||||
| -rw-r--r-- | Documentation/filesystems/overlayfs.txt | 23 | ||||
| -rw-r--r-- | Documentation/filesystems/proc.txt | 20 | ||||
| -rw-r--r-- | Documentation/filesystems/vfs.txt | 11 |
5 files changed, 208 insertions, 9 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 06d443450f21..539ac9c78ccf 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -197,7 +197,9 @@ prototypes: int (*releasepage) (struct page *, int); void (*freepage)(struct page *); int (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); + bool (*isolate_page) (struct page *, isolate_mode_t); int (*migratepage)(struct address_space *, struct page *, struct page *); + void (*putback_page) (struct page *); int (*launder_page)(struct page *); int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); int (*error_remove_page)(struct address_space *, struct page *); @@ -221,7 +223,9 @@ invalidatepage: yes releasepage: yes freepage: yes direct_IO: +isolate_page: yes migratepage: yes (both) +putback_page: yes launder_page: yes is_partially_uptodate: yes error_remove_page: yes diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index ecccb51c7279..c2e941317b87 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -126,6 +126,8 @@ disable_ext_identify Disable the extension list configured by mkfs, so f2fs does not aware of cold files such as media files. inline_xattr Enable the inline xattrs feature. noinline_xattr Disable the inline xattrs feature. +inline_xattr_size=%u Support configuring inline xattr size, it depends on + flexible inline xattr feature. inline_data Enable the inline data feature: New created small(<~3.4k) files can be written into inode block. inline_dentry Enable the inline dir feature: data in new created @@ -155,6 +157,27 @@ noinline_data Disable the inline data feature, inline data feature is enabled by default. data_flush Enable data flushing before checkpoint in order to persist data of regular and symlink. +fault_injection=%d Enable fault injection in all supported types with + specified injection rate. +fault_type=%d Support configuring fault injection type, should be + enabled with fault_injection option, fault type value + is shown below, it supports single or combined type. + Type_Name Type_Value + FAULT_KMALLOC 0x000000001 + FAULT_KVMALLOC 0x000000002 + FAULT_PAGE_ALLOC 0x000000004 + FAULT_PAGE_GET 0x000000008 + FAULT_ALLOC_BIO 0x000000010 + FAULT_ALLOC_NID 0x000000020 + FAULT_ORPHAN 0x000000040 + FAULT_BLOCK 0x000000080 + FAULT_DIR_DEPTH 0x000000100 + FAULT_EVICT_INODE 0x000000200 + FAULT_TRUNCATE 0x000000400 + FAULT_READ_IO 0x000000800 + FAULT_CHECKPOINT 0x000001000 + FAULT_DISCARD 0x000002000 + FAULT_WRITE_IO 0x000004000 mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random writes towards main area. @@ -191,6 +214,22 @@ fsync_mode=%s Control the policy of fsync. Currently supports "posix", non-atomic files likewise "nobarrier" mount option. test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt context. The fake fscrypt context is used by xfstests. +checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enable" + to reenable checkpointing. Is enabled by default. While + disabled, any unmounting or unexpected shutdowns will cause + the filesystem contents to appear as they did when the + filesystem was mounted with that option. + While mounting with checkpoint=disabled, the filesystem must + run garbage collection to ensure that all available space can + be used. If this takes too much time, the mount may return + EAGAIN. You may optionally add a value to indicate how much + of the disk you would be willing to temporarily give up to + avoid additional garbage collection. This can be given as a + number of blocks, or as a percent. For instance, mounting + with checkpoint=disable:100% would always succeed, but it may + hide up to all remaining free space. The actual space that + would be unusable can be viewed at /sys/fs/f2fs/<disk>/unusable + This space is reclaimed once checkpoint=enable. ================================================================================ DEBUGFS ENTRIES @@ -218,11 +257,14 @@ Files in /sys/fs/f2fs/<devname> .............................................................................. File Content - gc_max_sleep_time This tuning parameter controls the maximum sleep + gc_urgent_sleep_time This parameter controls sleep time for gc_urgent. + 500 ms is set by default. See above gc_urgent. + + gc_min_sleep_time This tuning parameter controls the minimum sleep time for the garbage collection thread. Time is in milliseconds. - gc_min_sleep_time This tuning parameter controls the minimum sleep + gc_max_sleep_time This tuning parameter controls the maximum sleep time for the garbage collection thread. Time is in milliseconds. @@ -242,9 +284,6 @@ Files in /sys/fs/f2fs/<devname> to 1, background thread starts to do GC by given gc_urgent_sleep_time interval. - gc_urgent_sleep_time This parameter controls sleep time for gc_urgent. - 500 ms is set by default. See above gc_urgent. - reclaim_segments This parameter controls the number of prefree segments to be reclaimed. If the number of prefree segments is larger than the number of segments @@ -259,7 +298,16 @@ Files in /sys/fs/f2fs/<devname> checkpoint is triggered, and issued during the checkpoint. By default, it is disabled with 0. - trim_sections This parameter controls the number of sections + discard_granularity This parameter controls the granularity of discard + command size. It will issue discard commands iif + the size is larger than given granularity. Its + unit size is 4KB, and 4 (=16KB) is set by default. + The maximum value is 128 (=512KB). + + reserved_blocks This parameter indicates the number of blocks that + f2fs reserves internally for root. + + batched_trim_sections This parameter controls the number of sections to be trimmed out in batch mode when FITRIM conducts. 32 sections is set by default. @@ -281,11 +329,35 @@ Files in /sys/fs/f2fs/<devname> the number is less than this value, it triggers in-place-updates. + min_seq_blocks This parameter controls the threshold to serialize + write IOs issued by multiple threads in parallel. + + min_hot_blocks This parameter controls the threshold to allocate + a hot data log for pending data blocks to write. + + min_ssr_sections This parameter adds the threshold when deciding + SSR block allocation. If this is large, SSR mode + will be enabled early. + + ram_thresh This parameter controls the memory footprint used + by free nids and cached nat entries. By default, + 10 is set, which indicates 10 MB / 1 GB RAM. + + ra_nid_pages When building free nids, F2FS reads NAT blocks + ahead for speed up. Default is 0. + + dirty_nats_ratio Given dirty ratio of cached nat entries, F2FS + determines flushing them in background. + max_victim_search This parameter controls the number of trials to find a victim segment when conducting SSR and cleaning operations. The default value is 4096 which covers 8GB block address range. + migration_granularity For large-sized sections, F2FS can stop GC given + this granularity instead of reclaiming entire + section. + dir_level This parameter controls the directory level to support large directory. If a directory has a number of files, it can reduce the file lookup @@ -293,9 +365,53 @@ Files in /sys/fs/f2fs/<devname> Otherwise, it needs to decrease this value to reduce the space overhead. The default value is 0. - ram_thresh This parameter controls the memory footprint used - by free nids and cached nat entries. By default, - 10 is set, which indicates 10 MB / 1 GB RAM. + cp_interval F2FS tries to do checkpoint periodically, 60 secs + by default. + + idle_interval F2FS detects system is idle, if there's no F2FS + operations during given interval, 5 secs by + default. + + discard_idle_interval F2FS detects the discard thread is idle, given + time interval. Default is 5 secs. + + gc_idle_interval F2FS detects the GC thread is idle, given time + interval. Default is 5 secs. + + umount_discard_timeout When unmounting the disk, F2FS waits for finishing + queued discard commands which can take huge time. + This gives time out for it, 5 secs by default. + + iostat_enable This controls to enable/disable iostat in F2FS. + + readdir_ra This enables/disabled readahead of inode blocks + in readdir, and default is enabled. + + gc_pin_file_thresh This indicates how many GC can be failed for the + pinned file. If it exceeds this, F2FS doesn't + guarantee its pinning state. 2048 trials is set + by default. + + extension_list This enables to change extension_list for hot/cold + files in runtime. + + inject_rate This controls injection rate of arbitrary faults. + + inject_type This controls injection type of arbitrary faults. + + dirty_segments This shows # of dirty segments. + + lifetime_write_kbytes This shows # of data written to the disk. + + features This shows current features enabled on F2FS. + + current_reserved_blocks This shows # of blocks currently reserved. + + unusable If checkpoint=disable, this shows the number of + blocks that are unusable. + If checkpoint=enable it shows the number of blocks + that would be unusable if checkpoint=disable were + to be set. ================================================================================ USAGE @@ -628,3 +744,28 @@ algorithm. In order to identify whether the data in the victim segment are valid or not, F2FS manages a bitmap. Each bit represents the validity of a block, and the bitmap is composed of a bit stream covering whole blocks in main area. + +Fallocate(2) Policy +------------------- + +The default policy follows the below posix rule. + +Allocating disk space + The default operation (i.e., mode is zero) of fallocate() allocates + the disk space within the range specified by offset and len. The + file size (as reported by stat(2)) will be changed if offset+len is + greater than the file size. Any subregion within the range specified + by offset and len that did not contain data before the call will be + initialized to zero. This default behavior closely resembles the + behavior of the posix_fallocate(3) library function, and is intended + as a method of optimally implementing that function. + +However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to +fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addressess having +zero or random data, which is useful to the below scenario where: + 1. create(fd) + 2. ioctl(fd, F2FS_IOC_SET_PIN_FILE) + 3. fallocate(fd, 0, 0, size) + 4. address = fibmap(fd, offset) + 5. open(blkdev) + 6. write(blkdev, address) diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index 28091457b71a..771bb220449b 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -82,6 +82,29 @@ Only the lists of names from directories are merged. Other content such as metadata and extended attributes are reported for the upper directory only. These attributes of the lower directory are hidden. +credentials +----------- + +By default, all access to the upper, lower and work directories is the +recorded mounter's MAC and DAC credentials. The incoming accesses are +checked against the caller's credentials. + +In the case where caller MAC or DAC credentials do not overlap, a +use case available in older versions of the driver, the +override_creds mount flag can be turned off and help when the use +pattern has caller with legitimate credentials where the mounter +does not. Several unintended side effects will occur though. The +caller without certain key capabilities or lower privilege will not +always be able to delete files or directories, create nodes, or +search some restricted directories. The ability to search and read +a directory entry is spotty as a result of the cache mechanism not +retesting the credentials because of the assumption, a privileged +caller can fill cache, then a lower privilege can read the directory +cache. The uneven security model where cache, upperdir and workdir +are opened at privilege, but accessed without creating a form of +privilege escalation, should only be used with strict understanding +of the side effects and of the security policies. + whiteouts and opaque directories -------------------------------- diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 9b8aad3eb3b8..d4f8be8f63e1 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -138,6 +138,7 @@ Table 1-1: Process specific entries in /proc maps Memory maps to executables and library files (2.4) mem Memory held by this process root Link to the root directory of this process + reclaim Reclaim pages in this process stat Process status statm Process memory status information status Process status in human readable form @@ -501,6 +502,25 @@ current value: Any other value written to /proc/PID/clear_refs will have no effect. +The file /proc/PID/reclaim is used to reclaim pages in this process. +To reclaim file-backed pages, + > echo file > /proc/PID/reclaim + +To reclaim anonymous pages, + > echo anon > /proc/PID/reclaim + +To reclaim all pages, + > echo all > /proc/PID/reclaim + +Also, you can specify address range of process so part of address space +will be reclaimed. The format is following as + > echo addr size-byte > /proc/PID/reclaim + +NOTE: addr should be page-aligned. + +Below is example which try to reclaim 2M from 0x100000. + > echo 0x100000 2M > /proc/PID/reclaim + The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags using /proc/kpageflags and number of times a page is mapped using /proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 8c6f07ad373a..66ddf1e8fe28 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -593,9 +593,14 @@ struct address_space_operations { int (*releasepage) (struct page *, int); void (*freepage)(struct page *); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); + /* isolate a page for migration */ + bool (*isolate_page) (struct page *, isolate_mode_t); /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); + /* put migration-failed page back to right list */ + void (*putback_page) (struct page *); int (*launder_page) (struct page *); + int (*is_partially_uptodate) (struct page *, unsigned long, unsigned long); void (*is_dirty_writeback) (struct page *, bool *, bool *); @@ -748,6 +753,10 @@ struct address_space_operations { and transfer data directly between the storage and the application's address space. + isolate_page: Called by the VM when isolating a movable non-lru page. + If page is successfully isolated, VM marks the page as PG_isolated + via __SetPageIsolated. + migrate_page: This is used to compact the physical memory usage. If the VM wants to relocate a page (maybe off a memory card that is signalling imminent failure) it will pass a new page @@ -755,6 +764,8 @@ struct address_space_operations { transfer any private data across and update any references that it has to the page. + putback_page: Called by the VM when isolated page's migration fails. + launder_page: Called before freeing a page - it writes back the dirty page. To prevent redirtying the page, it is kept locked during the whole operation. |
