summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c5
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/Kconfig.binfmt8
-rw-r--r--fs/Makefile3
-rw-r--r--fs/affs/amigaffs.c63
-rw-r--r--fs/affs/file.c26
-rw-r--r--fs/afs/file.c14
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/mntpt.c2
-rw-r--r--fs/autofs4/expire.c5
-rw-r--r--fs/autofs4/waitq.c4
-rw-r--r--fs/binfmt_elf.c14
-rw-r--r--fs/block_dev.c22
-rw-r--r--fs/btrfs/Kconfig2
-rw-r--r--fs/btrfs/async-thread.c22
-rw-r--r--fs/btrfs/async-thread.h2
-rw-r--r--fs/btrfs/backref.c22
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.c82
-rw-r--r--fs/btrfs/ctree.h8
-rw-r--r--fs/btrfs/delayed-ref.c11
-rw-r--r--fs/btrfs/disk-io.c19
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/export.h5
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/extent_io.c68
-rw-r--r--fs/btrfs/extent_io.h6
-rw-r--r--fs/btrfs/extent_map.c11
-rw-r--r--fs/btrfs/file-item.c6
-rw-r--r--fs/btrfs/file.c6
-rw-r--r--fs/btrfs/free-space-cache.c16
-rw-r--r--fs/btrfs/inode-map.c1
-rw-r--r--fs/btrfs/inode.c84
-rw-r--r--fs/btrfs/ioctl.c43
-rw-r--r--fs/btrfs/ordered-data.c7
-rw-r--r--fs/btrfs/qgroup.c6
-rw-r--r--fs/btrfs/relocation.c44
-rw-r--r--fs/btrfs/scrub.c17
-rw-r--r--fs/btrfs/send.c74
-rw-r--r--fs/btrfs/super.c44
-rw-r--r--fs/btrfs/tests/btrfs-tests.c9
-rw-r--r--fs/btrfs/tests/inode-tests.c1
-rw-r--r--fs/btrfs/tests/qgroup-tests.c4
-rw-r--r--fs/btrfs/transaction.c14
-rw-r--r--fs/btrfs/tree-log.c67
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/uuid-tree.c2
-rw-r--r--fs/btrfs/volumes.c25
-rw-r--r--fs/btrfs/volumes.h1
-rw-r--r--fs/cachefiles/rdwr.c5
-rw-r--r--fs/ceph/addr.c12
-rw-r--r--fs/ceph/caps.c36
-rw-r--r--fs/ceph/export.c5
-rw-r--r--fs/ceph/file.c1
-rw-r--r--fs/ceph/mds_client.c14
-rw-r--r--fs/char_dev.c88
-rw-r--r--fs/cifs/asn1.c16
-rw-r--r--fs/cifs/cifs_dfs_ref.c7
-rw-r--r--fs/cifs/cifs_unicode.c17
-rw-r--r--fs/cifs/cifsacl.c4
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifssmb.c12
-rw-r--r--fs/cifs/connect.c28
-rw-r--r--fs/cifs/dir.c22
-rw-r--r--fs/cifs/file.c17
-rw-r--r--fs/cifs/inode.c8
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/smb2file.c2
-rw-r--r--fs/cifs/smb2misc.c11
-rw-r--r--fs/cifs/smb2ops.c14
-rw-r--r--fs/cifs/smb2pdu.c22
-rw-r--r--fs/debugfs/inode.c10
-rw-r--r--fs/direct-io.c5
-rw-r--r--fs/dlm/debug_fs.c1
-rw-r--r--fs/dlm/dlm_internal.h1
-rw-r--r--fs/dlm/lock.c9
-rw-r--r--fs/dlm/lockspace.c7
-rw-r--r--fs/dlm/lowcomms.c2
-rw-r--r--fs/dlm/memory.c9
-rw-r--r--fs/dlm/user.c3
-rw-r--r--fs/ecryptfs/keystore.c2
-rw-r--r--fs/ecryptfs/main.c6
-rw-r--r--fs/ecryptfs/messaging.c1
-rw-r--r--fs/efivarfs/inode.c2
-rw-r--r--fs/efivarfs/super.c3
-rw-r--r--fs/eventpoll.c104
-rw-r--r--fs/exec.c8
-rw-r--r--fs/exofs/inode.c12
-rw-r--r--fs/exportfs/expfs.c1
-rw-r--r--fs/ext2/balloc.c14
-rw-r--r--fs/ext2/ialloc.c3
-rw-r--r--fs/ext2/super.c6
-rw-r--r--fs/ext2/xattr.c3
-rw-r--r--fs/ext4/balloc.c14
-rw-r--r--fs/ext4/block_validity.c116
-rw-r--r--fs/ext4/dir.c25
-rw-r--r--fs/ext4/ext4.h90
-rw-r--r--fs/ext4/ext4_extents.h9
-rw-r--r--fs/ext4/extents.c107
-rw-r--r--fs/ext4/extents_status.c4
-rw-r--r--fs/ext4/ialloc.c91
-rw-r--r--fs/ext4/indirect.c6
-rw-r--r--fs/ext4/inline.c9
-rw-r--r--fs/ext4/inode.c133
-rw-r--r--fs/ext4/ioctl.c7
-rw-r--r--fs/ext4/mballoc.c80
-rw-r--r--fs/ext4/migrate.c23
-rw-r--r--fs/ext4/namei.c206
-rw-r--r--fs/ext4/resize.c67
-rw-r--r--fs/ext4/super.c323
-rw-r--r--fs/ext4/xattr.c66
-rw-r--r--fs/f2fs/checkpoint.c2
-rw-r--r--fs/f2fs/file.c3
-rw-r--r--fs/fat/inode.c25
-rw-r--r--fs/file.c21
-rw-r--r--fs/file_table.c9
-rw-r--r--fs/fs-writeback.c120
-rw-r--r--fs/fuse/cuse.c2
-rw-r--r--fs/fuse/dev.c48
-rw-r--r--fs/fuse/dir.c27
-rw-r--r--fs/fuse/file.c16
-rw-r--r--fs/fuse/fuse_i.h4
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c5
-rw-r--r--fs/gfs2/glock.c5
-rw-r--r--fs/gfs2/glops.c2
-rw-r--r--fs/gfs2/inode.c2
-rw-r--r--fs/gfs2/lock_dlm.c13
-rw-r--r--fs/gfs2/ops_fstype.c20
-rw-r--r--fs/gfs2/rgrp.c9
-rw-r--r--fs/hfs/bfind.c14
-rw-r--r--fs/hfs/bnode.c25
-rw-r--r--fs/hfs/btree.h7
-rw-r--r--fs/hfs/super.c10
-rw-r--r--fs/hfsplus/attributes.c4
-rw-r--r--fs/hugetlbfs/inode.c9
-rw-r--r--fs/isofs/dir.c1
-rw-r--r--fs/isofs/inode.c2
-rw-r--r--fs/isofs/namei.c1
-rw-r--r--fs/jbd2/checkpoint.c2
-rw-r--r--fs/jbd2/commit.c55
-rw-r--r--fs/jbd2/journal.c21
-rw-r--r--fs/jbd2/transaction.c44
-rw-r--r--fs/jffs2/compr_rtime.c3
-rw-r--r--fs/jffs2/dir.c6
-rw-r--r--fs/jffs2/readinode.c16
-rw-r--r--fs/jffs2/scan.c2
-rw-r--r--fs/jffs2/summary.c3
-rw-r--r--fs/jfs/inode.c3
-rw-r--r--fs/jfs/jfs_dmap.c2
-rw-r--r--fs/jfs/jfs_dmap.h2
-rw-r--r--fs/jfs/jfs_filsys.h1
-rw-r--r--fs/jfs/jfs_logmgr.c1
-rw-r--r--fs/jfs/jfs_mount.c61
-rw-r--r--fs/jfs/jfs_txnmgr.c3
-rw-r--r--fs/kernfs/dir.c224
-rw-r--r--fs/kernfs/file.c92
-rw-r--r--fs/kernfs/kernfs-internal.h2
-rw-r--r--fs/kernfs/mount.c87
-rw-r--r--fs/libfs.c20
-rw-r--r--fs/lockd/host.c20
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/locks.c25
-rw-r--r--fs/logfs/dev_bdev.c4
-rw-r--r--fs/logfs/dev_mtd.c4
-rw-r--r--fs/logfs/dir.c4
-rw-r--r--fs/logfs/logfs.h4
-rw-r--r--fs/minix/inode.c36
-rw-r--r--fs/minix/itree_common.c8
-rw-r--r--fs/namei.c7
-rw-r--r--fs/namespace.c71
-rw-r--r--fs/nfs/Kconfig4
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/dir.c60
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/filelayout/filelayout.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c13
-rw-r--r--fs/nfs/inode.c10
-rw-r--r--fs/nfs/internal.h12
-rw-r--r--fs/nfs/namespace.c14
-rw-r--r--fs/nfs/nfs3acl.c22
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs3xdr.c3
-rw-r--r--fs/nfs/nfs42proc.c5
-rw-r--r--fs/nfs/nfs42xdr.c3
-rw-r--r--fs/nfs/nfs4client.c2
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfs4namespace.c2
-rw-r--r--fs/nfs/nfs4proc.c89
-rw-r--r--fs/nfs/nfs4xdr.c6
-rw-r--r--fs/nfs/pagelist.c29
-rw-r--r--fs/nfs/read.c2
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfs_common/grace.c6
-rw-r--r--fs/nfsd/nfs3xdr.c7
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4recover.c17
-rw-r--r--fs/nfsd/nfs4state.c9
-rw-r--r--fs/nfsd/nfs4xdr.c19
-rw-r--r--fs/nfsd/nfsctl.c18
-rw-r--r--fs/nfsd/vfs.c17
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/nilfs2/sysfs.c27
-rw-r--r--fs/notify/fanotify/Kconfig1
-rw-r--r--fs/notify/inotify/Kconfig1
-rw-r--r--fs/ntfs/inode.c14
-rw-r--r--fs/ocfs2/acl.c4
-rw-r--r--fs/ocfs2/alloc.c4
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c4
-rw-r--r--fs/ocfs2/file.c82
-rw-r--r--fs/ocfs2/journal.c14
-rw-r--r--fs/ocfs2/journal.h8
-rw-r--r--fs/ocfs2/ocfs2.h4
-rw-r--r--fs/ocfs2/ocfs2_fs.h4
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/ocfs2/suballoc.c13
-rw-r--r--fs/ocfs2/super.c19
-rw-r--r--fs/open.c6
-rw-r--r--fs/overlayfs/copy_up.c2
-rw-r--r--fs/overlayfs/dir.c10
-rw-r--r--fs/overlayfs/inode.c13
-rw-r--r--fs/pipe.c21
-rw-r--r--fs/pnode.c9
-rw-r--r--fs/posix_acl.c7
-rw-r--r--fs/proc/array.c207
-rw-r--r--fs/proc/base.c32
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/kcore.c18
-rw-r--r--fs/proc/meminfo.c38
-rw-r--r--fs/proc/namespaces.c3
-rw-r--r--fs/proc/self.c9
-rw-r--r--fs/proc/stat.c49
-rw-r--r--fs/proc/thread_self.c2
-rw-r--r--fs/proc/vmcore.c15
-rw-r--r--fs/pstore/inode.c31
-rw-r--r--fs/pstore/platform.c5
-rw-r--r--fs/pstore/ram.c13
-rw-r--r--fs/pstore/ram_core.c4
-rw-r--r--fs/qnx4/dir.c69
-rw-r--r--fs/quota/dquot.c40
-rw-r--r--fs/quota/quota_tree.c23
-rw-r--r--fs/quota/quota_v2.c1
-rw-r--r--fs/ramfs/inode.c2
-rw-r--r--fs/read_write.c54
-rw-r--r--fs/readdir.c40
-rw-r--r--fs/reiserfs/inode.c9
-rw-r--r--fs/reiserfs/journal.c14
-rw-r--r--fs/reiserfs/stree.c30
-rw-r--r--fs/reiserfs/super.c20
-rw-r--r--fs/reiserfs/xattr.c7
-rw-r--r--fs/reiserfs/xattr.h2
-rw-r--r--fs/romfs/storage.c4
-rw-r--r--fs/sdfat/Kconfig126
-rw-r--r--fs/sdfat/LICENSE339
-rw-r--r--fs/sdfat/Makefile24
-rw-r--r--fs/sdfat/README.md19
-rw-r--r--fs/sdfat/amap_smart.c1314
-rw-r--r--fs/sdfat/amap_smart.h137
-rw-r--r--fs/sdfat/api.c636
-rw-r--r--fs/sdfat/api.h409
-rw-r--r--fs/sdfat/blkdev.c416
-rw-r--r--fs/sdfat/cache.c846
-rw-r--r--fs/sdfat/config.h146
-rw-r--r--fs/sdfat/core.c3694
-rw-r--r--fs/sdfat/core.h221
-rw-r--r--fs/sdfat/core_exfat.c1560
-rw-r--r--fs/sdfat/core_fat.c1465
-rw-r--r--fs/sdfat/dfr.c1372
-rw-r--r--fs/sdfat/dfr.h261
-rw-r--r--fs/sdfat/extent.c351
-rw-r--r--fs/sdfat/fatent.c420
-rw-r--r--fs/sdfat/misc.c464
-rw-r--r--fs/sdfat/mpage.c635
-rw-r--r--fs/sdfat/nls.c478
-rw-r--r--fs/sdfat/sdfat.c5255
-rw-r--r--fs/sdfat/sdfat.h528
-rw-r--r--fs/sdfat/sdfat_fs.h423
-rw-r--r--fs/sdfat/statistics.c281
-rw-r--r--fs/sdfat/upcase.h407
-rw-r--r--fs/sdfat/version.h25
-rw-r--r--fs/sdfat/xattr.c132
-rw-r--r--fs/select.c3
-rw-r--r--fs/seq_file.c61
-rw-r--r--fs/signalfd.c12
-rw-r--r--fs/splice.c282
-rw-r--r--fs/squashfs/export.c45
-rw-r--r--fs/squashfs/file.c6
-rw-r--r--fs/squashfs/id.c42
-rw-r--r--fs/squashfs/squashfs_fs.h1
-rw-r--r--fs/squashfs/squashfs_fs_sb.h1
-rw-r--r--fs/squashfs/super.c6
-rw-r--r--fs/squashfs/xattr.h10
-rw-r--r--fs/squashfs/xattr_id.c68
-rw-r--r--fs/super.c74
-rw-r--r--fs/sysfs/dir.c6
-rw-r--r--fs/sysfs/file.c55
-rw-r--r--fs/timerfd.c22
-rw-r--r--fs/tracefs/inode.c79
-rw-r--r--fs/ubifs/debug.c1
-rw-r--r--fs/ubifs/file.c5
-rw-r--r--fs/ubifs/io.c29
-rw-r--r--fs/ubifs/super.c1
-rw-r--r--fs/udf/inode.c34
-rw-r--r--fs/udf/misc.c13
-rw-r--r--fs/udf/namei.c4
-rw-r--r--fs/udf/super.c6
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/xattr.c4
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c13
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_icache.c58
-rw-r--r--fs/xfs/xfs_ioctl.c3
-rw-r--r--fs/xfs/xfs_ioctl32.c6
-rw-r--r--fs/xfs/xfs_iops.c2
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_pnfs.c2
-rw-r--r--fs/xfs/xfs_quotaops.c3
-rw-r--r--fs/xfs/xfs_rtalloc.c25
-rw-r--r--fs/xfs/xfs_sysfs.h6
-rw-r--r--fs/xfs/xfs_trans_dquot.c2
324 files changed, 26852 insertions, 2027 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 1e9bb8db7b48..3a56f4fa59f9 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -457,10 +457,9 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
}
#ifdef CONFIG_9P_FSCACHE
- if (v9ses->fscache) {
+ if (v9ses->fscache)
v9fs_cache_session_put_cookie(v9ses);
- kfree(v9ses->cachetag);
- }
+ kfree(v9ses->cachetag);
#endif
kfree(v9ses->uname);
kfree(v9ses->aname);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 9dbf37147126..b5aa3e005b9e 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -624,9 +624,9 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
struct writeback_control wbc = {
.nr_to_write = LONG_MAX,
.sync_mode = WB_SYNC_ALL,
- .range_start = vma->vm_pgoff * PAGE_SIZE,
+ .range_start = (loff_t)vma->vm_pgoff * PAGE_SIZE,
/* absolute end, byte at end included */
- .range_end = vma->vm_pgoff * PAGE_SIZE +
+ .range_end = (loff_t)vma->vm_pgoff * PAGE_SIZE +
(vma->vm_end - vma->vm_start - 1),
};
diff --git a/fs/Kconfig b/fs/Kconfig
index 89ddd182f568..4d90075f54bf 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -103,6 +103,7 @@ if BLOCK
menu "DOS/FAT/NT Filesystems"
source "fs/fat/Kconfig"
+source "fs/sdfat/Kconfig"
source "fs/ntfs/Kconfig"
endmenu
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 2d0cbbd14cfc..72c03354c14b 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -1,6 +1,7 @@
config BINFMT_ELF
bool "Kernel support for ELF binaries"
depends on MMU && (BROKEN || !FRV)
+ select ELFCORE
default y
---help---
ELF (Executable and Linkable Format) is a format for libraries and
@@ -26,6 +27,7 @@ config BINFMT_ELF
config COMPAT_BINFMT_ELF
bool
depends on COMPAT && BINFMT_ELF
+ select ELFCORE
config ARCH_BINFMT_ELF_STATE
bool
@@ -34,6 +36,7 @@ config BINFMT_ELF_FDPIC
bool "Kernel support for FDPIC ELF binaries"
default y
depends on (FRV || BLACKFIN || (SUPERH32 && !MMU) || C6X)
+ select ELFCORE
help
ELF FDPIC binaries are based on ELF, but allow the individual load
segments of a binary to be located in memory independently of each
@@ -43,6 +46,11 @@ config BINFMT_ELF_FDPIC
It is also possible to run FDPIC ELF binaries on MMU linux also.
+config ELFCORE
+ bool
+ help
+ This option enables kernel/elfcore.o.
+
config CORE_DUMP_DEFAULT_ELF_HEADERS
bool "Write ELF core dumps with partial segments"
default y
diff --git a/fs/Makefile b/fs/Makefile
index 4644db462ba9..969caba6b282 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -23,7 +23,7 @@ obj-$(CONFIG_PROC_FS) += proc_namespace.o
obj-y += notify/
obj-$(CONFIG_EPOLL) += eventpoll.o
-obj-$(CONFIG_ANON_INODES) += anon_inodes.o
+obj-y += anon_inodes.o
obj-$(CONFIG_SIGNALFD) += signalfd.o
obj-$(CONFIG_TIMERFD) += timerfd.o
obj-$(CONFIG_EVENTFD) += eventfd.o
@@ -77,6 +77,7 @@ obj-$(CONFIG_HUGETLBFS) += hugetlbfs/
obj-$(CONFIG_CODA_FS) += coda/
obj-$(CONFIG_MINIX_FS) += minix/
obj-$(CONFIG_FAT_FS) += fat/
+obj-$(CONFIG_SDFAT_FS) += sdfat/
obj-$(CONFIG_BFS_FS) += bfs/
obj-$(CONFIG_ISO9660_FS) += isofs/
obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 5fa92bc790ef..c1b344e56e85 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -390,23 +390,23 @@ prot_to_mode(u32 prot)
umode_t mode = 0;
if (!(prot & FIBF_NOWRITE))
- mode |= S_IWUSR;
+ mode |= 0200;
if (!(prot & FIBF_NOREAD))
- mode |= S_IRUSR;
+ mode |= 0400;
if (!(prot & FIBF_NOEXECUTE))
- mode |= S_IXUSR;
+ mode |= 0100;
if (prot & FIBF_GRP_WRITE)
- mode |= S_IWGRP;
+ mode |= 0020;
if (prot & FIBF_GRP_READ)
- mode |= S_IRGRP;
+ mode |= 0040;
if (prot & FIBF_GRP_EXECUTE)
- mode |= S_IXGRP;
+ mode |= 0010;
if (prot & FIBF_OTR_WRITE)
- mode |= S_IWOTH;
+ mode |= 0002;
if (prot & FIBF_OTR_READ)
- mode |= S_IROTH;
+ mode |= 0004;
if (prot & FIBF_OTR_EXECUTE)
- mode |= S_IXOTH;
+ mode |= 0001;
return mode;
}
@@ -417,24 +417,51 @@ mode_to_prot(struct inode *inode)
u32 prot = AFFS_I(inode)->i_protect;
umode_t mode = inode->i_mode;
- if (!(mode & S_IXUSR))
+ /*
+ * First, clear all RWED bits for owner, group, other.
+ * Then, recalculate them afresh.
+ *
+ * We'll always clear the delete-inhibit bit for the owner, as that is
+ * the classic single-user mode AmigaOS protection bit and we need to
+ * stay compatible with all scenarios.
+ *
+ * Since multi-user AmigaOS is an extension, we'll only set the
+ * delete-allow bit if any of the other bits in the same user class
+ * (group/other) are used.
+ */
+ prot &= ~(FIBF_NOEXECUTE | FIBF_NOREAD
+ | FIBF_NOWRITE | FIBF_NODELETE
+ | FIBF_GRP_EXECUTE | FIBF_GRP_READ
+ | FIBF_GRP_WRITE | FIBF_GRP_DELETE
+ | FIBF_OTR_EXECUTE | FIBF_OTR_READ
+ | FIBF_OTR_WRITE | FIBF_OTR_DELETE);
+
+ /* Classic single-user AmigaOS flags. These are inverted. */
+ if (!(mode & 0100))
prot |= FIBF_NOEXECUTE;
- if (!(mode & S_IRUSR))
+ if (!(mode & 0400))
prot |= FIBF_NOREAD;
- if (!(mode & S_IWUSR))
+ if (!(mode & 0200))
prot |= FIBF_NOWRITE;
- if (mode & S_IXGRP)
+
+ /* Multi-user extended flags. Not inverted. */
+ if (mode & 0010)
prot |= FIBF_GRP_EXECUTE;
- if (mode & S_IRGRP)
+ if (mode & 0040)
prot |= FIBF_GRP_READ;
- if (mode & S_IWGRP)
+ if (mode & 0020)
prot |= FIBF_GRP_WRITE;
- if (mode & S_IXOTH)
+ if (mode & 0070)
+ prot |= FIBF_GRP_DELETE;
+
+ if (mode & 0001)
prot |= FIBF_OTR_EXECUTE;
- if (mode & S_IROTH)
+ if (mode & 0004)
prot |= FIBF_OTR_READ;
- if (mode & S_IWOTH)
+ if (mode & 0002)
prot |= FIBF_OTR_WRITE;
+ if (mode & 0007)
+ prot |= FIBF_OTR_DELETE;
AFFS_I(inode)->i_protect = prot;
}
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 659c579c4588..38e0fd4caf2b 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -426,6 +426,24 @@ static int affs_write_begin(struct file *file, struct address_space *mapping,
return ret;
}
+static int affs_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned int len, unsigned int copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = mapping->host;
+ int ret;
+
+ ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+ /* Clear Archived bit on file writes, as AmigaOS would do */
+ if (AFFS_I(inode)->i_protect & FIBF_ARCHIVED) {
+ AFFS_I(inode)->i_protect &= ~FIBF_ARCHIVED;
+ mark_inode_dirty(inode);
+ }
+
+ return ret;
+}
+
static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
{
return generic_block_bmap(mapping,block,affs_get_block);
@@ -435,7 +453,7 @@ const struct address_space_operations affs_aops = {
.readpage = affs_readpage,
.writepage = affs_writepage,
.write_begin = affs_write_begin,
- .write_end = generic_write_end,
+ .write_end = affs_write_end,
.direct_IO = affs_direct_IO,
.bmap = _affs_bmap
};
@@ -793,6 +811,12 @@ done:
if (tmp > inode->i_size)
inode->i_size = AFFS_I(inode)->mmu_private = tmp;
+ /* Clear Archived bit on file writes, as AmigaOS would do */
+ if (AFFS_I(inode)->i_protect & FIBF_ARCHIVED) {
+ AFFS_I(inode)->i_protect &= ~FIBF_ARCHIVED;
+ mark_inode_dirty(inode);
+ }
+
err_first_bh:
unlock_page(page);
page_cache_release(page);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index cf8a07e282a6..5290f6e83605 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -123,11 +123,10 @@ static void afs_file_readpage_read_complete(struct page *page,
/*
* read page from file, directory or symlink, given a key to use
*/
-int afs_page_filler(void *data, struct page *page)
+static int __afs_page_filler(struct key *key, struct page *page)
{
struct inode *inode = page->mapping->host;
struct afs_vnode *vnode = AFS_FS_I(inode);
- struct key *key = data;
size_t len;
off_t offset;
int ret;
@@ -209,6 +208,13 @@ error:
return ret;
}
+int afs_page_filler(struct file *data, struct page *page)
+{
+ struct key *key = (struct key *)data;
+
+ return __afs_page_filler(key, page);
+}
+
/*
* read page from file, directory or symlink, given a file to nominate the key
* to be used
@@ -221,14 +227,14 @@ static int afs_readpage(struct file *file, struct page *page)
if (file) {
key = file->private_data;
ASSERT(key != NULL);
- ret = afs_page_filler(key, page);
+ ret = __afs_page_filler(key, page);
} else {
struct inode *inode = page->mapping->host;
key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
} else {
- ret = afs_page_filler(key, page);
+ ret = __afs_page_filler(key, page);
key_put(key);
}
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 1330b2a695ff..64452ba25988 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -499,7 +499,7 @@ extern const struct file_operations afs_file_operations;
extern int afs_open(struct inode *, struct file *);
extern int afs_release(struct inode *, struct file *);
-extern int afs_page_filler(void *, struct page *);
+extern int afs_page_filler(struct file *, struct page *);
/*
* flock.c
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index ccd0b212e82a..05ba277e6269 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -202,7 +202,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
/* try and do the mount */
_debug("--- attempting mount %s -o %s ---", devname, options);
- mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options);
+ mnt = vfs_submount(mntpt, &afs_fs_type, devname, options);
_debug("--- mount result %p ---", mnt);
free_page((unsigned long) devname);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 0d8b9c4f27f2..5124f06c32bc 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -467,9 +467,10 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
*/
flags &= ~AUTOFS_EXP_LEAVES;
found = should_expire(expired, mnt, timeout, how);
- if (!found || found != expired)
- /* Something has changed, continue */
+ if (found != expired) { // something has changed, continue
+ dput(found);
goto next;
+ }
if (expired != dentry)
dput(dentry);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 98198c57370b..acd43792ef82 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -439,8 +439,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
memcpy(&wq->name, &qstr, sizeof(struct qstr));
wq->dev = autofs4_get_dev(sbi);
wq->ino = autofs4_get_ino(sbi);
- wq->uid = current_uid();
- wq->gid = current_gid();
+ wq->uid = current_cred()->uid;
+ wq->gid = current_cred()->gid;
wq->pid = pid;
wq->tgid = tgid;
wq->status = -EINTR; /* Status return if interrupted */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 91abe314a771..3eb05f441451 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1101,6 +1101,18 @@ static int load_elf_binary(struct linux_binprm *bprm)
current->mm->start_stack = bprm->p;
if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
+ /*
+ * For architectures with ELF randomization, when executing
+ * a loader directly (i.e. no interpreter listed in ELF
+ * headers), move the brk area out of the mmap region
+ * (since it grows up, and may collide early with the stack
+ * growing down), and into the unused ELF_ET_DYN_BASE region.
+ */
+ if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
+ loc->elf_ex.e_type == ET_DYN && !interpreter)
+ current->mm->brk = current->mm->start_brk =
+ ELF_ET_DYN_BASE;
+
current->mm->brk = current->mm->start_brk =
arch_randomize_brk(current->mm);
#ifdef compat_brk_randomized
@@ -1718,7 +1730,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
(!regset->active || regset->active(t->task, regset) > 0)) {
int ret;
size_t size = regset->n * regset->size;
- void *data = kmalloc(size, GFP_KERNEL);
+ void *data = kzalloc(size, GFP_KERNEL);
if (unlikely(!data))
return 0;
ret = regset->get(t->task, regset,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 43b80ca84d9c..83c007612a68 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1203,10 +1203,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
*/
if (!for_part) {
ret = devcgroup_inode_permission(bdev->bd_inode, perm);
- if (ret != 0) {
- bdput(bdev);
+ if (ret != 0)
return ret;
- }
}
restart:
@@ -1276,8 +1274,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
goto out_clear;
BUG_ON(for_part);
ret = __blkdev_get(whole, mode, 1);
- if (ret)
+ if (ret) {
+ bdput(whole);
goto out_clear;
+ }
bdev->bd_contains = whole;
bdev->bd_part = disk_get_part(disk, partno);
if (!(disk->flags & GENHD_FL_UP) ||
@@ -1337,7 +1337,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
put_disk(disk);
module_put(owner);
out:
- bdput(bdev);
return ret;
}
@@ -1423,6 +1422,9 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
bdput(whole);
}
+ if (res)
+ bdput(bdev);
+
return res;
}
EXPORT_SYMBOL(blkdev_get);
@@ -1539,6 +1541,16 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
struct gendisk *disk = bdev->bd_disk;
struct block_device *victim = NULL;
+ /*
+ * Sync early if it looks like we're the last one. If someone else
+ * opens the block device between now and the decrement of bd_openers
+ * then we did a sync that we didn't need to, but that's not the end
+ * of the world and we want to avoid long (could be several minute)
+ * syncs while holding the mutex.
+ */
+ if (bdev->bd_openers == 1)
+ sync_blockdev(bdev);
+
mutex_lock_nested(&bdev->bd_mutex, for_part);
if (for_part)
bdev->bd_part_count--;
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 80e9c18ea64f..fd6b67c40d9d 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -9,6 +9,8 @@ config BTRFS_FS
select RAID6_PQ
select XOR_BLOCKS
select SRCU
+ depends on !PPC_256K_PAGES # powerpc
+ depends on !PAGE_SIZE_256KB # hexagon
help
Btrfs is a general purpose copy-on-write filesystem with extents,
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index a09264d8b853..d096254d9acc 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -261,6 +261,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
ordered_list);
if (!test_bit(WORK_DONE_BIT, &work->flags))
break;
+ /*
+ * Orders all subsequent loads after reading WORK_DONE_BIT,
+ * paired with the smp_mb__before_atomic in btrfs_work_helper
+ * this guarantees that the ordered function will see all
+ * updates from ordinary work function.
+ */
+ smp_rmb();
/*
* we are going to call the ordered done function, but
@@ -310,6 +317,13 @@ static void normal_work_helper(struct btrfs_work *work)
thresh_exec_hook(wq);
work->func(work);
if (need_order) {
+ /*
+ * Ensures all memory accesses done in the work function are
+ * ordered before setting the WORK_DONE_BIT. Ensuring the thread
+ * which is going to executed the ordered work sees them.
+ * Pairs with the smp_rmb in run_ordered_work.
+ */
+ smp_mb__before_atomic();
set_bit(WORK_DONE_BIT, &work->flags);
run_ordered_work(wq);
}
@@ -389,3 +403,11 @@ void btrfs_set_work_high_priority(struct btrfs_work *work)
{
set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
}
+
+void btrfs_flush_workqueue(struct btrfs_workqueue *wq)
+{
+ if (wq->high)
+ flush_workqueue(wq->high->normal_wq);
+
+ flush_workqueue(wq->normal->normal_wq);
+}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 8e1d6576d764..7ea220726de2 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -81,4 +81,6 @@ void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max);
void btrfs_set_work_high_priority(struct btrfs_work *work);
bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq);
+void btrfs_flush_workqueue(struct btrfs_workqueue *wq);
+
#endif
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 81c5d07a2af1..00c9a9e719ec 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -975,7 +975,12 @@ again:
ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
if (ret < 0)
goto out;
- BUG_ON(ret == 0);
+ if (ret == 0) {
+ /* This shouldn't happen, indicates a bug or fs corruption. */
+ ASSERT(ret != 0);
+ ret = -EUCLEAN;
+ goto out;
+ }
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
if (trans && likely(trans->type != __TRANS_DUMMY) &&
@@ -1104,10 +1109,18 @@ again:
goto out;
if (!ret && extent_item_pos) {
/*
- * we've recorded that parent, so we must extend
- * its inode list here
+ * We've recorded that parent, so we must extend
+ * its inode list here.
+ *
+ * However if there was corruption we may not
+ * have found an eie, return an error in this
+ * case.
*/
- BUG_ON(!eie);
+ ASSERT(eie);
+ if (!eie) {
+ ret = -EUCLEAN;
+ goto out;
+ }
while (eie->next)
eie = eie->next;
eie->next = ref->inode_list;
@@ -1221,6 +1234,7 @@ static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans,
if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp);
ulist_free(*roots);
+ *roots = NULL;
return ret;
}
node = ulist_next(tmp, &uiter);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index bae05c5c75ba..92601775ec5e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -290,7 +290,7 @@ static void end_compressed_bio_write(struct bio *bio)
cb->start,
cb->start + cb->len - 1,
NULL,
- bio->bi_error ? 0 : 1);
+ !cb->errors);
cb->compressed_pages[0]->mapping = NULL;
end_compressed_writeback(inode, cb);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index a980b3309770..fbb4c81f6311 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -332,26 +332,6 @@ struct tree_mod_elem {
struct tree_mod_root old_root;
};
-static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info)
-{
- read_lock(&fs_info->tree_mod_log_lock);
-}
-
-static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info)
-{
- read_unlock(&fs_info->tree_mod_log_lock);
-}
-
-static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info)
-{
- write_lock(&fs_info->tree_mod_log_lock);
-}
-
-static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
-{
- write_unlock(&fs_info->tree_mod_log_lock);
-}
-
/*
* Pull a new tree mod seq number for our operation.
*/
@@ -371,14 +351,12 @@ static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem)
{
- tree_mod_log_write_lock(fs_info);
- spin_lock(&fs_info->tree_mod_seq_lock);
+ write_lock(&fs_info->tree_mod_log_lock);
if (!elem->seq) {
elem->seq = btrfs_inc_tree_mod_seq(fs_info);
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
}
- spin_unlock(&fs_info->tree_mod_seq_lock);
- tree_mod_log_write_unlock(fs_info);
+ write_unlock(&fs_info->tree_mod_log_lock);
return elem->seq;
}
@@ -397,7 +375,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
if (!seq_putting)
return;
- spin_lock(&fs_info->tree_mod_seq_lock);
+ write_lock(&fs_info->tree_mod_log_lock);
list_del(&elem->list);
elem->seq = 0;
@@ -408,29 +386,27 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
* blocker with lower sequence number exists, we
* cannot remove anything from the log
*/
- spin_unlock(&fs_info->tree_mod_seq_lock);
+ write_unlock(&fs_info->tree_mod_log_lock);
return;
}
min_seq = cur_elem->seq;
}
}
- spin_unlock(&fs_info->tree_mod_seq_lock);
/*
* anything that's lower than the lowest existing (read: blocked)
* sequence number can be removed from the tree.
*/
- tree_mod_log_write_lock(fs_info);
tm_root = &fs_info->tree_mod_log;
for (node = rb_first(tm_root); node; node = next) {
next = rb_next(node);
tm = container_of(node, struct tree_mod_elem, node);
- if (tm->seq > min_seq)
+ if (tm->seq >= min_seq)
continue;
rb_erase(node, tm_root);
kfree(tm);
}
- tree_mod_log_write_unlock(fs_info);
+ write_unlock(&fs_info->tree_mod_log_lock);
}
/*
@@ -441,7 +417,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
* operations, or the shifted logical of the affected block for all other
* operations.
*
- * Note: must be called with write lock (tree_mod_log_write_lock).
+ * Note: must be called with write lock for fs_info::tree_mod_log_lock.
*/
static noinline int
__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
@@ -481,7 +457,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
* Determines if logging can be omitted. Returns 1 if it can. Otherwise, it
* returns zero with the tree_mod_log_lock acquired. The caller must hold
* this until all tree mod log insertions are recorded in the rb tree and then
- * call tree_mod_log_write_unlock() to release.
+ * write unlock fs_info::tree_mod_log_lock.
*/
static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb) {
@@ -491,9 +467,9 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
if (eb && btrfs_header_level(eb) == 0)
return 1;
- tree_mod_log_write_lock(fs_info);
+ write_lock(&fs_info->tree_mod_log_lock);
if (list_empty(&(fs_info)->tree_mod_seq_list)) {
- tree_mod_log_write_unlock(fs_info);
+ write_unlock(&fs_info->tree_mod_log_lock);
return 1;
}
@@ -557,7 +533,7 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
}
ret = __tree_mod_log_insert(fs_info, tm);
- tree_mod_log_write_unlock(fs_info);
+ write_unlock(&eb->fs_info->tree_mod_log_lock);
if (ret)
kfree(tm);
@@ -621,7 +597,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
ret = __tree_mod_log_insert(fs_info, tm);
if (ret)
goto free_tms;
- tree_mod_log_write_unlock(fs_info);
+ write_unlock(&eb->fs_info->tree_mod_log_lock);
kfree(tm_list);
return 0;
@@ -632,7 +608,7 @@ free_tms:
kfree(tm_list[i]);
}
if (locked)
- tree_mod_log_write_unlock(fs_info);
+ write_unlock(&eb->fs_info->tree_mod_log_lock);
kfree(tm_list);
kfree(tm);
@@ -713,7 +689,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
if (!ret)
ret = __tree_mod_log_insert(fs_info, tm);
- tree_mod_log_write_unlock(fs_info);
+ write_unlock(&fs_info->tree_mod_log_lock);
if (ret)
goto free_tms;
kfree(tm_list);
@@ -741,7 +717,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
struct tree_mod_elem *found = NULL;
u64 index = start >> PAGE_CACHE_SHIFT;
- tree_mod_log_read_lock(fs_info);
+ read_lock(&fs_info->tree_mod_log_lock);
tm_root = &fs_info->tree_mod_log;
node = tm_root->rb_node;
while (node) {
@@ -769,7 +745,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
break;
}
}
- tree_mod_log_read_unlock(fs_info);
+ read_unlock(&fs_info->tree_mod_log_lock);
return found;
}
@@ -850,7 +826,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
goto free_tms;
}
- tree_mod_log_write_unlock(fs_info);
+ write_unlock(&fs_info->tree_mod_log_lock);
kfree(tm_list);
return 0;
@@ -862,7 +838,7 @@ free_tms:
kfree(tm_list[i]);
}
if (locked)
- tree_mod_log_write_unlock(fs_info);
+ write_unlock(&fs_info->tree_mod_log_lock);
kfree(tm_list);
return ret;
@@ -922,7 +898,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
goto free_tms;
ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
- tree_mod_log_write_unlock(fs_info);
+ write_unlock(&eb->fs_info->tree_mod_log_lock);
if (ret)
goto free_tms;
kfree(tm_list);
@@ -1153,6 +1129,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
if (ret) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
btrfs_abort_transaction(trans, root, ret);
return ret;
}
@@ -1160,6 +1138,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
btrfs_abort_transaction(trans, root, ret);
return ret;
}
@@ -1198,6 +1178,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (last_ref) {
ret = tree_mod_log_free_eb(root->fs_info, buf);
if (ret) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
btrfs_abort_transaction(trans, root, ret);
return ret;
}
@@ -1284,7 +1266,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
unsigned long p_size = sizeof(struct btrfs_key_ptr);
n = btrfs_header_nritems(eb);
- tree_mod_log_read_lock(fs_info);
+ read_lock(&fs_info->tree_mod_log_lock);
while (tm && tm->seq >= time_seq) {
/*
* all the operations are recorded with the operator used for
@@ -1339,7 +1321,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
if (tm->index != first_tm->index)
break;
}
- tree_mod_log_read_unlock(fs_info);
+ read_unlock(&fs_info->tree_mod_log_lock);
btrfs_set_header_nritems(eb, n);
}
@@ -1396,7 +1378,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
- extent_buffer_get(eb_rewin);
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
+ eb_rewin, btrfs_header_level(eb_rewin));
btrfs_tree_read_lock(eb_rewin);
__tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) >
@@ -1448,7 +1431,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_warn(root->fs_info,
"failed to read tree block %llu from get_old_root", logical);
} else {
+ btrfs_tree_read_lock(old);
eb = btrfs_clone_extent_buffer(old);
+ btrfs_tree_read_unlock(old);
free_extent_buffer(old);
}
} else if (old_root) {
@@ -1465,8 +1450,6 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
if (!eb)
return NULL;
- extent_buffer_get(eb);
- btrfs_tree_read_lock(eb);
if (old_root) {
btrfs_set_header_bytenr(eb, eb->start);
btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
@@ -1474,6 +1457,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_set_header_level(eb, old_root->level);
btrfs_set_header_generation(eb, old_generation);
}
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb,
+ btrfs_header_level(eb));
+ btrfs_tree_read_lock(eb);
if (tm)
__tree_mod_log_rewind(root->fs_info, eb, time_seq, tm);
else
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4a91d3119e59..8fb9a1e0048b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1576,14 +1576,12 @@ struct btrfs_fs_info {
struct list_head delayed_iputs;
struct mutex cleaner_delayed_iput_mutex;
- /* this protects tree_mod_seq_list */
- spinlock_t tree_mod_seq_lock;
atomic64_t tree_mod_seq;
- struct list_head tree_mod_seq_list;
- /* this protects tree_mod_log */
+ /* this protects tree_mod_log and tree_mod_seq_list */
rwlock_t tree_mod_log_lock;
struct rb_root tree_mod_log;
+ struct list_head tree_mod_seq_list;
atomic_t nr_async_submits;
atomic_t async_submit_draining;
@@ -4098,6 +4096,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
/* super.c */
int btrfs_parse_options(struct btrfs_root *root, char *options);
int btrfs_sync_fs(struct super_block *sb, int wait);
+char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
+ u64 subvol_objectid);
#ifdef CONFIG_PRINTK
__printf(2, 3)
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index e06dd75ad13f..bb1e32f77b69 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -193,8 +193,6 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
ref->in_tree = 0;
btrfs_put_delayed_ref(ref);
atomic_dec(&delayed_refs->num_entries);
- if (trans->delayed_ref_updates)
- trans->delayed_ref_updates--;
}
static bool merge_ref(struct btrfs_trans_handle *trans,
@@ -281,7 +279,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
if (head->is_data)
return;
- spin_lock(&fs_info->tree_mod_seq_lock);
+ read_lock(&fs_info->tree_mod_log_lock);
if (!list_empty(&fs_info->tree_mod_seq_list)) {
struct seq_list *elem;
@@ -289,7 +287,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
struct seq_list, list);
seq = elem->seq;
}
- spin_unlock(&fs_info->tree_mod_seq_lock);
+ read_unlock(&fs_info->tree_mod_log_lock);
ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
list);
@@ -317,7 +315,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem;
int ret = 0;
- spin_lock(&fs_info->tree_mod_seq_lock);
+ read_lock(&fs_info->tree_mod_log_lock);
if (!list_empty(&fs_info->tree_mod_seq_list)) {
elem = list_first_entry(&fs_info->tree_mod_seq_list,
struct seq_list, list);
@@ -330,7 +328,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
}
}
- spin_unlock(&fs_info->tree_mod_seq_lock);
+ read_unlock(&fs_info->tree_mod_log_lock);
return ret;
}
@@ -444,7 +442,6 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
add_tail:
list_add_tail(&ref->list, &href->ref_list);
atomic_inc(&root->num_entries);
- trans->delayed_ref_updates++;
spin_unlock(&href->lock);
return ret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 78722aaffecd..de63cb9bc64b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1698,8 +1698,8 @@ static void end_workqueue_fn(struct btrfs_work *work)
bio->bi_error = end_io_wq->error;
bio->bi_private = end_io_wq->private;
bio->bi_end_io = end_io_wq->end_io;
- kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
bio_endio(bio);
+ kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
}
static int cleaner_kthread(void *arg)
@@ -1750,7 +1750,7 @@ static int cleaner_kthread(void *arg)
*/
btrfs_delete_unused_bgs(root->fs_info);
sleep:
- if (!try_to_freeze() && !again) {
+ if (!again) {
set_current_state(TASK_INTERRUPTIBLE);
if (!kthread_should_stop())
schedule();
@@ -2481,7 +2481,6 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->free_chunk_lock);
- spin_lock_init(&fs_info->tree_mod_seq_lock);
spin_lock_init(&fs_info->super_lock);
spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->buffer_lock);
@@ -2973,6 +2972,7 @@ retry_root_backup:
/* do not make disk changes in broken FS */
if (btrfs_super_log_root(disk_super) != 0) {
+ btrfs_info(fs_info, "start tree-log replay");
ret = btrfs_replay_log(fs_info, fs_devices);
if (ret) {
err = ret;
@@ -3774,6 +3774,19 @@ void close_ctree(struct btrfs_root *root)
*/
btrfs_delete_unused_bgs(root->fs_info);
+ /*
+ * There might be existing delayed inode workers still running
+ * and holding an empty delayed inode item. We must wait for
+ * them to complete first because they can create a transaction.
+ * This happens when someone calls btrfs_balance_delayed_items()
+ * and then a transaction commit runs the same delayed nodes
+ * before any delayed worker has done something with the nodes.
+ * We must wait for any worker here and not at transaction
+ * commit time since that could cause a deadlock.
+ * This is a very rare case.
+ */
+ btrfs_flush_workqueue(fs_info->delayed_workers);
+
ret = btrfs_commit_super(root);
if (ret)
btrfs_err(fs_info, "commit super ret %d", ret);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 2513a7f53334..92f80ed64219 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -55,9 +55,9 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
return type;
}
-static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
- u64 root_objectid, u32 generation,
- int check_generation)
+struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+ u64 root_objectid, u32 generation,
+ int check_generation)
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root;
@@ -150,7 +150,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
}
-static struct dentry *btrfs_get_parent(struct dentry *child)
+struct dentry *btrfs_get_parent(struct dentry *child)
{
struct inode *dir = d_inode(child);
struct btrfs_root *root = BTRFS_I(dir)->root;
diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h
index 074348a95841..7a305e554999 100644
--- a/fs/btrfs/export.h
+++ b/fs/btrfs/export.h
@@ -16,4 +16,9 @@ struct btrfs_fid {
u64 parent_root_objectid;
} __attribute__ ((packed));
+struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+ u64 root_objectid, u32 generation,
+ int check_generation);
+struct dentry *btrfs_get_parent(struct dentry *child);
+
#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 34ffc125763f..3bb731b2156c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10688,7 +10688,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
disk_super = fs_info->super_copy;
if (!btrfs_super_root(disk_super))
- return 1;
+ return -EINVAL;
features = btrfs_super_incompat_flags(disk_super);
if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 315f21191643..42c745dbccfa 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3995,6 +3995,10 @@ retry:
if (!ret) {
free_extent_buffer(eb);
continue;
+ } else if (ret < 0) {
+ done = 1;
+ free_extent_buffer(eb);
+ break;
}
ret = write_one_eb(eb, fs_info, wbc, &epd);
@@ -4153,6 +4157,14 @@ retry:
*/
scanned = 1;
index = 0;
+
+ /*
+ * If we're looping we could run into a page that is locked by a
+ * writer and that writer could be waiting on writeback for a
+ * page in our current bio, and thus deadlock, so flush the
+ * write bio here.
+ */
+ flush_write_bio(data);
goto retry;
}
btrfs_add_delayed_iput(inode);
@@ -4418,6 +4430,8 @@ int try_release_extent_mapping(struct extent_map_tree *map,
/* once for us */
free_extent_map(em);
+
+ cond_resched(); /* Allow large-extent preemption. */
}
}
return try_release_extent_state(map, tree, page, mask);
@@ -4842,25 +4856,28 @@ err:
static void check_buffer_tree_ref(struct extent_buffer *eb)
{
int refs;
- /* the ref bit is tricky. We have to make sure it is set
- * if we have the buffer dirty. Otherwise the
- * code to free a buffer can end up dropping a dirty
- * page
+ /*
+ * The TREE_REF bit is first set when the extent_buffer is added
+ * to the radix tree. It is also reset, if unset, when a new reference
+ * is created by find_extent_buffer.
*
- * Once the ref bit is set, it won't go away while the
- * buffer is dirty or in writeback, and it also won't
- * go away while we have the reference count on the
- * eb bumped.
+ * It is only cleared in two cases: freeing the last non-tree
+ * reference to the extent_buffer when its STALE bit is set or
+ * calling releasepage when the tree reference is the only reference.
*
- * We can't just set the ref bit without bumping the
- * ref on the eb because free_extent_buffer might
- * see the ref bit and try to clear it. If this happens
- * free_extent_buffer might end up dropping our original
- * ref by mistake and freeing the page before we are able
- * to add one more ref.
+ * In both cases, care is taken to ensure that the extent_buffer's
+ * pages are not under io. However, releasepage can be concurrently
+ * called with creating new references, which is prone to race
+ * conditions between the calls to check_buffer_tree_ref in those
+ * codepaths and clearing TREE_REF in try_release_extent_buffer.
*
- * So bump the ref count first, then set the bit. If someone
- * beat us to it, drop the ref we added.
+ * The actual lifetime of the extent_buffer in the radix tree is
+ * adequately protected by the refcount, but the TREE_REF bit and
+ * its corresponding reference are not. To protect against this
+ * class of races, we call check_buffer_tree_ref from the codepaths
+ * which trigger io after they set eb->io_pages. Note that once io is
+ * initiated, TREE_REF can no longer be cleared, so that is the
+ * moment at which any such race is best fixed.
*/
refs = atomic_read(&eb->refs);
if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
@@ -4937,12 +4954,14 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
return eb;
eb = alloc_dummy_extent_buffer(fs_info, start);
if (!eb)
- return NULL;
+ return ERR_PTR(-ENOMEM);
eb->fs_info = fs_info;
again:
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
- if (ret)
+ if (ret) {
+ exists = ERR_PTR(ret);
goto free_eb;
+ }
spin_lock(&fs_info->buffer_lock);
ret = radix_tree_insert(&fs_info->buffer_radix,
start >> PAGE_CACHE_SHIFT, eb);
@@ -5325,6 +5344,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = 0;
atomic_set(&eb->io_pages, num_reads);
+ /*
+ * It is possible for releasepage to clear the TREE_REF bit before we
+ * set io_pages. See check_buffer_tree_ref for a more detailed comment.
+ */
+ check_buffer_tree_ref(eb);
for (i = start_i; i < num_pages; i++) {
page = eb->pages[i];
if (!PageUptodate(page)) {
@@ -5400,9 +5424,9 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
}
}
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dstv,
- unsigned long start, unsigned long len)
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5423,7 +5447,7 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb,
cur = min(len, (PAGE_CACHE_SIZE - offset));
kaddr = page_address(page);
- if (copy_to_user(dst, kaddr + offset, cur)) {
+ if (probe_user_write(dst, kaddr + offset, cur)) {
ret = -EFAULT;
break;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 751435967724..9631be7fc9e2 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -313,9 +313,9 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
void read_extent_buffer(const struct extent_buffer *eb, void *dst,
unsigned long start,
unsigned long len);
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dst, unsigned long start,
- unsigned long len);
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dst, unsigned long start,
+ unsigned long len);
void write_extent_buffer(struct extent_buffer *eb, const void *src,
unsigned long start, unsigned long len);
void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 84fb56d5c018..3818b65b0682 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -227,6 +227,17 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
struct extent_map *merge = NULL;
struct rb_node *rb;
+ /*
+ * We can't modify an extent map that is in the tree and that is being
+ * used by another task, as it can cause that other task to see it in
+ * inconsistent state during the merging. We always have 1 reference for
+ * the tree and 1 for this task (which is unpinning the extent map or
+ * clearing the logging flag), so anything > 2 means it's being used by
+ * other tasks too.
+ */
+ if (atomic_read(&em->refs) > 2)
+ return;
+
if (em->start != 0) {
rb = rb_prev(&em->rb_node);
if (rb)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 58ece6558430..fb5c97ea670f 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -742,10 +742,12 @@ again:
nritems = btrfs_header_nritems(path->nodes[0]);
if (!nritems || (path->slots[0] >= nritems - 1)) {
ret = btrfs_next_leaf(root, path);
- if (ret == 1)
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
found_next = 1;
- if (ret != 0)
goto insert;
+ }
slot = path->slots[0];
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d056060529f8..2426dc56426f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1089,7 +1089,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
int del_nr = 0;
int del_slot = 0;
int recow;
- int ret;
+ int ret = 0;
u64 ino = btrfs_ino(inode);
path = btrfs_alloc_path();
@@ -1284,7 +1284,7 @@ again:
}
out:
btrfs_free_path(path);
- return 0;
+ return ret;
}
/*
@@ -1525,6 +1525,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
}
reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
+ only_release_metadata = false;
if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) &&
@@ -1659,7 +1660,6 @@ again:
set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, EXTENT_NORESERVE, NULL,
NULL, GFP_NOFS);
- only_release_metadata = false;
}
btrfs_drop_pages(pages, num_pages);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 6c0161284a9e..55d8020afc58 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -391,6 +391,12 @@ static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode
if (uptodate && !PageUptodate(page)) {
btrfs_readpage(NULL, page);
lock_page(page);
+ if (page->mapping != inode->i_mapping) {
+ btrfs_err(BTRFS_I(inode)->root->fs_info,
+ "free space cache page truncated");
+ io_ctl_drop_pages(io_ctl);
+ return -EIO;
+ }
if (!PageUptodate(page)) {
btrfs_err(BTRFS_I(inode)->root->fs_info,
"error reading free space cache");
@@ -748,8 +754,10 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
while (num_entries) {
e = kmem_cache_zalloc(btrfs_free_space_cachep,
GFP_NOFS);
- if (!e)
+ if (!e) {
+ ret = -ENOMEM;
goto free_cache;
+ }
ret = io_ctl_read_entry(&io_ctl, e, &type);
if (ret) {
@@ -758,6 +766,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
}
if (!e->bytes) {
+ ret = -1;
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
}
@@ -777,6 +786,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
num_bitmaps--;
e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
if (!e->bitmap) {
+ ret = -ENOMEM;
kmem_cache_free(
btrfs_free_space_cachep, e);
goto free_cache;
@@ -2152,7 +2162,7 @@ out:
static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info, bool update_stat)
{
- struct btrfs_free_space *left_info;
+ struct btrfs_free_space *left_info = NULL;
struct btrfs_free_space *right_info;
bool merged = false;
u64 offset = info->offset;
@@ -2167,7 +2177,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
if (right_info && rb_prev(&right_info->offset_index))
left_info = rb_entry(rb_prev(&right_info->offset_index),
struct btrfs_free_space, offset_index);
- else
+ else if (!right_info)
left_info = tree_search_offset(ctl, offset - 1, 0, 0);
if (right_info && !right_info->bitmap) {
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 07573dc1614a..3469c7ce7cb6 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -158,6 +158,7 @@ static void start_caching(struct btrfs_root *root)
spin_lock(&root->ino_cache_lock);
root->ino_cache_state = BTRFS_CACHE_FINISHED;
spin_unlock(&root->ino_cache_lock);
+ wake_up(&root->ino_cache_wait);
return;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d2c3edb50702..92415b8ac5a3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -926,7 +926,7 @@ static noinline int cow_file_range(struct inode *inode,
u64 alloc_hint = 0;
u64 num_bytes;
unsigned long ram_size;
- u64 disk_num_bytes;
+ u64 min_alloc_size;
u64 cur_alloc_size;
u64 blocksize = root->sectorsize;
struct btrfs_key ins;
@@ -942,7 +942,6 @@ static noinline int cow_file_range(struct inode *inode,
num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
- disk_num_bytes = num_bytes;
/* if this is a small write inside eof, kick off defrag */
if (num_bytes < 64 * 1024 &&
@@ -969,18 +968,33 @@ static noinline int cow_file_range(struct inode *inode,
}
}
- BUG_ON(disk_num_bytes >
- btrfs_super_total_bytes(root->fs_info->super_copy));
+ BUG_ON(num_bytes > btrfs_super_total_bytes(root->fs_info->super_copy));
alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
- while (disk_num_bytes > 0) {
+ /*
+ * Relocation relies on the relocated extents to have exactly the same
+ * size as the original extents. Normally writeback for relocation data
+ * extents follows a NOCOW path because relocation preallocates the
+ * extents. However, due to an operation such as scrub turning a block
+ * group to RO mode, it may fallback to COW mode, so we must make sure
+ * an extent allocated during COW has exactly the requested size and can
+ * not be split into smaller extents, otherwise relocation breaks and
+ * fails during the stage where it updates the bytenr of file extent
+ * items.
+ */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ min_alloc_size = num_bytes;
+ else
+ min_alloc_size = root->sectorsize;
+
+ while (num_bytes > 0) {
unsigned long op;
- cur_alloc_size = disk_num_bytes;
+ cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size,
- root->sectorsize, 0, alloc_hint,
+ min_alloc_size, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
@@ -1033,7 +1047,7 @@ static noinline int cow_file_range(struct inode *inode,
goto out_drop_extent_cache;
}
- if (disk_num_bytes < cur_alloc_size)
+ if (num_bytes < cur_alloc_size)
break;
/* we're not doing compressed IO, don't unlock the first
@@ -1050,8 +1064,10 @@ static noinline int cow_file_range(struct inode *inode,
start + ram_size - 1, locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC,
op);
- disk_num_bytes -= cur_alloc_size;
- num_bytes -= cur_alloc_size;
+ if (num_bytes < cur_alloc_size)
+ num_bytes = 0;
+ else
+ num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
}
@@ -5354,11 +5370,13 @@ no_delete:
}
/*
- * this returns the key found in the dir entry in the location pointer.
+ * Return the key found in the dir entry in the location pointer, fill @type
+ * with BTRFS_FT_*, and return 0.
+ *
* If no dir entries were found, location->objectid is 0.
*/
static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
- struct btrfs_key *location)
+ struct btrfs_key *location, u8 *type)
{
const char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
@@ -5380,6 +5398,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
goto out_err;
btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
+ if (!ret)
+ *type = btrfs_dir_type(path->nodes[0], di);
out:
btrfs_free_path(path);
return ret;
@@ -5506,7 +5526,6 @@ static void inode_tree_del(struct inode *inode)
spin_unlock(&root->inode_lock);
if (empty && btrfs_root_refs(&root->root_item) == 0) {
- synchronize_srcu(&root->fs_info->subvol_srcu);
spin_lock(&root->inode_lock);
empty = RB_EMPTY_ROOT(&root->inode_tree);
spin_unlock(&root->inode_lock);
@@ -5666,19 +5685,25 @@ static struct inode *new_simple_dir(struct super_block *s,
return inode;
}
+static inline u8 btrfs_inode_type(struct inode *inode)
+{
+ return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
+}
+
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
{
struct inode *inode;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *sub_root = root;
struct btrfs_key location;
+ u8 di_type = 0;
int index;
int ret = 0;
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
- ret = btrfs_inode_by_name(dir, dentry, &location);
+ ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
if (ret < 0)
return ERR_PTR(ret);
@@ -5687,6 +5712,18 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
if (location.type == BTRFS_INODE_ITEM_KEY) {
inode = btrfs_iget(dir->i_sb, &location, root, NULL);
+ if (IS_ERR(inode))
+ return inode;
+
+ /* Do extra check against inode mode with di_type */
+ if (btrfs_inode_type(inode) != di_type) {
+ btrfs_crit(root->fs_info,
+"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
+ inode->i_mode, btrfs_inode_type(inode),
+ di_type);
+ iput(inode);
+ return ERR_PTR(-EUCLEAN);
+ }
return inode;
}
@@ -6300,11 +6337,6 @@ fail:
return ERR_PTR(ret);
}
-static inline u8 btrfs_inode_type(struct inode *inode)
-{
- return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
-}
-
/*
* utility function to add 'inode' into 'parent_inode' with
* a give name and a given sequence number.
@@ -6896,6 +6928,14 @@ again:
extent_start = found_key.offset;
if (found_type == BTRFS_FILE_EXTENT_REG ||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ /* Only regular file could have regular/prealloc extent */
+ if (!S_ISREG(inode->i_mode)) {
+ err = -EUCLEAN;
+ btrfs_crit(root->fs_info,
+ "regular/prealloc extent found for non-regular inode %llu",
+ btrfs_ino(inode));
+ goto out;
+ }
extent_end = extent_start +
btrfs_file_extent_num_bytes(leaf, item);
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
@@ -8302,7 +8342,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
btrfs_io_bio(bio)->logical = file_offset;
- atomic_inc(&dip->pending_bios);
while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
if (map_length < submit_len + bvec->bv_len ||
@@ -8359,7 +8398,8 @@ submit:
if (!ret)
return 0;
- bio_put(bio);
+ if (bio != orig_bio)
+ bio_put(bio);
out_err:
dip->errors = 1;
/*
@@ -8406,7 +8446,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
io_bio->bi_private = dip;
dip->orig_bio = io_bio;
dip->dio_bio = dio_bio;
- atomic_set(&dip->pending_bios, 0);
+ atomic_set(&dip->pending_bios, 1);
btrfs_bio = btrfs_io_bio(io_bio);
btrfs_bio->logical = file_offset;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 3379490ce54d..f35e18e76f16 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -59,6 +59,7 @@
#include "props.h"
#include "sysfs.h"
#include "qgroup.h"
+#include "tree-log.h"
#ifdef CONFIG_64BIT
/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
@@ -594,12 +595,18 @@ static noinline int create_subvol(struct inode *dir,
btrfs_i_size_write(dir, dir->i_size + namelen * 2);
ret = btrfs_update_inode(trans, root, dir);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto fail;
+ }
ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
objectid, root->root_key.objectid,
btrfs_ino(dir), index, name, namelen);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto fail;
+ }
ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
@@ -2010,9 +2017,14 @@ static noinline int copy_to_sk(struct btrfs_root *root,
sh.len = item_len;
sh.transid = found_transid;
- /* copy search result header */
- if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
- ret = -EFAULT;
+ /*
+ * Copy search result header. If we fault then loop again so we
+ * can fault in the pages and -EFAULT there if there's a
+ * problem. Otherwise we'll fault and then copy the buffer in
+ * properly this next time through
+ */
+ if (probe_user_write(ubuf + *sk_offset, &sh, sizeof(sh))) {
+ ret = 0;
goto out;
}
@@ -2020,10 +2032,14 @@ static noinline int copy_to_sk(struct btrfs_root *root,
if (item_len) {
char __user *up = ubuf + *sk_offset;
- /* copy the item */
- if (read_extent_buffer_to_user(leaf, up,
- item_off, item_len)) {
- ret = -EFAULT;
+ /*
+ * Copy the item, same behavior as above, but reset the
+ * * sk_offset so we copy the full thing again.
+ */
+ if (read_extent_buffer_to_user_nofault(leaf, up,
+ item_off, item_len)) {
+ ret = 0;
+ *sk_offset -= sizeof(sh);
goto out;
}
@@ -2113,6 +2129,11 @@ static noinline int search_ioctl(struct inode *inode,
key.offset = sk->min_offset;
while (1) {
+ ret = fault_in_pages_writeable(ubuf + sk_offset,
+ *buf_size - sk_offset);
+ if (ret)
+ break;
+
ret = btrfs_search_forward(root, &key, path, sk->min_transid);
if (ret != 0) {
if (ret > 0)
@@ -2534,6 +2555,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
+ if (!err)
+ btrfs_record_snapshot_destroy(trans, dir);
ret = btrfs_end_transaction(trans, root);
if (ret && !err)
err = ret;
@@ -3833,6 +3856,8 @@ process_slot:
ret = -EINTR;
goto out;
}
+
+ cond_resched();
}
ret = 0;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 8c27292ea9ea..2eadc8f8c9ef 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -820,10 +820,15 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
}
btrfs_start_ordered_extent(inode, ordered, 1);
end = ordered->file_offset;
+ /*
+ * If the ordered extent had an error save the error but don't
+ * exit without waiting first for all other ordered extents in
+ * the range to complete.
+ */
if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
ret = -EIO;
btrfs_put_ordered_extent(ordered);
- if (ret || end == 0 || end == start)
+ if (end == 0 || end == start)
break;
end--;
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 734babb6626c..bc4cc417e7ab 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -462,6 +462,7 @@ next2:
break;
}
out:
+ btrfs_free_path(path);
fs_info->qgroup_flags |= flags;
if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
fs_info->quota_enabled = 0;
@@ -470,7 +471,6 @@ out:
ret >= 0) {
ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
}
- btrfs_free_path(path);
if (ret < 0) {
ulist_free(fs_info->qgroup_ulist);
@@ -2288,8 +2288,10 @@ out:
}
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
- if (done && !ret)
+ if (done && !ret) {
ret = 1;
+ fs_info->qgroup_rescan_progress.objectid = (u64)-1;
+ }
return ret;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index d6ccfb31aef0..628b6a046093 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1289,7 +1289,7 @@ static int __must_check __add_reloc_root(struct btrfs_root *root)
if (!node)
return -ENOMEM;
- node->bytenr = root->node->start;
+ node->bytenr = root->commit_root->start;
node->data = root;
spin_lock(&rc->reloc_root_tree.lock);
@@ -1321,15 +1321,14 @@ static void __del_reloc_root(struct btrfs_root *root)
if (rc && root->node) {
spin_lock(&rc->reloc_root_tree.lock);
rb_node = tree_search(&rc->reloc_root_tree.rb_root,
- root->node->start);
+ root->commit_root->start);
if (rb_node) {
node = rb_entry(rb_node, struct mapping_node, rb_node);
rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
+ RB_CLEAR_NODE(&node->rb_node);
}
spin_unlock(&rc->reloc_root_tree.lock);
- if (!node)
- return;
- BUG_ON((struct btrfs_root *)node->data != root);
+ ASSERT(!node || (struct btrfs_root *)node->data == root);
}
spin_lock(&root->fs_info->trans_lock);
@@ -1342,7 +1341,7 @@ static void __del_reloc_root(struct btrfs_root *root)
* helper to update the 'address of tree root -> reloc tree'
* mapping
*/
-static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr)
+static int __update_reloc_root(struct btrfs_root *root)
{
struct rb_node *rb_node;
struct mapping_node *node = NULL;
@@ -1350,7 +1349,7 @@ static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr)
spin_lock(&rc->reloc_root_tree.lock);
rb_node = tree_search(&rc->reloc_root_tree.rb_root,
- root->node->start);
+ root->commit_root->start);
if (rb_node) {
node = rb_entry(rb_node, struct mapping_node, rb_node);
rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
@@ -1362,7 +1361,7 @@ static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr)
BUG_ON((struct btrfs_root *)node->data != root);
spin_lock(&rc->reloc_root_tree.lock);
- node->bytenr = new_bytenr;
+ node->bytenr = root->node->start;
rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
node->bytenr, &node->rb_node);
spin_unlock(&rc->reloc_root_tree.lock);
@@ -1503,6 +1502,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
}
if (reloc_root->commit_root != reloc_root->node) {
+ __update_reloc_root(reloc_root);
btrfs_set_root_node(root_item, reloc_root->node);
free_extent_buffer(reloc_root->commit_root);
reloc_root->commit_root = btrfs_root_node(reloc_root);
@@ -1785,8 +1785,8 @@ int replace_path(struct btrfs_trans_handle *trans,
int ret;
int slot;
- BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
- BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
+ ASSERT(src->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
+ ASSERT(dest->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
last_snapshot = btrfs_root_last_snapshot(&src->root_item);
again:
@@ -1818,7 +1818,7 @@ again:
parent = eb;
while (1) {
level = btrfs_header_level(parent);
- BUG_ON(level < lowest_level);
+ ASSERT(level >= lowest_level);
ret = btrfs_bin_search(parent, &key, level, &slot);
if (ret && slot > 0)
@@ -2440,7 +2440,21 @@ out:
free_reloc_roots(&reloc_roots);
}
- BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
+ /*
+ * We used to have
+ *
+ * BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
+ *
+ * here, but it's wrong. If we fail to start the transaction in
+ * prepare_to_merge() we will have only 0 ref reloc roots, none of which
+ * have actually been removed from the reloc_root_tree rb tree. This is
+ * fine because we're bailing here, and we hold a reference on the root
+ * for the list that holds it, so these roots will be cleaned up when we
+ * do the reloc_dirty_list afterwards. Meanwhile the root->reloc_root
+ * will be cleaned up on unmount.
+ *
+ * The remaining nodes will be cleaned up by free_reloc_control.
+ */
}
static void free_block_list(struct rb_root *blocks)
@@ -4454,6 +4468,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
reloc_root->root_key.offset);
if (IS_ERR(fs_root)) {
err = PTR_ERR(fs_root);
+ list_add_tail(&reloc_root->root_list, &reloc_roots);
goto out_free;
}
@@ -4563,11 +4578,6 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
- if (buf == root->node)
- __update_reloc_root(root, cow->start);
- }
-
level = btrfs_header_level(buf);
if (btrfs_header_generation(buf) <=
btrfs_root_last_snapshot(&root->root_item))
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index cc9ccc42f469..0b41a88ef9e9 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -918,11 +918,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
have_csum = sblock_to_check->pagev[0]->have_csum;
dev = sblock_to_check->pagev[0]->dev;
- if (sctx->is_dev_replace && !is_metadata && !have_csum) {
- sblocks_for_recheck = NULL;
- goto nodatasum_case;
- }
-
/*
* read all mirrors one after the other. This includes to
* re-read the extent or metadata block that failed (that was
@@ -1035,13 +1030,19 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
goto out;
}
- if (!is_metadata && !have_csum) {
+ /*
+ * NOTE: Even for nodatasum case, it's still possible that it's a
+ * compressed data extent, thus scrub_fixup_nodatasum(), which write
+ * inode page cache onto disk, could cause serious data corruption.
+ *
+ * So here we could only read from disk, and hope our recovery could
+ * reach disk before the newer write.
+ */
+ if (0 && !is_metadata && !have_csum) {
struct scrub_fixup_nodatasum *fixup_nodatasum;
WARN_ON(sctx->is_dev_replace);
-nodatasum_case:
-
/*
* !is_metadata and !have_csum, this means that the data
* might not be COW'ed, that it might be modified
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 40d1ab957fb6..de0ebb3b3cd3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -34,6 +34,7 @@
#include "disk-io.h"
#include "btrfs_inode.h"
#include "transaction.h"
+#include "xattr.h"
static int g_verbose = 0;
@@ -4194,6 +4195,10 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key,
struct fs_path *p;
posix_acl_xattr_header dummy_acl;
+ /* Capabilities are emitted by finish_inode_if_needed */
+ if (!strncmp(name, XATTR_NAME_CAPS, name_len))
+ return 0;
+
p = fs_path_alloc();
if (!p)
return -ENOMEM;
@@ -4733,6 +4738,64 @@ static int send_extent_data(struct send_ctx *sctx,
return 0;
}
+/*
+ * Search for a capability xattr related to sctx->cur_ino. If the capability is
+ * found, call send_set_xattr function to emit it.
+ *
+ * Return 0 if there isn't a capability, or when the capability was emitted
+ * successfully, or < 0 if an error occurred.
+ */
+static int send_capabilities(struct send_ctx *sctx)
+{
+ struct fs_path *fspath = NULL;
+ struct btrfs_path *path;
+ struct btrfs_dir_item *di;
+ struct extent_buffer *leaf;
+ unsigned long data_ptr;
+ char *buf = NULL;
+ int buf_len;
+ int ret = 0;
+
+ path = alloc_path_for_send();
+ if (!path)
+ return -ENOMEM;
+
+ di = btrfs_lookup_xattr(NULL, sctx->send_root, path, sctx->cur_ino,
+ XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), 0);
+ if (!di) {
+ /* There is no xattr for this inode */
+ goto out;
+ } else if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ buf_len = btrfs_dir_data_len(leaf, di);
+
+ fspath = fs_path_alloc();
+ buf = kmalloc(buf_len, GFP_KERNEL);
+ if (!fspath || !buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
+ if (ret < 0)
+ goto out;
+
+ data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di);
+ read_extent_buffer(leaf, buf, data_ptr, buf_len);
+
+ ret = send_set_xattr(sctx, fspath, XATTR_NAME_CAPS,
+ strlen(XATTR_NAME_CAPS), buf, buf_len);
+out:
+ kfree(buf);
+ fs_path_free(fspath);
+ btrfs_free_path(path);
+ return ret;
+}
+
static int clone_range(struct send_ctx *sctx,
struct clone_root *clone_root,
const u64 disk_byte,
@@ -5022,15 +5085,12 @@ static int is_extent_unchanged(struct send_ctx *sctx,
goto out;
}
- right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
if (right_type == BTRFS_FILE_EXTENT_INLINE) {
right_len = btrfs_file_extent_inline_len(eb, slot, ei);
right_len = PAGE_ALIGN(right_len);
} else {
right_len = btrfs_file_extent_num_bytes(eb, ei);
}
- right_offset = btrfs_file_extent_offset(eb, ei);
- right_gen = btrfs_file_extent_generation(eb, ei);
/*
* Are we at extent 8? If yes, we know the extent is changed.
@@ -5055,6 +5115,10 @@ static int is_extent_unchanged(struct send_ctx *sctx,
goto out;
}
+ right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
+ right_offset = btrfs_file_extent_offset(eb, ei);
+ right_gen = btrfs_file_extent_generation(eb, ei);
+
left_offset_fixed = left_offset;
if (key.offset < ekey->offset) {
/* Fix the right offset for 2a and 7. */
@@ -5443,6 +5507,10 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
goto out;
}
+ ret = send_capabilities(sctx);
+ if (ret < 0)
+ goto out;
+
/*
* If other directory inodes depended on our current directory
* inode's move/rename, now do their move/rename operations.
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0f99336c37eb..77e6ce0e1e35 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -843,8 +843,8 @@ out:
return error;
}
-static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
- u64 subvol_objectid)
+char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
+ u64 subvol_objectid)
{
struct btrfs_root *root = fs_info->tree_root;
struct btrfs_root *fs_root;
@@ -1120,6 +1120,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
struct btrfs_root *root = info->tree_root;
char *compress_type;
+ const char *subvol_name;
if (btrfs_test_opt(root, DEGRADED))
seq_puts(seq, ",degraded");
@@ -1204,8 +1205,13 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
#endif
seq_printf(seq, ",subvolid=%llu",
BTRFS_I(d_inode(dentry))->root->root_key.objectid);
- seq_puts(seq, ",subvol=");
- seq_dentry(seq, dentry, " \t\n\\");
+ subvol_name = btrfs_get_subvol_name_from_objectid(info,
+ BTRFS_I(d_inode(dentry))->root->root_key.objectid);
+ if (!IS_ERR(subvol_name)) {
+ seq_puts(seq, ",subvol=");
+ seq_escape(seq, subvol_name, " \t\n\\");
+ kfree(subvol_name);
+ }
return 0;
}
@@ -1323,8 +1329,8 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
goto out;
}
}
- subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
- subvol_objectid);
+ subvol_name = btrfs_get_subvol_name_from_objectid(
+ btrfs_sb(mnt->mnt_sb), subvol_objectid);
if (IS_ERR(subvol_name)) {
root = ERR_CAST(subvol_name);
subvol_name = NULL;
@@ -1702,6 +1708,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
}
if (btrfs_super_log_root(fs_info->super_copy) != 0) {
+ btrfs_warn(fs_info,
+ "mount required to replay tree-log, cannot remount read-write");
ret = -EINVAL;
goto restore;
}
@@ -1978,6 +1986,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
int ret;
u64 thresh = 0;
+ int mixed = 0;
/*
* holding chunk_muext to avoid allocating new chunks, holding
@@ -2003,8 +2012,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
}
}
}
- if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
- total_free_meta += found->disk_total - found->disk_used;
+
+ /*
+ * Metadata in mixed block goup profiles are accounted in data
+ */
+ if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
+ if (found->flags & BTRFS_BLOCK_GROUP_DATA)
+ mixed = 1;
+ else
+ total_free_meta += found->disk_total -
+ found->disk_used;
+ }
total_used += found->disk_used;
}
@@ -2042,7 +2060,15 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
*/
thresh = 4 * 1024 * 1024;
- if (total_free_meta - thresh < block_rsv->size)
+ /*
+ * We only want to claim there's no available space if we can no longer
+ * allocate chunks for our metadata profile and our global reserve will
+ * not fit in the free metadata space. If we aren't ->full then we
+ * still can allocate chunks and thus are fine using the currently
+ * calculated f_bavail.
+ */
+ if (!mixed && block_rsv->space_info->full &&
+ total_free_meta - thresh < block_rsv->size)
buf->f_bavail = 0;
buf->f_type = BTRFS_SUPER_MAGIC;
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 9626252ee6b4..2825cbe3ea8d 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -48,7 +48,13 @@ static struct file_system_type test_type = {
struct inode *btrfs_new_test_inode(void)
{
- return new_inode(test_mnt->mnt_sb);
+ struct inode *inode;
+
+ inode = new_inode(test_mnt->mnt_sb);
+ if (inode)
+ inode_init_owner(inode, NULL, S_IFREG);
+
+ return inode;
}
int btrfs_init_test_fs(void)
@@ -109,7 +115,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->super_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
- spin_lock_init(&fs_info->tree_mod_seq_lock);
mutex_init(&fs_info->qgroup_ioctl_lock);
mutex_init(&fs_info->qgroup_rescan_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 054fc0d97131..5ff676df698f 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -235,6 +235,7 @@ static noinline int test_btrfs_get_extent(void)
return ret;
}
+ inode->i_mode = S_IFREG;
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 2b2978c04e80..1efec40455f8 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -477,9 +477,9 @@ int btrfs_test_qgroups(void)
* *cough*backref walking code*cough*
*/
root->node = alloc_test_extent_buffer(root->fs_info, 4096);
- if (!root->node) {
+ if (IS_ERR(root->node)) {
test_msg("Couldn't allocate dummy buffer\n");
- ret = -ENOMEM;
+ ret = PTR_ERR(root->node);
goto out;
}
btrfs_set_header_level(root->node, 0);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 098016338f98..f0675b7c95ec 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1264,8 +1264,10 @@ int btrfs_defrag_root(struct btrfs_root *root)
while (1) {
trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
ret = btrfs_defrag_leaves(trans, root);
@@ -1814,6 +1816,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_inode *btree_ino = BTRFS_I(root->fs_info->btree_inode);
int ret;
+ /*
+ * Some places just start a transaction to commit it. We need to make
+ * sure that if this commit fails that the abort code actually marks the
+ * transaction as failed, so set trans->dirty to make the abort code do
+ * the right thing.
+ */
+ trans->dirty = true;
+
/* Stop the commit early if ->aborted is set */
if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 45c77f261856..bcf61a32f970 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1511,6 +1511,7 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
break;
if (ret == 1) {
+ ret = 0;
if (path->slots[0] == 0)
break;
path->slots[0]--;
@@ -1523,17 +1524,19 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
ret = btrfs_del_item(trans, root, path);
if (ret)
- goto out;
+ break;
btrfs_release_path(path);
inode = read_one_inode(root, key.offset);
- if (!inode)
- return -EIO;
+ if (!inode) {
+ ret = -EIO;
+ break;
+ }
ret = fixup_inode_link_count(trans, root, inode);
iput(inode);
if (ret)
- goto out;
+ break;
/*
* fixup on a directory may create new entries,
@@ -1542,8 +1545,6 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
*/
key.offset = (u64)-1;
}
- ret = 0;
-out:
btrfs_release_path(path);
return ret;
}
@@ -1582,8 +1583,6 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, root, inode);
} else if (ret == -EEXIST) {
ret = 0;
- } else {
- BUG(); /* Logic Error */
}
iput(inode);
@@ -2208,7 +2207,9 @@ again:
else {
ret = find_dir_range(log, path, dirid, key_type,
&range_start, &range_end);
- if (ret != 0)
+ if (ret < 0)
+ goto out;
+ else if (ret > 0)
break;
}
@@ -3169,11 +3170,13 @@ fail:
btrfs_free_path(path);
out_unlock:
mutex_unlock(&BTRFS_I(dir)->log_mutex);
- if (ret == -ENOSPC) {
+ if (err == -ENOSPC) {
btrfs_set_log_full_commit(root->fs_info, trans);
- ret = 0;
- } else if (ret < 0)
- btrfs_abort_transaction(trans, root, ret);
+ err = 0;
+ } else if (err < 0 && err != -ENOENT) {
+ /* ENOENT can be returned if the entry hasn't been fsynced yet */
+ btrfs_abort_transaction(trans, root, err);
+ }
btrfs_end_log_trans(root);
@@ -3333,6 +3336,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
* search and this search we'll not find the key again and can just
* bail.
*/
+search:
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
if (ret != 0)
goto done;
@@ -3352,6 +3356,13 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
if (min_key.objectid != ino || min_key.type != key_type)
goto done;
+
+ if (need_resched()) {
+ btrfs_release_path(path);
+ cond_resched();
+ goto search;
+ }
+
ret = overwrite_item(trans, log, dst_path, src, i,
&min_key);
if (ret) {
@@ -3733,11 +3744,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
log->fs_info->csum_root,
ds + cs, ds + cs + cl - 1,
&ordered_sums, 0);
- if (ret) {
- btrfs_release_path(dst_path);
- kfree(ins_data);
- return ret;
- }
+ if (ret)
+ break;
}
}
}
@@ -3750,7 +3758,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
* we have to do this after the loop above to avoid changing the
* log tree while trying to change the log tree.
*/
- ret = 0;
while (!list_empty(&ordered_sums)) {
struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
struct btrfs_ordered_sum,
@@ -4404,13 +4411,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, extent) ==
- BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_inline_len(leaf,
- path->slots[0],
- extent);
- ASSERT(len == i_size);
+ BTRFS_FILE_EXTENT_INLINE)
return 0;
- }
len = btrfs_file_extent_num_bytes(leaf, extent);
/* Last extent goes beyond i_size, no need to log a hole. */
@@ -5700,6 +5702,21 @@ record:
}
/*
+ * Make sure that if someone attempts to fsync the parent directory of a deleted
+ * snapshot, it ends up triggering a transaction commit. This is to guarantee
+ * that after replaying the log tree of the parent directory's root we will not
+ * see the snapshot anymore and at log replay time we will not see any log tree
+ * corresponding to the deleted snapshot's root, which could lead to replaying
+ * it after replaying the log tree of the parent directory (which would replay
+ * the snapshot delete operation).
+ */
+void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
+ struct inode *dir)
+{
+ BTRFS_I(dir)->last_unlink_trans = trans->transid;
+}
+
+/*
* Call this after adding a new name for a file and it will properly
* update the log to reflect the new name.
*
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 6916a781ea02..a9f1b75d080d 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -79,6 +79,8 @@ int btrfs_pin_log_trans(struct btrfs_root *root);
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
struct inode *dir, struct inode *inode,
int for_rename);
+void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
+ struct inode *dir);
int btrfs_log_new_name(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *old_dir,
struct dentry *parent);
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 837a9a8d579e..24eb6283dc62 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -332,6 +332,8 @@ again_search_slot:
}
if (ret < 0 && ret != -ENOENT)
goto out;
+ key.offset++;
+ goto again_search_slot;
}
item_size -= sizeof(subid_le);
offset += sizeof(subid_le);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 55ce6543050d..d6383d362e27 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2357,9 +2357,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
btrfs_set_super_num_devices(root->fs_info->super_copy,
tmp + 1);
- /* add sysfs device entry */
- btrfs_sysfs_add_device_link(root->fs_info->fs_devices, device);
-
/*
* we've got more storage, clear any full flags on the space
* infos
@@ -2367,6 +2364,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
btrfs_clear_space_info_full(root->fs_info);
unlock_chunks(root);
+
+ /* add sysfs device entry */
+ btrfs_sysfs_add_device_link(root->fs_info->fs_devices, device);
+
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
if (seeding_dev) {
@@ -4065,6 +4066,7 @@ static int btrfs_uuid_scan_kthread(void *data)
goto skip;
}
update_tree:
+ btrfs_release_path(path);
if (!btrfs_is_empty_uuid(root_item.uuid)) {
ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
root_item.uuid,
@@ -4090,6 +4092,7 @@ update_tree:
}
skip:
+ btrfs_release_path(path);
if (trans) {
ret = btrfs_end_transaction(trans, fs_info->uuid_root);
trans = NULL;
@@ -4097,7 +4100,6 @@ skip:
break;
}
- btrfs_release_path(path);
if (key.offset < (u64)-1) {
key.offset++;
} else if (key.type < BTRFS_ROOT_ITEM_KEY) {
@@ -6261,6 +6263,13 @@ static int btrfs_check_chunk_valid(struct btrfs_root *root,
return -EIO;
}
+ if (!is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
+ (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) {
+ btrfs_err(root->fs_info,
+ "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
+ type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+ return -EUCLEAN;
+ }
if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
btrfs_err(root->fs_info, "missing chunk type flag: 0x%llx", type);
return -EIO;
@@ -6694,6 +6703,14 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
lock_chunks(root);
/*
+ * It is possible for mount and umount to race in such a way that
+ * we execute this code path, but open_fs_devices failed to clear
+ * total_rw_bytes. We certainly want it cleared before reading the
+ * device items, so clear it here.
+ */
+ root->fs_info->fs_devices->total_rw_bytes = 0;
+
+ /*
* Read all device items, and then all the chunk items. All
* device items are found before any chunk item (their object id
* is smaller than the lowest possible object id for a chunk
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 7feac2d9da56..d24f3ceb0691 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -312,7 +312,6 @@ struct btrfs_bio {
u64 map_type; /* get from map_lookup->type */
bio_end_io_t *end_io;
struct bio *orig_bio;
- unsigned long flags;
void *private;
atomic_t error;
int max_errors;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index c05ab2ec0fef..9047f0e64bc0 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -64,9 +64,9 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
object = container_of(op->op.object, struct cachefiles_object, fscache);
spin_lock(&object->work_lock);
list_add_tail(&monitor->op_link, &op->to_do);
+ fscache_enqueue_retrieval(op);
spin_unlock(&object->work_lock);
- fscache_enqueue_retrieval(op);
fscache_put_retrieval(op);
return 0;
}
@@ -125,7 +125,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object,
_debug("reissue read");
ret = bmapping->a_ops->readpage(NULL, backpage);
if (ret < 0)
- goto unlock_discard;
+ goto discard;
}
/* but the page may have been read before the monitor was installed, so
@@ -142,6 +142,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object,
unlock_discard:
unlock_page(backpage);
+discard:
spin_lock_irq(&object->work_lock);
list_del(&monitor->op_link);
spin_unlock_irq(&object->work_lock);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 26607401edfe..bec37093a3de 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -72,10 +72,6 @@ static int ceph_set_page_dirty(struct page *page)
struct inode *inode;
struct ceph_inode_info *ci;
struct ceph_snap_context *snapc;
- int ret;
-
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);
if (PageDirty(page)) {
dout("%p set_page_dirty %p idx %lu -- already dirty\n",
@@ -121,11 +117,7 @@ static int ceph_set_page_dirty(struct page *page)
page->private = (unsigned long)snapc;
SetPagePrivate(page);
- ret = __set_page_dirty_nobuffers(page);
- WARN_ON(!PageLocked(page));
- WARN_ON(!page->mapping);
-
- return ret;
+ return __set_page_dirty_nobuffers(page);
}
/*
@@ -1237,7 +1229,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
struct page *pinned_page = NULL;
- loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
+ loff_t off = (loff_t)vmf->pgoff << PAGE_CACHE_SHIFT;
int want, got, ret;
dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index aa4df4a02252..154c47282a34 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -920,12 +920,19 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
- struct ceph_mds_client *mdsc =
- ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+ struct ceph_mds_client *mdsc;
int removed = 0;
+ /* 'ci' being NULL means the remove have already occurred */
+ if (!ci) {
+ dout("%s: cap inode is NULL\n", __func__);
+ return;
+ }
+
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
+ mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
+
/* remove from inode's cap rbtree, and clear auth cap */
rb_erase(&cap->ci_node, &ci->i_caps);
if (ci->i_auth_cap == cap)
@@ -1538,6 +1545,8 @@ static int __mark_caps_flushing(struct inode *inode,
* try to invalidate mapping pages without blocking.
*/
static int try_nonblocking_invalidate(struct inode *inode)
+ __releases(ci->i_ceph_lock)
+ __acquires(ci->i_ceph_lock)
{
struct ceph_inode_info *ci = ceph_inode(inode);
u32 invalidating_gen = ci->i_rdcache_gen;
@@ -1734,8 +1743,12 @@ retry_locked:
}
/* want more caps from mds? */
- if (want & ~(cap->mds_wanted | cap->issued))
- goto ack;
+ if (want & ~cap->mds_wanted) {
+ if (want & ~(cap->mds_wanted | cap->issued))
+ goto ack;
+ if (!__cap_is_valid(cap))
+ goto ack;
+ }
/* things we might delay */
if ((cap->issued & ~retain) == 0 &&
@@ -1773,12 +1786,24 @@ ack:
if (mutex_trylock(&session->s_mutex) == 0) {
dout("inverting session/ino locks on %p\n",
session);
+ session = ceph_get_mds_session(session);
spin_unlock(&ci->i_ceph_lock);
if (took_snap_rwsem) {
up_read(&mdsc->snap_rwsem);
took_snap_rwsem = 0;
}
- mutex_lock(&session->s_mutex);
+ if (session) {
+ mutex_lock(&session->s_mutex);
+ ceph_put_mds_session(session);
+ } else {
+ /*
+ * Because we take the reference while
+ * holding the i_ceph_lock, it should
+ * never be NULL. Throw a warning if it
+ * ever is.
+ */
+ WARN_ON_ONCE(true);
+ }
goto retry;
}
}
@@ -3289,6 +3314,7 @@ retry:
WARN_ON(1);
tsession = NULL;
target = -1;
+ mutex_lock(&session->s_mutex);
}
goto retry;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index fe02ae7f056a..ff9e60daf086 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -146,6 +146,11 @@ static struct dentry *__get_parent(struct super_block *sb,
}
req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, NULL, req);
+ if (err) {
+ ceph_mdsc_put_request(req);
+ return ERR_PTR(err);
+ }
+
inode = req->r_target_inode;
if (inode)
ihold(inode);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index c8222bfe1e56..3e6ebe40f06f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1433,6 +1433,7 @@ const struct file_operations ceph_file_fops = {
.mmap = ceph_mmap,
.fsync = ceph_fsync,
.lock = ceph_lock,
+ .setlease = simple_nosetlease,
.flock = ceph_flock,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a5de8e22629b..b7fd7d69be07 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3428,6 +3428,9 @@ static void delayed_work(struct work_struct *work)
dout("mdsc delayed_work\n");
ceph_check_delayed_caps(mdsc);
+ if (mdsc->stopping)
+ return;
+
mutex_lock(&mdsc->mutex);
renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
renew_caps = time_after_eq(jiffies, HZ*renew_interval +
@@ -3752,7 +3755,16 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
{
dout("stop\n");
- cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
+ /*
+ * Make sure the delayed work stopped before releasing
+ * the resources.
+ *
+ * Because the cancel_delayed_work_sync() will only
+ * guarantee that the work finishes executing. But the
+ * delayed work will re-arm itself again after that.
+ */
+ flush_delayed_work(&mdsc->delayed_work);
+
if (mdsc->mdsmap)
ceph_mdsmap_destroy(mdsc->mdsmap);
kfree(mdsc->sessions);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index d0655ca89481..9154a2d7b195 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -332,7 +332,7 @@ static struct kobject *cdev_get(struct cdev *p)
if (owner && !try_module_get(owner))
return NULL;
- kobj = kobject_get(&p->kobj);
+ kobj = kobject_get_unless_zero(&p->kobj);
if (!kobj)
module_put(owner);
return kobj;
@@ -472,6 +472,85 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
return 0;
}
+/**
+ * cdev_set_parent() - set the parent kobject for a char device
+ * @p: the cdev structure
+ * @kobj: the kobject to take a reference to
+ *
+ * cdev_set_parent() sets a parent kobject which will be referenced
+ * appropriately so the parent is not freed before the cdev. This
+ * should be called before cdev_add.
+ */
+void cdev_set_parent(struct cdev *p, struct kobject *kobj)
+{
+ WARN_ON(!kobj->state_initialized);
+ p->kobj.parent = kobj;
+}
+
+/**
+ * cdev_device_add() - add a char device and it's corresponding
+ * struct device, linkink
+ * @dev: the device structure
+ * @cdev: the cdev structure
+ *
+ * cdev_device_add() adds the char device represented by @cdev to the system,
+ * just as cdev_add does. It then adds @dev to the system using device_add
+ * The dev_t for the char device will be taken from the struct device which
+ * needs to be initialized first. This helper function correctly takes a
+ * reference to the parent device so the parent will not get released until
+ * all references to the cdev are released.
+ *
+ * This helper uses dev->devt for the device number. If it is not set
+ * it will not add the cdev and it will be equivalent to device_add.
+ *
+ * This function should be used whenever the struct cdev and the
+ * struct device are members of the same structure whose lifetime is
+ * managed by the struct device.
+ *
+ * NOTE: Callers must assume that userspace was able to open the cdev and
+ * can call cdev fops callbacks at any time, even if this function fails.
+ */
+int cdev_device_add(struct cdev *cdev, struct device *dev)
+{
+ int rc = 0;
+
+ if (dev->devt) {
+ cdev_set_parent(cdev, &dev->kobj);
+
+ rc = cdev_add(cdev, dev->devt, 1);
+ if (rc)
+ return rc;
+ }
+
+ rc = device_add(dev);
+ if (rc)
+ cdev_del(cdev);
+
+ return rc;
+}
+
+/**
+ * cdev_device_del() - inverse of cdev_device_add
+ * @dev: the device structure
+ * @cdev: the cdev structure
+ *
+ * cdev_device_del() is a helper function to call cdev_del and device_del.
+ * It should be used whenever cdev_device_add is used.
+ *
+ * If dev->devt is not set it will not remove the cdev and will be equivalent
+ * to device_del.
+ *
+ * NOTE: This guarantees that associated sysfs callbacks are not running
+ * or runnable, however any cdevs already open will remain and their fops
+ * will still be callable even after this function returns.
+ */
+void cdev_device_del(struct cdev *cdev, struct device *dev)
+{
+ device_del(dev);
+ if (dev->devt)
+ cdev_del(cdev);
+}
+
static void cdev_unmap(dev_t dev, unsigned count)
{
kobj_unmap(cdev_map, dev, count);
@@ -483,6 +562,10 @@ static void cdev_unmap(dev_t dev, unsigned count)
*
* cdev_del() removes @p from the system, possibly freeing the structure
* itself.
+ *
+ * NOTE: This guarantees that cdev device will no longer be able to be
+ * opened, however any cdevs already open will remain and their fops will
+ * still be callable even after cdev_del returns.
*/
void cdev_del(struct cdev *p)
{
@@ -571,5 +654,8 @@ EXPORT_SYMBOL(cdev_init);
EXPORT_SYMBOL(cdev_alloc);
EXPORT_SYMBOL(cdev_del);
EXPORT_SYMBOL(cdev_add);
+EXPORT_SYMBOL(cdev_set_parent);
+EXPORT_SYMBOL(cdev_device_add);
+EXPORT_SYMBOL(cdev_device_del);
EXPORT_SYMBOL(__register_chrdev);
EXPORT_SYMBOL(__unregister_chrdev);
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index a3b56544c21b..ae1f2817bd6a 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -541,8 +541,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
|| (tag != ASN1_EOC)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 0\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 0\n",
+ cls, con, tag, end);
return 0;
}
@@ -552,8 +552,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
|| (tag != ASN1_SEQ)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 1\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 1\n",
+ cls, con, tag, end);
return 0;
}
@@ -563,8 +563,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
|| (tag != ASN1_EOC)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 0\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 0\n",
+ cls, con, tag, end);
return 0;
}
@@ -575,8 +575,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
|| (tag != ASN1_SEQ)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 1\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d sequence_end = %p exit 1\n",
+ cls, con, tag, sequence_end);
return 0;
}
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 1ea643faf04b..d8afb402f966 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -241,7 +241,8 @@ compose_mount_options_err:
* @fullpath: full path in UNC format
* @ref: server's referral
*/
-static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
+static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt,
+ struct cifs_sb_info *cifs_sb,
const char *fullpath, const struct dfs_info3_param *ref)
{
struct vfsmount *mnt;
@@ -255,7 +256,7 @@ static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
if (IS_ERR(mountdata))
return (struct vfsmount *)mountdata;
- mnt = vfs_kern_mount(&cifs_fs_type, 0, devname, mountdata);
+ mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata);
kfree(mountdata);
kfree(devname);
return mnt;
@@ -330,7 +331,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
mnt = ERR_PTR(-EINVAL);
break;
}
- mnt = cifs_dfs_do_refmount(cifs_sb,
+ mnt = cifs_dfs_do_refmount(mntpt, cifs_sb,
full_path, referrals + i);
cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n",
__func__, referrals[i].node_name, mnt);
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 211ac472cb9d..e5e780145728 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -367,14 +367,9 @@ cifs_strndup_from_utf16(const char *src, const int maxlen,
if (!dst)
return NULL;
cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
- NO_MAP_UNI_RSVD);
+ NO_MAP_UNI_RSVD);
} else {
- len = strnlen(src, maxlen);
- len++;
- dst = kmalloc(len, GFP_KERNEL);
- if (!dst)
- return NULL;
- strlcpy(dst, src, len);
+ dst = kstrndup(src, maxlen, GFP_KERNEL);
}
return dst;
@@ -493,7 +488,13 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
else if (map_chars == SFM_MAP_UNI_RSVD) {
bool end_of_string;
- if (i == srclen - 1)
+ /**
+ * Remap spaces and periods found at the end of every
+ * component of the path. The special cases of '.' and
+ * '..' do not need to be dealt with explicitly because
+ * they are addressed in namei.c:link_path_walk().
+ **/
+ if ((i == srclen - 1) || (source[i+1] == '\\'))
end_of_string = true;
else
end_of_string = false;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 3f93125916bf..f5b87a8f75c4 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -480,7 +480,7 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS))
*pmode |= (S_IXUGO & (*pbits_to_set));
- cifs_dbg(NOISY, "access flags 0x%x mode now 0x%x\n", flags, *pmode);
+ cifs_dbg(NOISY, "access flags 0x%x mode now %04o\n", flags, *pmode);
return;
}
@@ -509,7 +509,7 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use,
if (mode & S_IXUGO)
*pace_flags |= SET_FILE_EXEC_RIGHTS;
- cifs_dbg(NOISY, "mode: 0x%x, access flags now 0x%x\n",
+ cifs_dbg(NOISY, "mode: %04o, access flags now 0x%x\n",
mode, *pace_flags);
return;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 4f4fc9ff3636..5666eb49bbbd 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -204,7 +204,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
rc = server->ops->queryfs(xid, tcon, buf);
free_xid(xid);
- return 0;
+ return rc;
}
static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len)
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b9b8f19dce0e..fa07f7cb85a5 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -184,6 +184,18 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
* reconnect the same SMB session
*/
mutex_lock(&ses->session_mutex);
+
+ /*
+ * Recheck after acquire mutex. If another thread is negotiating
+ * and the server never sends an answer the socket will be closed
+ * and tcpStatus set to reconnect.
+ */
+ if (server->tcpStatus == CifsNeedReconnect) {
+ rc = -EHOSTDOWN;
+ mutex_unlock(&ses->session_mutex);
+ goto out;
+ }
+
rc = cifs_negotiate_protocol(0, ses);
if (rc == 0 && ses->need_reconnect)
rc = cifs_setup_session(0, ses, nls_codepage);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 63108343124a..cda22b312a4c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -338,8 +338,10 @@ static int reconn_set_ipaddr(struct TCP_Server_Info *server)
return rc;
}
+ spin_lock(&cifs_tcp_ses_lock);
rc = cifs_convert_address((struct sockaddr *)&server->dstaddr, ipaddr,
strlen(ipaddr));
+ spin_unlock(&cifs_tcp_ses_lock);
kfree(ipaddr);
return !rc ? -1 : 0;
@@ -546,20 +548,21 @@ static bool
server_unresponsive(struct TCP_Server_Info *server)
{
/*
- * We need to wait 2 echo intervals to make sure we handle such
+ * We need to wait 3 echo intervals to make sure we handle such
* situations right:
* 1s client sends a normal SMB request
- * 2s client gets a response
+ * 3s client gets a response
* 30s echo workqueue job pops, and decides we got a response recently
* and don't need to send another
* ...
* 65s kernel_recvmsg times out, and we see that we haven't gotten
* a response in >60s.
*/
- if (server->tcpStatus == CifsGood &&
- time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) {
+ if ((server->tcpStatus == CifsGood ||
+ server->tcpStatus == CifsNeedNegotiate) &&
+ time_after(jiffies, server->lstrp + 3 * SMB_ECHO_INTERVAL)) {
cifs_dbg(VFS, "Server %s has not responded in %d seconds. Reconnecting...\n",
- server->hostname, (2 * SMB_ECHO_INTERVAL) / HZ);
+ server->hostname, (3 * SMB_ECHO_INTERVAL) / HZ);
cifs_reconnect(server);
wake_up(&server->response_q);
return true;
@@ -780,6 +783,8 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
list_del_init(&server->tcp_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
+ cancel_delayed_work_sync(&server->echo);
+
spin_lock(&GlobalMid_Lock);
server->tcpStatus = CifsExiting;
spin_unlock(&GlobalMid_Lock);
@@ -939,6 +944,7 @@ cifs_demultiplex_thread(void *p)
mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
set_freezable();
+ allow_kernel_signal(SIGKILL);
while (server->tcpStatus != CifsExiting) {
if (try_to_freeze())
continue;
@@ -2246,7 +2252,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
task = xchg(&server->tsk, NULL);
if (task)
- force_sig(SIGKILL, task);
+ send_sig(SIGKILL, task, 1);
}
static struct TCP_Server_Info *
@@ -2955,9 +2961,10 @@ cifs_match_super(struct super_block *sb, void *data)
spin_lock(&cifs_tcp_ses_lock);
cifs_sb = CIFS_SB(sb);
tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
- if (IS_ERR(tlink)) {
+ if (tlink == NULL) {
+ /* can not match superblock if tlink were ever null */
spin_unlock(&cifs_tcp_ses_lock);
- return rc;
+ return 0;
}
tcon = tlink_tcon(tlink);
ses = tcon->ses;
@@ -3401,7 +3408,7 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
cifs_sb->mnt_gid = pvolume_info->linux_gid;
cifs_sb->mnt_file_mode = pvolume_info->file_mode;
cifs_sb->mnt_dir_mode = pvolume_info->dir_mode;
- cifs_dbg(FYI, "file mode: 0x%hx dir mode: 0x%hx\n",
+ cifs_dbg(FYI, "file mode: %04ho dir mode: %04ho\n",
cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
cifs_sb->actimeo = pvolume_info->actimeo;
@@ -4199,9 +4206,12 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
vol_info->retry = master_tcon->retry;
vol_info->nocase = master_tcon->nocase;
vol_info->local_lease = master_tcon->local_lease;
+ vol_info->resilient = master_tcon->use_resilient;
+ vol_info->persistent = master_tcon->use_persistent;
vol_info->no_linux_ext = !master_tcon->unix_ext;
vol_info->sectype = master_tcon->ses->sectype;
vol_info->sign = master_tcon->ses->sign;
+ vol_info->seal = master_tcon->seal;
rc = cifs_set_vol_auth(vol_info, master_tcon->ses);
if (rc) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index be16da31cbcc..9f1641324a81 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -831,6 +831,7 @@ static int
cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
{
struct inode *inode;
+ int rc;
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -840,8 +841,25 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
if ((flags & LOOKUP_REVAL) && !CIFS_CACHE_READ(CIFS_I(inode)))
CIFS_I(inode)->time = 0; /* force reval */
- if (cifs_revalidate_dentry(direntry))
- return 0;
+ rc = cifs_revalidate_dentry(direntry);
+ if (rc) {
+ cifs_dbg(FYI, "cifs_revalidate_dentry failed with rc=%d", rc);
+ switch (rc) {
+ case -ENOENT:
+ case -ESTALE:
+ /*
+ * Those errors mean the dentry is invalid
+ * (file was deleted or recreated)
+ */
+ return 0;
+ default:
+ /*
+ * Otherwise some unexpected error happened
+ * report it as-is to VFS layer
+ */
+ return rc;
+ }
+ }
else {
/*
* If the inode wasn't known to be a dfs entry when
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5cad1109ed80..62cc0c22db63 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -163,6 +163,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
goto posix_open_ret;
}
} else {
+ cifs_revalidate_mapping(*pinode);
cifs_fattr_to_inode(*pinode, &fattr);
}
@@ -312,9 +313,6 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
INIT_LIST_HEAD(&fdlocks->locks);
fdlocks->cfile = cfile;
cfile->llist = fdlocks;
- cifs_down_write(&cinode->lock_sem);
- list_add(&fdlocks->llist, &cinode->llist);
- up_write(&cinode->lock_sem);
cfile->count = 1;
cfile->pid = current->tgid;
@@ -338,6 +336,10 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
oplock = 0;
}
+ cifs_down_write(&cinode->lock_sem);
+ list_add(&fdlocks->llist, &cinode->llist);
+ up_write(&cinode->lock_sem);
+
spin_lock(&tcon->open_file_lock);
if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
oplock = fid->pending_open->oplock;
@@ -702,6 +704,13 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
if (backup_cred(cifs_sb))
create_options |= CREATE_OPEN_BACKUP_INTENT;
+ /* O_SYNC also has bit for O_DSYNC so following check picks up either */
+ if (cfile->f_flags & O_SYNC)
+ create_options |= CREATE_WRITE_THROUGH;
+
+ if (cfile->f_flags & O_DIRECT)
+ create_options |= CREATE_NO_BUFFER;
+
if (server->ops->get_lease_key)
server->ops->get_lease_key(inode, &cfile->fid);
@@ -3222,7 +3231,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
* than it negotiated since it will refuse the read
* then.
*/
- if ((tcon->ses) && !(tcon->ses->capabilities &
+ if (!(tcon->ses->capabilities &
tcon->ses->server->vals->cap_large_files)) {
current_read_size = min_t(uint,
current_read_size, CIFSMaxBufSize);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 0a219545940d..c18c26a78453 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1540,7 +1540,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
struct TCP_Server_Info *server;
char *full_path;
- cifs_dbg(FYI, "In cifs_mkdir, mode = 0x%hx inode = 0x%p\n",
+ cifs_dbg(FYI, "In cifs_mkdir, mode = %04ho inode = 0x%p\n",
mode, inode);
cifs_sb = CIFS_SB(inode->i_sb);
@@ -1957,6 +1957,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry)
struct inode *inode = d_inode(dentry);
struct super_block *sb = dentry->d_sb;
char *full_path = NULL;
+ int count = 0;
if (inode == NULL)
return -ENOENT;
@@ -1978,15 +1979,18 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry)
full_path, inode, inode->i_count.counter,
dentry, dentry->d_time, jiffies);
+again:
if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid);
else
rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
xid, NULL);
-
+ if (rc == -EAGAIN && count++ < 10)
+ goto again;
out:
kfree(full_path);
free_xid(xid);
+
return rc;
}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 9bc7a29f88d6..2d3918cdcc28 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -602,7 +602,7 @@ sess_alloc_buffer(struct sess_data *sess_data, int wct)
return 0;
out_free_smb_buf:
- kfree(smb_buf);
+ cifs_small_buf_release(smb_buf);
sess_data->iov[0].iov_base = NULL;
sess_data->iov[0].iov_len = 0;
sess_data->buf0_type = CIFS_NO_BUFFER;
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 41f1a5dd33a5..4dcce3f034f4 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -69,7 +69,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
goto out;
- if (oparms->tcon->use_resilient) {
+ if (oparms->tcon->use_resilient) {
nr_ioctl_req.Timeout = 0; /* use server default (120 seconds) */
nr_ioctl_req.Reserved = 0;
rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid,
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 0e62bf1ebbd7..19baeb4ca511 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -582,10 +582,10 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
spin_lock(&cifs_tcp_ses_lock);
list_for_each(tmp, &server->smb_ses_list) {
ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+
list_for_each(tmp1, &ses->tcon_list) {
tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
- cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
spin_lock(&tcon->open_file_lock);
list_for_each(tmp2, &tcon->openFileList) {
cfile = list_entry(tmp2, struct cifsFileInfo,
@@ -597,6 +597,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
continue;
cifs_dbg(FYI, "file id match, oplock break\n");
+ cifs_stats_inc(
+ &tcon->stats.cifs_stats.num_oplock_brks);
cinode = CIFS_I(d_inode(cfile->dentry));
spin_lock(&cfile->file_info_lock);
if (!CIFS_CACHE_WRITE(cinode) &&
@@ -628,14 +630,11 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
return true;
}
spin_unlock(&tcon->open_file_lock);
- spin_unlock(&cifs_tcp_ses_lock);
- cifs_dbg(FYI, "No matching file for oplock break\n");
- return true;
}
}
spin_unlock(&cifs_tcp_ses_lock);
- cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n");
- return false;
+ cifs_dbg(FYI, "No file id matched, oplock break ignored\n");
+ return true;
}
void
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 0fcf42401a5d..c173d047b44b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -619,6 +619,8 @@ smb2_clone_range(const unsigned int xid,
cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
/* Request server copy to target from src identified by key */
+ kfree(retbuf);
+ retbuf = NULL;
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
true /* is_fsctl */, (char *)pcchunk,
@@ -1145,6 +1147,12 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
inode = d_inode(cfile->dentry);
cifsi = CIFS_I(inode);
+ /*
+ * We zero the range through ioctl, so we need remove the page caches
+ * first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
+
/* if file not oplocked can't be sure whether asking to extend size */
if (!CIFS_CACHE_READ(cifsi))
if (keep_size == false)
@@ -1201,6 +1209,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse))
return -EOPNOTSUPP;
+ /*
+ * We implement the punch hole through ioctl, so we need remove the page
+ * caches first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
+
cifs_dbg(FYI, "offset %lld len %lld", offset, len);
fsctl_buf.FileOffset = cpu_to_le64(offset);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 84e60b3a5c7c..4ffd5e177288 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -249,10 +249,27 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
* the same SMB session
*/
mutex_lock(&tcon->ses->session_mutex);
+
+ /*
+ * Recheck after acquire mutex. If another thread is negotiating
+ * and the server never sends an answer the socket will be closed
+ * and tcpStatus set to reconnect.
+ */
+ if (server->tcpStatus == CifsNeedReconnect) {
+ rc = -EHOSTDOWN;
+ mutex_unlock(&tcon->ses->session_mutex);
+ goto out;
+ }
+
rc = cifs_negotiate_protocol(0, tcon->ses);
- if (!rc && tcon->ses->need_reconnect)
+ if (!rc && tcon->ses->need_reconnect) {
rc = cifs_setup_session(0, tcon->ses, nls_codepage);
-
+ if ((rc == -EACCES) && !tcon->retry) {
+ rc = -EHOSTDOWN;
+ mutex_unlock(&tcon->ses->session_mutex);
+ goto failed;
+ }
+ }
if (rc || !tcon->need_reconnect) {
mutex_unlock(&tcon->ses->session_mutex);
goto out;
@@ -286,6 +303,7 @@ out:
case SMB2_SET_INFO:
rc = -EAGAIN;
}
+failed:
unload_nls(nls_codepage);
return rc;
}
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 3530e1c3ff56..3590c5c5eb6a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -186,9 +186,9 @@ static const struct super_operations debugfs_super_operations = {
static struct vfsmount *debugfs_automount(struct path *path)
{
- struct vfsmount *(*f)(void *);
- f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata;
- return f(d_inode(path->dentry)->i_private);
+ debugfs_automount_t f;
+ f = (debugfs_automount_t)path->dentry->d_fsdata;
+ return f(path->dentry, d_inode(path->dentry)->i_private);
}
static const struct dentry_operations debugfs_dops = {
@@ -449,7 +449,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir);
*/
struct dentry *debugfs_create_automount(const char *name,
struct dentry *parent,
- struct vfsmount *(*f)(void *),
+ debugfs_automount_t f,
void *data)
{
struct dentry *dentry = start_creating(name, parent);
@@ -699,7 +699,7 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
take_dentry_name_snapshot(&old_name, old_dentry);
error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir),
- dentry);
+ dentry, 0);
if (error) {
release_dentry_name_snapshot(&old_name);
goto exit;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index da574a74a467..5afb6e260c84 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -794,6 +794,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
struct buffer_head *map_bh)
{
int ret = 0;
+ int boundary = sdio->boundary; /* dio_send_cur_page may clear it */
if (dio->rw & WRITE) {
/*
@@ -832,10 +833,10 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
sdio->cur_page_fs_offset = sdio->block_in_file << sdio->blkbits;
out:
/*
- * If sdio->boundary then we want to schedule the IO now to
+ * If boundary then we want to schedule the IO now to
* avoid metadata seeks.
*/
- if (sdio->boundary) {
+ if (boundary) {
ret = dio_send_cur_page(dio, sdio, map_bh);
if (sdio->bio)
dio_bio_submit(dio, sdio);
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index eea64912c9c0..3b79c0284a30 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -545,6 +545,7 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos)
if (bucket >= ls->ls_rsbtbl_size) {
kfree(ri);
+ ++*pos;
return NULL;
}
tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 5eff6ea3e27f..63e856d90ed0 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -92,7 +92,6 @@ do { \
__LINE__, __FILE__, #x, jiffies); \
{do} \
printk("\n"); \
- BUG(); \
panic("DLM: Record message above and reboot.\n"); \
} \
}
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 3a7f401e943c..ffab7dc88157 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -3975,6 +3975,14 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
int from = ms->m_header.h_nodeid;
int error = 0;
+ /* currently mixing of user/kernel locks are not supported */
+ if (ms->m_flags & DLM_IFL_USER && ~lkb->lkb_flags & DLM_IFL_USER) {
+ log_error(lkb->lkb_resource->res_ls,
+ "got user dlm message for a kernel lock");
+ error = -EINVAL;
+ goto out;
+ }
+
switch (ms->m_type) {
case DLM_MSG_CONVERT:
case DLM_MSG_UNLOCK:
@@ -4003,6 +4011,7 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
error = -EINVAL;
}
+out:
if (error)
log_error(lkb->lkb_resource->res_ls,
"ignore invalid message %d from %d %x %x %x %d",
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 30e4e01db35a..499f54f99891 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -626,6 +626,9 @@ static int new_lockspace(const char *name, const char *cluster,
wait_event(ls->ls_recover_lock_wait,
test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
+ /* let kobject handle freeing of ls if there's an error */
+ do_unreg = 1;
+
ls->ls_kobj.kset = dlm_kset;
error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
"%s", ls->ls_name);
@@ -633,9 +636,6 @@ static int new_lockspace(const char *name, const char *cluster,
goto out_recoverd;
kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
- /* let kobject handle freeing of ls if there's an error */
- do_unreg = 1;
-
/* This uevent triggers dlm_controld in userspace to add us to the
group of nodes that are members of this lockspace (managed by the
cluster infrastructure.) Once it's done that, it tells us who the
@@ -800,6 +800,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
dlm_delete_debug_file(ls);
+ idr_destroy(&ls->ls_recover_idr);
kfree(ls->ls_recover_buf);
/*
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 9d7a4a714907..99f4cd91910f 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -554,7 +554,7 @@ static void close_connection(struct connection *con, bool and_other,
}
if (con->othercon && and_other) {
/* Will only re-enter once. */
- close_connection(con->othercon, false, true, true);
+ close_connection(con->othercon, false, tx, rx);
}
if (con->rx_page) {
__free_page(con->rx_page);
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 7cd24bccd4fe..37be29f21d04 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -38,10 +38,8 @@ int __init dlm_memory_init(void)
void dlm_memory_exit(void)
{
- if (lkb_cache)
- kmem_cache_destroy(lkb_cache);
- if (rsb_cache)
- kmem_cache_destroy(rsb_cache);
+ kmem_cache_destroy(lkb_cache);
+ kmem_cache_destroy(rsb_cache);
}
char *dlm_allocate_lvb(struct dlm_ls *ls)
@@ -86,8 +84,7 @@ void dlm_free_lkb(struct dlm_lkb *lkb)
struct dlm_user_args *ua;
ua = lkb->lkb_ua;
if (ua) {
- if (ua->lksb.sb_lvbptr)
- kfree(ua->lksb.sb_lvbptr);
+ kfree(ua->lksb.sb_lvbptr);
kfree(ua);
}
}
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index dd2b7416e40a..761d74a84f92 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -25,6 +25,7 @@
#include "lvb_table.h"
#include "user.h"
#include "ast.h"
+#include "config.h"
static const char name_prefix[] = "dlm";
static const struct file_operations device_fops;
@@ -402,7 +403,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- error = dlm_new_lockspace(params->name, NULL, params->flags,
+ error = dlm_new_lockspace(params->name, dlm_config.ci_cluster_name, params->flags,
DLM_USER_LVB_LEN, NULL, NULL, NULL,
&lockspace);
if (error)
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index ea3d99ebb6ee..ac9b48aee73f 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1346,7 +1346,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
printk(KERN_WARNING "Tag 1 packet contains key larger "
"than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES");
rc = -EINVAL;
- goto out;
+ goto out_free;
}
memcpy((*new_auth_tok)->session_key.encrypted_key,
&data[(*packet_size)], (body_size - (ECRYPTFS_SIG_SIZE + 2)));
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 85f7a289bdac..7d63228b9657 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -547,6 +547,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
goto out;
}
+ if (!dev_name) {
+ rc = -EINVAL;
+ err = "Device name cannot be null";
+ goto out;
+ }
+
rc = ecryptfs_parse_options(sbi, raw_data, &check_ruid);
if (rc) {
err = "Error parsing options";
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 4f457d5c4933..26464f9d9b76 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -397,6 +397,7 @@ int __init ecryptfs_init_messaging(void)
* ecryptfs_message_buf_len),
GFP_KERNEL);
if (!ecryptfs_msg_ctx_arr) {
+ kfree(ecryptfs_daemon_hash);
rc = -ENOMEM;
printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
goto out;
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index e2ab6d0497f2..151884b95ee2 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -10,6 +10,7 @@
#include <linux/efi.h>
#include <linux/fs.h>
#include <linux/ctype.h>
+#include <linux/kmemleak.h>
#include <linux/slab.h>
#include "internal.h"
@@ -138,6 +139,7 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
var->var.VariableName[i] = '\0';
inode->i_private = var;
+ kmemleak_ignore(var);
efivar_entry_add(var, &efivarfs_list);
d_instantiate(dentry, inode);
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index abb244b06024..fca235020312 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -147,6 +147,9 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
name[len + EFI_VARIABLE_GUID_LEN+1] = '\0';
+ /* replace invalid slashes like kobject_set_name_vargs does for /sys/firmware/efi/vars. */
+ strreplace(name, '/', '!');
+
inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0,
is_removable);
if (!inode)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ac21caad6729..b484d4500687 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -218,8 +218,7 @@ struct eventpoll {
struct file *file;
/* used to optimize loop detection check */
- int visited;
- struct list_head visited_list_link;
+ u64 gen;
};
/* Wait structure used by the poll hooks */
@@ -263,6 +262,8 @@ static long max_user_watches __read_mostly;
*/
static DEFINE_MUTEX(epmutex);
+static u64 loop_check_gen = 0;
+
/* Used to check for epoll file descriptor inclusion loops */
static struct nested_calls poll_loop_ncalls;
@@ -278,9 +279,6 @@ static struct kmem_cache *epi_cache __read_mostly;
/* Slab cache used to allocate "struct eppoll_entry" */
static struct kmem_cache *pwq_cache __read_mostly;
-/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
-static LIST_HEAD(visited_list);
-
/*
* List of files with newly added links, where we may need to limit the number
* of emanating paths. Protected by the epmutex.
@@ -1235,17 +1233,26 @@ static int reverse_path_check(void)
static int ep_create_wakeup_source(struct epitem *epi)
{
- const char *name;
+ struct name_snapshot n;
struct wakeup_source *ws;
+ char task_comm_buf[TASK_COMM_LEN];
+ char buf[64];
+
+ get_task_comm(task_comm_buf, current);
if (!epi->ep->ws) {
- epi->ep->ws = wakeup_source_register("eventpoll");
+ snprintf(buf, sizeof(buf), "epoll_%.*s_epollfd",
+ (int)sizeof(task_comm_buf), task_comm_buf);
+ epi->ep->ws = wakeup_source_register(buf);
if (!epi->ep->ws)
return -ENOMEM;
}
- name = epi->ffd.file->f_path.dentry->d_name.name;
- ws = wakeup_source_register(name);
+ take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry);
+ snprintf(buf, sizeof(buf), "epoll_%.*s_file:%s",
+ (int)sizeof(task_comm_buf), task_comm_buf, n.name);
+ ws = wakeup_source_register(n.name);
+ release_dentry_name_snapshot(&n);
if (!ws)
return -ENOMEM;
@@ -1305,6 +1312,22 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
RCU_INIT_POINTER(epi->ws, NULL);
}
+ /* Add the current item to the list of active epoll hook for this file */
+ spin_lock(&tfile->f_lock);
+ list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
+ spin_unlock(&tfile->f_lock);
+
+ /*
+ * Add the current item to the RB tree. All RB tree operations are
+ * protected by "mtx", and ep_insert() is called with "mtx" held.
+ */
+ ep_rbtree_insert(ep, epi);
+
+ /* now check if we've created too many backpaths */
+ error = -EINVAL;
+ if (full_check && reverse_path_check())
+ goto error_remove_epi;
+
/* Initialize the poll table using the queue callback */
epq.epi = epi;
init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
@@ -1327,22 +1350,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
if (epi->nwait < 0)
goto error_unregister;
- /* Add the current item to the list of active epoll hook for this file */
- spin_lock(&tfile->f_lock);
- list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
- spin_unlock(&tfile->f_lock);
-
- /*
- * Add the current item to the RB tree. All RB tree operations are
- * protected by "mtx", and ep_insert() is called with "mtx" held.
- */
- ep_rbtree_insert(ep, epi);
-
- /* now check if we've created too many backpaths */
- error = -EINVAL;
- if (full_check && reverse_path_check())
- goto error_remove_epi;
-
/* We have to drop the new item inside our item list to keep track of it */
spin_lock_irqsave(&ep->lock, flags);
@@ -1368,6 +1375,8 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
return 0;
+error_unregister:
+ ep_unregister_pollwait(ep, epi);
error_remove_epi:
spin_lock(&tfile->f_lock);
list_del_rcu(&epi->fllink);
@@ -1375,9 +1384,6 @@ error_remove_epi:
rb_erase(&epi->rbn, &ep->rbr);
-error_unregister:
- ep_unregister_pollwait(ep, epi);
-
/*
* We need to do this because an event could have been arrived on some
* allocated wait queue. Note that we don't care about the ep->ovflist
@@ -1699,13 +1705,12 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
struct epitem *epi;
mutex_lock_nested(&ep->mtx, call_nests + 1);
- ep->visited = 1;
- list_add(&ep->visited_list_link, &visited_list);
+ ep->gen = loop_check_gen;
for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) {
ep_tovisit = epi->ffd.file->private_data;
- if (ep_tovisit->visited)
+ if (ep_tovisit->gen == loop_check_gen)
continue;
error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
ep_loop_check_proc, epi->ffd.file,
@@ -1721,9 +1726,11 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
* not already there, and calling reverse_path_check()
* during ep_insert().
*/
- if (list_empty(&epi->ffd.file->f_tfile_llink))
- list_add(&epi->ffd.file->f_tfile_llink,
- &tfile_check_list);
+ if (list_empty(&epi->ffd.file->f_tfile_llink)) {
+ if (get_file_rcu(epi->ffd.file))
+ list_add(&epi->ffd.file->f_tfile_llink,
+ &tfile_check_list);
+ }
}
}
mutex_unlock(&ep->mtx);
@@ -1744,18 +1751,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
*/
static int ep_loop_check(struct eventpoll *ep, struct file *file)
{
- int ret;
- struct eventpoll *ep_cur, *ep_next;
-
- ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
ep_loop_check_proc, file, ep, current);
- /* clear visited list */
- list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
- visited_list_link) {
- ep_cur->visited = 0;
- list_del(&ep_cur->visited_list_link);
- }
- return ret;
}
static void clear_tfile_check_list(void)
@@ -1767,6 +1764,7 @@ static void clear_tfile_check_list(void)
file = list_first_entry(&tfile_check_list, struct file,
f_tfile_llink);
list_del_init(&file->f_tfile_llink);
+ fput(file);
}
INIT_LIST_HEAD(&tfile_check_list);
}
@@ -1898,19 +1896,20 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
mutex_lock_nested(&ep->mtx, 0);
if (op == EPOLL_CTL_ADD) {
if (!list_empty(&f.file->f_ep_links) ||
+ ep->gen == loop_check_gen ||
is_file_epoll(tf.file)) {
full_check = 1;
mutex_unlock(&ep->mtx);
mutex_lock(&epmutex);
if (is_file_epoll(tf.file)) {
error = -ELOOP;
- if (ep_loop_check(ep, tf.file) != 0) {
- clear_tfile_check_list();
+ if (ep_loop_check(ep, tf.file) != 0)
goto error_tgt_fput;
- }
- } else
+ } else {
+ get_file(tf.file);
list_add(&tf.file->f_tfile_llink,
&tfile_check_list);
+ }
mutex_lock_nested(&ep->mtx, 0);
if (is_file_epoll(tf.file)) {
tep = tf.file->private_data;
@@ -1934,8 +1933,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
error = ep_insert(ep, &epds, tf.file, fd, full_check);
} else
error = -EEXIST;
- if (full_check)
- clear_tfile_check_list();
break;
case EPOLL_CTL_DEL:
if (epi)
@@ -1956,8 +1953,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
mutex_unlock(&ep->mtx);
error_tgt_fput:
- if (full_check)
+ if (full_check) {
+ clear_tfile_check_list();
+ loop_check_gen++;
mutex_unlock(&epmutex);
+ }
fdput(tf);
error_fput:
diff --git a/fs/exec.c b/fs/exec.c
index eba40240c0c5..341b872d758f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -875,7 +875,7 @@ static int exec_mmap(struct mm_struct *mm)
/* Notify parent that we're no longer interested in the old VM */
tsk = current;
old_mm = current->mm;
- mm_release(tsk, old_mm);
+ exec_mm_release(tsk, old_mm);
if (old_mm) {
sync_mm_rss(old_mm);
@@ -1124,6 +1124,8 @@ int flush_old_exec(struct linux_binprm * bprm)
*/
set_mm_exe_file(bprm->mm, bprm->file);
+ would_dump(bprm, bprm->file);
+
/*
* Release all of the old mmap stuff
*/
@@ -1209,7 +1211,7 @@ void setup_new_exec(struct linux_binprm * bprm)
/* An exec changes our domain. We are no longer part of the thread
group */
- current->self_exec_id++;
+ WRITE_ONCE(current->self_exec_id, current->self_exec_id + 1);
flush_signal_handlers(current, 0);
}
EXPORT_SYMBOL(setup_new_exec);
@@ -1634,8 +1636,6 @@ static int do_execveat_common(int fd, struct filename *filename,
if (retval < 0)
goto out;
- would_dump(bprm, bprm->file);
-
retval = exec_binprm(bprm);
if (retval < 0)
goto out;
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 60f03b78914e..9323b9a8bc72 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -377,9 +377,8 @@ err:
* and will start a new collection. Eventually caller must submit the last
* segment if present.
*/
-static int readpage_strip(void *data, struct page *page)
+static int __readpage_strip(struct page_collect *pcol, struct page *page)
{
- struct page_collect *pcol = data;
struct inode *inode = pcol->inode;
struct exofs_i_info *oi = exofs_i(inode);
loff_t i_size = i_size_read(inode);
@@ -470,6 +469,13 @@ fail:
return ret;
}
+static int readpage_strip(struct file *data, struct page *page)
+{
+ struct page_collect *pcol = (struct page_collect *)data;
+
+ return __readpage_strip(pcol, page);
+}
+
static int exofs_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
@@ -499,7 +505,7 @@ static int _readpage(struct page *page, bool read_4_write)
_pcol_init(&pcol, 1, page->mapping->host);
pcol.read_4_write = read_4_write;
- ret = readpage_strip(&pcol, page);
+ ret = __readpage_strip(&pcol, page);
if (ret) {
EXOFS_ERR("_readpage => %d\n", ret);
return ret;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 6599c6124552..01cbdd0987c0 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -148,6 +148,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
mutex_unlock(&parent->d_inode->i_mutex);
if (IS_ERR(tmp)) {
dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
+ err = PTR_ERR(tmp);
goto out_err;
}
if (tmp != dentry) {
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 9f9992b37924..2e4747e0aaf0 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -46,10 +46,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
struct ext2_sb_info *sbi = EXT2_SB(sb);
if (block_group >= sbi->s_groups_count) {
- ext2_error (sb, "ext2_get_group_desc",
- "block_group >= groups_count - "
- "block_group = %d, groups_count = %lu",
- block_group, sbi->s_groups_count);
+ WARN(1, "block_group >= groups_count - "
+ "block_group = %d, groups_count = %lu",
+ block_group, sbi->s_groups_count);
return NULL;
}
@@ -57,10 +56,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
if (!sbi->s_group_desc[group_desc]) {
- ext2_error (sb, "ext2_get_group_desc",
- "Group descriptor not loaded - "
- "block_group = %d, group_desc = %lu, desc = %lu",
- block_group, group_desc, offset);
+ WARN(1, "Group descriptor not loaded - "
+ "block_group = %d, group_desc = %lu, desc = %lu",
+ block_group, group_desc, offset);
return NULL;
}
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index efe5fb21c533..d9ef354b821a 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -79,6 +79,7 @@ static void ext2_release_inode(struct super_block *sb, int group, int dir)
if (dir)
le16_add_cpu(&desc->bg_used_dirs_count, -1);
spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
+ percpu_counter_inc(&EXT2_SB(sb)->s_freeinodes_counter);
if (dir)
percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter);
mark_buffer_dirty(bh);
@@ -525,7 +526,7 @@ got:
goto fail;
}
- percpu_counter_add(&sbi->s_freeinodes_counter, -1);
+ percpu_counter_dec(&sbi->s_freeinodes_counter);
if (S_ISDIR(mode))
percpu_counter_inc(&sbi->s_dirs_counter);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7600c98a8f86..f5cf7faf43c1 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1054,9 +1054,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
if (EXT2_BLOCKS_PER_GROUP(sb) == 0)
goto cantfind_ext2;
- sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
- le32_to_cpu(es->s_first_data_block) - 1)
- / EXT2_BLOCKS_PER_GROUP(sb)) + 1;
+ sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
+ le32_to_cpu(es->s_first_data_block) - 1)
+ / EXT2_BLOCKS_PER_GROUP(sb)) + 1;
db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
EXT2_DESC_PER_BLOCK(sb);
sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL);
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index e66047d6943f..1435b0749f58 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -823,8 +823,7 @@ ext2_xattr_cache_insert(struct mb2_cache *cache, struct buffer_head *bh)
error = mb2_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr);
if (error) {
if (error == -EBUSY) {
- ea_bdebug(bh, "already in cache (%d cache entries)",
- atomic_read(&ext2_xattr_cache->c_entry_count));
+ ea_bdebug(bh, "already in cache");
error = 0;
}
} else
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e0fb7cdcee89..b041a215cd73 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -279,6 +279,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
ext4_group_t ngroups = ext4_get_groups_count(sb);
struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct buffer_head *bh_p;
if (block_group >= ngroups) {
ext4_error(sb, "block_group >= groups_count - block_group = %u,"
@@ -289,7 +290,14 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
- if (!sbi->s_group_desc[group_desc]) {
+ bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc);
+ /*
+ * sbi_array_rcu_deref returns with rcu unlocked, this is ok since
+ * the pointer being dereferenced won't be dereferenced again. By
+ * looking at the usage in add_new_gdb() the value isn't modified,
+ * just the pointer, and so it remains valid.
+ */
+ if (!bh_p) {
ext4_error(sb, "Group descriptor not loaded - "
"block_group = %u, group_desc = %u, desc = %u",
block_group, group_desc, offset);
@@ -297,10 +305,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
}
desc = (struct ext4_group_desc *)(
- (__u8 *)sbi->s_group_desc[group_desc]->b_data +
+ (__u8 *)bh_p->b_data +
offset * EXT4_DESC_SIZE(sb));
if (bh)
- *bh = sbi->s_group_desc[group_desc];
+ *bh = bh_p;
return desc;
}
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 02ddec6d8a7d..176a8382e372 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -23,6 +23,7 @@ struct ext4_system_zone {
struct rb_node node;
ext4_fsblk_t start_blk;
unsigned int count;
+ u32 ino;
};
static struct kmem_cache *ext4_system_zone_cachep;
@@ -43,7 +44,8 @@ void ext4_exit_system_zone(void)
static inline int can_merge(struct ext4_system_zone *entry1,
struct ext4_system_zone *entry2)
{
- if ((entry1->start_blk + entry1->count) == entry2->start_blk)
+ if ((entry1->start_blk + entry1->count) == entry2->start_blk &&
+ entry1->ino == entry2->ino)
return 1;
return 0;
}
@@ -55,9 +57,9 @@ static inline int can_merge(struct ext4_system_zone *entry1,
*/
static int add_system_zone(struct ext4_sb_info *sbi,
ext4_fsblk_t start_blk,
- unsigned int count)
+ unsigned int count, u32 ino)
{
- struct ext4_system_zone *new_entry = NULL, *entry;
+ struct ext4_system_zone *new_entry, *entry;
struct rb_node **n = &sbi->system_blks.rb_node, *node;
struct rb_node *parent = NULL, *new_node = NULL;
@@ -68,30 +70,21 @@ static int add_system_zone(struct ext4_sb_info *sbi,
n = &(*n)->rb_left;
else if (start_blk >= (entry->start_blk + entry->count))
n = &(*n)->rb_right;
- else {
- if (start_blk + count > (entry->start_blk +
- entry->count))
- entry->count = (start_blk + count -
- entry->start_blk);
- new_node = *n;
- new_entry = rb_entry(new_node, struct ext4_system_zone,
- node);
- break;
- }
+ else /* Unexpected overlap of system zones. */
+ return -EFSCORRUPTED;
}
- if (!new_entry) {
- new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
- GFP_KERNEL);
- if (!new_entry)
- return -ENOMEM;
- new_entry->start_blk = start_blk;
- new_entry->count = count;
- new_node = &new_entry->node;
-
- rb_link_node(new_node, parent, n);
- rb_insert_color(new_node, &sbi->system_blks);
- }
+ new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
+ GFP_KERNEL);
+ if (!new_entry)
+ return -ENOMEM;
+ new_entry->start_blk = start_blk;
+ new_entry->count = count;
+ new_entry->ino = ino;
+ new_node = &new_entry->node;
+
+ rb_link_node(new_node, parent, n);
+ rb_insert_color(new_node, &sbi->system_blks);
/* Can we merge to the left? */
node = rb_prev(new_node);
@@ -136,6 +129,50 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
printk("\n");
}
+static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino)
+{
+ struct inode *inode;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_map_blocks map;
+ u32 i = 0, num;
+ int err = 0, n;
+
+ if ((ino < EXT4_ROOT_INO) ||
+ (ino > le32_to_cpu(sbi->s_es->s_inodes_count)))
+ return -EINVAL;
+ inode = ext4_iget(sb, ino, EXT4_IGET_SPECIAL);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+ num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+ while (i < num) {
+ cond_resched();
+ map.m_lblk = i;
+ map.m_len = num - i;
+ n = ext4_map_blocks(NULL, inode, &map, 0);
+ if (n < 0) {
+ err = n;
+ break;
+ }
+ if (n == 0) {
+ i++;
+ } else {
+ err = add_system_zone(sbi, map.m_pblk, n, ino);
+ if (err < 0) {
+ if (err == -EFSCORRUPTED) {
+ ext4_error(sb,
+ "blocks %llu-%llu from inode %u "
+ "overlap system zone", map.m_pblk,
+ map.m_pblk + map.m_len - 1, ino);
+ }
+ break;
+ }
+ i += n;
+ }
+ }
+ iput(inode);
+ return err;
+}
+
int ext4_setup_system_zone(struct super_block *sb)
{
ext4_group_t ngroups = ext4_get_groups_count(sb);
@@ -157,16 +194,22 @@ int ext4_setup_system_zone(struct super_block *sb)
if (ext4_bg_has_super(sb, i) &&
((i < 5) || ((i % flex_size) == 0)))
add_system_zone(sbi, ext4_group_first_block_no(sb, i),
- ext4_bg_num_gdb(sb, i) + 1);
+ ext4_bg_num_gdb(sb, i) + 1, 0);
gdp = ext4_get_group_desc(sb, i, NULL);
- ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
+ ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1, 0);
if (ret)
return ret;
- ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
+ ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1, 0);
if (ret)
return ret;
ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
- sbi->s_itb_per_group);
+ sbi->s_itb_per_group, 0);
+ if (ret)
+ return ret;
+ }
+ if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) {
+ ret = ext4_protect_reserved_inode(sb,
+ le32_to_cpu(sbi->s_es->s_journal_inum));
if (ret)
return ret;
}
@@ -193,10 +236,11 @@ void ext4_release_system_zone(struct super_block *sb)
* start_blk+count) is valid; 0 if some part of the block region
* overlaps with filesystem metadata blocks.
*/
-int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
- unsigned int count)
+int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
+ unsigned int count)
{
struct ext4_system_zone *entry;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct rb_node *n = sbi->system_blks.rb_node;
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
@@ -212,6 +256,8 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
else if (start_blk >= (entry->start_blk + entry->count))
n = n->rb_right;
else {
+ if (entry->ino == inode->i_ino)
+ return 1;
sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
return 0;
}
@@ -226,11 +272,15 @@ int ext4_check_blockref(const char *function, unsigned int line,
__le32 *bref = p;
unsigned int blk;
+ if (ext4_has_feature_journal(inode->i_sb) &&
+ (inode->i_ino ==
+ le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
+ return 0;
+
while (bref < p+max) {
blk = le32_to_cpu(*bref++);
if (blk &&
- unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
- blk, 1))) {
+ unlikely(!ext4_inode_block_valid(inode, blk, 1))) {
es->s_last_error_block = cpu_to_le64(blk);
ext4_error_inode(inode, function, line, blk,
"invalid block");
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index e452f9a9f174..df7014749be2 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -75,6 +75,11 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
error_msg = "rec_len is too small for name_len";
else if (unlikely(((char *) de - buf) + rlen > size))
error_msg = "directory entry overrun";
+ else if (unlikely(((char *) de - buf) + rlen >
+ size - EXT4_DIR_REC_LEN(1) &&
+ ((char *) de - buf) + rlen != size)) {
+ error_msg = "directory entry too close to block end";
+ }
else if (unlikely(le32_to_cpu(de->inode) >
le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
error_msg = "inode out of bounds";
@@ -120,12 +125,14 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
if (err != ERR_BAD_DX_DIR) {
return err;
}
- /*
- * We don't set the inode dirty flag since it's not
- * critical that it get flushed back to the disk.
- */
- ext4_clear_inode_flag(file_inode(file),
- EXT4_INODE_INDEX);
+ /* Can we just clear INDEX flag to ignore htree information? */
+ if (!ext4_has_metadata_csum(sb)) {
+ /*
+ * We don't set the inode dirty flag since it's not
+ * critical that it gets flushed back to the disk.
+ */
+ ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
+ }
}
if (ext4_has_inline_data(inode)) {
@@ -517,7 +524,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
struct dir_private_info *info = file->private_data;
struct inode *inode = file_inode(file);
struct fname *fname;
- int ret;
+ int ret = 0;
if (!info) {
info = ext4_htree_create_dir_info(file, ctx->pos);
@@ -565,7 +572,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
info->curr_minor_hash,
&info->next_hash);
if (ret < 0)
- return ret;
+ goto finished;
if (ret == 0) {
ctx->pos = ext4_get_htree_eof(file);
break;
@@ -596,7 +603,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
}
finished:
info->last_pos = ctx->pos;
- return 0;
+ return ret < 0 ? ret : 0;
}
static int ext4_dir_open(struct inode * inode, struct file * filp)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index c161f9a9e7e1..87ab6150343b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -379,14 +379,15 @@ struct flex_groups {
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE 0x004380FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE 0x204380FF /* User modifiable flags */
/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
- EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
+ EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
+ EXT4_PROJINHERIT_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
@@ -1028,6 +1029,7 @@ struct ext4_inode_info {
/* Encryption params */
struct ext4_crypt_info *i_crypt_info;
#endif
+ kprojid_t i_projid;
};
/*
@@ -1070,9 +1072,15 @@ struct ext4_inode_info {
#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
-#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
-#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
-#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
+#define EXT4_MOUNT_QUOTA 0x40000 /* Some quota option set */
+#define EXT4_MOUNT_USRQUOTA 0x80000 /* "old" user quota,
+ * enable enforcement for hidden
+ * quota files */
+#define EXT4_MOUNT_GRPQUOTA 0x100000 /* "old" group quota, enable
+ * enforcement for hidden quota
+ * files */
+#define EXT4_MOUNT_PRJQUOTA 0x200000 /* Enable project quota
+ * enforcement */
#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
@@ -1283,7 +1291,7 @@ struct ext4_super_block {
#endif
/* Number of quota types we support */
-#define EXT4_MAXQUOTAS 2
+#define EXT4_MAXQUOTAS 3
/*
* fourth extended-fs super-block data in memory
@@ -1305,7 +1313,7 @@ struct ext4_sb_info {
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
- struct buffer_head **s_group_desc;
+ struct buffer_head * __rcu *s_group_desc;
unsigned int s_mount_opt;
unsigned int s_mount_opt2;
unsigned int s_mount_flags;
@@ -1365,7 +1373,7 @@ struct ext4_sb_info {
#endif
/* for buddy allocator */
- struct ext4_group_info ***s_group_info;
+ struct ext4_group_info ** __rcu *s_group_info;
struct inode *s_buddy_cache;
spinlock_t s_md_lock;
unsigned short *s_mb_offsets;
@@ -1412,7 +1420,7 @@ struct ext4_sb_info {
unsigned int s_extent_max_zeroout_kb;
unsigned int s_log_groups_per_flex;
- struct flex_groups *s_flex_groups;
+ struct flex_groups * __rcu *s_flex_groups;
ext4_group_t s_flex_groups_allocated;
/* workqueue for reserved extent conversions (buffered io) */
@@ -1494,6 +1502,23 @@ static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io)
}
/*
+ * Returns: sbi->field[index]
+ * Used to access an array element from the following sbi fields which require
+ * rcu protection to avoid dereferencing an invalid pointer due to reassignment
+ * - s_group_desc
+ * - s_group_info
+ * - s_flex_group
+ */
+#define sbi_array_rcu_deref(sbi, field, index) \
+({ \
+ typeof(*((sbi)->field)) _v; \
+ rcu_read_lock(); \
+ _v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \
+ rcu_read_unlock(); \
+ _v; \
+})
+
+/*
* Inode dynamic state flags
*/
enum {
@@ -1784,7 +1809,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
- EXT4_FEATURE_RO_COMPAT_QUOTA)
+ EXT4_FEATURE_RO_COMPAT_QUOTA |\
+ EXT4_FEATURE_RO_COMPAT_PROJECT)
#define EXTN_FEATURE_FUNCS(ver) \
static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -1826,6 +1852,11 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
#define EXT4_DEF_RESUID 0
#define EXT4_DEF_RESGID 0
+/*
+ * Default project ID
+ */
+#define EXT4_DEF_PROJID 0
+
#define EXT4_DEF_INODE_READAHEAD_BLKS 32
/*
@@ -2408,8 +2439,12 @@ int ext4_insert_dentry(struct inode *dir,
struct ext4_filename *fname);
static inline void ext4_update_dx_flag(struct inode *inode)
{
- if (!ext4_has_feature_dir_index(inode->i_sb))
+ if (!ext4_has_feature_dir_index(inode->i_sb) &&
+ ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) {
+ /* ext4_iget() should have caught this... */
+ WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb));
ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
+ }
}
static unsigned char ext4_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
@@ -2505,8 +2540,19 @@ int do_journal_get_write_access(handle_t *handle,
#define FALL_BACK_TO_NONDELALLOC 1
#define CONVERT_INLINE_DATA 2
-extern struct inode *ext4_iget(struct super_block *, unsigned long);
-extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
+typedef enum {
+ EXT4_IGET_NORMAL = 0,
+ EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */
+ EXT4_IGET_HANDLE = 0x0002 /* Inode # is from a handle */
+} ext4_iget_flags;
+
+extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ ext4_iget_flags flags, const char *function,
+ unsigned int line);
+
+#define ext4_iget(sb, ino, flags) \
+ __ext4_iget((sb), (ino), (flags), __func__, __LINE__)
+
extern int ext4_write_inode(struct inode *, struct writeback_control *);
extern int ext4_setattr(struct dentry *, struct iattr *);
extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -2533,6 +2579,7 @@ extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
+extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
@@ -2582,6 +2629,7 @@ extern int ext4_generic_delete_entry(handle_t *handle,
extern int ext4_empty_dir(struct inode *inode);
/* resize.c */
+extern void ext4_kvfree_array_rcu(void *to_free);
extern int ext4_group_add(struct super_block *sb,
struct ext4_new_group_data *input);
extern int ext4_group_extend(struct super_block *sb,
@@ -2822,13 +2870,13 @@ static inline
struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
ext4_group_t group)
{
- struct ext4_group_info ***grp_info;
+ struct ext4_group_info **grp_info;
long indexv, indexh;
BUG_ON(group >= EXT4_SB(sb)->s_groups_count);
- grp_info = EXT4_SB(sb)->s_group_info;
indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
- return grp_info[indexv][indexh];
+ grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
+ return grp_info[indexh];
}
/*
@@ -2878,7 +2926,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
!mutex_is_locked(&inode->i_mutex));
down_write(&EXT4_I(inode)->i_data_sem);
if (newsize > EXT4_I(inode)->i_disksize)
- EXT4_I(inode)->i_disksize = newsize;
+ WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize);
up_write(&EXT4_I(inode)->i_data_sem);
}
@@ -3130,9 +3178,9 @@ extern void ext4_release_system_zone(struct super_block *sb);
extern int ext4_setup_system_zone(struct super_block *sb);
extern int __init ext4_init_system_zone(void);
extern void ext4_exit_system_zone(void);
-extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
- ext4_fsblk_t start_blk,
- unsigned int count);
+extern int ext4_inode_block_valid(struct inode *inode,
+ ext4_fsblk_t start_blk,
+ unsigned int count);
extern int ext4_check_blockref(const char *, unsigned int,
struct inode *, __le32 *, unsigned int);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2d8e73793512..eea4e7547656 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -169,10 +169,13 @@ struct ext4_ext_path {
(EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
#define EXT_LAST_INDEX(__hdr__) \
(EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
-#define EXT_MAX_EXTENT(__hdr__) \
- (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+#define EXT_MAX_EXTENT(__hdr__) \
+ ((le16_to_cpu((__hdr__)->eh_max)) ? \
+ ((EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) \
+ : 0)
#define EXT_MAX_INDEX(__hdr__) \
- (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+ ((le16_to_cpu((__hdr__)->eh_max)) ? \
+ ((EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) : 0)
static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
{
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a88c1ed81411..4feac4bf90f4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -384,7 +384,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
*/
if (lblock + len <= lblock)
return 0;
- return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
+ return ext4_inode_block_valid(inode, block, len);
}
static int ext4_valid_extent_idx(struct inode *inode,
@@ -392,7 +392,7 @@ static int ext4_valid_extent_idx(struct inode *inode,
{
ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
- return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
+ return ext4_inode_block_valid(inode, block, 1);
}
static int ext4_valid_extent_entries(struct inode *inode,
@@ -505,6 +505,30 @@ int ext4_ext_check_inode(struct inode *inode)
return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
}
+static void ext4_cache_extents(struct inode *inode,
+ struct ext4_extent_header *eh)
+{
+ struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
+ ext4_lblk_t prev = 0;
+ int i;
+
+ for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
+ unsigned int status = EXTENT_STATUS_WRITTEN;
+ ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
+ int len = ext4_ext_get_actual_len(ex);
+
+ if (prev && (prev != lblk))
+ ext4_es_cache_extent(inode, prev, lblk - prev, ~0,
+ EXTENT_STATUS_HOLE);
+
+ if (ext4_ext_is_unwritten(ex))
+ status = EXTENT_STATUS_UNWRITTEN;
+ ext4_es_cache_extent(inode, lblk, len,
+ ext4_ext_pblock(ex), status);
+ prev = lblk + len;
+ }
+}
+
static struct buffer_head *
__read_extent_tree_block(const char *function, unsigned int line,
struct inode *inode, ext4_fsblk_t pblk, int depth,
@@ -535,26 +559,7 @@ __read_extent_tree_block(const char *function, unsigned int line,
*/
if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
struct ext4_extent_header *eh = ext_block_hdr(bh);
- struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
- ext4_lblk_t prev = 0;
- int i;
-
- for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
- unsigned int status = EXTENT_STATUS_WRITTEN;
- ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
- int len = ext4_ext_get_actual_len(ex);
-
- if (prev && (prev != lblk))
- ext4_es_cache_extent(inode, prev,
- lblk - prev, ~0,
- EXTENT_STATUS_HOLE);
-
- if (ext4_ext_is_unwritten(ex))
- status = EXTENT_STATUS_UNWRITTEN;
- ext4_es_cache_extent(inode, lblk, len,
- ext4_ext_pblock(ex), status);
- prev = lblk + len;
- }
+ ext4_cache_extents(inode, eh);
}
return bh;
errout:
@@ -860,6 +865,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
eh->eh_entries = 0;
eh->eh_magic = EXT4_EXT_MAGIC;
eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
+ eh->eh_generation = 0;
ext4_mark_inode_dirty(handle, inode);
return 0;
}
@@ -902,6 +908,8 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
path[0].p_bh = NULL;
i = depth;
+ if (!(flags & EXT4_EX_NOCACHE) && depth == 0)
+ ext4_cache_extents(inode, eh);
/* walk through the tree */
while (i) {
ext_debug("depth %d: num %d, max %d\n",
@@ -1121,6 +1129,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
neh->eh_magic = EXT4_EXT_MAGIC;
neh->eh_depth = 0;
+ neh->eh_generation = 0;
/* move remainder of path[depth] to the new leaf */
if (unlikely(path[depth].p_hdr->eh_entries !=
@@ -1198,6 +1207,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
neh->eh_magic = EXT4_EXT_MAGIC;
neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
neh->eh_depth = cpu_to_le16(depth - i);
+ neh->eh_generation = 0;
fidx = EXT_FIRST_INDEX(neh);
fidx->ei_block = border;
ext4_idx_store_pblock(fidx, oldblock);
@@ -2891,7 +2901,7 @@ again:
* in use to avoid freeing it when removing blocks.
*/
if (sbi->s_cluster_ratio > 1) {
- pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
+ pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
partial_cluster =
-(long long) EXT4_B2C(sbi, pblk);
}
@@ -3253,7 +3263,10 @@ static int ext4_split_extent_at(handle_t *handle,
ext4_ext_mark_unwritten(ex2);
err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
- if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+ if (err != -ENOSPC && err != -EDQUOT)
+ goto out;
+
+ if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
if (split_flag & EXT4_EXT_DATA_VALID1) {
err = ext4_ext_zeroout(inode, ex2);
@@ -3279,30 +3292,30 @@ static int ext4_split_extent_at(handle_t *handle,
ext4_ext_pblock(&orig_ex));
}
- if (err)
- goto fix_extent_len;
- /* update the extent length and mark as initialized */
- ex->ee_len = cpu_to_le16(ee_len);
- ext4_ext_try_to_merge(handle, inode, path, ex);
- err = ext4_ext_dirty(handle, inode, path + path->p_depth);
- if (err)
- goto fix_extent_len;
-
- /* update extent status tree */
- err = ext4_zeroout_es(inode, &zero_ex);
-
- goto out;
- } else if (err)
- goto fix_extent_len;
-
-out:
- ext4_ext_show_leaf(inode, path);
- return err;
+ if (!err) {
+ /* update the extent length and mark as initialized */
+ ex->ee_len = cpu_to_le16(ee_len);
+ ext4_ext_try_to_merge(handle, inode, path, ex);
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+ if (!err)
+ /* update extent status tree */
+ err = ext4_zeroout_es(inode, &zero_ex);
+ /* If we failed at this point, we don't know in which
+ * state the extent tree exactly is so don't try to fix
+ * length of the original extent as it may do even more
+ * damage.
+ */
+ goto out;
+ }
+ }
fix_extent_len:
ex->ee_len = orig_ex.ee_len;
ext4_ext_dirty(handle, inode, path + path->p_depth);
return err;
+out:
+ ext4_ext_show_leaf(inode, path);
+ return err;
}
/*
@@ -3431,8 +3444,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
(unsigned long long)map->m_lblk, map_len);
sbi = EXT4_SB(inode->i_sb);
- eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
- inode->i_sb->s_blocksize_bits;
+ eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
+ >> inode->i_sb->s_blocksize_bits;
if (eof_block < map->m_lblk + map_len)
eof_block = map->m_lblk + map_len;
@@ -3693,8 +3706,8 @@ static int ext4_split_convert_extents(handle_t *handle,
__func__, inode->i_ino,
(unsigned long long)map->m_lblk, map->m_len);
- eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
- inode->i_sb->s_blocksize_bits;
+ eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
+ >> inode->i_sb->s_blocksize_bits;
if (eof_block < map->m_lblk + map->m_len)
eof_block = map->m_lblk + map->m_len;
/*
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index ac748b3af1c1..665cf30c95e9 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -1080,11 +1080,9 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
- if (!nr_to_scan)
- return ret;
-
nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
+ ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
return nr_shrunk;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 0963213e9cd3..ea26a0dcb87b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -331,11 +331,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
percpu_counter_inc(&sbi->s_freeinodes_counter);
if (sbi->s_log_groups_per_flex) {
- ext4_group_t f = ext4_flex_group(sbi, block_group);
+ struct flex_groups *fg;
- atomic_inc(&sbi->s_flex_groups[f].free_inodes);
+ fg = sbi_array_rcu_deref(sbi, s_flex_groups,
+ ext4_flex_group(sbi, block_group));
+ atomic_inc(&fg->free_inodes);
if (is_directory)
- atomic_dec(&sbi->s_flex_groups[f].used_dirs);
+ atomic_dec(&fg->used_dirs);
}
BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
fatal = ext4_handle_dirty_metadata(handle, NULL, bh2);
@@ -376,12 +378,13 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
int flex_size, struct orlov_stats *stats)
{
struct ext4_group_desc *desc;
- struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
if (flex_size > 1) {
- stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
- stats->free_clusters = atomic64_read(&flex_group[g].free_clusters);
- stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
+ struct flex_groups *fg = sbi_array_rcu_deref(EXT4_SB(sb),
+ s_flex_groups, g);
+ stats->free_inodes = atomic_read(&fg->free_inodes);
+ stats->free_clusters = atomic64_read(&fg->free_clusters);
+ stats->used_dirs = atomic_read(&fg->used_dirs);
return;
}
@@ -402,7 +405,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
*
* We always try to spread first-level directories.
*
- * If there are blockgroups with both free inodes and free blocks counts
+ * If there are blockgroups with both free inodes and free clusters counts
* not worse than average we return one with smallest directory count.
* Otherwise we simply return a random group.
*
@@ -411,7 +414,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
* It's OK to put directory into a group unless
* it has too many directories already (max_dirs) or
* it has too few free inodes left (min_inodes) or
- * it has too few free blocks left (min_blocks) or
+ * it has too few free clusters left (min_clusters) or
* Parent's group is preferred, if it doesn't satisfy these
* conditions we search cyclically through the rest. If none
* of the groups look good we just look for a group with more
@@ -427,7 +430,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4_group_t real_ngroups = ext4_get_groups_count(sb);
int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
unsigned int freei, avefreei, grp_free;
- ext4_fsblk_t freeb, avefreec;
+ ext4_fsblk_t freec, avefreec;
unsigned int ndirs;
int max_dirs, min_inodes;
ext4_grpblk_t min_clusters;
@@ -446,9 +449,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
avefreei = freei / ngroups;
- freeb = EXT4_C2B(sbi,
- percpu_counter_read_positive(&sbi->s_freeclusters_counter));
- avefreec = freeb;
+ freec = percpu_counter_read_positive(&sbi->s_freeclusters_counter);
+ avefreec = freec;
do_div(avefreec, ngroups);
ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
@@ -779,6 +781,13 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
inode->i_gid = dir->i_gid;
} else
inode_init_owner(inode, dir, mode);
+
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
+ ei->i_projid = EXT4_I(dir)->i_projid;
+ else
+ ei->i_projid = make_kprojid(&init_user_ns, EXT4_DEF_PROJID);
+
err = dquot_initialize(inode);
if (err)
goto out;
@@ -981,7 +990,8 @@ got:
if (sbi->s_log_groups_per_flex) {
ext4_group_t f = ext4_flex_group(sbi, group);
- atomic_inc(&sbi->s_flex_groups[f].used_dirs);
+ atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups,
+ f)->used_dirs);
}
}
if (ext4_has_group_desc_csum(sb)) {
@@ -1004,7 +1014,8 @@ got:
if (sbi->s_log_groups_per_flex) {
flex_group = ext4_flex_group(sbi, group);
- atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
+ atomic_dec(&sbi_array_rcu_deref(sbi, s_flex_groups,
+ flex_group)->free_inodes);
}
inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
@@ -1145,7 +1156,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
if (!ext4_test_bit(bit, bitmap_bh->b_data))
goto bad_orphan;
- inode = ext4_iget(sb, ino);
+ inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
ext4_error(sb, "couldn't read orphan inode %lu (err %d)",
@@ -1273,6 +1284,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
handle_t *handle;
ext4_fsblk_t blk;
int num, ret = 0, used_blks = 0;
+ unsigned long used_inos = 0;
/* This should not happen, but just to be sure check this */
if (sb->s_flags & MS_RDONLY) {
@@ -1303,22 +1315,37 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
* used inodes so we need to skip blocks with used inodes in
* inode table.
*/
- if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
- used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
- ext4_itable_unused_count(sb, gdp)),
- sbi->s_inodes_per_block);
-
- if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group) ||
- ((group == 0) && ((EXT4_INODES_PER_GROUP(sb) -
- ext4_itable_unused_count(sb, gdp)) <
- EXT4_FIRST_INO(sb)))) {
- ext4_error(sb, "Something is wrong with group %u: "
- "used itable blocks: %d; "
- "itable unused count: %u",
- group, used_blks,
- ext4_itable_unused_count(sb, gdp));
- ret = 1;
- goto err_out;
+ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) {
+ used_inos = EXT4_INODES_PER_GROUP(sb) -
+ ext4_itable_unused_count(sb, gdp);
+ used_blks = DIV_ROUND_UP(used_inos, sbi->s_inodes_per_block);
+
+ /* Bogus inode unused count? */
+ if (used_blks < 0 || used_blks > sbi->s_itb_per_group) {
+ ext4_error(sb, "Something is wrong with group %u: "
+ "used itable blocks: %d; "
+ "itable unused count: %u",
+ group, used_blks,
+ ext4_itable_unused_count(sb, gdp));
+ ret = 1;
+ goto err_out;
+ }
+
+ used_inos += group * EXT4_INODES_PER_GROUP(sb);
+ /*
+ * Are there some uninitialized inodes in the inode table
+ * before the first normal inode?
+ */
+ if ((used_blks != sbi->s_itb_per_group) &&
+ (used_inos < EXT4_FIRST_INO(sb))) {
+ ext4_error(sb, "Something is wrong with group %u: "
+ "itable unused count: %u; "
+ "itables initialized count: %ld",
+ group, ext4_itable_unused_count(sb, gdp),
+ used_inos);
+ ret = 1;
+ goto err_out;
+ }
}
blk = ext4_inode_table(sb, gdp) + used_blks;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 08f3a0c0f468..4f610cd8041b 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -946,8 +946,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
else if (ext4_should_journal_data(inode))
flags |= EXT4_FREE_BLOCKS_FORGET;
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
- count)) {
+ if (!ext4_inode_block_valid(inode, block_to_free, count)) {
EXT4_ERROR_INODE(inode, "attempt to clear invalid "
"blocks %llu len %lu",
(unsigned long long) block_to_free, count);
@@ -1109,8 +1108,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
if (!nr)
continue; /* A hole */
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
- nr, 1)) {
+ if (!ext4_inode_block_valid(inode, nr, 1)) {
EXT4_ERROR_INODE(inode,
"invalid indirect mapped "
"block %lu (level %d)",
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index ec506c2733ee..9f893b6df1cb 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -760,6 +760,12 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
ext4_write_lock_xattr(inode, &no_expand);
BUG_ON(!ext4_has_inline_data(inode));
+ /*
+ * ei->i_inline_off may have changed since ext4_write_begin()
+ * called ext4_try_to_write_inline_data()
+ */
+ (void) ext4_find_inline_data_nolock(inode);
+
kaddr = kmap_atomic(page);
ext4_write_inline_data(inode, &iloc, kaddr, pos, len);
kunmap_atomic(kaddr);
@@ -1430,7 +1436,7 @@ int htree_inlinedir_to_tree(struct file *dir_file,
err = ext4_htree_store_dirent(dir_file, hinfo->hash,
hinfo->minor_hash, de, &tmp_str);
if (err) {
- count = err;
+ ret = err;
goto out;
}
count++;
@@ -1904,6 +1910,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
ext4_write_lock_xattr(inode, &no_expand);
if (!ext4_has_inline_data(inode)) {
+ ext4_write_unlock_xattr(inode, &no_expand);
*has_inline = 0;
ext4_journal_stop(handle);
return;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a8eeea6bcb7c..b65680c5404b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -379,8 +379,11 @@ static int __check_block_validity(struct inode *inode, const char *func,
unsigned int line,
struct ext4_map_blocks *map)
{
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
- map->m_len)) {
+ if (ext4_has_feature_journal(inode->i_sb) &&
+ (inode->i_ino ==
+ le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
+ return 0;
+ if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) {
ext4_error_inode(inode, func, line, map->m_pblk,
"lblock %lu mapped to illegal pblock %llu "
"(length %d)", (unsigned long) map->m_lblk,
@@ -1851,13 +1854,13 @@ static int __ext4_journalled_writepage(struct page *page,
if (!ret)
ret = err;
- if (!ext4_has_inline_data(inode))
- ext4_walk_page_buffers(NULL, page_bufs, 0, len,
- NULL, bput_one);
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
out:
unlock_page(page);
out_no_pagelock:
+ if (!inline_data && page_bufs)
+ ext4_walk_page_buffers(NULL, page_bufs, 0, len,
+ NULL, bput_one);
brelse(inode_bh);
return ret;
}
@@ -2350,7 +2353,7 @@ update_disksize:
* truncate are avoided by checking i_size under i_data_sem.
*/
disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
- if (disksize > EXT4_I(inode)->i_disksize) {
+ if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) {
int err2;
loff_t i_size;
@@ -3382,6 +3385,13 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
size_t count = iov_iter_count(iter);
ssize_t ret;
+ if (iov_iter_rw(iter) == READ) {
+ loff_t size = i_size_read(inode);
+
+ if (offset >= size)
+ return 0;
+ }
+
#if defined(CONFIG_EXT4_FS_ENCRYPTION) && \
!defined(CONFIG_EXT4_FS_ICE_ENCRYPTION)
@@ -4290,7 +4300,17 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
EXT4_I(inode)->i_inline_off = 0;
}
-struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
+int ext4_get_projid(struct inode *inode, kprojid_t *projid)
+{
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
+ return -EOPNOTSUPP;
+ *projid = EXT4_I(inode)->i_projid;
+ return 0;
+}
+
+struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ ext4_iget_flags flags, const char *function,
+ unsigned int line)
{
struct ext4_iloc iloc;
struct ext4_inode *raw_inode;
@@ -4302,6 +4322,19 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
int block;
uid_t i_uid;
gid_t i_gid;
+ projid_t i_projid;
+
+ if ((!(flags & EXT4_IGET_SPECIAL) &&
+ (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) ||
+ (ino < EXT4_ROOT_INO) ||
+ (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) {
+ if (flags & EXT4_IGET_HANDLE)
+ return ERR_PTR(-ESTALE);
+ __ext4_error(sb, function, line,
+ "inode #%lu: comm %s: iget: illegal inode #",
+ ino, current->comm);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
inode = iget_locked(sb, ino);
if (!inode)
@@ -4318,11 +4351,18 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
raw_inode = ext4_raw_inode(&iloc);
if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
- EXT4_ERROR_INODE(inode, "root inode unallocated");
+ ext4_error_inode(inode, function, line, 0,
+ "iget: root inode unallocated");
ret = -EFSCORRUPTED;
goto bad_inode;
}
+ if ((flags & EXT4_IGET_HANDLE) &&
+ (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
+ ret = -ESTALE;
+ goto bad_inode;
+ }
+
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
@@ -4349,7 +4389,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
}
if (!ext4_inode_csum_verify(inode, raw_inode, ei)) {
- EXT4_ERROR_INODE(inode, "checksum invalid");
+ ext4_error_inode(inode, function, line, 0,
+ "iget: checksum invalid");
ret = -EFSBADCRC;
goto bad_inode;
}
@@ -4357,12 +4398,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+ EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+ i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
+ else
+ i_projid = EXT4_DEF_PROJID;
+
if (!(test_opt(inode->i_sb, NO_UID32))) {
i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
}
i_uid_write(inode, i_uid);
i_gid_write(inode, i_gid);
+ ei->i_projid = make_kprojid(&init_user_ns, i_projid);
set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
@@ -4397,7 +4446,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
inode->i_size = ext4_isize(raw_inode);
if ((size = i_size_read(inode)) < 0) {
- EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
+ ext4_error_inode(inode, function, line, 0,
+ "iget: bad i_size value: %lld", size);
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
+ /*
+ * If dir_index is not enabled but there's dir with INDEX flag set,
+ * we'd normally treat htree data as empty space. But with metadata
+ * checksumming that corrupts checksums so forbid that.
+ */
+ if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) &&
+ ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) {
+ EXT4_ERROR_INODE(inode,
+ "iget: Dir with htree data on filesystem without dir_index feature.");
ret = -EFSCORRUPTED;
goto bad_inode;
}
@@ -4467,8 +4529,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ret = 0;
if (ei->i_file_acl &&
- !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
- EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
+ !ext4_inode_block_valid(inode, ei->i_file_acl, 1)) {
+ ext4_error_inode(inode, function, line, 0,
+ "iget: bad extended attribute block %llu",
ei->i_file_acl);
ret = -EFSCORRUPTED;
goto bad_inode;
@@ -4523,7 +4586,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
make_bad_inode(inode);
} else {
ret = -EFSCORRUPTED;
- EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
+ ext4_error_inode(inode, function, line, 0,
+ "iget: bogus i_mode (%o)", inode->i_mode);
goto bad_inode;
}
brelse(iloc.bh);
@@ -4537,19 +4601,12 @@ bad_inode:
return ERR_PTR(ret);
}
-struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
-{
- if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
- return ERR_PTR(-EFSCORRUPTED);
- return ext4_iget(sb, ino);
-}
-
static int ext4_inode_blocks_set(handle_t *handle,
struct ext4_inode *raw_inode,
struct ext4_inode_info *ei)
{
struct inode *inode = &(ei->vfs_inode);
- u64 i_blocks = inode->i_blocks;
+ u64 i_blocks = READ_ONCE(inode->i_blocks);
struct super_block *sb = inode->i_sb;
if (i_blocks <= ~0U) {
@@ -4662,10 +4719,11 @@ static int ext4_do_update_inode(handle_t *handle,
struct ext4_inode_info *ei = EXT4_I(inode);
struct buffer_head *bh = iloc->bh;
struct super_block *sb = inode->i_sb;
- int err = 0, rc, block;
+ int err = 0, block;
int need_datasync = 0, set_large_file = 0;
uid_t i_uid;
gid_t i_gid;
+ projid_t i_projid;
spin_lock(&ei->i_raw_lock);
@@ -4678,6 +4736,7 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
i_uid = i_uid_read(inode);
i_gid = i_gid_read(inode);
+ i_projid = from_kprojid(&init_user_ns, ei->i_projid);
if (!(test_opt(inode->i_sb, NO_UID32))) {
raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
@@ -4755,6 +4814,15 @@ static int ext4_do_update_inode(handle_t *handle,
cpu_to_le16(ei->i_extra_isize);
}
}
+
+ BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+ EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ i_projid != EXT4_DEF_PROJID);
+
+ if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+ EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+ raw_inode->i_projid = cpu_to_le32(i_projid);
+
ext4_inode_csum_set(inode, raw_inode, ei);
spin_unlock(&ei->i_raw_lock);
if (inode->i_sb->s_flags & MS_LAZYTIME)
@@ -4762,9 +4830,9 @@ static int ext4_do_update_inode(handle_t *handle,
bh->b_data);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- rc = ext4_handle_dirty_metadata(handle, NULL, bh);
- if (!err)
- err = rc;
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ goto out_brelse;
ext4_clear_inode_state(inode, EXT4_STATE_NEW);
if (set_large_file) {
BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access");
@@ -5265,10 +5333,25 @@ static int ext4_expand_extra_isize(struct inode *inode,
{
struct ext4_inode *raw_inode;
struct ext4_xattr_ibody_header *header;
+ unsigned int inode_size = EXT4_INODE_SIZE(inode->i_sb);
+ struct ext4_inode_info *ei = EXT4_I(inode);
if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
return 0;
+ /* this was checked at iget time, but double check for good measure */
+ if ((EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > inode_size) ||
+ (ei->i_extra_isize & 3)) {
+ EXT4_ERROR_INODE(inode, "bad extra_isize %u (inode size %u)",
+ ei->i_extra_isize,
+ EXT4_INODE_SIZE(inode->i_sb));
+ return -EFSCORRUPTED;
+ }
+ if ((new_extra_isize < ei->i_extra_isize) ||
+ (new_extra_isize < 4) ||
+ (new_extra_isize > inode_size - EXT4_GOOD_OLD_INODE_SIZE))
+ return -EINVAL; /* Should never happen */
+
raw_inode = ext4_raw_inode(&iloc);
header = IHDR(inode, raw_inode);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 06ca0e647a97..fb750dc13830 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -105,7 +105,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN))
return -EPERM;
- inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO);
+ inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(inode_bl))
return PTR_ERR(inode_bl);
ei_bl = EXT4_I(inode_bl);
@@ -615,8 +615,6 @@ resizefs_out:
sizeof(range)))
return -EFAULT;
- range.minlen = max((unsigned int)range.minlen,
- q->limits.discard_granularity);
ret = ext4_trim_fs(sb, &range, flags);
if (ret < 0)
return ret;
@@ -677,7 +675,10 @@ encryption_policy_out:
err = ext4_journal_get_write_access(handle, sbi->s_sbh);
if (err)
goto pwsalt_err_journal;
+ lock_buffer(sbi->s_sbh);
generate_random_uuid(sbi->s_es->s_encrypt_pw_salt);
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(sbi->s_sbh);
err = ext4_handle_dirty_metadata(handle, NULL,
sbi->s_sbh);
pwsalt_err_journal:
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d38cfed0394f..c15cb259c9ea 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1944,7 +1944,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
int free;
free = e4b->bd_info->bb_free;
- BUG_ON(free <= 0);
+ if (WARN_ON(free <= 0))
+ return;
i = e4b->bd_info->bb_first_free;
@@ -1965,7 +1966,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
}
mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
- BUG_ON(ex.fe_len <= 0);
+ if (WARN_ON(ex.fe_len <= 0))
+ break;
if (free < ex.fe_len) {
ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
"%d free clusters as per "
@@ -2378,7 +2380,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned size;
- struct ext4_group_info ***new_groupinfo;
+ struct ext4_group_info ***old_groupinfo, ***new_groupinfo;
size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
EXT4_DESC_PER_BLOCK_BITS(sb);
@@ -2391,13 +2393,16 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
return -ENOMEM;
}
- if (sbi->s_group_info) {
- memcpy(new_groupinfo, sbi->s_group_info,
+ rcu_read_lock();
+ old_groupinfo = rcu_dereference(sbi->s_group_info);
+ if (old_groupinfo)
+ memcpy(new_groupinfo, old_groupinfo,
sbi->s_group_info_size * sizeof(*sbi->s_group_info));
- kvfree(sbi->s_group_info);
- }
- sbi->s_group_info = new_groupinfo;
+ rcu_read_unlock();
+ rcu_assign_pointer(sbi->s_group_info, new_groupinfo);
sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
+ if (old_groupinfo)
+ ext4_kvfree_array_rcu(old_groupinfo);
ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
sbi->s_group_info_size);
return 0;
@@ -2409,6 +2414,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
{
int i;
int metalen = 0;
+ int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb);
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_info **meta_group_info;
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
@@ -2427,12 +2433,12 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
"for a buddy group");
goto exit_meta_group_info;
}
- sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
- meta_group_info;
+ rcu_read_lock();
+ rcu_dereference(sbi->s_group_info)[idx] = meta_group_info;
+ rcu_read_unlock();
}
- meta_group_info =
- sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
+ meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx);
i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
@@ -2480,8 +2486,13 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
exit_group_info:
/* If a meta_group_info table has been allocated, release it now */
if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
- kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
- sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
+ struct ext4_group_info ***group_info;
+
+ rcu_read_lock();
+ group_info = rcu_dereference(sbi->s_group_info);
+ kfree(group_info[idx]);
+ group_info[idx] = NULL;
+ rcu_read_unlock();
}
exit_meta_group_info:
return -ENOMEM;
@@ -2494,6 +2505,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
struct ext4_sb_info *sbi = EXT4_SB(sb);
int err;
struct ext4_group_desc *desc;
+ struct ext4_group_info ***group_info;
struct kmem_cache *cachep;
err = ext4_mb_alloc_groupinfo(sb, ngroups);
@@ -2528,11 +2540,16 @@ err_freebuddy:
while (i-- > 0)
kmem_cache_free(cachep, ext4_get_group_info(sb, i));
i = sbi->s_group_info_size;
+ rcu_read_lock();
+ group_info = rcu_dereference(sbi->s_group_info);
while (i-- > 0)
- kfree(sbi->s_group_info[i]);
+ kfree(group_info[i]);
+ rcu_read_unlock();
iput(sbi->s_buddy_cache);
err_freesgi:
- kvfree(sbi->s_group_info);
+ rcu_read_lock();
+ kvfree(rcu_dereference(sbi->s_group_info));
+ rcu_read_unlock();
return -ENOMEM;
}
@@ -2720,7 +2737,7 @@ int ext4_mb_release(struct super_block *sb)
ext4_group_t ngroups = ext4_get_groups_count(sb);
ext4_group_t i;
int num_meta_group_infos;
- struct ext4_group_info *grinfo;
+ struct ext4_group_info *grinfo, ***group_info;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
@@ -2738,9 +2755,12 @@ int ext4_mb_release(struct super_block *sb)
num_meta_group_infos = (ngroups +
EXT4_DESC_PER_BLOCK(sb) - 1) >>
EXT4_DESC_PER_BLOCK_BITS(sb);
+ rcu_read_lock();
+ group_info = rcu_dereference(sbi->s_group_info);
for (i = 0; i < num_meta_group_infos; i++)
- kfree(sbi->s_group_info[i]);
- kvfree(sbi->s_group_info);
+ kfree(group_info[i]);
+ kvfree(group_info);
+ rcu_read_unlock();
}
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
@@ -2941,7 +2961,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
- if (!ext4_data_block_valid(sbi, block, len)) {
+ if (!ext4_inode_block_valid(ac->ac_inode, block, len)) {
ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
"fs metadata", block, block+len);
/* File system mounted not to panic on error
@@ -2996,7 +3016,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
ext4_group_t flex_group = ext4_flex_group(sbi,
ac->ac_b_ex.fe_group);
atomic64_sub(ac->ac_b_ex.fe_len,
- &sbi->s_flex_groups[flex_group].free_clusters);
+ &sbi_array_rcu_deref(sbi, s_flex_groups,
+ flex_group)->free_clusters);
}
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -4626,6 +4647,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
ext4_group_first_block_no(sb, group) +
EXT4_C2B(sbi, cluster),
"Block already on to-be-freed list");
+ kmem_cache_free(ext4_free_data_cachep, new_entry);
return 0;
}
}
@@ -4697,7 +4719,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
sbi = EXT4_SB(sb);
if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
- !ext4_data_block_valid(sbi, block, count)) {
+ !ext4_inode_block_valid(inode, block, count)) {
ext4_error(sb, "Freeing blocks not in datazone - "
"block = %llu, count = %lu", block, count);
goto error_return;
@@ -4889,7 +4911,8 @@ do_more:
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
atomic64_add(count_clusters,
- &sbi->s_flex_groups[flex_group].free_clusters);
+ &sbi_array_rcu_deref(sbi, s_flex_groups,
+ flex_group)->free_clusters);
}
if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
@@ -5034,7 +5057,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
- &sbi->s_flex_groups[flex_group].free_clusters);
+ &sbi_array_rcu_deref(sbi, s_flex_groups,
+ flex_group)->free_clusters);
}
ext4_mb_unload_buddy(&e4b);
@@ -5207,6 +5231,7 @@ out:
int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range,
unsigned long blkdev_flags)
{
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
struct ext4_group_info *grp;
ext4_group_t group, first_group, last_group;
ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
@@ -5225,6 +5250,13 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range,
start >= max_blks ||
range->len < sb->s_blocksize)
return -EINVAL;
+ /* No point to try to trim less than discard granularity */
+ if (range->minlen < q->limits.discard_granularity) {
+ minlen = EXT4_NUM_B2C(EXT4_SB(sb),
+ q->limits.discard_granularity >> sb->s_blocksize_bits);
+ if (minlen > EXT4_CLUSTERS_PER_GROUP(sb))
+ goto out;
+ }
if (end >= max_blks)
end = max_blks - 1;
if (end <= first_data_blk)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index a4651894cc33..1073e24ab622 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -459,12 +459,12 @@ int ext4_ext_migrate(struct inode *inode)
return retval;
/*
- * Worst case we can touch the allocation bitmaps, a bgd
- * block, and a block to link in the orphan list. We do need
- * need to worry about credits for modifying the quota inode.
+ * Worst case we can touch the allocation bitmaps and a block
+ * group descriptor block. We do need need to worry about
+ * credits for modifying the quota inode.
*/
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE,
- 4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
+ 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
@@ -481,6 +481,13 @@ int ext4_ext_migrate(struct inode *inode)
ext4_journal_stop(handle);
return retval;
}
+ /*
+ * Use the correct seed for checksum (i.e. the seed from 'inode'). This
+ * is so that the metadata blocks will have the correct checksum after
+ * the migration.
+ */
+ ei = EXT4_I(inode);
+ EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed;
i_size_write(tmp_inode, i_size_read(inode));
/*
* Set the i_nlink to zero so it will be deleted later
@@ -489,7 +496,6 @@ int ext4_ext_migrate(struct inode *inode)
clear_nlink(tmp_inode);
ext4_ext_tree_init(handle, tmp_inode);
- ext4_orphan_add(handle, tmp_inode);
ext4_journal_stop(handle);
/*
@@ -514,17 +520,10 @@ int ext4_ext_migrate(struct inode *inode)
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
if (IS_ERR(handle)) {
- /*
- * It is impossible to update on-disk structures without
- * a handle, so just rollback in-core changes and live other
- * work to orphan_list_cleanup()
- */
- ext4_orphan_del(NULL, tmp_inode);
retval = PTR_ERR(handle);
goto out;
}
- ei = EXT4_I(inode);
i_data = ei->i_data;
memset(&lb, 0, sizeof(lb));
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index c455a8b649a1..9c6fe28fe27e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1226,19 +1226,18 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
}
/*
- * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
+ * Test whether a directory entry matches the filename being searched for.
*
- * `len <= EXT4_NAME_LEN' is guaranteed by caller.
- * `de != NULL' is guaranteed by caller.
+ * Return: %true if the directory entry matches, otherwise %false.
*/
-static inline int ext4_match(struct ext4_filename *fname,
- struct ext4_dir_entry_2 *de)
+static inline bool ext4_match(const struct ext4_filename *fname,
+ const struct ext4_dir_entry_2 *de)
{
const void *name = fname_name(fname);
u32 len = fname_len(fname);
if (!de->inode)
- return 0;
+ return false;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
if (unlikely(!name)) {
@@ -1270,48 +1269,31 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
struct ext4_dir_entry_2 * de;
char * dlimit;
int de_len;
- int res;
de = (struct ext4_dir_entry_2 *)search_buf;
dlimit = search_buf + buf_size;
while ((char *) de < dlimit) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
- if ((char *) de + de->name_len <= dlimit) {
- res = ext4_match(fname, de);
- if (res < 0) {
- res = -1;
- goto return_result;
- }
- if (res > 0) {
- /* found a match - just to be sure, do
- * a full check */
- if (ext4_check_dir_entry(dir, NULL, de, bh,
- bh->b_data,
- bh->b_size, offset)) {
- res = -1;
- goto return_result;
- }
- *res_dir = de;
- res = 1;
- goto return_result;
- }
-
+ if ((char *) de + de->name_len <= dlimit &&
+ ext4_match(fname, de)) {
+ /* found a match - just to be sure, do
+ * a full check */
+ if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
+ buf_size, offset))
+ return -1;
+ *res_dir = de;
+ return 1;
}
/* prevent looping on a bad block */
de_len = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize);
- if (de_len <= 0) {
- res = -1;
- goto return_result;
- }
+ if (de_len <= 0)
+ return -1;
offset += de_len;
de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
}
-
- res = 0;
-return_result:
- return res;
+ return 0;
}
static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
@@ -1418,6 +1400,7 @@ restart:
/*
* We deal with the read-ahead logic here.
*/
+ cond_resched();
if (ra_ptr >= ra_max) {
/* Refill the readahead buffer */
ra_ptr = 0;
@@ -1600,7 +1583,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
dentry);
return ERR_PTR(-EFSCORRUPTED);
}
- inode = ext4_iget_normal(dir->i_sb, ino);
+ inode = ext4_iget(dir->i_sb, ino, EXT4_IGET_NORMAL);
if (inode == ERR_PTR(-ESTALE)) {
EXT4_ERROR_INODE(dir,
"deleted inode referenced: %u",
@@ -1645,7 +1628,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
return ERR_PTR(-EFSCORRUPTED);
}
- return d_obtain_alias(ext4_iget_normal(d_inode(child)->i_sb, ino));
+ return d_obtain_alias(ext4_iget(d_inode(child)->i_sb, ino, EXT4_IGET_NORMAL));
}
/*
@@ -1747,7 +1730,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
blocksize, hinfo, map);
map -= count;
dx_sort_map(map, count);
- /* Split the existing block in the middle, size-wise */
+ /* Ensure that neither split block is over half full */
size = 0;
move = 0;
for (i = count-1; i >= 0; i--) {
@@ -1757,8 +1740,18 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
size += map[i].size;
move++;
}
- /* map index at which we will split */
- split = count - move;
+ /*
+ * map index at which we will split
+ *
+ * If the sum of active entries didn't exceed half the block size, just
+ * split it in half by count; each resulting block will have at least
+ * half the space free.
+ */
+ if (i > 0)
+ split = count - move;
+ else
+ split = count/2;
+
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
@@ -1823,24 +1816,15 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
int nlen, rlen;
unsigned int offset = 0;
char *top;
- int res;
de = (struct ext4_dir_entry_2 *)buf;
top = buf + buf_size - reclen;
while ((char *) de <= top) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
- buf, buf_size, offset)) {
- res = -EFSCORRUPTED;
- goto return_result;
- }
- /* Provide crypto context and crypto buffer to ext4 match */
- res = ext4_match(fname, de);
- if (res < 0)
- goto return_result;
- if (res > 0) {
- res = -EEXIST;
- goto return_result;
- }
+ buf, buf_size, offset))
+ return -EFSCORRUPTED;
+ if (ext4_match(fname, de))
+ return -EEXIST;
nlen = EXT4_DIR_REC_LEN(de->name_len);
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
if ((de->inode ? rlen - nlen : rlen) >= reclen)
@@ -1848,15 +1832,11 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
offset += rlen;
}
-
if ((char *) de > top)
- res = -ENOSPC;
- else {
- *dest_de = de;
- res = 0;
- }
-return_result:
- return res;
+ return -ENOSPC;
+
+ *dest_de = de;
+ return 0;
}
int ext4_insert_dentry(struct inode *dir,
@@ -2119,6 +2099,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
retval = ext4_dx_add_entry(handle, &fname, dir, inode);
if (!retval || (retval != ERR_BAD_DX_DIR))
goto out;
+ /* Can we just ignore htree data? */
+ if (ext4_has_metadata_csum(sb)) {
+ EXT4_ERROR_INODE(dir,
+ "Directory has corrupted htree index.");
+ retval = -EFSCORRUPTED;
+ goto out;
+ }
ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
dx_fallback++;
ext4_mark_inode_dirty(handle, dir);
@@ -2332,7 +2319,7 @@ int ext4_generic_delete_entry(handle_t *handle,
de = (struct ext4_dir_entry_2 *)entry_buf;
while (i < buf_size - csum_size) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
- bh->b_data, bh->b_size, i))
+ entry_buf, buf_size, i))
return -EFSCORRUPTED;
if (de == de_del) {
if (pde)
@@ -3039,18 +3026,17 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
- if (inode->i_nlink == 0) {
- ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
- dentry->d_name.len, dentry->d_name.name);
- set_nlink(inode, 1);
- }
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
goto end_unlink;
dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir);
- drop_nlink(inode);
+ if (inode->i_nlink == 0)
+ ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
+ dentry->d_name.len, dentry->d_name.name);
+ else
+ drop_nlink(inode);
if (!inode->i_nlink)
ext4_orphan_add(handle, inode);
inode->i_ctime = ext4_current_time(inode);
@@ -3231,7 +3217,13 @@ static int ext4_link(struct dentry *old_dentry,
return -EMLINK;
if (ext4_encrypted_inode(dir) &&
!ext4_is_child_context_consistent_with_parent(dir, inode))
- return -EPERM;
+ return -EXDEV;
+
+ if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
+ (!projid_eq(EXT4_I(dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
err = dquot_initialize(dir);
if (err)
return err;
@@ -3384,12 +3376,35 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
return retval;
}
}
- brelse(ent->bh);
- ent->bh = NULL;
return 0;
}
+static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
+ unsigned ino, unsigned file_type)
+{
+ struct ext4_renament old = *ent;
+ int retval = 0;
+
+ /*
+ * old->de could have moved from under us during make indexed dir,
+ * so the old->de may no longer valid and need to find it again
+ * before reset old inode info.
+ */
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+ if (IS_ERR(old.bh))
+ retval = PTR_ERR(old.bh);
+ if (!old.bh)
+ retval = -ENOENT;
+ if (retval) {
+ ext4_std_error(old.dir->i_sb, retval);
+ return;
+ }
+
+ ext4_setent(handle, &old, ino, file_type);
+ brelse(old.bh);
+}
+
static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
const struct qstr *d_name)
{
@@ -3518,6 +3533,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
!ext4_has_encryption_key(new_dir)))
return -ENOKEY;
+ if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
+ (!projid_eq(EXT4_I(new_dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
retval = dquot_initialize(old.dir);
if (retval)
return retval;
@@ -3544,14 +3564,14 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
retval = -ENOENT;
if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
- goto end_rename;
+ goto release_bh;
if ((old.dir != new.dir) &&
ext4_encrypted_inode(new.dir) &&
!ext4_is_child_context_consistent_with_parent(new.dir,
old.inode)) {
- retval = -EPERM;
- goto end_rename;
+ retval = -EXDEV;
+ goto release_bh;
}
new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
@@ -3559,7 +3579,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (IS_ERR(new.bh)) {
retval = PTR_ERR(new.bh);
new.bh = NULL;
- goto end_rename;
+ goto release_bh;
}
if (new.bh) {
if (!new.inode) {
@@ -3576,18 +3596,17 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
- handle = NULL;
- goto end_rename;
+ goto release_bh;
}
} else {
whiteout = ext4_whiteout_for_rename(&old, credits, &handle);
if (IS_ERR(whiteout)) {
retval = PTR_ERR(whiteout);
- whiteout = NULL;
- goto end_rename;
+ goto release_bh;
}
}
+ old_file_type = old.de->file_type;
if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
ext4_handle_sync(handle);
@@ -3615,7 +3634,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
force_reread = (new.dir->i_ino == old.dir->i_ino &&
ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
- old_file_type = old.de->file_type;
if (whiteout) {
/*
* Do this before adding a new entry, so the old entry is sure
@@ -3687,17 +3705,23 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
retval = 0;
end_rename:
- brelse(old.dir_bh);
- brelse(old.bh);
- brelse(new.bh);
if (whiteout) {
- if (retval)
+ if (retval) {
+ ext4_resetent(handle, &old,
+ old.inode->i_ino, old_file_type);
drop_nlink(whiteout);
+ ext4_orphan_add(handle, whiteout);
+ }
unlock_new_inode(whiteout);
+ ext4_journal_stop(handle);
iput(whiteout);
- }
- if (handle)
+ } else {
ext4_journal_stop(handle);
+ }
+release_bh:
+ brelse(old.dir_bh);
+ brelse(old.bh);
+ brelse(new.bh);
return retval;
}
@@ -3731,7 +3755,15 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
old.inode) ||
!ext4_is_child_context_consistent_with_parent(old_dir,
new.inode)))
- return -EPERM;
+ return -EXDEV;
+
+ if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
+ !projid_eq(EXT4_I(new_dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)) ||
+ (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
+ !projid_eq(EXT4_I(old_dir)->i_projid,
+ EXT4_I(new_dentry->d_inode)->i_projid)))
+ return -EXDEV;
retval = dquot_initialize(old.dir);
if (retval)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 5223eb25bf59..7ed01bcc2419 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -16,6 +16,33 @@
#include "ext4_jbd2.h"
+struct ext4_rcu_ptr {
+ struct rcu_head rcu;
+ void *ptr;
+};
+
+static void ext4_rcu_ptr_callback(struct rcu_head *head)
+{
+ struct ext4_rcu_ptr *ptr;
+
+ ptr = container_of(head, struct ext4_rcu_ptr, rcu);
+ kvfree(ptr->ptr);
+ kfree(ptr);
+}
+
+void ext4_kvfree_array_rcu(void *to_free)
+{
+ struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
+
+ if (ptr) {
+ ptr->ptr = to_free;
+ call_rcu(&ptr->rcu, ext4_rcu_ptr_callback);
+ return;
+ }
+ synchronize_rcu();
+ kvfree(to_free);
+}
+
int ext4_resize_begin(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -541,8 +568,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
brelse(gdb);
goto out;
}
- memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
- gdb->b_size);
+ memcpy(gdb->b_data, sbi_array_rcu_deref(sbi,
+ s_group_desc, j)->b_data, gdb->b_size);
set_buffer_uptodate(gdb);
err = ext4_handle_dirty_metadata(handle, NULL, gdb);
@@ -849,13 +876,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
}
brelse(dind);
- o_group_desc = EXT4_SB(sb)->s_group_desc;
+ rcu_read_lock();
+ o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
+ rcu_read_unlock();
n_group_desc[gdb_num] = gdb_bh;
- EXT4_SB(sb)->s_group_desc = n_group_desc;
+ rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
EXT4_SB(sb)->s_gdb_count++;
- kvfree(o_group_desc);
+ ext4_kvfree_array_rcu(o_group_desc);
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
err = ext4_handle_dirty_super(handle, sb);
@@ -903,9 +932,11 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
return err;
}
- o_group_desc = EXT4_SB(sb)->s_group_desc;
+ rcu_read_lock();
+ o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
+ rcu_read_unlock();
n_group_desc[gdb_num] = gdb_bh;
BUFFER_TRACE(gdb_bh, "get_write_access");
@@ -916,9 +947,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
return err;
}
- EXT4_SB(sb)->s_group_desc = n_group_desc;
+ rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
EXT4_SB(sb)->s_gdb_count++;
- kvfree(o_group_desc);
+ ext4_kvfree_array_rcu(o_group_desc);
return err;
}
@@ -1180,7 +1211,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
* use non-sparse filesystems anymore. This is already checked above.
*/
if (gdb_off) {
- gdb_bh = sbi->s_group_desc[gdb_num];
+ gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
+ gdb_num);
BUFFER_TRACE(gdb_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, gdb_bh);
@@ -1262,7 +1294,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
/*
* get_write_access() has been called on gdb_bh by ext4_add_new_desc().
*/
- gdb_bh = sbi->s_group_desc[gdb_num];
+ gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num);
/* Update group descriptor block for new group */
gdp = (struct ext4_group_desc *)(gdb_bh->b_data +
gdb_off * EXT4_DESC_SIZE(sb));
@@ -1390,11 +1422,14 @@ static void ext4_update_super(struct super_block *sb,
percpu_counter_read(&sbi->s_freeclusters_counter));
if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) {
ext4_group_t flex_group;
+ struct flex_groups *fg;
+
flex_group = ext4_flex_group(sbi, group_data[0].group);
+ fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
atomic64_add(EXT4_NUM_B2C(sbi, free_blocks),
- &sbi->s_flex_groups[flex_group].free_clusters);
+ &fg->free_clusters);
atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
- &sbi->s_flex_groups[flex_group].free_inodes);
+ &fg->free_inodes);
}
/*
@@ -1489,7 +1524,8 @@ exit_journal:
for (; gdb_num <= gdb_num_end; gdb_num++) {
struct buffer_head *gdb_bh;
- gdb_bh = sbi->s_group_desc[gdb_num];
+ gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
+ gdb_num);
if (old_gdb == gdb_bh->b_blocknr)
continue;
update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
@@ -1613,7 +1649,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
"No reserved GDT blocks, can't resize");
return -EPERM;
}
- inode = ext4_iget(sb, EXT4_RESIZE_INO);
+ inode = ext4_iget(sb, EXT4_RESIZE_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(inode)) {
ext4_warning(sb, "Error opening resize inode");
return PTR_ERR(inode);
@@ -1941,7 +1977,8 @@ retry:
}
if (!resize_inode)
- resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
+ resize_inode = ext4_iget(sb, EXT4_RESIZE_INO,
+ EXT4_IGET_SPECIAL);
if (IS_ERR(resize_inode)) {
ext4_warning(sb, "Error opening resize inode");
return PTR_ERR(resize_inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9a652931eef8..9f2ca5e2531a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -313,7 +313,8 @@ static void save_error_info(struct super_block *sb, const char *func,
unsigned int line)
{
__save_error_info(sb, func, line);
- ext4_commit_super(sb, 1);
+ if (!bdev_read_only(sb->s_bdev))
+ ext4_commit_super(sb, 1);
}
/*
@@ -794,6 +795,8 @@ static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
+ struct buffer_head **group_desc;
+ struct flex_groups **flex_groups;
int aborted = 0;
int i, err;
@@ -825,10 +828,18 @@ static void ext4_put_super(struct super_block *sb)
if (!(sb->s_flags & MS_RDONLY))
ext4_commit_super(sb, 1);
+ rcu_read_lock();
+ group_desc = rcu_dereference(sbi->s_group_desc);
for (i = 0; i < sbi->s_gdb_count; i++)
- brelse(sbi->s_group_desc[i]);
- kvfree(sbi->s_group_desc);
- kvfree(sbi->s_flex_groups);
+ brelse(group_desc[i]);
+ kvfree(group_desc);
+ flex_groups = rcu_dereference(sbi->s_flex_groups);
+ if (flex_groups) {
+ for (i = 0; i < sbi->s_flex_groups_allocated; i++)
+ kvfree(flex_groups[i]);
+ kvfree(flex_groups);
+ }
+ rcu_read_unlock();
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -1010,20 +1021,11 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
{
struct inode *inode;
- if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
- return ERR_PTR(-ESTALE);
- if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
- return ERR_PTR(-ESTALE);
-
- /* iget isn't really right if the inode is currently unallocated!!
- *
- * ext4_read_inode will return a bad_inode if the inode had been
- * deleted, so we should be safe.
- *
+ /*
* Currently we don't know the generation for parent directory, so
* a generation of 0 means "accept any"
*/
- inode = ext4_iget_normal(sb, ino);
+ inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (generation && inode->i_generation != generation) {
@@ -1079,8 +1081,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
}
#ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
-#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+static char *quotatypes[] = INITQFNAMES;
+#define QTYPE2NAME(t) (quotatypes[t])
static int ext4_write_dquot(struct dquot *dquot);
static int ext4_acquire_dquot(struct dquot *dquot);
@@ -1113,6 +1115,7 @@ static const struct dquot_operations ext4_quota_operations = {
.write_info = ext4_write_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
+ .get_projid = ext4_get_projid,
};
static const struct quotactl_ops ext4_qctl_operations = {
@@ -1168,7 +1171,7 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
- Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
+ Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime,
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
@@ -1228,6 +1231,7 @@ static const match_table_t tokens = {
{Opt_noquota, "noquota"},
{Opt_quota, "quota"},
{Opt_usrquota, "usrquota"},
+ {Opt_prjquota, "prjquota"},
{Opt_barrier, "barrier=%u"},
{Opt_barrier, "barrier"},
{Opt_nobarrier, "nobarrier"},
@@ -1447,10 +1451,13 @@ static const struct mount_opts {
MOPT_SET | MOPT_Q},
{Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
MOPT_SET | MOPT_Q},
+ {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
+ MOPT_SET | MOPT_Q},
{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
- EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
- {Opt_usrjquota, 0, MOPT_Q},
- {Opt_grpjquota, 0, MOPT_Q},
+ EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
+ MOPT_CLEAR | MOPT_Q},
+ {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
+ {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
{Opt_offusrjquota, 0, MOPT_Q},
{Opt_offgrpjquota, 0, MOPT_Q},
{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
@@ -1733,13 +1740,17 @@ static int parse_options(char *options, struct super_block *sb,
return 0;
}
#ifdef CONFIG_QUOTA
- if (ext4_has_feature_quota(sb) &&
- (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
- ext4_msg(sb, KERN_INFO, "Quota feature enabled, usrquota and grpquota "
- "mount options ignored.");
- clear_opt(sb, USRQUOTA);
- clear_opt(sb, GRPQUOTA);
- } else if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
+ /*
+ * We do the test below only for project quotas. 'usrquota' and
+ * 'grpquota' mount options are allowed even without quota feature
+ * to support legacy quotas in quota files.
+ */
+ if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) {
+ ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. "
+ "Cannot enable project quota enforcement.");
+ return 0;
+ }
+ if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
clear_opt(sb, USRQUOTA);
@@ -1977,8 +1988,8 @@ done:
int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct flex_groups *new_groups;
- int size;
+ struct flex_groups **old_groups, **new_groups;
+ int size, i, j;
if (!sbi->s_log_groups_per_flex)
return 0;
@@ -1987,22 +1998,37 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
if (size <= sbi->s_flex_groups_allocated)
return 0;
- size = roundup_pow_of_two(size * sizeof(struct flex_groups));
- new_groups = ext4_kvzalloc(size, GFP_KERNEL);
+ new_groups = ext4_kvzalloc(roundup_pow_of_two(size *
+ sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
if (!new_groups) {
- ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
- size / (int) sizeof(struct flex_groups));
+ ext4_msg(sb, KERN_ERR,
+ "not enough memory for %d flex group pointers", size);
return -ENOMEM;
}
-
- if (sbi->s_flex_groups) {
- memcpy(new_groups, sbi->s_flex_groups,
- (sbi->s_flex_groups_allocated *
- sizeof(struct flex_groups)));
- kvfree(sbi->s_flex_groups);
+ for (i = sbi->s_flex_groups_allocated; i < size; i++) {
+ new_groups[i] = ext4_kvzalloc(roundup_pow_of_two(
+ sizeof(struct flex_groups)),
+ GFP_KERNEL);
+ if (!new_groups[i]) {
+ for (j = sbi->s_flex_groups_allocated; j < i; j++)
+ kvfree(new_groups[j]);
+ kvfree(new_groups);
+ ext4_msg(sb, KERN_ERR,
+ "not enough memory for %d flex groups", size);
+ return -ENOMEM;
+ }
}
- sbi->s_flex_groups = new_groups;
- sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
+ rcu_read_lock();
+ old_groups = rcu_dereference(sbi->s_flex_groups);
+ if (old_groups)
+ memcpy(new_groups, old_groups,
+ (sbi->s_flex_groups_allocated *
+ sizeof(struct flex_groups *)));
+ rcu_read_unlock();
+ rcu_assign_pointer(sbi->s_flex_groups, new_groups);
+ sbi->s_flex_groups_allocated = size;
+ if (old_groups)
+ ext4_kvfree_array_rcu(old_groups);
return 0;
}
@@ -2010,6 +2036,7 @@ static int ext4_fill_flex_info(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_desc *gdp = NULL;
+ struct flex_groups *fg;
ext4_group_t flex_group;
int i, err;
@@ -2027,12 +2054,11 @@ static int ext4_fill_flex_info(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL);
flex_group = ext4_flex_group(sbi, i);
- atomic_add(ext4_free_inodes_count(sb, gdp),
- &sbi->s_flex_groups[flex_group].free_inodes);
+ fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
+ atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
atomic64_add(ext4_free_group_clusters(sb, gdp),
- &sbi->s_flex_groups[flex_group].free_clusters);
- atomic_add(ext4_used_dirs_count(sb, gdp),
- &sbi->s_flex_groups[flex_group].used_dirs);
+ &fg->free_clusters);
+ atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
}
return 1;
@@ -2618,6 +2644,12 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
"without CONFIG_QUOTA");
return 0;
}
+ if (ext4_has_feature_project(sb) && !readonly) {
+ ext4_msg(sb, KERN_ERR,
+ "Filesystem with project quota feature cannot be mounted RDWR "
+ "without CONFIG_QUOTA");
+ return 0;
+ }
#endif /* CONFIG_QUOTA */
return 1;
}
@@ -3168,6 +3200,40 @@ int ext4_calculate_overhead(struct super_block *sb)
return 0;
}
+static void ext4_clamp_want_extra_isize(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ unsigned def_extra_isize = sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE;
+
+ if (sbi->s_inode_size == EXT4_GOOD_OLD_INODE_SIZE) {
+ sbi->s_want_extra_isize = 0;
+ return;
+ }
+ if (sbi->s_want_extra_isize < 4) {
+ sbi->s_want_extra_isize = def_extra_isize;
+ if (ext4_has_feature_extra_isize(sb)) {
+ if (sbi->s_want_extra_isize <
+ le16_to_cpu(es->s_want_extra_isize))
+ sbi->s_want_extra_isize =
+ le16_to_cpu(es->s_want_extra_isize);
+ if (sbi->s_want_extra_isize <
+ le16_to_cpu(es->s_min_extra_isize))
+ sbi->s_want_extra_isize =
+ le16_to_cpu(es->s_min_extra_isize);
+ }
+ }
+ /* Check if enough inode space is available */
+ if ((sbi->s_want_extra_isize > sbi->s_inode_size) ||
+ (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
+ sbi->s_inode_size)) {
+ sbi->s_want_extra_isize = def_extra_isize;
+ ext4_msg(sb, KERN_INFO,
+ "required extra inode space not available");
+ }
+}
+
static void ext4_set_resv_clusters(struct super_block *sb)
{
ext4_fsblk_t resv_clusters;
@@ -3201,9 +3267,10 @@ static void ext4_set_resv_clusters(struct super_block *sb)
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
char *orig_data = kstrdup(data, GFP_KERNEL);
- struct buffer_head *bh;
+ struct buffer_head *bh, **group_desc;
struct ext4_super_block *es = NULL;
struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+ struct flex_groups **flex_groups;
ext4_fsblk_t block;
ext4_fsblk_t sb_block = get_sb_block(&data);
ext4_fsblk_t logical_sb_block;
@@ -3599,7 +3666,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
sbi->s_inodes_per_group > blocksize * 8) {
ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
- sbi->s_blocks_per_group);
+ sbi->s_inodes_per_group);
goto failed_mount;
}
sbi->s_itb_per_group = sbi->s_inodes_per_group /
@@ -3730,9 +3797,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
EXT4_BLOCKS_PER_GROUP(sb) - 1);
do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
- ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
+ ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
"(block count %llu, first data block %u, "
- "blocks per group %lu)", sbi->s_groups_count,
+ "blocks per group %lu)", blocks_count,
ext4_blocks_count(es),
le32_to_cpu(es->s_first_data_block),
EXT4_BLOCKS_PER_GROUP(sb));
@@ -3760,9 +3827,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
}
- sbi->s_group_desc = ext4_kvmalloc(db_count *
+ rcu_assign_pointer(sbi->s_group_desc,
+ ext4_kvmalloc(db_count *
sizeof(struct buffer_head *),
- GFP_KERNEL);
+ GFP_KERNEL));
if (sbi->s_group_desc == NULL) {
ext4_msg(sb, KERN_ERR, "not enough memory");
ret = -ENOMEM;
@@ -3772,14 +3840,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
bgl_lock_init(sbi->s_blockgroup_lock);
for (i = 0; i < db_count; i++) {
+ struct buffer_head *bh;
+
block = descriptor_loc(sb, logical_sb_block, i);
- sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
- if (!sbi->s_group_desc[i]) {
+ bh = sb_bread_unmovable(sb, block);
+ if (!bh) {
ext4_msg(sb, KERN_ERR,
"can't read group descriptor %d", i);
db_count = i;
goto failed_mount2;
}
+ rcu_read_lock();
+ rcu_dereference(sbi->s_group_desc)[i] = bh;
+ rcu_read_unlock();
}
sbi->s_gdb_count = db_count;
if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
@@ -3813,7 +3886,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_qcop = &dquot_quotactl_sysfile_ops;
else
sb->s_qcop = &ext4_qctl_operations;
- sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+ sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
@@ -3968,7 +4041,7 @@ no_journal:
* so we can safely mount the rest of the filesystem now.
*/
- root = ext4_iget(sb, EXT4_ROOT_INO);
+ root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(root)) {
ext4_msg(sb, KERN_ERR, "get root inode failed");
ret = PTR_ERR(root);
@@ -3990,29 +4063,7 @@ no_journal:
if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
sb->s_flags |= MS_RDONLY;
- /* determine the minimum size of new large inodes, if present */
- if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
- sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
- if (ext4_has_feature_extra_isize(sb)) {
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_want_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_want_extra_isize);
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_min_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_min_extra_isize);
- }
- }
- /* Check if enough inode space is available */
- if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
- sbi->s_inode_size) {
- sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
- ext4_msg(sb, KERN_INFO, "required extra inode space not"
- "available");
- }
+ ext4_clamp_want_extra_isize(sb);
ext4_set_resv_clusters(sb);
@@ -4131,13 +4182,20 @@ cantfind_ext4:
#ifdef CONFIG_QUOTA
failed_mount8:
ext4_unregister_sysfs(sb);
+ kobject_put(&sbi->s_kobj);
#endif
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
ext4_mb_release(sb);
- if (sbi->s_flex_groups)
- kvfree(sbi->s_flex_groups);
+ rcu_read_lock();
+ flex_groups = rcu_dereference(sbi->s_flex_groups);
+ if (flex_groups) {
+ for (i = 0; i < sbi->s_flex_groups_allocated; i++)
+ kvfree(flex_groups[i]);
+ kvfree(flex_groups);
+ }
+ rcu_read_unlock();
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -4168,9 +4226,12 @@ failed_mount3:
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
failed_mount2:
+ rcu_read_lock();
+ group_desc = rcu_dereference(sbi->s_group_desc);
for (i = 0; i < db_count; i++)
- brelse(sbi->s_group_desc[i]);
- kvfree(sbi->s_group_desc);
+ brelse(group_desc[i]);
+ kvfree(group_desc);
+ rcu_read_unlock();
failed_mount:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
@@ -4222,11 +4283,12 @@ static journal_t *ext4_get_journal(struct super_block *sb,
BUG_ON(!ext4_has_feature_journal(sb));
- /* First, test for the existence of a valid inode on disk. Bad
- * things happen if we iget() an unused inode, as the subsequent
- * iput() will try to delete it. */
-
- journal_inode = ext4_iget(sb, journal_inum);
+ /*
+ * Test for the existence of a valid inode on disk. Bad things
+ * happen if we iget() an unused inode, as the subsequent iput()
+ * will try to delete it.
+ */
+ journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
if (IS_ERR(journal_inode)) {
ext4_msg(sb, KERN_ERR, "no journal found");
return NULL;
@@ -4450,8 +4512,10 @@ static int ext4_commit_super(struct super_block *sb, int sync)
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0;
- if (!sbh || block_device_ejected(sb))
- return error;
+ if (!sbh)
+ return -EINVAL;
+ if (block_device_ejected(sb))
+ return -ENODEV;
/*
* The superblock bh should be mapped, but it might not be if the
@@ -4769,6 +4833,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
+ ext4_clamp_want_extra_isize(sb);
+
if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
test_opt(sb, JOURNAL_CHECKSUM)) {
ext4_msg(sb, KERN_ERR, "changing journal_checksum "
@@ -4922,7 +4988,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
ext4_register_li_request(sb, first_not_zeroed);
}
- ext4_setup_system_zone(sb);
+ err = ext4_setup_system_zone(sb);
+ if (err)
+ goto restore_opts;
+
if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
ext4_commit_super(sb, 1);
@@ -4966,6 +5035,48 @@ restore_opts:
return err;
}
+#ifdef CONFIG_QUOTA
+static int ext4_statfs_project(struct super_block *sb,
+ kprojid_t projid, struct kstatfs *buf)
+{
+ struct kqid qid;
+ struct dquot *dquot;
+ u64 limit;
+ u64 curblock;
+
+ qid = make_kqid_projid(projid);
+ dquot = dqget(sb, qid);
+ if (IS_ERR(dquot))
+ return PTR_ERR(dquot);
+ spin_lock(&dq_data_lock);
+
+ limit = (dquot->dq_dqb.dqb_bsoftlimit ?
+ dquot->dq_dqb.dqb_bsoftlimit :
+ dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
+ if (limit && buf->f_blocks > limit) {
+ curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+ buf->f_blocks = limit;
+ buf->f_bfree = buf->f_bavail =
+ (buf->f_blocks > curblock) ?
+ (buf->f_blocks - curblock) : 0;
+ }
+
+ limit = dquot->dq_dqb.dqb_isoftlimit ?
+ dquot->dq_dqb.dqb_isoftlimit :
+ dquot->dq_dqb.dqb_ihardlimit;
+ if (limit && buf->f_files > limit) {
+ buf->f_files = limit;
+ buf->f_ffree =
+ (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
+ (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+ }
+
+ spin_unlock(&dq_data_lock);
+ dqput(dquot);
+ return 0;
+}
+#endif
+
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
@@ -4998,6 +5109,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+#ifdef CONFIG_QUOTA
+ if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
+ sb_has_quota_limits_enabled(sb, PRJQUOTA))
+ ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
+#endif
return 0;
}
@@ -5140,6 +5256,11 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
/* Quotafile not on the same filesystem? */
if (path->dentry->d_sb != sb)
return -EXDEV;
+
+ /* Quota already enabled for this file? */
+ if (IS_NOQUOTA(d_inode(path->dentry)))
+ return -EBUSY;
+
/* Journaling quota? */
if (EXT4_SB(sb)->s_qf_names[type]) {
/* Quotafile not in fs root? */
@@ -5180,7 +5301,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
struct inode *qf_inode;
unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
- le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
};
BUG_ON(!ext4_has_feature_quota(sb));
@@ -5188,7 +5310,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
if (!qf_inums[type])
return -EPERM;
- qf_inode = ext4_iget(sb, qf_inums[type]);
+ qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
if (IS_ERR(qf_inode)) {
ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
return PTR_ERR(qf_inode);
@@ -5211,14 +5333,21 @@ static int ext4_enable_quotas(struct super_block *sb)
int type, err = 0;
unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
- le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
+ };
+ bool quota_mopt[EXT4_MAXQUOTAS] = {
+ test_opt(sb, USRQUOTA),
+ test_opt(sb, GRPQUOTA),
+ test_opt(sb, PRJQUOTA),
};
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
for (type = 0; type < EXT4_MAXQUOTAS; type++) {
if (qf_inums[type]) {
err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
- DQUOT_USAGE_ENABLED);
+ DQUOT_USAGE_ENABLED |
+ (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
if (err) {
for (type--; type >= 0; type--)
dquot_quota_off(sb, type);
@@ -5311,7 +5440,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
struct buffer_head *bh;
handle_t *handle = journal_current_handle();
- if (EXT4_SB(sb)->s_journal && !handle) {
+ if (!handle) {
ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
" cancelled because transaction is not started",
(unsigned long long)off, (unsigned long long)len);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fdea338ce8e1..157187e6e060 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -139,31 +139,26 @@ static __le32 ext4_xattr_block_csum(struct inode *inode,
}
static int ext4_xattr_block_csum_verify(struct inode *inode,
- sector_t block_nr,
- struct ext4_xattr_header *hdr)
+ struct buffer_head *bh)
{
- if (ext4_has_metadata_csum(inode->i_sb) &&
- (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
- return 0;
- return 1;
-}
-
-static void ext4_xattr_block_csum_set(struct inode *inode,
- sector_t block_nr,
- struct ext4_xattr_header *hdr)
-{
- if (!ext4_has_metadata_csum(inode->i_sb))
- return;
+ struct ext4_xattr_header *hdr = BHDR(bh);
+ int ret = 1;
- hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
+ if (ext4_has_metadata_csum(inode->i_sb)) {
+ lock_buffer(bh);
+ ret = (hdr->h_checksum == ext4_xattr_block_csum(inode,
+ bh->b_blocknr, hdr));
+ unlock_buffer(bh);
+ }
+ return ret;
}
-static inline int ext4_handle_dirty_xattr_block(handle_t *handle,
- struct inode *inode,
- struct buffer_head *bh)
+static void ext4_xattr_block_csum_set(struct inode *inode,
+ struct buffer_head *bh)
{
- ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh));
- return ext4_handle_dirty_metadata(handle, inode, bh);
+ if (ext4_has_metadata_csum(inode->i_sb))
+ BHDR(bh)->h_checksum = ext4_xattr_block_csum(inode,
+ bh->b_blocknr, BHDR(bh));
}
static inline const struct xattr_handler *
@@ -226,7 +221,7 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
if (buffer_verified(bh))
return 0;
- if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
+ if (!ext4_xattr_block_csum_verify(inode, bh))
return -EFSBADCRC;
error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
bh->b_data);
@@ -591,23 +586,23 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
EXT4_FREE_BLOCKS_FORGET);
} else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
+
+ ext4_xattr_block_csum_set(inode, bh);
/*
* Beware of this ugliness: Releasing of xattr block references
* from different inodes can race and so we have to protect
* from a race where someone else frees the block (and releases
* its journal_head) before we are done dirtying the buffer. In
* nojournal mode this race is harmless and we actually cannot
- * call ext4_handle_dirty_xattr_block() with locked buffer as
+ * call ext4_handle_dirty_metadata() with locked buffer as
* that function can call sync_dirty_buffer() so for that case
* we handle the dirtying after unlocking the buffer.
*/
if (ext4_handle_valid(handle))
- error = ext4_handle_dirty_xattr_block(handle, inode,
- bh);
+ error = ext4_handle_dirty_metadata(handle, inode, bh);
unlock_buffer(bh);
if (!ext4_handle_valid(handle))
- error = ext4_handle_dirty_xattr_block(handle, inode,
- bh);
+ error = ext4_handle_dirty_metadata(handle, inode, bh);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
@@ -841,13 +836,14 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
ext4_xattr_rehash(header(s->base),
s->here);
}
+ ext4_xattr_block_csum_set(inode, bs->bh);
unlock_buffer(bs->bh);
if (error == -EFSCORRUPTED)
goto bad_block;
if (!error)
- error = ext4_handle_dirty_xattr_block(handle,
- inode,
- bs->bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode,
+ bs->bh);
if (error)
goto cleanup;
goto inserted;
@@ -937,10 +933,11 @@ inserted:
le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
ea_bdebug(new_bh, "reusing; refcount now=%d",
le32_to_cpu(BHDR(new_bh)->h_refcount));
+ ext4_xattr_block_csum_set(inode, new_bh);
unlock_buffer(new_bh);
- error = ext4_handle_dirty_xattr_block(handle,
- inode,
- new_bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode,
+ new_bh);
if (error)
goto cleanup_dquot;
}
@@ -991,11 +988,12 @@ getblk_failed:
goto getblk_failed;
}
memcpy(new_bh->b_data, s->base, new_bh->b_size);
+ ext4_xattr_block_csum_set(inode, new_bh);
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
- error = ext4_handle_dirty_xattr_block(handle,
- inode, new_bh);
+ error = ext4_handle_dirty_metadata(handle, inode,
+ new_bh);
if (error)
goto cleanup;
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 1e27252368c8..7d7d5dae1500 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -244,6 +244,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
blkno * NAT_ENTRY_PER_BLOCK);
break;
case META_SIT:
+ if (unlikely(blkno >= TOTAL_SEGS(sbi)))
+ goto out;
/* get sit block addr */
fio.new_blkaddr = current_sit_addr(sbi,
blkno * SIT_ENTRY_PER_BLOCK);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index a714f430675c..aa8a31be2eb2 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -759,7 +759,8 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr)
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+ if (!in_group_p(inode->i_gid) &&
+ !capable_wrt_inode_uidgid(inode, CAP_FSETID))
mode &= ~S_ISGID;
set_acl_inode(inode, mode);
}
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 1ac142ad7d1b..79375a009306 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -653,6 +653,13 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
return NULL;
init_rwsem(&ei->truncate_lock);
+ /* Zeroing to allow iput() even if partial initialized inode. */
+ ei->mmu_private = 0;
+ ei->i_start = 0;
+ ei->i_logstart = 0;
+ ei->i_attrs = 0;
+ ei->i_pos = 0;
+
return &ei->vfs_inode;
}
@@ -1277,16 +1284,6 @@ out:
return 0;
}
-static void fat_dummy_inode_init(struct inode *inode)
-{
- /* Initialize this dummy inode to work as no-op. */
- MSDOS_I(inode)->mmu_private = 0;
- MSDOS_I(inode)->i_start = 0;
- MSDOS_I(inode)->i_logstart = 0;
- MSDOS_I(inode)->i_attrs = 0;
- MSDOS_I(inode)->i_pos = 0;
-}
-
static int fat_read_root(struct inode *inode)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
@@ -1426,6 +1423,12 @@ static int fat_read_bpb(struct super_block *sb, struct fat_boot_sector *b,
goto out;
}
+ if (bpb->fat_fat_length == 0 && bpb->fat32_length == 0) {
+ if (!silent)
+ fat_msg(sb, KERN_ERR, "bogus number of FAT sectors");
+ goto out;
+ }
+
error = 0;
out:
@@ -1731,13 +1734,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
fat_inode = new_inode(sb);
if (!fat_inode)
goto out_fail;
- fat_dummy_inode_init(fat_inode);
sbi->fat_inode = fat_inode;
fsinfo_inode = new_inode(sb);
if (!fsinfo_inode)
goto out_fail;
- fat_dummy_inode_init(fsinfo_inode);
fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
sbi->fsinfo_inode = fsinfo_inode;
insert_inode_hash(fsinfo_inode);
diff --git a/fs/file.c b/fs/file.c
index 7e9eb65a2912..0ed602e1fbd2 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -88,7 +88,7 @@ static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
*/
static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
{
- unsigned int cpy, set;
+ size_t cpy, set;
BUG_ON(nfdt->max_fds < ofdt->max_fds);
@@ -691,7 +691,7 @@ void do_close_on_exec(struct files_struct *files)
spin_unlock(&files->file_lock);
}
-static struct file *__fget(unsigned int fd, fmode_t mask)
+static struct file *__fget(unsigned int fd, fmode_t mask, unsigned int refs)
{
struct files_struct *files = current->files;
struct file *file;
@@ -706,23 +706,32 @@ loop:
*/
if (file->f_mode & mask)
file = NULL;
- else if (!get_file_rcu(file))
+ else if (!get_file_rcu_many(file, refs))
+ goto loop;
+ else if (__fcheck_files(files, fd) != file) {
+ fput_many(file, refs);
goto loop;
+ }
}
rcu_read_unlock();
return file;
}
+struct file *fget_many(unsigned int fd, unsigned int refs)
+{
+ return __fget(fd, FMODE_PATH, refs);
+}
+
struct file *fget(unsigned int fd)
{
- return __fget(fd, FMODE_PATH);
+ return __fget(fd, FMODE_PATH, 1);
}
EXPORT_SYMBOL(fget);
struct file *fget_raw(unsigned int fd)
{
- return __fget(fd, 0);
+ return __fget(fd, 0, 1);
}
EXPORT_SYMBOL(fget_raw);
@@ -753,7 +762,7 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
return 0;
return (unsigned long)file;
} else {
- file = __fget(fd, mask);
+ file = __fget(fd, mask, 1);
if (!file)
return 0;
return FDPUT_FPUT | (unsigned long)file;
diff --git a/fs/file_table.c b/fs/file_table.c
index b4baa0de4988..89dc9d4b3555 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -397,9 +397,9 @@ void flush_delayed_fput(void)
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
-void fput(struct file *file)
+void fput_many(struct file *file, unsigned int refs)
{
- if (atomic_long_dec_and_test(&file->f_count)) {
+ if (atomic_long_sub_and_test(refs, &file->f_count)) {
struct task_struct *task = current;
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
@@ -418,6 +418,11 @@ void fput(struct file *file)
}
}
+void fput(struct file *file)
+{
+ fput_many(file, 1);
+}
+
/*
* synchronous analog of fput(); for kernel threads that might be needed
* in some umount() (and thus can't use flush_delayed_fput() without
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 67fbe6837fdb..0ce7ff7a2ce8 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -45,7 +45,6 @@ struct wb_completion {
struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
- unsigned long *older_than_this;
enum writeback_sync_modes sync_mode;
unsigned int tagged_writepages:1;
unsigned int for_kupdate:1;
@@ -160,7 +159,9 @@ static void inode_io_list_del_locked(struct inode *inode,
struct bdi_writeback *wb)
{
assert_spin_locked(&wb->list_lock);
+ assert_spin_locked(&inode->i_lock);
+ inode->i_state &= ~I_SYNC_QUEUED;
list_del_init(&inode->i_io_list);
wb_io_lists_depopulated(wb);
}
@@ -269,6 +270,7 @@ void __inode_attach_wb(struct inode *inode, struct page *page)
if (unlikely(cmpxchg(&inode->i_wb, NULL, wb)))
wb_put(wb);
}
+EXPORT_SYMBOL_GPL(__inode_attach_wb);
/**
* locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
@@ -510,9 +512,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
/* find and pin the new wb */
rcu_read_lock();
memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys);
- if (memcg_css)
- isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
+ if (memcg_css && !css_tryget(memcg_css))
+ memcg_css = NULL;
rcu_read_unlock();
+ if (!memcg_css)
+ goto out_free;
+
+ isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
+ css_put(memcg_css);
if (!isw->new_wb)
goto out_free;
@@ -1033,7 +1040,9 @@ void inode_io_list_del(struct inode *inode)
struct bdi_writeback *wb;
wb = inode_to_wb_and_lock_list(inode);
+ spin_lock(&inode->i_lock);
inode_io_list_del_locked(inode, wb);
+ spin_unlock(&inode->i_lock);
spin_unlock(&wb->list_lock);
}
@@ -1046,8 +1055,10 @@ void inode_io_list_del(struct inode *inode)
* the case then the inode must have been redirtied while it was being written
* out and we don't reset its dirtied_when.
*/
-static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
+static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb)
{
+ assert_spin_locked(&inode->i_lock);
+
if (!list_empty(&wb->b_dirty)) {
struct inode *tail;
@@ -1056,6 +1067,14 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
inode->dirtied_when = jiffies;
}
inode_io_list_move_locked(inode, wb, &wb->b_dirty);
+ inode->i_state &= ~I_SYNC_QUEUED;
+}
+
+static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
+{
+ spin_lock(&inode->i_lock);
+ redirty_tail_locked(inode, wb);
+ spin_unlock(&inode->i_lock);
}
/*
@@ -1094,16 +1113,13 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
#define EXPIRE_DIRTY_ATIME 0x0001
/*
- * Move expired (dirtied before work->older_than_this) dirty inodes from
+ * Move expired (dirtied before dirtied_before) dirty inodes from
* @delaying_queue to @dispatch_queue.
*/
static int move_expired_inodes(struct list_head *delaying_queue,
struct list_head *dispatch_queue,
- int flags,
- struct wb_writeback_work *work)
+ int flags, unsigned long dirtied_before)
{
- unsigned long *older_than_this = NULL;
- unsigned long expire_time;
LIST_HEAD(tmp);
struct list_head *pos, *node;
struct super_block *sb = NULL;
@@ -1111,21 +1127,17 @@ static int move_expired_inodes(struct list_head *delaying_queue,
int do_sb_sort = 0;
int moved = 0;
- if ((flags & EXPIRE_DIRTY_ATIME) == 0)
- older_than_this = work->older_than_this;
- else if (!work->for_sync) {
- expire_time = jiffies - (dirtytime_expire_interval * HZ);
- older_than_this = &expire_time;
- }
while (!list_empty(delaying_queue)) {
inode = wb_inode(delaying_queue->prev);
- if (older_than_this &&
- inode_dirtied_after(inode, *older_than_this))
+ if (inode_dirtied_after(inode, dirtied_before))
break;
list_move(&inode->i_io_list, &tmp);
moved++;
+ spin_lock(&inode->i_lock);
if (flags & EXPIRE_DIRTY_ATIME)
- set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
+ inode->i_state |= I_DIRTY_TIME_EXPIRED;
+ inode->i_state |= I_SYNC_QUEUED;
+ spin_unlock(&inode->i_lock);
if (sb_is_blkdev_sb(inode->i_sb))
continue;
if (sb && sb != inode->i_sb)
@@ -1163,18 +1175,22 @@ out:
* |
* +--> dequeue for IO
*/
-static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
+static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work,
+ unsigned long dirtied_before)
{
int moved;
+ unsigned long time_expire_jif = dirtied_before;
assert_spin_locked(&wb->list_lock);
list_splice_init(&wb->b_more_io, &wb->b_io);
- moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
+ moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, dirtied_before);
+ if (!work->for_sync)
+ time_expire_jif = jiffies - dirtytime_expire_interval * HZ;
moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
- EXPIRE_DIRTY_ATIME, work);
+ EXPIRE_DIRTY_ATIME, time_expire_jif);
if (moved)
wb_io_lists_populated(wb);
- trace_writeback_queue_io(wb, work, moved);
+ trace_writeback_queue_io(wb, work, dirtied_before, moved);
}
static int write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -1268,7 +1284,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* writeback is not making progress due to locked
* buffers. Skip this inode for now.
*/
- redirty_tail(inode, wb);
+ redirty_tail_locked(inode, wb);
return;
}
@@ -1288,7 +1304,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* retrying writeback of the dirty page/inode
* that cannot be performed immediately.
*/
- redirty_tail(inode, wb);
+ redirty_tail_locked(inode, wb);
}
} else if (inode->i_state & I_DIRTY) {
/*
@@ -1296,10 +1312,11 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* such as delayed allocation during submission or metadata
* updates after data IO completion.
*/
- redirty_tail(inode, wb);
+ redirty_tail_locked(inode, wb);
} else if (inode->i_state & I_DIRTY_TIME) {
inode->dirtied_when = jiffies;
inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
+ inode->i_state &= ~I_SYNC_QUEUED;
} else {
/* The inode is clean. Remove from writeback lists. */
inode_io_list_del_locked(inode, wb);
@@ -1542,8 +1559,8 @@ static long writeback_sb_inodes(struct super_block *sb,
*/
spin_lock(&inode->i_lock);
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
+ redirty_tail_locked(inode, wb);
spin_unlock(&inode->i_lock);
- redirty_tail(inode, wb);
continue;
}
if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
@@ -1684,7 +1701,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
blk_start_plug(&plug);
spin_lock(&wb->list_lock);
if (list_empty(&wb->b_io))
- queue_io(wb, &work);
+ queue_io(wb, &work, jiffies);
__writeback_inodes_wb(wb, &work);
spin_unlock(&wb->list_lock);
blk_finish_plug(&plug);
@@ -1704,7 +1721,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
* takes longer than a dirty_writeback_interval interval, then leave a
* one-second gap.
*
- * older_than_this takes precedence over nr_to_write. So we'll only write back
+ * dirtied_before takes precedence over nr_to_write. So we'll only write back
* all dirty pages if they are all attached to "old" mappings.
*/
static long wb_writeback(struct bdi_writeback *wb,
@@ -1712,14 +1729,11 @@ static long wb_writeback(struct bdi_writeback *wb,
{
unsigned long wb_start = jiffies;
long nr_pages = work->nr_pages;
- unsigned long oldest_jif;
+ unsigned long dirtied_before = jiffies;
struct inode *inode;
long progress;
struct blk_plug plug;
- oldest_jif = jiffies;
- work->older_than_this = &oldest_jif;
-
blk_start_plug(&plug);
spin_lock(&wb->list_lock);
for (;;) {
@@ -1753,14 +1767,14 @@ static long wb_writeback(struct bdi_writeback *wb,
* safe.
*/
if (work->for_kupdate) {
- oldest_jif = jiffies -
+ dirtied_before = jiffies -
msecs_to_jiffies(dirty_expire_interval * 10);
} else if (work->for_background)
- oldest_jif = jiffies;
+ dirtied_before = jiffies;
trace_writeback_start(wb, work);
if (list_empty(&wb->b_io))
- queue_io(wb, work);
+ queue_io(wb, work, dirtied_before);
if (work->sb)
progress = writeback_sb_inodes(work->sb, wb, work);
else
@@ -1920,7 +1934,7 @@ void wb_workfn(struct work_struct *work)
struct bdi_writeback, dwork);
long pages_written;
- set_worker_desc("flush-%s", dev_name(wb->bdi->dev));
+ set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
current->flags |= PF_SWAPWRITE;
if (likely(!current_is_workqueue_rescuer() ||
@@ -2031,28 +2045,6 @@ int dirtytime_interval_handler(struct ctl_table *table, int write,
return ret;
}
-static noinline void block_dump___mark_inode_dirty(struct inode *inode)
-{
- if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
- struct dentry *dentry;
- const char *name = "?";
-
- dentry = d_find_alias(inode);
- if (dentry) {
- spin_lock(&dentry->d_lock);
- name = (const char *) dentry->d_name.name;
- }
- printk(KERN_DEBUG
- "%s(%d): dirtied inode %lu (%s) on %s\n",
- current->comm, task_pid_nr(current), inode->i_ino,
- name, inode->i_sb->s_id);
- if (dentry) {
- spin_unlock(&dentry->d_lock);
- dput(dentry);
- }
- }
-}
-
/**
* __mark_inode_dirty - internal function
* @inode: inode to mark
@@ -2111,9 +2103,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
(dirtytime && (inode->i_state & I_DIRTY_INODE)))
return;
- if (unlikely(block_dump > 1))
- block_dump___mark_inode_dirty(inode);
-
spin_lock(&inode->i_lock);
if (dirtytime && (inode->i_state & I_DIRTY_INODE))
goto out_unlock_inode;
@@ -2127,11 +2116,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
inode->i_state |= flags;
/*
- * If the inode is being synced, just update its dirty state.
- * The unlocker will place the inode on the appropriate
- * superblock list, based upon its state.
+ * If the inode is queued for writeback by flush worker, just
+ * update its dirty state. Once the flush worker is done with
+ * the inode it will place it on the appropriate superblock
+ * list, based upon its state.
*/
- if (inode->i_state & I_SYNC)
+ if (inode->i_state & I_SYNC_QUEUED)
goto out_unlock_inode;
/*
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index d9aba9700726..b83367300f48 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -616,6 +616,8 @@ static int __init cuse_init(void)
cuse_channel_fops.owner = THIS_MODULE;
cuse_channel_fops.open = cuse_channel_open;
cuse_channel_fops.release = cuse_channel_release;
+ /* CUSE is not prepared for FUSE_DEV_IOC_CLONE */
+ cuse_channel_fops.unlocked_ioctl = NULL;
cuse_class = class_create(THIS_MODULE, "cuse");
if (IS_ERR(cuse_class))
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 817e46537e3c..a5c3bc632a21 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -148,9 +148,13 @@ static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
static void fuse_drop_waiting(struct fuse_conn *fc)
{
- if (fc->connected) {
- atomic_dec(&fc->num_waiting);
- } else if (atomic_dec_and_test(&fc->num_waiting)) {
+ /*
+ * lockess check of fc->connected is okay, because atomic_dec_and_test()
+ * provides a memory barrier mached with the one in fuse_wait_aborted()
+ * to ensure no wake-up is missed.
+ */
+ if (atomic_dec_and_test(&fc->num_waiting) &&
+ !READ_ONCE(fc->connected)) {
/* wake up aborters */
wake_up_all(&fc->blocked_waitq);
}
@@ -856,7 +860,6 @@ static int fuse_check_page(struct page *page)
{
if (page_mapcount(page) ||
page->mapping != NULL ||
- page_count(page) != 1 ||
(page->flags & PAGE_FLAGS_CHECK_AT_PREP &
~(1 << PG_locked |
1 << PG_referenced |
@@ -934,6 +937,13 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (!(buf->flags & PIPE_BUF_FLAG_LRU))
lru_cache_add_file(newpage);
+ /*
+ * Release while we have extra ref on stolen page. Otherwise
+ * anon_pipe_buf_release() might think the page can be reused.
+ */
+ buf->ops->release(cs->pipe, buf);
+ buf->ops = NULL;
+
err = 0;
spin_lock(&cs->req->waitq.lock);
if (test_bit(FR_ABORTED, &cs->req->flags))
@@ -1322,6 +1332,15 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
goto restart;
}
spin_lock(&fpq->lock);
+ /*
+ * Must not put request on fpq->io queue after having been shut down by
+ * fuse_abort_conn()
+ */
+ if (!fpq->connected) {
+ req->out.h.error = err = -ECONNABORTED;
+ goto out_end;
+
+ }
list_add(&req->list, &fpq->io);
spin_unlock(&fpq->lock);
cs->req = req;
@@ -1422,7 +1441,6 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
goto out;
ret = 0;
- pipe_lock(pipe);
if (!pipe->readers) {
send_sig(SIGPIPE, current, 0);
@@ -1458,7 +1476,6 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
}
out_unlock:
- pipe_unlock(pipe);
if (do_wakeup) {
smp_mb();
@@ -1929,7 +1946,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
}
err = -EINVAL;
- if (oh.error <= -1000 || oh.error > 0)
+ if (oh.error <= -512 || oh.error > 0)
goto err_finish;
spin_lock(&fpq->lock);
@@ -2048,10 +2065,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
ret = -EINVAL;
- if (rem < len) {
- pipe_unlock(pipe);
- goto out;
- }
+ if (rem < len)
+ goto out_free;
rem = len;
while (rem) {
@@ -2069,7 +2084,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
} else {
- ibuf->ops->get(pipe, ibuf);
+ if (!pipe_buf_get(pipe, ibuf))
+ goto out_free;
+
*obuf = *ibuf;
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
obuf->len = rem;
@@ -2092,13 +2109,14 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
ret = fuse_dev_do_write(fud, &cs, len);
pipe_lock(pipe);
+out_free:
for (idx = 0; idx < nbuf; idx++) {
struct pipe_buffer *buf = &bufs[idx];
- buf->ops->release(pipe, buf);
+ if (buf->ops)
+ buf->ops->release(pipe, buf);
}
pipe_unlock(pipe);
-out:
kfree(bufs);
return ret;
}
@@ -2239,6 +2257,8 @@ EXPORT_SYMBOL_GPL(fuse_abort_conn);
void fuse_wait_aborted(struct fuse_conn *fc)
{
+ /* matches implicit memory barrier in fuse_drop_waiting() */
+ smp_mb();
wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 4a9f20a861cf..7a5ff8d5afbd 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -240,7 +240,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
kfree(forget);
if (ret == -ENOMEM)
goto out;
- if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+ if (ret || fuse_invalid_attr(&outarg.attr) ||
+ (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
goto invalid;
fuse_change_attributes(inode, &outarg.attr,
@@ -327,6 +328,12 @@ int fuse_valid_type(int m)
S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
}
+bool fuse_invalid_attr(struct fuse_attr *attr)
+{
+ return !fuse_valid_type(attr->mode) ||
+ attr->size > LLONG_MAX;
+}
+
int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
struct fuse_entry_out *outarg, struct inode **inode)
{
@@ -358,7 +365,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
err = -EIO;
if (!outarg->nodeid)
goto out_put_forget;
- if (!fuse_valid_type(outarg->attr.mode))
+ if (fuse_invalid_attr(&outarg->attr))
goto out_put_forget;
*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
@@ -479,7 +486,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
goto out_free_ff;
err = -EIO;
- if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
+ if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
+ fuse_invalid_attr(&outentry.attr))
goto out_free_ff;
ff->fh = outopen.fh;
@@ -587,7 +595,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
goto out_put_forget_req;
err = -EIO;
- if (invalid_nodeid(outarg.nodeid))
+ if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
goto out_put_forget_req;
if ((outarg.attr.mode ^ mode) & S_IFMT)
@@ -860,7 +868,8 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
spin_lock(&fc->lock);
fi->attr_version = ++fc->attr_version;
- inc_nlink(inode);
+ if (likely(inode->i_nlink < UINT_MAX))
+ inc_nlink(inode);
spin_unlock(&fc->lock);
fuse_invalidate_attr(inode);
fuse_update_ctime(inode);
@@ -940,7 +949,8 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
args.out.args[0].value = &outarg;
err = fuse_simple_request(fc, &args);
if (!err) {
- if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+ if (fuse_invalid_attr(&outarg.attr) ||
+ (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
make_bad_inode(inode);
err = -EIO;
} else {
@@ -1245,7 +1255,7 @@ static int fuse_direntplus_link(struct file *file,
if (invalid_nodeid(o->nodeid))
return -EIO;
- if (!fuse_valid_type(o->attr.mode))
+ if (fuse_invalid_attr(&o->attr))
return -EIO;
fc = get_fuse_conn(dir);
@@ -1717,7 +1727,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
goto error;
}
- if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+ if (fuse_invalid_attr(&outarg.attr) ||
+ (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
make_bad_inode(inode);
err = -EIO;
goto error;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 52f1983868a0..1907299d4296 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -18,6 +18,7 @@
#include <linux/swap.h>
#include <linux/falloc.h>
#include <linux/uio.h>
+#include <linux/fs.h>
static const struct file_operations fuse_direct_io_file_operations;
@@ -874,9 +875,9 @@ struct fuse_fill_data {
unsigned nr_pages;
};
-static int fuse_readpages_fill(void *_data, struct page *page)
+static int fuse_readpages_fill(struct file *_data, struct page *page)
{
- struct fuse_fill_data *data = _data;
+ struct fuse_fill_data *data = (struct fuse_fill_data *)_data;
struct fuse_req *req = data->req;
struct inode *inode = data->inode;
struct fuse_conn *fc = get_fuse_conn(inode);
@@ -2592,7 +2593,16 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
struct iovec *iov = iov_page;
iov->iov_base = (void __user *)arg;
- iov->iov_len = _IOC_SIZE(cmd);
+
+ switch (cmd) {
+ case FS_IOC_GETFLAGS:
+ case FS_IOC_SETFLAGS:
+ iov->iov_len = sizeof(int);
+ break;
+ default:
+ iov->iov_len = _IOC_SIZE(cmd);
+ break;
+ }
if (_IOC_DIR(cmd) & _IOC_WRITE) {
in_iov = iov;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5a8cef204cda..411dcdec3db9 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -266,7 +266,7 @@ struct fuse_io_priv {
#define FUSE_IO_PRIV_SYNC(f) \
{ \
- .refcnt = { ATOMIC_INIT(1) }, \
+ .refcnt = KREF_INIT(1), \
.async = 0, \
.file = f, \
}
@@ -901,6 +901,8 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc);
*/
int fuse_valid_type(int m);
+bool fuse_invalid_attr(struct fuse_attr *attr);
+
/**
* Is current process allowed to perform filesystem operation?
*/
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ca9c492a1885..adecbb7a13fe 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -29,7 +29,7 @@ static struct kmem_cache *fuse_inode_cachep;
struct list_head fuse_conn_list;
DEFINE_MUTEX(fuse_mutex);
-static int set_global_limit(const char *val, struct kernel_param *kp);
+static int set_global_limit(const char *val, const struct kernel_param *kp);
unsigned max_user_bgreq;
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
@@ -811,7 +811,7 @@ static void sanitize_global_limit(unsigned *limit)
*limit = (1 << 16) - 1;
}
-static int set_global_limit(const char *val, struct kernel_param *kp)
+static int set_global_limit(const char *val, const struct kernel_param *kp)
{
int rv;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 582ef53f2104..6e7b6cb3f6cd 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -476,7 +476,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
*
*/
-static int __gfs2_readpage(void *file, struct page *page)
+static int __gfs2_readpage(struct file *file, struct page *page)
{
struct gfs2_inode *ip = GFS2_I(page->mapping->host);
struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 09476bb8f6cd..f0302e09eff9 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1235,6 +1235,8 @@ static int do_grow(struct inode *inode, u64 size)
}
error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
+ (unstuff &&
+ gfs2_is_jdata(ip) ? RES_JDATA : 0) +
(sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
0 : RES_QUOTA), 0);
if (error)
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 8744bd773823..dec23fb358ec 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1035,7 +1035,10 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
if (fl_gh->gh_state == state)
goto out;
locks_lock_file_wait(file,
- &(struct file_lock){.fl_type = F_UNLCK});
+ &(struct file_lock) {
+ .fl_type = F_UNLCK,
+ .fl_flags = FL_FLOCK
+ });
gfs2_glock_dq(fl_gh);
gfs2_holder_reinit(state, flags, fl_gh);
} else {
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1eb737c466dd..f115ce93dfb4 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -751,7 +751,8 @@ again:
}
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(cachep, gl);
- atomic_dec(&sdp->sd_glock_disposal);
+ if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+ wake_up(&sdp->sd_glock_wait);
*glp = tmp;
return ret;
@@ -1341,6 +1342,7 @@ __acquires(&lru_lock)
while(!list_empty(list)) {
gl = list_entry(list->next, struct gfs2_glock, gl_lru);
list_del_init(&gl->gl_lru);
+ clear_bit(GLF_LRU, &gl->gl_flags);
if (!spin_trylock(&gl->gl_lockref.lock)) {
add_back_to_lru:
list_add(&gl->gl_lru, &lru_list);
@@ -1387,7 +1389,6 @@ static long gfs2_scan_glock_lru(int nr)
if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
list_move(&gl->gl_lru, &dispose);
atomic_dec(&lru_count);
- clear_bit(GLF_LRU, &gl->gl_flags);
freed++;
continue;
}
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index f348cfb6b69a..437fd73e381e 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -13,6 +13,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/bio.h>
#include <linux/posix_acl.h>
+#include <linux/security.h>
#include "gfs2.h"
#include "incore.h"
@@ -262,6 +263,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
if (ip) {
set_bit(GIF_INVALID, &ip->i_flags);
forget_all_cached_acls(&ip->i_inode);
+ security_inode_invalidate_secctx(&ip->i_inode);
gfs2_dir_hash_inval(ip);
}
}
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 063fdfcf8275..32226dd19932 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1245,7 +1245,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
if (!(*opened & FILE_OPENED))
return finish_no_open(file, d);
dput(d);
- return 0;
+ return excl && (flags & O_CREAT) ? -EEXIST : 0;
}
BUG_ON(d != NULL);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 3c3d037df824..da9f97911852 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -284,7 +284,6 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
- int lvb_needs_unlock = 0;
int error;
if (gl->gl_lksb.sb_lkid == 0) {
@@ -297,13 +296,15 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_update_request_times(gl);
- /* don't want to skip dlm_unlock writing the lvb when lock is ex */
-
- if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
- lvb_needs_unlock = 1;
+ /* don't want to call dlm if we've unmounted the lock protocol */
+ if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) {
+ gfs2_glock_free(gl);
+ return;
+ }
+ /* don't want to skip dlm_unlock writing the lvb when lock has one */
if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
- !lvb_needs_unlock) {
+ !gl->gl_lksb.sb_lvbptr) {
gfs2_glock_free(gl);
return;
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 9e3f3224e54a..d66790fd702f 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -160,15 +160,19 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
return -EINVAL;
}
- /* If format numbers match exactly, we're done. */
-
- if (sb->sb_fs_format == GFS2_FORMAT_FS &&
- sb->sb_multihost_format == GFS2_FORMAT_MULTI)
- return 0;
+ if (sb->sb_fs_format != GFS2_FORMAT_FS ||
+ sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
+ fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
+ return -EINVAL;
+ }
- fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
+ if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE ||
+ (sb->sb_bsize & (sb->sb_bsize - 1))) {
+ pr_warn("Invalid superblock size\n");
+ return -EINVAL;
+ }
- return -EINVAL;
+ return 0;
}
static void end_bio_io_page(struct bio *bio)
@@ -916,7 +920,7 @@ fail:
}
static const match_table_t nolock_tokens = {
- { Opt_jid, "jid=%d\n", },
+ { Opt_jid, "jid=%d", },
{ Opt_err, NULL },
};
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 2736e9cfc2ee..dd0d8c1bf5c5 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -747,9 +747,9 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
}
gfs2_free_clones(rgd);
+ return_all_reservations(rgd);
kfree(rgd->rd_bits);
rgd->rd_bits = NULL;
- return_all_reservations(rgd);
kmem_cache_free(gfs2_rgrpd_cachep, rgd);
}
}
@@ -1017,6 +1017,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
if (error < 0)
return error;
+ if (RB_EMPTY_ROOT(&sdp->sd_rindex_tree)) {
+ fs_err(sdp, "no resource groups found in the file system.\n");
+ return -ENOENT;
+ }
set_rgrp_preferences(sdp);
sdp->sd_rindex_uptodate = 1;
@@ -1388,6 +1392,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+ return -EROFS;
+
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c
index de69d8a24f6d..7f2ef95dcd05 100644
--- a/fs/hfs/bfind.c
+++ b/fs/hfs/bfind.c
@@ -24,7 +24,19 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd)
fd->key = ptr + tree->max_key_len + 2;
hfs_dbg(BNODE_REFS, "find_init: %d (%p)\n",
tree->cnid, __builtin_return_address(0));
- mutex_lock(&tree->tree_lock);
+ switch (tree->cnid) {
+ case HFS_CAT_CNID:
+ mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX);
+ break;
+ case HFS_EXT_CNID:
+ mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX);
+ break;
+ case HFS_ATTR_CNID:
+ mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX);
+ break;
+ default:
+ return -EINVAL;
+ }
return 0;
}
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 221719eac5de..2cda99e61cae 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -14,16 +14,31 @@
#include "btree.h"
-void hfs_bnode_read(struct hfs_bnode *node, void *buf,
- int off, int len)
+void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
{
struct page *page;
+ int pagenum;
+ int bytes_read;
+ int bytes_to_read;
+ void *vaddr;
off += node->page_offset;
- page = node->page[0];
+ pagenum = off >> PAGE_SHIFT;
+ off &= ~PAGE_MASK; /* compute page offset for the first page */
- memcpy(buf, kmap(page) + off, len);
- kunmap(page);
+ for (bytes_read = 0; bytes_read < len; bytes_read += bytes_to_read) {
+ if (pagenum >= node->tree->pages_per_bnode)
+ break;
+ page = node->page[pagenum];
+ bytes_to_read = min_t(int, len - bytes_read, PAGE_SIZE - off);
+
+ vaddr = kmap_atomic(page);
+ memcpy(buf + bytes_read, vaddr + off, bytes_to_read);
+ kunmap_atomic(vaddr);
+
+ pagenum++;
+ off = 0; /* page offset only applies to the first page */
+ }
}
u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off)
diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h
index 2715f416b5a8..308b5f1af65b 100644
--- a/fs/hfs/btree.h
+++ b/fs/hfs/btree.h
@@ -12,6 +12,13 @@ typedef int (*btree_keycmp)(const btree_key *, const btree_key *);
#define NODE_HASH_SIZE 256
+/* B-tree mutex nested subclasses */
+enum hfs_btree_mutex_classes {
+ CATALOG_BTREE_MUTEX,
+ EXTENTS_BTREE_MUTEX,
+ ATTR_BTREE_MUTEX,
+};
+
/* A HFS BTree held in memory */
struct hfs_btree {
struct super_block *sb;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4574fdd3d421..3eb815bb2c78 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -426,14 +426,12 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
if (!res) {
if (fd.entrylength > sizeof(rec) || fd.entrylength < 0) {
res = -EIO;
- goto bail;
+ goto bail_hfs_find;
}
hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, fd.entrylength);
}
- if (res) {
- hfs_find_exit(&fd);
- goto bail_no_root;
- }
+ if (res)
+ goto bail_hfs_find;
res = -EINVAL;
root_inode = hfs_iget(sb, &fd.search_key->cat, &rec);
hfs_find_exit(&fd);
@@ -449,6 +447,8 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
/* everything's okay */
return 0;
+bail_hfs_find:
+ hfs_find_exit(&fd);
bail_no_root:
pr_err("get root inode failed\n");
bail:
diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c
index d7455ea70287..0c4548d8cd0b 100644
--- a/fs/hfsplus/attributes.c
+++ b/fs/hfsplus/attributes.c
@@ -291,6 +291,10 @@ static int __hfsplus_delete_attr(struct inode *inode, u32 cnid,
return -ENOENT;
}
+ /* Avoid btree corruption */
+ hfs_bnode_read(fd->bnode, fd->search_key,
+ fd->keyoffset, fd->keylength);
+
err = hfs_brec_remove(fd);
if (err)
return err;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 937c6ee1786f..00ab6084dcc6 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -414,7 +414,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
if (next >= end)
break;
- hash = hugetlb_fault_mutex_hash(h, mapping, next, 0);
+ hash = hugetlb_fault_mutex_hash(h, mapping, next);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
lock_page(page);
@@ -630,7 +630,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
addr = index * hpage_size;
/* mutex taken here, fault path and hole punch */
- hash = hugetlb_fault_mutex_hash(h, mapping, index, addr);
+ hash = hugetlb_fault_mutex_hash(h, mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */
@@ -661,8 +661,9 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ set_page_huge_active(page);
/*
- * page_put due to reference from alloc_huge_page()
+ * put_page() due to reference from alloc_huge_page()
* unlock_page because locked by add_to_page_cache()
*/
put_page(page);
@@ -1021,7 +1022,7 @@ static const struct inode_operations hugetlbfs_dir_inode_operations = {
.mkdir = hugetlbfs_mkdir,
.rmdir = simple_rmdir,
.mknod = hugetlbfs_mknod,
- .rename = simple_rename,
+ .rename2 = simple_rename,
.setattr = hugetlbfs_setattr,
};
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index b943cbd963bb..2e7d74c7beed 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -151,6 +151,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *file,
printk(KERN_NOTICE "iso9660: Corrupted directory entry"
" in block %lu of inode %lu\n", block,
inode->i_ino);
+ brelse(bh);
return -EIO;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 350f67fb5b9c..41ece64f1a34 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1268,6 +1268,8 @@ static int isofs_read_inode(struct inode *inode, int relocated)
de = (struct iso_directory_record *) (bh->b_data + offset);
de_len = *(unsigned char *) de;
+ if (de_len < sizeof(struct iso_directory_record))
+ goto fail;
if (offset + de_len > bufsize) {
int frag1 = bufsize - offset;
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 7b543e6b6526..696f255d1532 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -101,6 +101,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
printk(KERN_NOTICE "iso9660: Corrupted directory entry"
" in block %lu of inode %lu\n", block,
dir->i_ino);
+ brelse(bh);
return 0;
}
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 4d5a5a4cc017..addb0784dd1c 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -168,7 +168,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
"journal space in %s\n", __func__,
journal->j_devname);
WARN_ON(1);
- jbd2_journal_abort(journal, 0);
+ jbd2_journal_abort(journal, -EIO);
}
write_lock(&journal->j_state_lock);
} else {
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 2d964ce45606..57490f08ad7c 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -740,7 +740,6 @@ start_journal_io:
submit_bh(WRITE_SYNC, bh);
}
cond_resched();
- stats.run.rs_blocks_logged += bufs;
/* Force a new descriptor to be generated next
time round the loop. */
@@ -798,7 +797,7 @@ start_journal_io:
err = journal_submit_commit_record(journal, commit_transaction,
&cbh, crc32_sum);
if (err)
- __jbd2_journal_abort_hard(journal);
+ jbd2_journal_abort(journal, err);
}
blk_finish_plug(&plug);
@@ -827,6 +826,7 @@ start_journal_io:
if (unlikely(!buffer_uptodate(bh)))
err = -EIO;
jbd2_unfile_log_bh(bh);
+ stats.run.rs_blocks_logged++;
/*
* The list contains temporary buffer heads created by
@@ -872,6 +872,7 @@ start_journal_io:
BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
clear_buffer_jwrite(bh);
jbd2_unfile_log_bh(bh);
+ stats.run.rs_blocks_logged++;
__brelse(bh); /* One for getblk */
/* AKPM: bforget here */
}
@@ -889,10 +890,11 @@ start_journal_io:
err = journal_submit_commit_record(journal, commit_transaction,
&cbh, crc32_sum);
if (err)
- __jbd2_journal_abort_hard(journal);
+ jbd2_journal_abort(journal, err);
}
if (cbh)
err = journal_wait_on_commit_record(journal, cbh);
+ stats.run.rs_blocks_logged++;
if (jbd2_has_feature_async_commit(journal) &&
journal->j_flags & JBD2_BARRIER) {
blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL);
@@ -985,29 +987,34 @@ restart_loop:
* it. */
/*
- * A buffer which has been freed while still being journaled by
- * a previous transaction.
- */
- if (buffer_freed(bh)) {
+ * A buffer which has been freed while still being journaled
+ * by a previous transaction, refile the buffer to BJ_Forget of
+ * the running transaction. If the just committed transaction
+ * contains "add to orphan" operation, we can completely
+ * invalidate the buffer now. We are rather through in that
+ * since the buffer may be still accessible when blocksize <
+ * pagesize and it is attached to the last partial page.
+ */
+ if (buffer_freed(bh) && !jh->b_next_transaction) {
+ struct address_space *mapping;
+
+ clear_buffer_freed(bh);
+ clear_buffer_jbddirty(bh);
+
/*
- * If the running transaction is the one containing
- * "add to orphan" operation (b_next_transaction !=
- * NULL), we have to wait for that transaction to
- * commit before we can really get rid of the buffer.
- * So just clear b_modified to not confuse transaction
- * credit accounting and refile the buffer to
- * BJ_Forget of the running transaction. If the just
- * committed transaction contains "add to orphan"
- * operation, we can completely invalidate the buffer
- * now. We are rather through in that since the
- * buffer may be still accessible when blocksize <
- * pagesize and it is attached to the last partial
- * page.
+ * Block device buffers need to stay mapped all the
+ * time, so it is enough to clear buffer_jbddirty and
+ * buffer_freed bits. For the file mapping buffers (i.e.
+ * journalled data) we need to unmap buffer and clear
+ * more bits. We also need to be careful about the check
+ * because the data page mapping can get cleared under
+ * our hands. Note that if mapping == NULL, we don't
+ * need to make buffer unmapped because the page is
+ * already detached from the mapping and buffers cannot
+ * get reused.
*/
- jh->b_modified = 0;
- if (!jh->b_next_transaction) {
- clear_buffer_freed(bh);
- clear_buffer_jbddirty(bh);
+ mapping = READ_ONCE(bh->b_page->mapping);
+ if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
clear_buffer_mapped(bh);
clear_buffer_new(bh);
clear_buffer_req(bh);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9398d1b70545..d62435897d0d 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1656,6 +1656,11 @@ int jbd2_journal_load(journal_t *journal)
journal->j_devname);
return -EFSCORRUPTED;
}
+ /*
+ * clear JBD2_ABORT flag initialized in journal_init_common
+ * here to update log tail information with the newest seq.
+ */
+ journal->j_flags &= ~JBD2_ABORT;
/* OK, we've finished with the dynamic journal bits:
* reinitialise the dynamic contents of the superblock in memory
@@ -1663,7 +1668,6 @@ int jbd2_journal_load(journal_t *journal)
if (journal_reset(journal))
goto recovery_error;
- journal->j_flags &= ~JBD2_ABORT;
journal->j_flags |= JBD2_LOADED;
return 0;
@@ -2082,12 +2086,10 @@ static void __journal_abort_soft (journal_t *journal, int errno)
__jbd2_journal_abort_hard(journal);
- if (errno) {
- jbd2_journal_update_sb_errno(journal);
- write_lock(&journal->j_state_lock);
- journal->j_flags |= JBD2_REC_ERR;
- write_unlock(&journal->j_state_lock);
- }
+ jbd2_journal_update_sb_errno(journal);
+ write_lock(&journal->j_state_lock);
+ journal->j_flags |= JBD2_REC_ERR;
+ write_unlock(&journal->j_state_lock);
}
/**
@@ -2129,11 +2131,6 @@ static void __journal_abort_soft (journal_t *journal, int errno)
* failure to disk. ext3_error, for example, now uses this
* functionality.
*
- * Errors which originate from within the journaling layer will NOT
- * supply an errno; a null errno implies that absolutely no further
- * writes are done to the journal (unless there are any already in
- * progress).
- *
*/
void jbd2_journal_abort(journal_t *journal, int errno)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index c34433432d47..ce2bf9d74224 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1041,8 +1041,8 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh,
/* For undo access buffer must have data copied */
if (undo && !jh->b_committed_data)
goto out;
- if (jh->b_transaction != handle->h_transaction &&
- jh->b_next_transaction != handle->h_transaction)
+ if (READ_ONCE(jh->b_transaction) != handle->h_transaction &&
+ READ_ONCE(jh->b_next_transaction) != handle->h_transaction)
goto out;
/*
* There are two reasons for the barrier here:
@@ -1906,6 +1906,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
*/
static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
{
+ J_ASSERT_JH(jh, jh->b_transaction != NULL);
+ J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+
__jbd2_journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL;
jbd2_journal_put_journal_head(jh);
@@ -1997,6 +2000,7 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
{
struct buffer_head *head;
struct buffer_head *bh;
+ bool has_write_io_error = false;
int ret = 0;
J_ASSERT(PageLocked(page));
@@ -2021,11 +2025,26 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
jbd_unlock_bh_state(bh);
if (buffer_jbd(bh))
goto busy;
+
+ /*
+ * If we free a metadata buffer which has been failed to
+ * write out, the jbd2 checkpoint procedure will not detect
+ * this failure and may lead to filesystem inconsistency
+ * after cleanup journal tail.
+ */
+ if (buffer_write_io_error(bh)) {
+ pr_err("JBD2: Error while async write back metadata bh %llu.",
+ (unsigned long long)bh->b_blocknr);
+ has_write_io_error = true;
+ }
} while ((bh = bh->b_this_page) != head);
ret = try_to_free_buffers(page);
busy:
+ if (has_write_io_error)
+ jbd2_journal_abort(journal, -EIO);
+
return ret;
}
@@ -2223,14 +2242,16 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
return -EBUSY;
}
/*
- * OK, buffer won't be reachable after truncate. We just set
- * j_next_transaction to the running transaction (if there is
- * one) and mark buffer as freed so that commit code knows it
- * should clear dirty bits when it is done with the buffer.
+ * OK, buffer won't be reachable after truncate. We just clear
+ * b_modified to not confuse transaction credit accounting, and
+ * set j_next_transaction to the running transaction (if there
+ * is one) and mark buffer as freed so that commit code knows
+ * it should clear dirty bits when it is done with the buffer.
*/
set_buffer_freed(bh);
if (journal->j_running_transaction && buffer_jbddirty(bh))
jh->b_next_transaction = journal->j_running_transaction;
+ jh->b_modified = 0;
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
@@ -2451,13 +2472,20 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
was_dirty = test_clear_buffer_jbddirty(bh);
__jbd2_journal_temp_unlink_buffer(jh);
+
+ /*
+ * b_transaction must be set, otherwise the new b_transaction won't
+ * be holding jh reference
+ */
+ J_ASSERT_JH(jh, jh->b_transaction != NULL);
+
/*
* We set b_transaction here because b_next_transaction will inherit
* our jh reference and thus __jbd2_journal_file_buffer() must not
* take a new one.
*/
- jh->b_transaction = jh->b_next_transaction;
- jh->b_next_transaction = NULL;
+ WRITE_ONCE(jh->b_transaction, jh->b_next_transaction);
+ WRITE_ONCE(jh->b_next_transaction, NULL);
if (buffer_freed(bh))
jlist = BJ_Forget;
else if (jh->b_modified)
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 406d9cc84ba8..79e771ab624f 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -37,6 +37,9 @@ static int jffs2_rtime_compress(unsigned char *data_in,
int outpos = 0;
int pos=0;
+ if (*dstlen <= 3)
+ return -1;
+
memset(positions,0,sizeof(positions));
while (pos < (*sourcelen) && outpos <= (*dstlen)-2) {
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 30c4c9ebb693..6f467642d530 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -591,10 +591,14 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
int ret;
uint32_t now = get_seconds();
+ mutex_lock(&f->sem);
for (fd = f->dents ; fd; fd = fd->next) {
- if (fd->ino)
+ if (fd->ino) {
+ mutex_unlock(&f->sem);
return -ENOTEMPTY;
+ }
}
+ mutex_unlock(&f->sem);
ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
dentry->d_name.len, f, now);
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 5b52ea41b84f..bee8964682f8 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -672,6 +672,22 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
jffs2_free_full_dirent(fd);
return -EIO;
}
+
+#ifdef CONFIG_JFFS2_SUMMARY
+ /*
+ * we use CONFIG_JFFS2_SUMMARY because without it, we
+ * have checked it while mounting
+ */
+ crc = crc32(0, fd->name, rd->nsize);
+ if (unlikely(crc != je32_to_cpu(rd->name_crc))) {
+ JFFS2_NOTICE("name CRC failed on dirent node at"
+ "%#08x: read %#08x,calculated %#08x\n",
+ ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
+ jffs2_mark_node_obsolete(c, ref);
+ jffs2_free_full_dirent(fd);
+ return 0;
+ }
+#endif
}
fd->nhash = full_name_hash(fd->name, rd->nsize);
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 9ad5ba4b299b..5f90173ae38d 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -1075,7 +1075,7 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo
memcpy(&fd->name, rd->name, checkedlen);
fd->name[checkedlen] = 0;
- crc = crc32(0, fd->name, rd->nsize);
+ crc = crc32(0, fd->name, checkedlen);
if (crc != je32_to_cpu(rd->name_crc)) {
pr_notice("%s(): Name CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
__func__, ofs, je32_to_cpu(rd->name_crc), crc);
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index bc5385471a6e..c05d6f5f10ec 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -783,6 +783,8 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
dbg_summary("Writing unknown RWCOMPAT_COPY node type %x\n",
je16_to_cpu(temp->u.nodetype));
jffs2_sum_disable_collecting(c->summary);
+ /* The above call removes the list, nothing more to do */
+ goto bail_rwcompat;
} else {
BUG(); /* unknown node in summary information */
}
@@ -794,6 +796,7 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
c->summary->sum_num--;
}
+ bail_rwcompat:
jffs2_sum_reset_collected(c->summary);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 41aa3ca6a6a4..b318732a8562 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -160,7 +160,8 @@ void jfs_evict_inode(struct inode *inode)
if (test_cflag(COMMIT_Freewmap, inode))
jfs_free_zero_link(inode);
- diFree(inode);
+ if (JFS_SBI(inode->i_sb)->ipimap)
+ diFree(inode);
/*
* Free the inode from the quota allocation.
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 2d514c7affc2..9ff510a489cb 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -1669,7 +1669,7 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen)
} else if (rc == -ENOSPC) {
/* search for next smaller log2 block */
l2nb = BLKSTOL2(nblocks) - 1;
- nblocks = 1 << l2nb;
+ nblocks = 1LL << l2nb;
} else {
/* Trim any already allocated blocks */
jfs_error(bmp->db_ipbmap->i_sb, "-EIO\n");
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 562b9a7e4311..f502a15c6c98 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -196,7 +196,7 @@ typedef union dmtree {
#define dmt_leafidx t1.leafidx
#define dmt_height t1.height
#define dmt_budmin t1.budmin
-#define dmt_stree t1.stree
+#define dmt_stree t2.stree
/*
* on-disk aggregate disk allocation map descriptor.
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index b67d64671bb4..415bfa90607a 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -281,5 +281,6 @@
* fsck() must be run to repair
*/
#define FM_EXTENDFS 0x00000008 /* file system extendfs() in progress */
+#define FM_STATE_MAX 0x0000000f /* max value of s_state */
#endif /* _H_JFS_FILSYS */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index a69bdf2a1085..d19542a88c2c 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1339,6 +1339,7 @@ int lmLogInit(struct jfs_log * log)
} else {
if (memcmp(logsuper->uuid, log->uuid, 16)) {
jfs_warn("wrong uuid on JFS log device");
+ rc = -EINVAL;
goto errout20;
}
log->size = le32_to_cpu(logsuper->size);
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index 9895595fd2f2..0c2aabba1fdb 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -49,6 +49,7 @@
#include <linux/fs.h>
#include <linux/buffer_head.h>
+#include <linux/log2.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
@@ -92,14 +93,14 @@ int jfs_mount(struct super_block *sb)
* (initialize mount inode from the superblock)
*/
if ((rc = chkSuper(sb))) {
- goto errout20;
+ goto out;
}
ipaimap = diReadSpecial(sb, AGGREGATE_I, 0);
if (ipaimap == NULL) {
jfs_err("jfs_mount: Failed to read AGGREGATE_I");
rc = -EIO;
- goto errout20;
+ goto out;
}
sbi->ipaimap = ipaimap;
@@ -110,7 +111,7 @@ int jfs_mount(struct super_block *sb)
*/
if ((rc = diMount(ipaimap))) {
jfs_err("jfs_mount: diMount(ipaimap) failed w/rc = %d", rc);
- goto errout21;
+ goto err_ipaimap;
}
/*
@@ -119,7 +120,7 @@ int jfs_mount(struct super_block *sb)
ipbmap = diReadSpecial(sb, BMAP_I, 0);
if (ipbmap == NULL) {
rc = -EIO;
- goto errout22;
+ goto err_umount_ipaimap;
}
jfs_info("jfs_mount: ipbmap:0x%p", ipbmap);
@@ -131,7 +132,7 @@ int jfs_mount(struct super_block *sb)
*/
if ((rc = dbMount(ipbmap))) {
jfs_err("jfs_mount: dbMount failed w/rc = %d", rc);
- goto errout22;
+ goto err_ipbmap;
}
/*
@@ -150,7 +151,7 @@ int jfs_mount(struct super_block *sb)
if (!ipaimap2) {
jfs_err("jfs_mount: Failed to read AGGREGATE_I");
rc = -EIO;
- goto errout35;
+ goto err_umount_ipbmap;
}
sbi->ipaimap2 = ipaimap2;
@@ -162,7 +163,7 @@ int jfs_mount(struct super_block *sb)
if ((rc = diMount(ipaimap2))) {
jfs_err("jfs_mount: diMount(ipaimap2) failed, rc = %d",
rc);
- goto errout35;
+ goto err_ipaimap2;
}
} else
/* Secondary aggregate inode table is not valid */
@@ -179,7 +180,7 @@ int jfs_mount(struct super_block *sb)
jfs_err("jfs_mount: Failed to read FILESYSTEM_I");
/* open fileset secondary inode allocation map */
rc = -EIO;
- goto errout40;
+ goto err_umount_ipaimap2;
}
jfs_info("jfs_mount: ipimap:0x%p", ipimap);
@@ -189,41 +190,34 @@ int jfs_mount(struct super_block *sb)
/* initialize fileset inode allocation map */
if ((rc = diMount(ipimap))) {
jfs_err("jfs_mount: diMount failed w/rc = %d", rc);
- goto errout41;
+ goto err_ipimap;
}
- goto out;
+ return rc;
/*
* unwind on error
*/
- errout41: /* close fileset inode allocation map inode */
+err_ipimap:
+ /* close fileset inode allocation map inode */
diFreeSpecial(ipimap);
-
- errout40: /* fileset closed */
-
+err_umount_ipaimap2:
/* close secondary aggregate inode allocation map */
- if (ipaimap2) {
+ if (ipaimap2)
diUnmount(ipaimap2, 1);
+err_ipaimap2:
+ /* close aggregate inodes */
+ if (ipaimap2)
diFreeSpecial(ipaimap2);
- }
-
- errout35:
-
- /* close aggregate block allocation map */
+err_umount_ipbmap: /* close aggregate block allocation map */
dbUnmount(ipbmap, 1);
+err_ipbmap: /* close aggregate inodes */
diFreeSpecial(ipbmap);
-
- errout22: /* close aggregate inode allocation map */
-
+err_umount_ipaimap: /* close aggregate inode allocation map */
diUnmount(ipaimap, 1);
-
- errout21: /* close aggregate inodes */
+err_ipaimap: /* close aggregate inodes */
diFreeSpecial(ipaimap);
- errout20: /* aggregate closed */
-
- out:
-
+out:
if (rc)
jfs_err("Mount JFS Failure: %d", rc);
@@ -378,6 +372,15 @@ static int chkSuper(struct super_block *sb)
sbi->bsize = bsize;
sbi->l2bsize = le16_to_cpu(j_sb->s_l2bsize);
+ /* check some fields for possible corruption */
+ if (sbi->l2bsize != ilog2((u32)bsize) ||
+ j_sb->pad != 0 ||
+ le32_to_cpu(j_sb->s_state) > FM_STATE_MAX) {
+ rc = -EINVAL;
+ jfs_err("jfs_mount: Mount Failure: superblock is corrupt!");
+ goto out;
+ }
+
/*
* For now, ignore s_pbsize, l2bfactor. All I/O going through buffer
* cache.
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index d595856453b2..de6351c1c8db 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1928,8 +1928,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
* header ?
*/
if (tlck->type & tlckTRUNCATE) {
- /* This odd declaration suppresses a bogus gcc warning */
- pxd_t pxd = pxd; /* truncated extent of xad */
+ pxd_t pxd; /* truncated extent of xad */
int twm;
/*
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 91e004518237..e5499c5f63da 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -44,28 +44,116 @@ static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
}
-static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf,
- size_t buflen)
+/* kernfs_node_depth - compute depth from @from to @to */
+static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
{
- char *p = buf + buflen;
- int len;
+ size_t depth = 0;
- *--p = '\0';
+ while (to->parent && to != from) {
+ depth++;
+ to = to->parent;
+ }
+ return depth;
+}
- do {
- len = strlen(kn->name);
- if (p - buf < len + 1) {
- buf[0] = '\0';
- p = NULL;
- break;
- }
- p -= len;
- memcpy(p, kn->name, len);
- *--p = '/';
- kn = kn->parent;
- } while (kn && kn->parent);
+static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
+ struct kernfs_node *b)
+{
+ size_t da, db;
+ struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b);
+
+ if (ra != rb)
+ return NULL;
+
+ da = kernfs_depth(ra->kn, a);
+ db = kernfs_depth(rb->kn, b);
+
+ while (da > db) {
+ a = a->parent;
+ da--;
+ }
+ while (db > da) {
+ b = b->parent;
+ db--;
+ }
+
+ /* worst case b and a will be the same at root */
+ while (b != a) {
+ b = b->parent;
+ a = a->parent;
+ }
+
+ return a;
+}
+
+/**
+ * kernfs_path_from_node_locked - find a pseudo-absolute path to @kn_to,
+ * where kn_from is treated as root of the path.
+ * @kn_from: kernfs node which should be treated as root for the path
+ * @kn_to: kernfs node to which path is needed
+ * @buf: buffer to copy the path into
+ * @buflen: size of @buf
+ *
+ * We need to handle couple of scenarios here:
+ * [1] when @kn_from is an ancestor of @kn_to at some level
+ * kn_from: /n1/n2/n3
+ * kn_to: /n1/n2/n3/n4/n5
+ * result: /n4/n5
+ *
+ * [2] when @kn_from is on a different hierarchy and we need to find common
+ * ancestor between @kn_from and @kn_to.
+ * kn_from: /n1/n2/n3/n4
+ * kn_to: /n1/n2/n5
+ * result: /../../n5
+ * OR
+ * kn_from: /n1/n2/n3/n4/n5 [depth=5]
+ * kn_to: /n1/n2/n3 [depth=3]
+ * result: /../..
+ *
+ * Returns the length of the full path. If the full length is equal to or
+ * greater than @buflen, @buf contains the truncated path with the trailing
+ * '\0'. On error, -errno is returned.
+ */
+static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
+ struct kernfs_node *kn_from,
+ char *buf, size_t buflen)
+{
+ struct kernfs_node *kn, *common;
+ const char parent_str[] = "/..";
+ size_t depth_from, depth_to, len = 0;
+ int i, j;
+
+ if (!kn_from)
+ kn_from = kernfs_root(kn_to)->kn;
+
+ if (kn_from == kn_to)
+ return strlcpy(buf, "/", buflen);
+
+ common = kernfs_common_ancestor(kn_from, kn_to);
+ if (WARN_ON(!common))
+ return -EINVAL;
- return p;
+ depth_to = kernfs_depth(common, kn_to);
+ depth_from = kernfs_depth(common, kn_from);
+
+ if (buf)
+ buf[0] = '\0';
+
+ for (i = 0; i < depth_from; i++)
+ len += strlcpy(buf + len, parent_str,
+ len < buflen ? buflen - len : 0);
+
+ /* Calculate how many bytes we need for the rest */
+ for (i = depth_to - 1; i >= 0; i--) {
+ for (kn = kn_to, j = 0; j < i; j++)
+ kn = kn->parent;
+ len += strlcpy(buf + len, "/",
+ len < buflen ? buflen - len : 0);
+ len += strlcpy(buf + len, kn->name,
+ len < buflen ? buflen - len : 0);
+ }
+
+ return len;
}
/**
@@ -115,27 +203,33 @@ size_t kernfs_path_len(struct kernfs_node *kn)
}
/**
- * kernfs_path - build full path of a given node
- * @kn: kernfs_node of interest
- * @buf: buffer to copy @kn's name into
+ * kernfs_path_from_node - build path of node @to relative to @from.
+ * @from: parent kernfs_node relative to which we need to build the path
+ * @to: kernfs_node of interest
+ * @buf: buffer to copy @to's path into
* @buflen: size of @buf
*
- * Builds and returns the full path of @kn in @buf of @buflen bytes. The
- * path is built from the end of @buf so the returned pointer usually
- * doesn't match @buf. If @buf isn't long enough, @buf is nul terminated
- * and %NULL is returned.
+ * Builds @to's path relative to @from in @buf. @from and @to must
+ * be on the same kernfs-root. If @from is not parent of @to, then a relative
+ * path (which includes '..'s) as needed to reach from @from to @to is
+ * returned.
+ *
+ * Returns the length of the full path. If the full length is equal to or
+ * greater than @buflen, @buf contains the truncated path with the trailing
+ * '\0'. On error, -errno is returned.
*/
-char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
+int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
+ char *buf, size_t buflen)
{
unsigned long flags;
- char *p;
+ int ret;
spin_lock_irqsave(&kernfs_rename_lock, flags);
- p = kernfs_path_locked(kn, buf, buflen);
+ ret = kernfs_path_from_node_locked(to, from, buf, buflen);
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
- return p;
+ return ret;
}
-EXPORT_SYMBOL_GPL(kernfs_path);
+EXPORT_SYMBOL_GPL(kernfs_path_from_node);
/**
* pr_cont_kernfs_name - pr_cont name of a kernfs_node
@@ -164,17 +258,25 @@ void pr_cont_kernfs_name(struct kernfs_node *kn)
void pr_cont_kernfs_path(struct kernfs_node *kn)
{
unsigned long flags;
- char *p;
+ int sz;
spin_lock_irqsave(&kernfs_rename_lock, flags);
- p = kernfs_path_locked(kn, kernfs_pr_cont_buf,
- sizeof(kernfs_pr_cont_buf));
- if (p)
- pr_cont("%s", p);
- else
- pr_cont("<name too long>");
+ sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
+ sizeof(kernfs_pr_cont_buf));
+ if (sz < 0) {
+ pr_cont("(error)");
+ goto out;
+ }
+
+ if (sz >= sizeof(kernfs_pr_cont_buf)) {
+ pr_cont("(name too long)");
+ goto out;
+ }
+ pr_cont("%s", kernfs_pr_cont_buf);
+
+out:
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
}
@@ -389,7 +491,7 @@ static void kernfs_drain(struct kernfs_node *kn)
rwsem_release(&kn->dep_map, 1, _RET_IP_);
}
- kernfs_unmap_bin_file(kn);
+ kernfs_drain_open_files(kn);
mutex_lock(&kernfs_mutex);
}
@@ -694,6 +796,29 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
return NULL;
}
+static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
+ const unsigned char *path,
+ const void *ns)
+{
+ static char path_buf[PATH_MAX]; /* protected by kernfs_mutex */
+ size_t len = strlcpy(path_buf, path, PATH_MAX);
+ char *p = path_buf;
+ char *name;
+
+ lockdep_assert_held(&kernfs_mutex);
+
+ if (len >= PATH_MAX)
+ return NULL;
+
+ while ((name = strsep(&p, "/")) && parent) {
+ if (*name == '\0')
+ continue;
+ parent = kernfs_find_ns(parent, name, ns);
+ }
+
+ return parent;
+}
+
/**
* kernfs_find_and_get_ns - find and get kernfs_node with the given name
* @parent: kernfs_node to search under
@@ -719,6 +844,29 @@ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
/**
+ * kernfs_walk_and_get_ns - find and get kernfs_node with the given path
+ * @parent: kernfs_node to search under
+ * @path: path to look for
+ * @ns: the namespace tag to use
+ *
+ * Look for kernfs_node with path @path under @parent and get a reference
+ * if found. This function may sleep and returns pointer to the found
+ * kernfs_node on success, %NULL on failure.
+ */
+struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
+ const char *path, const void *ns)
+{
+ struct kernfs_node *kn;
+
+ mutex_lock(&kernfs_mutex);
+ kn = kernfs_walk_ns(parent, path, ns);
+ kernfs_get(kn);
+ mutex_unlock(&kernfs_mutex);
+
+ return kn;
+}
+
+/**
* kernfs_create_root - create a new kernfs hierarchy
* @scops: optional syscall operations for the hierarchy
* @flags: KERNFS_ROOT_* flags
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 6875bd5d35f6..7fb45ee36897 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -701,7 +701,8 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
if (error)
goto err_free;
- ((struct seq_file *)file->private_data)->private = of;
+ of->seq_file = file->private_data;
+ of->seq_file->private = of;
/* seq_file clears PWRITE unconditionally, restore it if WRITE */
if (file->f_mode & FMODE_WRITE)
@@ -710,13 +711,22 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
/* make sure we have open node struct */
error = kernfs_get_open_node(kn, of);
if (error)
- goto err_close;
+ goto err_seq_release;
+
+ if (ops->open) {
+ /* nobody has access to @of yet, skip @of->mutex */
+ error = ops->open(of);
+ if (error)
+ goto err_put_node;
+ }
/* open succeeded, put active references */
kernfs_put_active(kn);
return 0;
-err_close:
+err_put_node:
+ kernfs_put_open_node(kn, of);
+err_seq_release:
seq_release(inode, file);
err_free:
kfree(of->prealloc_buf);
@@ -726,11 +736,41 @@ err_out:
return error;
}
+/* used from release/drain to ensure that ->release() is called exactly once */
+static void kernfs_release_file(struct kernfs_node *kn,
+ struct kernfs_open_file *of)
+{
+ /*
+ * @of is guaranteed to have no other file operations in flight and
+ * we just want to synchronize release and drain paths.
+ * @kernfs_open_file_mutex is enough. @of->mutex can't be used
+ * here because drain path may be called from places which can
+ * cause circular dependency.
+ */
+ lockdep_assert_held(&kernfs_open_file_mutex);
+
+ if (!of->released) {
+ /*
+ * A file is never detached without being released and we
+ * need to be able to release files which are deactivated
+ * and being drained. Don't use kernfs_ops().
+ */
+ kn->attr.ops->release(of);
+ of->released = true;
+ }
+}
+
static int kernfs_fop_release(struct inode *inode, struct file *filp)
{
struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
struct kernfs_open_file *of = kernfs_of(filp);
+ if (kn->flags & KERNFS_HAS_RELEASE) {
+ mutex_lock(&kernfs_open_file_mutex);
+ kernfs_release_file(kn, of);
+ mutex_unlock(&kernfs_open_file_mutex);
+ }
+
kernfs_put_open_node(kn, of);
seq_release(inode, filp);
kfree(of->prealloc_buf);
@@ -739,12 +779,12 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
return 0;
}
-void kernfs_unmap_bin_file(struct kernfs_node *kn)
+void kernfs_drain_open_files(struct kernfs_node *kn)
{
struct kernfs_open_node *on;
struct kernfs_open_file *of;
- if (!(kn->flags & KERNFS_HAS_MMAP))
+ if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
return;
spin_lock_irq(&kernfs_open_node_lock);
@@ -756,10 +796,17 @@ void kernfs_unmap_bin_file(struct kernfs_node *kn)
return;
mutex_lock(&kernfs_open_file_mutex);
+
list_for_each_entry(of, &on->files, list) {
struct inode *inode = file_inode(of->file);
- unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+
+ if (kn->flags & KERNFS_HAS_MMAP)
+ unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+
+ if (kn->flags & KERNFS_HAS_RELEASE)
+ kernfs_release_file(kn, of);
}
+
mutex_unlock(&kernfs_open_file_mutex);
kernfs_put_open_node(kn, NULL);
@@ -779,26 +826,35 @@ void kernfs_unmap_bin_file(struct kernfs_node *kn)
* to see if it supports poll (Neither 'poll' nor 'select' return
* an appropriate error code). When in doubt, set a suitable timeout value.
*/
+unsigned int kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
+{
+ struct kernfs_node *kn = of->file->f_path.dentry->d_fsdata;
+ struct kernfs_open_node *on = kn->attr.open;
+
+ poll_wait(of->file, &on->poll, wait);
+
+ if (of->event != atomic_read(&on->event))
+ return DEFAULT_POLLMASK|POLLERR|POLLPRI;
+
+ return DEFAULT_POLLMASK;
+}
+
static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait)
{
struct kernfs_open_file *of = kernfs_of(filp);
struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
- struct kernfs_open_node *on = kn->attr.open;
+ unsigned int ret;
if (!kernfs_get_active(kn))
- goto trigger;
+ return DEFAULT_POLLMASK|POLLERR|POLLPRI;
- poll_wait(filp, &on->poll, wait);
+ if (kn->attr.ops->poll)
+ ret = kn->attr.ops->poll(of, wait);
+ else
+ ret = kernfs_generic_poll(of, wait);
kernfs_put_active(kn);
-
- if (of->event != atomic_read(&on->event))
- goto trigger;
-
- return DEFAULT_POLLMASK;
-
- trigger:
- return DEFAULT_POLLMASK|POLLERR|POLLPRI;
+ return ret;
}
static void kernfs_notify_workfn(struct work_struct *work)
@@ -957,6 +1013,8 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
kn->flags |= KERNFS_HAS_SEQ_SHOW;
if (ops->mmap)
kn->flags |= KERNFS_HAS_MMAP;
+ if (ops->release)
+ kn->flags |= KERNFS_HAS_RELEASE;
rc = kernfs_add_one(kn);
if (rc) {
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 6762bfbd8207..34d5a59ebd41 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -108,7 +108,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
*/
extern const struct file_operations kernfs_file_fops;
-void kernfs_unmap_bin_file(struct kernfs_node *kn);
+void kernfs_drain_open_files(struct kernfs_node *kn);
/*
* symlink.c
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 8eaf417187f1..a71e996d6c55 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -14,6 +14,8 @@
#include <linux/magic.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <linux/namei.h>
+#include <linux/seq_file.h>
#include "kernfs-internal.h"
@@ -39,6 +41,19 @@ static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
return 0;
}
+static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry)
+{
+ struct kernfs_node *node = dentry->d_fsdata;
+ struct kernfs_root *root = kernfs_root(node);
+ struct kernfs_syscall_ops *scops = root->syscall_ops;
+
+ if (scops && scops->show_path)
+ return scops->show_path(sf, node, root);
+
+ seq_dentry(sf, dentry, " \t\n\\");
+ return 0;
+}
+
const struct super_operations kernfs_sops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
@@ -46,6 +61,7 @@ const struct super_operations kernfs_sops = {
.remount_fs = kernfs_sop_remount_fs,
.show_options = kernfs_sop_show_options,
+ .show_path = kernfs_sop_show_path,
};
/**
@@ -62,6 +78,74 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
return NULL;
}
+/*
+ * find the next ancestor in the path down to @child, where @parent was the
+ * ancestor whose descendant we want to find.
+ *
+ * Say the path is /a/b/c/d. @child is d, @parent is NULL. We return the root
+ * node. If @parent is b, then we return the node for c.
+ * Passing in d as @parent is not ok.
+ */
+static struct kernfs_node *find_next_ancestor(struct kernfs_node *child,
+ struct kernfs_node *parent)
+{
+ if (child == parent) {
+ pr_crit_once("BUG in find_next_ancestor: called with parent == child");
+ return NULL;
+ }
+
+ while (child->parent != parent) {
+ if (!child->parent)
+ return NULL;
+ child = child->parent;
+ }
+
+ return child;
+}
+
+/**
+ * kernfs_node_dentry - get a dentry for the given kernfs_node
+ * @kn: kernfs_node for which a dentry is needed
+ * @sb: the kernfs super_block
+ */
+struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
+ struct super_block *sb)
+{
+ struct dentry *dentry;
+ struct kernfs_node *knparent = NULL;
+
+ BUG_ON(sb->s_op != &kernfs_sops);
+
+ dentry = dget(sb->s_root);
+
+ /* Check if this is the root kernfs_node */
+ if (!kn->parent)
+ return dentry;
+
+ knparent = find_next_ancestor(kn, NULL);
+ if (WARN_ON(!knparent))
+ return ERR_PTR(-EINVAL);
+
+ do {
+ struct dentry *dtmp;
+ struct kernfs_node *kntmp;
+
+ if (kn == knparent)
+ return dentry;
+ kntmp = find_next_ancestor(kn, knparent);
+ if (WARN_ON(!kntmp))
+ return ERR_PTR(-EINVAL);
+ mutex_lock(&d_inode(dentry)->i_mutex);
+ dtmp = lookup_one_len(kntmp->name, dentry, strlen(kntmp->name));
+ mutex_unlock(&d_inode(dentry)->i_mutex);
+ dput(dentry);
+ if (IS_ERR(dtmp))
+ return dtmp;
+ knparent = kntmp;
+ dentry = dtmp;
+ } while (true);
+}
+
static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
{
struct kernfs_super_info *info = kernfs_info(sb);
@@ -158,7 +242,8 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
info->root = root;
info->ns = ns;
- sb = sget(fs_type, kernfs_test_super, kernfs_set_super, flags, info);
+ sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags,
+ &init_user_ns, info);
if (IS_ERR(sb) || sb->s_fs_info != info)
kfree(info);
if (IS_ERR(sb))
diff --git a/fs/libfs.c b/fs/libfs.c
index c7cbfb092e94..883cdd45a08c 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -321,11 +321,15 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
EXPORT_SYMBOL(simple_rmdir);
int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
{
struct inode *inode = d_inode(old_dentry);
int they_are_dirs = d_is_dir(old_dentry);
+ if (flags & ~RENAME_NOREPLACE)
+ return -EINVAL;
+
if (!simple_empty(new_dentry))
return -ENOTEMPTY;
@@ -761,7 +765,7 @@ int simple_attr_open(struct inode *inode, struct file *file,
{
struct simple_attr *attr;
- attr = kmalloc(sizeof(*attr), GFP_KERNEL);
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
if (!attr)
return -ENOMEM;
@@ -801,9 +805,11 @@ ssize_t simple_attr_read(struct file *file, char __user *buf,
if (ret)
return ret;
- if (*ppos) { /* continued read */
+ if (*ppos && attr->get_buf[0]) {
+ /* continued read */
size = strlen(attr->get_buf);
- } else { /* first read */
+ } else {
+ /* first read */
u64 val;
ret = attr->get(attr->data, &val);
if (ret)
@@ -825,7 +831,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
struct simple_attr *attr;
- u64 val;
+ unsigned long long val;
size_t size;
ssize_t ret;
@@ -843,7 +849,9 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
goto out;
attr->set_buf[size] = '\0';
- val = simple_strtoll(attr->set_buf, NULL, 0);
+ ret = kstrtoull(attr->set_buf, 0, &val);
+ if (ret)
+ goto out;
ret = attr->set(attr->data, val);
if (ret == 0)
ret = len; /* on success, claim we got the whole input */
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index c7eb47f2fb6c..603fa652b965 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -430,12 +430,7 @@ nlm_bind_host(struct nlm_host *host)
* RPC rebind is required
*/
if ((clnt = host->h_rpcclnt) != NULL) {
- if (time_after_eq(jiffies, host->h_nextrebind)) {
- rpc_force_rebind(clnt);
- host->h_nextrebind = jiffies + NLM_HOST_REBIND;
- dprintk("lockd: next rebind in %lu jiffies\n",
- host->h_nextrebind - jiffies);
- }
+ nlm_rebind_host(host);
} else {
unsigned long increment = nlmsvc_timeout;
struct rpc_timeout timeparms = {
@@ -483,13 +478,20 @@ nlm_bind_host(struct nlm_host *host)
return clnt;
}
-/*
- * Force a portmap lookup of the remote lockd port
+/**
+ * nlm_rebind_host - If needed, force a portmap lookup of the peer's lockd port
+ * @host: NLM host handle for peer
+ *
+ * This is not needed when using a connection-oriented protocol, such as TCP.
+ * The existing autobind mechanism is sufficient to force a rebind when
+ * required, e.g. on connection state transitions.
*/
void
nlm_rebind_host(struct nlm_host *host)
{
- dprintk("lockd: rebind host %s\n", host->h_name);
+ if (host->h_proto != IPPROTO_UDP)
+ return;
+
if (host->h_rpcclnt && time_after_eq(jiffies, host->h_nextrebind)) {
rpc_force_rebind(host->h_rpcclnt);
host->h_nextrebind = jiffies + NLM_HOST_REBIND;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index f038d4ac9aec..7890779f44b5 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -526,7 +526,7 @@ static struct ctl_table nlm_sysctl_root[] = {
*/
#define param_set_min_max(name, type, which_strtol, min, max) \
-static int param_set_##name(const char *val, struct kernel_param *kp) \
+static int param_set_##name(const char *val, const struct kernel_param *kp) \
{ \
char *endp; \
__typeof__(type) num = which_strtol(val, &endp, 0); \
diff --git a/fs/locks.c b/fs/locks.c
index b515e65f1376..4faeb3f6d3dc 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -230,6 +230,7 @@ locks_get_lock_context(struct inode *inode, int type)
ctx = smp_load_acquire(&inode->i_flctx);
}
out:
+ trace_locks_get_lock_context(inode, type, ctx);
return ctx;
}
@@ -934,7 +935,8 @@ out:
return error;
}
-static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
+static int posix_lock_inode(struct inode *inode, struct file_lock *request,
+ struct file_lock *conflock)
{
struct file_lock *fl, *tmp;
struct file_lock *new_fl = NULL;
@@ -1142,6 +1144,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
if (new_fl2)
locks_free_lock(new_fl2);
locks_dispose_list(&dispose);
+ trace_posix_lock_inode(inode, request, error);
+
return error;
}
@@ -1162,7 +1166,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
int posix_lock_file(struct file *filp, struct file_lock *fl,
struct file_lock *conflock)
{
- return __posix_lock_file(file_inode(filp), fl, conflock);
+ return posix_lock_inode(file_inode(filp), fl, conflock);
}
EXPORT_SYMBOL(posix_lock_file);
@@ -1178,7 +1182,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
int error;
might_sleep ();
for (;;) {
- error = __posix_lock_file(inode, fl, NULL);
+ error = posix_lock_inode(inode, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1260,7 +1264,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
if (filp) {
fl.fl_owner = filp;
fl.fl_flags &= ~FL_SLEEP;
- error = __posix_lock_file(inode, &fl, NULL);
+ error = posix_lock_inode(inode, &fl, NULL);
if (!error)
break;
}
@@ -1268,7 +1272,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
if (sleep)
fl.fl_flags |= FL_SLEEP;
fl.fl_owner = current->files;
- error = __posix_lock_file(inode, &fl, NULL);
+ error = posix_lock_inode(inode, &fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
@@ -2165,6 +2169,8 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (file_lock == NULL)
return -ENOLCK;
+ inode = file_inode(filp);
+
/*
* This might block, so we do it before checking the inode.
*/
@@ -2172,8 +2178,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (copy_from_user(&flock, l, sizeof(flock)))
goto out;
- inode = file_inode(filp);
-
/* Don't allow mandatory locks on files that may be memory mapped
* and shared.
*/
@@ -2242,6 +2246,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
}
}
out:
+ trace_fcntl_setlk(inode, file_lock, error);
locks_free_lock(file_lock);
return error;
}
@@ -2398,6 +2403,7 @@ out:
*/
void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
+ int error;
struct file_lock lock;
struct file_lock_context *ctx;
@@ -2420,10 +2426,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
lock.fl_ops = NULL;
lock.fl_lmops = NULL;
- vfs_lock_file(filp, F_SETLK, &lock, NULL);
+ error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
if (lock.fl_ops && lock.fl_ops->fl_release_private)
lock.fl_ops->fl_release_private(&lock);
+ trace_locks_remove_posix(file_inode(filp), &lock, error);
}
EXPORT_SYMBOL(locks_remove_posix);
@@ -2599,7 +2606,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
}
if (inode) {
/* userspace relies on this representation of dev_t */
- seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,
+ seq_printf(f, "%d %02x:%02x:%lu ", fl_pid,
MAJOR(inode->i_sb->s_dev),
MINOR(inode->i_sb->s_dev), inode->i_ino);
} else {
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index a709d80c8ebc..9a202f7ed755 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -33,9 +33,9 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
return submit_bio_wait(rw, &bio);
}
-static int bdev_readpage(void *_sb, struct page *page)
+static int bdev_readpage(struct file *_sb, struct page *page)
{
- struct super_block *sb = _sb;
+ struct super_block *sb = (struct super_block *)_sb;
struct block_device *bdev = logfs_super(sb)->s_bdev;
int err;
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 9c501449450d..4ae7d17f96e3 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -122,9 +122,9 @@ static void logfs_mtd_sync(struct super_block *sb)
mtd_sync(mtd);
}
-static int logfs_mtd_readpage(void *_sb, struct page *page)
+static int logfs_mtd_readpage(struct file *_sb, struct page *page)
{
- struct super_block *sb = _sb;
+ struct super_block *sb = (struct super_block *)_sb;
int err;
err = logfs_mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index f9b45d46d4c4..48085ab8bcd5 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -174,7 +174,7 @@ static struct page *logfs_get_dd_page(struct inode *dir, struct dentry *dentry)
if (!logfs_exist_block(dir, index))
continue;
page = read_cache_page(dir->i_mapping, index,
- (filler_t *)logfs_readpage, NULL);
+ logfs_readpage, NULL);
if (IS_ERR(page))
return page;
dd = kmap_atomic(page);
@@ -306,7 +306,7 @@ static int logfs_readdir(struct file *file, struct dir_context *ctx)
continue;
}
page = read_cache_page(dir->i_mapping, pos,
- (filler_t *)logfs_readpage, NULL);
+ logfs_readpage, NULL);
if (IS_ERR(page))
return PTR_ERR(page);
dd = kmap(page);
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 5f0937609465..96322622870c 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -151,7 +151,7 @@ struct logfs_device_ops {
struct page *(*find_first_sb)(struct super_block *sb, u64 *ofs);
struct page *(*find_last_sb)(struct super_block *sb, u64 *ofs);
int (*write_sb)(struct super_block *sb, struct page *page);
- int (*readpage)(void *_sb, struct page *page);
+ int (*readpage)(struct file *_sb, struct page *page);
void (*writeseg)(struct super_block *sb, u64 ofs, size_t len);
int (*erase)(struct super_block *sb, loff_t ofs, size_t len,
int ensure_write);
@@ -485,7 +485,7 @@ static inline int logfs_get_sb_bdev(struct logfs_super *s,
#endif
/* dev_mtd.c */
-#ifdef CONFIG_MTD
+#if IS_ENABLED(CONFIG_MTD)
int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr);
#else
static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 086cd0a61e80..8d9bc0344cf3 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -155,6 +155,23 @@ static int minix_remount (struct super_block * sb, int * flags, char * data)
return 0;
}
+static bool minix_check_superblock(struct minix_sb_info *sbi)
+{
+ if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
+ return false;
+
+ /*
+ * s_max_size must not exceed the block mapping limitation. This check
+ * is only needed for V1 filesystems, since V2/V3 support an extra level
+ * of indirect blocks which places the limit well above U32_MAX.
+ */
+ if (sbi->s_version == MINIX_V1 &&
+ sbi->s_max_size > (7 + 512 + 512*512) * BLOCK_SIZE)
+ return false;
+
+ return true;
+}
+
static int minix_fill_super(struct super_block *s, void *data, int silent)
{
struct buffer_head *bh;
@@ -233,11 +250,12 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
} else
goto out_no_fs;
+ if (!minix_check_superblock(sbi))
+ goto out_illegal_sb;
+
/*
* Allocate the buffer map to keep the superblock small.
*/
- if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
- goto out_illegal_sb;
i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
map = kzalloc(i, GFP_KERNEL);
if (!map)
@@ -472,6 +490,13 @@ static struct inode *V1_minix_iget(struct inode *inode)
iget_failed(inode);
return ERR_PTR(-EIO);
}
+ if (raw_inode->i_nlinks == 0) {
+ printk("MINIX-fs: deleted inode referenced: %lu\n",
+ inode->i_ino);
+ brelse(bh);
+ iget_failed(inode);
+ return ERR_PTR(-ESTALE);
+ }
inode->i_mode = raw_inode->i_mode;
i_uid_write(inode, raw_inode->i_uid);
i_gid_write(inode, raw_inode->i_gid);
@@ -505,6 +530,13 @@ static struct inode *V2_minix_iget(struct inode *inode)
iget_failed(inode);
return ERR_PTR(-EIO);
}
+ if (raw_inode->i_nlinks == 0) {
+ printk("MINIX-fs: deleted inode referenced: %lu\n",
+ inode->i_ino);
+ brelse(bh);
+ iget_failed(inode);
+ return ERR_PTR(-ESTALE);
+ }
inode->i_mode = raw_inode->i_mode;
i_uid_write(inode, raw_inode->i_uid);
i_gid_write(inode, raw_inode->i_gid);
diff --git a/fs/minix/itree_common.c b/fs/minix/itree_common.c
index a731cabf1540..3816427e8938 100644
--- a/fs/minix/itree_common.c
+++ b/fs/minix/itree_common.c
@@ -74,6 +74,7 @@ static int alloc_branch(struct inode *inode,
int n = 0;
int i;
int parent = minix_new_block(inode);
+ int err = -ENOSPC;
branch[0].key = cpu_to_block(parent);
if (parent) for (n = 1; n < num; n++) {
@@ -84,6 +85,11 @@ static int alloc_branch(struct inode *inode,
break;
branch[n].key = cpu_to_block(nr);
bh = sb_getblk(inode->i_sb, parent);
+ if (!bh) {
+ minix_free_block(inode, nr);
+ err = -ENOMEM;
+ break;
+ }
lock_buffer(bh);
memset(bh->b_data, 0, bh->b_size);
branch[n].bh = bh;
@@ -102,7 +108,7 @@ static int alloc_branch(struct inode *inode,
bforget(branch[i].bh);
for (i = 0; i < n; i++)
minix_free_block(inode, block_to_cpu(branch[i].key));
- return -ENOSPC;
+ return err;
}
static inline int splice_branch(struct inode *inode,
diff --git a/fs/namei.c b/fs/namei.c
index f93a238ad4f9..9ec00139fcc9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -35,6 +35,7 @@
#include <linux/fs_struct.h>
#include <linux/posix_acl.h>
#include <linux/hash.h>
+#include <linux/init_task.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -1457,7 +1458,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
nd->path.dentry = parent;
nd->seq = seq;
if (unlikely(!path_connected(&nd->path)))
- return -ENOENT;
+ return -ECHILD;
break;
} else {
struct mount *mnt = real_mount(nd->path.mnt);
@@ -3182,8 +3183,8 @@ static int do_last(struct nameidata *nd,
int *opened)
{
struct dentry *dir = nd->path.dentry;
- kuid_t dir_uid = dir->d_inode->i_uid;
- umode_t dir_mode = dir->d_inode->i_mode;
+ kuid_t dir_uid = nd->inode->i_uid;
+ umode_t dir_mode = nd->inode->i_mode;
int open_flag = op->open_flag;
bool will_truncate = (open_flag & O_TRUNC) != 0;
bool got_write = false;
diff --git a/fs/namespace.c b/fs/namespace.c
index e6585f285234..6f9a6bee9d87 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1015,6 +1015,21 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
}
EXPORT_SYMBOL_GPL(vfs_kern_mount);
+struct vfsmount *
+vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
+ const char *name, void *data)
+{
+ /* Until it is worked out how to pass the user namespace
+ * through from the parent mount to the submount don't support
+ * unprivileged mounts with submounts.
+ */
+ if (mountpoint->d_sb->s_user_ns != &init_user_ns)
+ return ERR_PTR(-EPERM);
+
+ return vfs_kern_mount(type, MS_SUBMOUNT, name, data);
+}
+EXPORT_SYMBOL_GPL(vfs_submount);
+
static struct mount *clone_mnt(struct mount *old, struct dentry *root,
int flag)
{
@@ -1850,6 +1865,20 @@ void drop_collected_mounts(struct vfsmount *mnt)
namespace_unlock();
}
+static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
+{
+ struct mount *child;
+
+ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+ if (!is_subdir(child->mnt_mountpoint, dentry))
+ continue;
+
+ if (child->mnt.mnt_flags & MNT_LOCKED)
+ return true;
+ }
+ return false;
+}
+
/**
* clone_private_mount - create a private clone of a path
*
@@ -1864,16 +1893,27 @@ struct vfsmount *clone_private_mount(struct path *path)
struct mount *old_mnt = real_mount(path->mnt);
struct mount *new_mnt;
+ down_read(&namespace_sem);
if (IS_MNT_UNBINDABLE(old_mnt))
- return ERR_PTR(-EINVAL);
+ goto invalid;
+
+ if (!check_mnt(old_mnt))
+ goto invalid;
+
+ if (has_locked_children(old_mnt, path->dentry))
+ goto invalid;
- down_read(&namespace_sem);
new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
up_read(&namespace_sem);
+
if (IS_ERR(new_mnt))
return ERR_CAST(new_mnt);
return &new_mnt->mnt;
+
+invalid:
+ up_read(&namespace_sem);
+ return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL_GPL(clone_private_mount);
@@ -2189,19 +2229,6 @@ static int do_change_type(struct path *path, int flag)
return err;
}
-static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
-{
- struct mount *child;
- list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
- if (!is_subdir(child->mnt_mountpoint, dentry))
- continue;
-
- if (child->mnt.mnt_flags & MNT_LOCKED)
- return true;
- }
- return false;
-}
-
/*
* do loopback mount.
*/
@@ -2533,10 +2560,6 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
return -ENODEV;
if (user_ns != &init_user_ns) {
- if (!(type->fs_flags & FS_USERNS_MOUNT)) {
- put_filesystem(type);
- return -EPERM;
- }
/* Only in special cases allow devices from mounts
* created outside the initial user namespace.
*/
@@ -2856,7 +2879,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
- MS_STRICTATIME);
+ MS_STRICTATIME | MS_SUBMOUNT);
if (flags & MS_REMOUNT)
retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
@@ -3187,8 +3210,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
/* make certain new is below the root */
if (!is_path_reachable(new_mnt, new.dentry, &root))
goto out4;
- root_mp->m_count++; /* pin it so it won't go away */
lock_mount_hash();
+ root_mp->m_count++; /* pin it so it won't go away */
detach_mnt(new_mnt, &parent_path);
detach_mnt(root_mnt, &root_parent);
if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
@@ -3491,10 +3514,16 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return 0;
}
+static struct user_namespace *mntns_owner(struct ns_common *ns)
+{
+ return to_mnt_ns(ns)->user_ns;
+}
+
const struct proc_ns_operations mntns_operations = {
.name = "mnt",
.type = CLONE_NEWNS,
.get = mntns_get,
.put = mntns_put,
.install = mntns_install,
+ .owner = mntns_owner,
};
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index b1daeafbea92..55ebf9f4a824 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -89,7 +89,7 @@ config NFS_V4
config NFS_SWAP
bool "Provide swap over NFS support"
default n
- depends on NFS_FS
+ depends on NFS_FS && SWAP
select SUNRPC_SWAP
help
This option enables swapon to work on files located on NFS mounts.
@@ -132,7 +132,7 @@ config PNFS_OBJLAYOUT
config PNFS_FLEXFILE_LAYOUT
tristate
depends on NFS_V4_1 && NFS_V3
- default m
+ default NFS_V4
config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
string "NFSv4.1 Implementation ID Domain"
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 807eb6ef4f91..6f4f68967c31 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -368,7 +368,7 @@ static bool referring_call_exists(struct nfs_client *clp,
uint32_t nrclists,
struct referring_call_list *rclists)
{
- bool status = 0;
+ bool status = false;
int i, j;
struct nfs4_session *session;
struct nfs4_slot_table *tbl;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index d6d5d2a48e83..ba2cd0bd3894 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -377,7 +377,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
if (cl_init->hostname == NULL) {
WARN_ON(1);
- return NULL;
+ return ERR_PTR(-EINVAL);
}
dprintk("--> nfs_get_client(%s,v%u)\n",
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c690a1c0c4e5..ba7e98d8ce09 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -169,6 +169,17 @@ typedef struct {
unsigned int eof:1;
} nfs_readdir_descriptor_t;
+static
+void nfs_readdir_init_array(struct page *page)
+{
+ struct nfs_cache_array *array;
+
+ array = kmap_atomic(page);
+ memset(array, 0, sizeof(struct nfs_cache_array));
+ array->eof_index = -1;
+ kunmap_atomic(array);
+}
+
/*
* The caller is responsible for calling nfs_readdir_release_array(page)
*/
@@ -202,6 +213,7 @@ void nfs_readdir_clear_array(struct page *page)
array = kmap_atomic(page);
for (i = 0; i < array->size; i++)
kfree(array->array[i].string.name);
+ array->size = 0;
kunmap_atomic(array);
}
@@ -277,7 +289,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
desc->cache_entry_index = index;
return 0;
out_eof:
- desc->eof = 1;
+ desc->eof = true;
return -EBADCOOKIE;
}
@@ -331,7 +343,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
if (array->eof_index >= 0) {
status = -EBADCOOKIE;
if (*desc->dir_cookie == array->last_cookie)
- desc->eof = 1;
+ desc->eof = true;
}
out:
return status;
@@ -550,6 +562,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
do {
+ if (entry->label)
+ entry->label->len = NFS4_MAXLABELLEN;
+
status = xdr_decode(desc, entry, &stream);
if (status != 0) {
if (status == -EAGAIN)
@@ -622,6 +637,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
int status = -ENOMEM;
unsigned int array_size = ARRAY_SIZE(pages);
+ nfs_readdir_init_array(page);
+
entry.prev_cookie = 0;
entry.cookie = desc->last_cookie;
entry.eof = 0;
@@ -642,8 +659,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
status = PTR_ERR(array);
goto out_label_free;
}
- memset(array, 0, sizeof(struct nfs_cache_array));
- array->eof_index = -1;
status = nfs_readdir_alloc_pages(pages, array_size);
if (status < 0)
@@ -698,6 +713,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
unlock_page(page);
return 0;
error:
+ nfs_readdir_clear_array(page);
unlock_page(page);
return ret;
}
@@ -705,8 +721,6 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
static
void cache_page_release(nfs_readdir_descriptor_t *desc)
{
- if (!desc->page->mapping)
- nfs_readdir_clear_array(desc->page);
page_cache_release(desc->page);
desc->page = NULL;
}
@@ -720,19 +734,28 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
/*
* Returns 0 if desc->dir_cookie was found on page desc->page_index
+ * and locks the page to prevent removal from the page cache.
*/
static
-int find_cache_page(nfs_readdir_descriptor_t *desc)
+int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc)
{
int res;
desc->page = get_cache_page(desc);
if (IS_ERR(desc->page))
return PTR_ERR(desc->page);
-
- res = nfs_readdir_search_array(desc);
+ res = lock_page_killable(desc->page);
if (res != 0)
- cache_page_release(desc);
+ goto error;
+ res = -EAGAIN;
+ if (desc->page->mapping != NULL) {
+ res = nfs_readdir_search_array(desc);
+ if (res == 0)
+ return 0;
+ }
+ unlock_page(desc->page);
+error:
+ cache_page_release(desc);
return res;
}
@@ -747,7 +770,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
desc->last_cookie = 0;
}
do {
- res = find_cache_page(desc);
+ res = find_and_lock_cache_page(desc);
} while (res == -EAGAIN);
return res;
}
@@ -776,7 +799,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
ent = &array->array[i];
if (!dir_emit(desc->ctx, ent->string.name, ent->string.len,
nfs_compat_user_ino64(ent->ino), ent->d_type)) {
- desc->eof = 1;
+ desc->eof = true;
break;
}
desc->ctx->pos++;
@@ -788,11 +811,10 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
ctx->duped = 1;
}
if (array->eof_index >= 0)
- desc->eof = 1;
+ desc->eof = true;
nfs_readdir_release_array(desc->page);
out:
- cache_page_release(desc);
dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
(unsigned long long)*desc->dir_cookie, res);
return res;
@@ -838,13 +860,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
status = nfs_do_filldir(desc);
+ out_release:
+ nfs_readdir_clear_array(desc->page);
+ cache_page_release(desc);
out:
dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
__func__, status);
return status;
- out_release:
- cache_page_release(desc);
- goto out;
}
/* The file offset position represents the dirent entry number. A
@@ -890,7 +912,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
if (res == -EBADCOOKIE) {
res = 0;
/* This means either end of directory */
- if (*desc->dir_cookie && desc->eof == 0) {
+ if (*desc->dir_cookie && !desc->eof) {
/* Or that the server has 'lost' a cookie */
res = uncached_readdir(desc);
if (res == 0)
@@ -910,6 +932,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
break;
res = nfs_do_filldir(desc);
+ unlock_page(desc->page);
+ cache_page_release(desc);
if (res < 0)
break;
} while (!desc->eof);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 88cb8e0d6014..7789f0b9b999 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -605,6 +605,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
l_ctx = nfs_get_lock_context(dreq->ctx);
if (IS_ERR(l_ctx)) {
result = PTR_ERR(l_ctx);
+ nfs_direct_req_release(dreq);
goto out_release;
}
dreq->l_ctx = l_ctx;
@@ -1015,6 +1016,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
l_ctx = nfs_get_lock_context(dreq->ctx);
if (IS_ERR(l_ctx)) {
result = PTR_ERR(l_ctx);
+ nfs_direct_req_release(dreq);
goto out_release;
}
dreq->l_ctx = l_ctx;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 8e268965c96d..3f1ea498ecab 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -716,7 +716,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
if (unlikely(!p))
goto out_err;
fl->fh_array[i]->size = be32_to_cpup(p++);
- if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
+ if (fl->fh_array[i]->size > NFS_MAXFHSIZE) {
printk(KERN_ERR "NFS: Too big fh %d received %d\n",
i, fl->fh_array[i]->size);
goto out_err;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 6506775575aa..e7f8732895b7 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -86,7 +86,7 @@ static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
if (unlikely(!p))
return -ENOBUFS;
fh->size = be32_to_cpup(p++);
- if (fh->size > sizeof(struct nfs_fh)) {
+ if (fh->size > NFS_MAXFHSIZE) {
printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n",
fh->size);
return -EOVERFLOW;
@@ -855,9 +855,8 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
goto out_mds;
/* Use a direct mapping of ds_idx to pgio mirror_idx */
- if (WARN_ON_ONCE(pgio->pg_mirror_count !=
- FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg)))
- goto out_mds;
+ if (pgio->pg_mirror_count != FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))
+ goto out_eagain;
for (i = 0; i < pgio->pg_mirror_count; i++) {
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true);
@@ -869,11 +868,15 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
}
return;
-
+out_eagain:
+ pnfs_generic_pg_cleanup(pgio);
+ pgio->pg_error = -EAGAIN;
+ return;
out_mds:
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
nfs_pageio_reset_write_mds(pgio);
+ pgio->pg_error = -EAGAIN;
}
static unsigned int
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d25b55ceb9d5..0d7b8c6e1de8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1430,10 +1430,10 @@ EXPORT_SYMBOL_GPL(_nfs_display_fhandle);
*/
static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
{
- const struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long attr_gencount = NFS_I(inode)->attr_gencount;
- return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
- ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
+ return (long)(fattr->gencount - attr_gencount) > 0 ||
+ (long)(attr_gencount - nfs_read_attr_generation_counter()) > 0;
}
/*
@@ -1849,7 +1849,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->attrtimeo_timestamp = now;
}
/* Set the barrier to be more recent than this fattr */
- if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
+ if ((long)(fattr->gencount - nfsi->attr_gencount) > 0)
nfsi->attr_gencount = fattr->gencount;
}
@@ -1964,7 +1964,7 @@ static int nfsiod_start(void)
{
struct workqueue_struct *wq;
dprintk("RPC: creating workqueue nfsiod\n");
- wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
+ wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (wq == NULL)
return -ENOMEM;
nfsiod_workqueue = wq;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 578350fd96e1..7eeab683a81f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -534,12 +534,14 @@ extern int nfs41_walk_client_list(struct nfs_client *clp,
static inline struct inode *nfs_igrab_and_active(struct inode *inode)
{
- inode = igrab(inode);
- if (inode != NULL && !nfs_sb_active(inode->i_sb)) {
- iput(inode);
- inode = NULL;
+ struct super_block *sb = inode->i_sb;
+
+ if (sb && nfs_sb_active(sb)) {
+ if (igrab(inode))
+ return inode;
+ nfs_sb_deactive(sb);
}
- return inode;
+ return NULL;
}
static inline void nfs_iput_and_deactive(struct inode *inode)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index c8162c660c44..f1804be48a3a 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -30,9 +30,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
/*
* nfs_path - reconstruct the path given an arbitrary dentry
* @base - used to return pointer to the end of devname part of path
- * @dentry - pointer to dentry
+ * @dentry_in - pointer to dentry
* @buffer - result buffer
- * @buflen - length of buffer
+ * @buflen_in - length of buffer
* @flags - options (see below)
*
* Helper function for constructing the server pathname
@@ -47,15 +47,19 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
* the original device (export) name
* (if unset, the original name is returned verbatim)
*/
-char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen,
- unsigned flags)
+char *nfs_path(char **p, struct dentry *dentry_in, char *buffer,
+ ssize_t buflen_in, unsigned flags)
{
char *end;
int namelen;
unsigned seq;
const char *base;
+ struct dentry *dentry;
+ ssize_t buflen;
rename_retry:
+ buflen = buflen_in;
+ dentry = dentry_in;
end = buffer+buflen;
*--end = '\0';
buflen--;
@@ -226,7 +230,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
const char *devname,
struct nfs_clone_mount *mountdata)
{
- return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
+ return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata);
}
/**
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 1ebe2fc7cda2..05c697d5b477 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -213,37 +213,45 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
- struct posix_acl *alloc = NULL, *dfacl = NULL;
+ struct posix_acl *orig = acl, *dfacl = NULL, *alloc;
int status;
if (S_ISDIR(inode->i_mode)) {
switch(type) {
case ACL_TYPE_ACCESS:
- alloc = dfacl = get_acl(inode, ACL_TYPE_DEFAULT);
+ alloc = get_acl(inode, ACL_TYPE_DEFAULT);
if (IS_ERR(alloc))
goto fail;
+ dfacl = alloc;
break;
case ACL_TYPE_DEFAULT:
- dfacl = acl;
- alloc = acl = get_acl(inode, ACL_TYPE_ACCESS);
+ alloc = get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(alloc))
goto fail;
+ dfacl = acl;
+ acl = alloc;
break;
}
}
if (acl == NULL) {
- alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+ alloc = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
if (IS_ERR(alloc))
goto fail;
+ acl = alloc;
}
status = __nfs3_proc_setacls(inode, acl, dfacl);
- posix_acl_release(alloc);
+out:
+ if (acl != orig)
+ posix_acl_release(acl);
+ if (dfacl != orig)
+ posix_acl_release(dfacl);
return status;
fail:
- return PTR_ERR(alloc);
+ status = PTR_ERR(alloc);
+ goto out;
}
const struct xattr_handler *nfs3_xattr_handlers[] = {
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index cb28cceefebe..9f365b004453 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -363,7 +363,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
break;
case NFS3_CREATE_UNCHECKED:
- goto out;
+ goto out_release_acls;
}
nfs_fattr_init(data->res.dir_attr);
nfs_fattr_init(data->res.fattr);
@@ -708,7 +708,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
break;
default:
status = -EINVAL;
- goto out;
+ goto out_release_acls;
}
status = nfs3_do_create(dir, dentry, data);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 267126d32ec0..4a68837e92ea 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -33,6 +33,7 @@
*/
#define NFS3_fhandle_sz (1+16)
#define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */
+#define NFS3_post_op_fh_sz (1+NFS3_fh_sz)
#define NFS3_sattr_sz (15)
#define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2))
#define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2))
@@ -70,7 +71,7 @@
#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
#define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
-#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
+#define NFS3_createres_sz (1+NFS3_post_op_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
#define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
#define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 7f1a0fb8c493..31cc6f3d992d 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -168,7 +168,10 @@ static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
if (status)
return status;
- return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
+ if (whence == SEEK_DATA && res.sr_eof)
+ return -NFS4ERR_NXIO;
+ else
+ return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
}
loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 0ca482a51e53..988d26202958 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -439,8 +439,7 @@ static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp,
status = decode_clone(xdr);
if (status)
goto out;
- status = decode_getfattr(xdr, res->dst_fattr, res->server);
-
+ decode_getfattr(xdr, res->dst_fattr, res->server);
out:
res->rpc_status = status;
return status;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index dac20f31f01f..92895f41d9a0 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -751,7 +751,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
- if (nfs4_cb_match_client(addr, clp, minorversion) == false)
+ if (!nfs4_cb_match_client(addr, clp, minorversion))
continue;
if (!nfs4_has_session(clp))
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index c5e884585c23..9b9c8e598436 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -168,7 +168,7 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
case SEEK_HOLE:
case SEEK_DATA:
ret = nfs42_proc_llseek(filep, offset, whence);
- if (ret != -ENOTSUPP)
+ if (ret != -EOPNOTSUPP)
return ret;
default:
return nfs_file_llseek(filep, offset, whence);
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index f592672373cb..b2a7f72eb116 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -279,7 +279,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
mountdata->hostname,
mountdata->mnt_path);
- mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata);
+ mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata);
if (!IS_ERR(mnt))
break;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 08207001d475..e10bada12361 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4047,12 +4047,12 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
u64 cookie, struct page **pages, unsigned int count, int plus)
{
struct inode *dir = d_inode(dentry);
+ struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_readdir_arg args = {
.fh = NFS_FH(dir),
.pages = pages,
.pgbase = 0,
.count = count,
- .bitmask = NFS_SERVER(d_inode(dentry))->attr_bitmask,
.plus = plus,
};
struct nfs4_readdir_res res;
@@ -4067,9 +4067,15 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__,
dentry,
(unsigned long long)cookie);
+ if (!(server->caps & NFS_CAP_SECURITY_LABEL))
+ args.bitmask = server->attr_bitmask_nl;
+ else
+ args.bitmask = server->attr_bitmask;
+
nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
res.pgbase = args.pgbase;
- status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
+ status = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
+ &res.seq_res, 0);
if (status >= 0) {
memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE);
status += args.pgbase;
@@ -4842,6 +4848,9 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
int ret, i;
+ /* You can't remove system.nfs4_acl: */
+ if (buflen == 0)
+ return -EINVAL;
if (!nfs4_server_supports_acls(server))
return -EOPNOTSUPP;
if (npages > ARRAY_SIZE(pages))
@@ -4878,6 +4887,14 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
do {
err = __nfs4_proc_set_acl(inode, buf, buflen);
trace_nfs4_set_acl(inode, err);
+ if (err == -NFS4ERR_BADOWNER || err == -NFS4ERR_BADNAME) {
+ /*
+ * no need to retry since the kernel
+ * isn't involved in encoding the ACEs.
+ */
+ err = -EINVAL;
+ break;
+ }
err = nfs4_handle_exception(NFS_SERVER(inode), err,
&exception);
} while (exception.retry);
@@ -4916,9 +4933,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
return ret;
if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL))
return -ENOENT;
- if (buflen < label.len)
- return -ERANGE;
- return 0;
+ return label.len;
}
static int nfs4_get_security_label(struct inode *inode, void *buf,
@@ -6054,6 +6069,7 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
struct nfs_inode *nfsi = NFS_I(state->inode);
+ struct nfs4_state_owner *sp = state->owner;
unsigned char fl_flags = request->fl_flags;
int status = -ENOLCK;
@@ -6068,6 +6084,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
status = do_vfs_lock(state->inode, request);
if (status < 0)
goto out;
+ mutex_lock(&sp->so_delegreturn_mutex);
down_read(&nfsi->rwsem);
if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
/* Yes: cache locks! */
@@ -6075,9 +6092,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
request->fl_flags = fl_flags & ~FL_SLEEP;
status = do_vfs_lock(state->inode, request);
up_read(&nfsi->rwsem);
+ mutex_unlock(&sp->so_delegreturn_mutex);
goto out;
}
up_read(&nfsi->rwsem);
+ mutex_unlock(&sp->so_delegreturn_mutex);
status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
out:
request->fl_flags = fl_flags;
@@ -6169,7 +6188,12 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state,
err = nfs4_set_lock_state(state, fl);
if (err != 0)
return err;
- err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
+ do {
+ err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
+ if (err != -NFS4ERR_DELAY)
+ break;
+ ssleep(1);
+ } while (err == -NFS4ERR_DELAY);
return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err);
}
@@ -6292,10 +6316,6 @@ static size_t nfs4_xattr_list_nfs4_acl(const struct xattr_handler *handler,
}
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
-static inline int nfs4_server_supports_labels(struct nfs_server *server)
-{
- return server->caps & NFS_CAP_SECURITY_LABEL;
-}
static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler,
struct dentry *dentry, const char *key,
@@ -6317,29 +6337,34 @@ static int nfs4_xattr_get_nfs4_label(const struct xattr_handler *handler,
return -EOPNOTSUPP;
}
-static size_t nfs4_xattr_list_nfs4_label(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_len, const char *name,
- size_t name_len)
+static ssize_t
+nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
{
- size_t len = 0;
+ int len = 0;
- if (nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) {
- len = security_inode_listsecurity(d_inode(dentry), NULL, 0);
- if (list && len <= list_len)
- security_inode_listsecurity(d_inode(dentry), list, len);
+ if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) {
+ len = security_inode_listsecurity(inode, list, list_len);
+ if (list_len && len > list_len)
+ return -ERANGE;
}
return len;
}
static const struct xattr_handler nfs4_xattr_nfs4_label_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = nfs4_xattr_list_nfs4_label,
.get = nfs4_xattr_get_nfs4_label,
.set = nfs4_xattr_set_nfs4_label,
};
-#endif
+#else
+
+static ssize_t
+nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
+{
+ return 0;
+}
+
+#endif
/*
* nfs_fhget will use either the mounted_on_fileid or the fileid
@@ -8769,6 +8794,24 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
#endif
};
+ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+ ssize_t error, error2;
+
+ error = generic_listxattr(dentry, list, size);
+ if (error < 0)
+ return error;
+ if (list) {
+ list += error;
+ size -= error;
+ }
+
+ error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size);
+ if (error2 < 0)
+ return error2;
+ return error + error2;
+}
+
static const struct inode_operations nfs4_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
@@ -8785,7 +8828,7 @@ static const struct inode_operations nfs4_dir_inode_operations = {
.setattr = nfs_setattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
- .listxattr = generic_listxattr,
+ .listxattr = nfs4_listxattr,
.removexattr = generic_removexattr,
};
@@ -8795,7 +8838,7 @@ static const struct inode_operations nfs4_file_inode_operations = {
.setattr = nfs_setattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
- .listxattr = generic_listxattr,
+ .listxattr = nfs4_listxattr,
.removexattr = generic_removexattr,
};
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 15cd9db6d616..28c1b765e444 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4158,7 +4158,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
goto out_overflow;
if (len < NFS4_MAXLABELLEN) {
if (label) {
- memcpy(label->label, p, len);
+ if (label->len) {
+ if (label->len < len)
+ return -ERANGE;
+ memcpy(label->label, p, len);
+ }
label->len = len;
label->pi = pi;
label->lfs = lfs;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index af1bb7353792..18868e318b03 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -886,15 +886,6 @@ static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio,
return 0;
}
-/*
- * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1)
- */
-void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio)
-{
- pgio->pg_mirror_count = 1;
- pgio->pg_mirror_idx = 0;
-}
-
static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio)
{
pgio->pg_mirror_count = 1;
@@ -1002,17 +993,16 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
{
struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
-
if (!list_empty(&mirror->pg_list)) {
int error = desc->pg_ops->pg_doio(desc);
if (error < 0)
desc->pg_error = error;
- else
+ if (list_empty(&mirror->pg_list)) {
mirror->pg_bytes_written += mirror->pg_count;
- }
- if (list_empty(&mirror->pg_list)) {
- mirror->pg_count = 0;
- mirror->pg_base = 0;
+ mirror->pg_count = 0;
+ mirror->pg_base = 0;
+ mirror->pg_recoalesce = 0;
+ }
}
}
@@ -1098,7 +1088,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
do {
list_splice_init(&mirror->pg_list, &head);
- mirror->pg_bytes_written -= mirror->pg_count;
mirror->pg_count = 0;
mirror->pg_base = 0;
mirror->pg_recoalesce = 0;
@@ -1287,6 +1276,14 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
}
}
+/*
+ * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1)
+ */
+void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio)
+{
+ nfs_pageio_complete(pgio);
+}
+
int __init nfs_init_nfspagecache(void)
{
nfs_page_cachep = kmem_cache_create("nfs_page",
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0bb580174cb3..a25985705cd5 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -343,7 +343,7 @@ struct nfs_readdesc {
};
static int
-readpage_async_filler(void *data, struct page *page)
+readpage_async_filler(struct file *data, struct page *page)
{
struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
struct nfs_page *new;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index dced329a8584..47a7751146cf 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1901,7 +1901,7 @@ static int nfs_parse_devname(const char *dev_name,
/* kill possible hostname list: not supported */
comma = strchr(dev_name, ',');
if (comma != NULL && comma < end)
- *comma = 0;
+ len = comma - dev_name;
}
if (len > maxnamlen)
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index 77d136ac8909..c21fca0dcba7 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -75,10 +75,14 @@ __state_in_grace(struct net *net, bool open)
if (!open)
return !list_empty(grace_list);
+ spin_lock(&grace_lock);
list_for_each_entry(lm, grace_list, list) {
- if (lm->block_opens)
+ if (lm->block_opens) {
+ spin_unlock(&grace_lock);
return true;
+ }
}
+ spin_unlock(&grace_lock);
return false;
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index d4fa7fbc37dc..d6c443a874f2 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -821,9 +821,14 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
if (isdotent(name, namlen)) {
if (namlen == 2) {
dchild = dget_parent(dparent);
- /* filesystem root - cannot return filehandle for ".." */
+ /*
+ * Don't return filehandle for ".." if we're at
+ * the filesystem or export root:
+ */
if (dchild == dparent)
goto out;
+ if (dparent == exp->ex_path.dentry)
+ goto out;
} else
dchild = dget(dparent);
} else
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 4fa3f0ba9ab3..0a0b41071ed7 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1096,6 +1096,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
err = setup_callback_client(clp, &conn, ses);
if (err) {
nfsd4_mark_cb_down(clp, err);
+ if (c)
+ svc_xprt_put(c->cn_xprt);
return;
}
}
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e3d47091b191..2cb2e61cdbf6 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -655,7 +655,7 @@ struct cld_net {
struct cld_upcall {
struct list_head cu_list;
struct cld_net *cu_net;
- struct task_struct *cu_task;
+ struct completion cu_done;
struct cld_msg cu_msg;
};
@@ -664,23 +664,18 @@ __cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
{
int ret;
struct rpc_pipe_msg msg;
+ struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg);
memset(&msg, 0, sizeof(msg));
msg.data = cmsg;
msg.len = sizeof(*cmsg);
- /*
- * Set task state before we queue the upcall. That prevents
- * wake_up_process in the downcall from racing with schedule.
- */
- set_current_state(TASK_UNINTERRUPTIBLE);
ret = rpc_queue_upcall(pipe, &msg);
if (ret < 0) {
- set_current_state(TASK_RUNNING);
goto out;
}
- schedule();
+ wait_for_completion(&cup->cu_done);
if (msg.errno < 0)
ret = msg.errno;
@@ -747,7 +742,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
return -EFAULT;
- wake_up_process(cup->cu_task);
+ complete(&cup->cu_done);
return mlen;
}
@@ -762,7 +757,7 @@ cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
if (msg->errno >= 0)
return;
- wake_up_process(cup->cu_task);
+ complete(&cup->cu_done);
}
static const struct rpc_pipe_ops cld_upcall_ops = {
@@ -893,7 +888,7 @@ restart_search:
goto restart_search;
}
}
- new->cu_task = current;
+ init_completion(&new->cu_done);
new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid);
new->cu_net = cn;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ea5cb1ba282f..5ee62045150c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -844,6 +844,11 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
return 0;
}
+static bool delegation_hashed(struct nfs4_delegation *dp)
+{
+ return !(list_empty(&dp->dl_perfile));
+}
+
static bool
unhash_delegation_locked(struct nfs4_delegation *dp)
{
@@ -851,7 +856,7 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
lockdep_assert_held(&state_lock);
- if (list_empty(&dp->dl_perfile))
+ if (!delegation_hashed(dp))
return false;
dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
@@ -3656,7 +3661,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
* queued for a lease break. Don't queue it again.
*/
spin_lock(&state_lock);
- if (dp->dl_time == 0) {
+ if (delegation_hashed(dp) && dp->dl_time == 0) {
dp->dl_time = get_seconds();
list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index ee0da259a3d3..87708608c0ff 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2988,15 +2988,18 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
goto fail;
cd->rd_maxcount -= entry_bytes;
/*
- * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so
- * let's always let through the first entry, at least:
+ * RFC 3530 14.2.24 describes rd_dircount as only a "hint", and
+ * notes that it could be zero. If it is zero, then the server
+ * should enforce only the rd_maxcount value.
*/
- if (!cd->rd_dircount)
- goto fail;
- name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8;
- if (name_and_cookie > cd->rd_dircount && cd->cookie_offset)
- goto fail;
- cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie);
+ if (cd->rd_dircount) {
+ name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8;
+ if (name_and_cookie > cd->rd_dircount && cd->cookie_offset)
+ goto fail;
+ cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie);
+ if (!cd->rd_dircount)
+ cd->rd_maxcount = 0;
+ }
cd->cookie_offset = cookie_offset;
skip_entry:
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 0cd57db5c5af..4d816b64dafc 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -768,7 +768,10 @@ out_close:
svc_xprt_put(xprt);
}
out_err:
- nfsd_destroy(net);
+ if (!list_empty(&nn->nfsd_serv->sv_permsocks))
+ nn->nfsd_serv->sv_nrthreads--;
+ else
+ nfsd_destroy(net);
return err;
}
@@ -1156,20 +1159,15 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
#endif
/* last one */ {""}
};
- struct net *net = data;
- int ret;
-
- ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
- if (ret)
- return ret;
- sb->s_fs_info = get_net(net);
- return 0;
+ get_net(sb->s_fs_info);
+ return simple_fill_super(sb, 0x6e667364, nfsd_files);
}
static struct dentry *nfsd_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_ns(fs_type, flags, current->nsproxy->net_ns, nfsd_fill_super);
+ struct net *net = current->nsproxy->net_ns;
+ return mount_ns(fs_type, flags, data, net, net->user_ns, nfsd_fill_super);
}
static void nfsd_umount(struct super_block *sb)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 17138a97f306..7745d0a9029c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -387,10 +387,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
bool get_write_count;
bool size_change = (iap->ia_valid & ATTR_SIZE);
- if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
+ if (iap->ia_valid & ATTR_SIZE) {
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
- if (iap->ia_valid & ATTR_SIZE)
ftype = S_IFREG;
+ }
+
+ /*
+ * If utimes(2) and friends are called with times not NULL, we should
+ * not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
+ * will return EACCESS, when the caller's effective UID does not match
+ * the owner of the file, and the caller is not privileged. In this
+ * situation, we should return EPERM(notify_change will return this).
+ */
+ if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME)) {
+ accmode |= NFSD_MAY_OWNER_OVERRIDE;
+ if (!(iap->ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET)))
+ accmode |= NFSD_MAY_WRITE;
+ }
/* Callers that do fh_verify should do the fh_want_write: */
get_write_count = !fhp->fh_dentry;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 092c0496aef4..b174508d8766 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2777,6 +2777,8 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
if (!nilfs->ns_writer)
return -ENOMEM;
+ inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL);
+
err = nilfs_segctor_start_thread(nilfs->ns_writer);
if (err) {
kfree(nilfs->ns_writer);
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index bbb0dcc35905..49a148ebbcda 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -73,11 +73,9 @@ static const struct sysfs_ops nilfs_##name##_attr_ops = { \
#define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \
static void nilfs_##name##_attr_release(struct kobject *kobj) \
{ \
- struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
- struct the_nilfs *nilfs = container_of(kobj->parent, \
- struct the_nilfs, \
- ns_##parent_name##_kobj); \
- subgroups = nilfs->ns_##parent_name##_subgroups; \
+ struct nilfs_sysfs_##parent_name##_subgroups *subgroups = container_of(kobj, \
+ struct nilfs_sysfs_##parent_name##_subgroups, \
+ sg_##name##_kobj); \
complete(&subgroups->sg_##name##_kobj_unregister); \
} \
static struct kobj_type nilfs_##name##_ktype = { \
@@ -103,12 +101,12 @@ static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \
err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \
#name); \
if (err) \
- return err; \
- return 0; \
+ kobject_put(kobj); \
+ return err; \
} \
static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \
{ \
- kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
+ kobject_put(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
}
/************************************************************************
@@ -219,14 +217,14 @@ int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root)
}
if (err)
- return err;
+ kobject_put(&root->snapshot_kobj);
- return 0;
+ return err;
}
void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root)
{
- kobject_del(&root->snapshot_kobj);
+ kobject_put(&root->snapshot_kobj);
}
/************************************************************************
@@ -1008,7 +1006,7 @@ int nilfs_sysfs_create_device_group(struct super_block *sb)
err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL,
"%s", sb->s_id);
if (err)
- goto free_dev_subgroups;
+ goto cleanup_dev_kobject;
err = nilfs_sysfs_create_mounted_snapshots_group(nilfs);
if (err)
@@ -1045,9 +1043,7 @@ delete_mounted_snapshots_group:
nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
cleanup_dev_kobject:
- kobject_del(&nilfs->ns_dev_kobj);
-
-free_dev_subgroups:
+ kobject_put(&nilfs->ns_dev_kobj);
kfree(nilfs->ns_dev_subgroups);
failed_create_device_group:
@@ -1062,6 +1058,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
nilfs_sysfs_delete_superblock_group(nilfs);
nilfs_sysfs_delete_segctor_group(nilfs);
kobject_del(&nilfs->ns_dev_kobj);
+ kobject_put(&nilfs->ns_dev_kobj);
kfree(nilfs->ns_dev_subgroups);
}
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
index e5f911bd80d2..390827fa82ef 100644
--- a/fs/notify/fanotify/Kconfig
+++ b/fs/notify/fanotify/Kconfig
@@ -1,7 +1,6 @@
config FANOTIFY
bool "Filesystem wide access notification"
select FSNOTIFY
- select ANON_INODES
default n
---help---
Say Y here to enable fanotify support. fanotify is a file access
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index b981fc0c8379..0161c74e76e2 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -1,6 +1,5 @@
config INOTIFY_USER
bool "Inotify support for userspace"
- select ANON_INODES
select FSNOTIFY
default y
---help---
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index d284f07eda77..8d4d58b12972 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -502,7 +502,7 @@ err_corrupt_attr:
}
file_name_attr = (FILE_NAME_ATTR*)((u8*)attr +
le16_to_cpu(attr->data.resident.value_offset));
- p2 = (u8*)attr + le32_to_cpu(attr->data.resident.value_length);
+ p2 = (u8 *)file_name_attr + le32_to_cpu(attr->data.resident.value_length);
if (p2 < (u8*)attr || p2 > p)
goto err_corrupt_attr;
/* This attribute is ok, but is it in the $Extend directory? */
@@ -661,6 +661,12 @@ static int ntfs_read_locked_inode(struct inode *vi)
}
a = ctx->attr;
/* Get the standard information attribute value. */
+ if ((u8 *)a + le16_to_cpu(a->data.resident.value_offset)
+ + le32_to_cpu(a->data.resident.value_length) >
+ (u8 *)ctx->mrec + vol->mft_record_size) {
+ ntfs_error(vi->i_sb, "Corrupt standard information attribute in inode.");
+ goto unm_err_out;
+ }
si = (STANDARD_INFORMATION*)((u8*)a +
le16_to_cpu(a->data.resident.value_offset));
@@ -1844,6 +1850,12 @@ int ntfs_read_inode_mount(struct inode *vi)
brelse(bh);
}
+ if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) {
+ ntfs_error(sb, "Incorrect mft record size %u in superblock, should be %u.",
+ le32_to_cpu(m->bytes_allocated), vol->mft_record_size);
+ goto err_out;
+ }
+
/* Apply the mst fixups. */
if (post_read_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size)) {
/* FIXME: Try to use the $MFTMirr now. */
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 1e0d8da0d3cd..80b92120c812 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -338,8 +338,8 @@ int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh)
down_read(&OCFS2_I(inode)->ip_xattr_sem);
acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh);
up_read(&OCFS2_I(inode)->ip_xattr_sem);
- if (IS_ERR(acl) || !acl)
- return PTR_ERR(acl);
+ if (IS_ERR_OR_NULL(acl))
+ return PTR_ERR_OR_ZERO(acl);
ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
if (ret)
return ret;
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 93e6f029a322..7e34be37c96d 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7206,6 +7206,10 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_inline_data *idata = &di->id2.i_data;
+ /* No need to punch hole beyond i_size. */
+ if (start >= i_size_read(inode))
+ return 0;
+
if (end > i_size_read(inode))
end = i_size_read(inode);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b5cf27dcb18a..cb3703393264 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -88,13 +88,13 @@ struct workqueue_struct *user_dlm_worker;
*/
#define DLMFS_CAPABILITIES "bast stackglue"
static int param_set_dlmfs_capabilities(const char *val,
- struct kernel_param *kp)
+ const struct kernel_param *kp)
{
printk(KERN_ERR "%s: readonly parameter\n", kp->name);
return -EINVAL;
}
static int param_get_dlmfs_capabilities(char *buffer,
- struct kernel_param *kp)
+ const struct kernel_param *kp)
{
return strlcpy(buffer, DLMFS_CAPABILITIES,
strlen(DLMFS_CAPABILITIES) + 1);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 1d738723a41a..73c12b13fc3e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -490,10 +490,11 @@ int ocfs2_truncate_file(struct inode *inode,
* greater than page size, so we have to truncate them
* anyway.
*/
- unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
- truncate_inode_pages(inode->i_mapping, new_i_size);
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+ unmap_mapping_range(inode->i_mapping,
+ new_i_size + PAGE_SIZE - 1, 0, 1);
+ truncate_inode_pages(inode->i_mapping, new_i_size);
status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
i_size_read(inode), 1);
if (status)
@@ -512,6 +513,9 @@ int ocfs2_truncate_file(struct inode *inode,
goto bail_unlock_sem;
}
+ unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
+ truncate_inode_pages(inode->i_mapping, new_i_size);
+
status = ocfs2_commit_truncate(osb, inode, di_bh);
if (status < 0) {
mlog_errno(status);
@@ -1532,6 +1536,45 @@ static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
}
}
+/*
+ * zero out partial blocks of one cluster.
+ *
+ * start: file offset where zero starts, will be made upper block aligned.
+ * len: it will be trimmed to the end of current cluster if "start + len"
+ * is bigger than it.
+ */
+static int ocfs2_zeroout_partial_cluster(struct inode *inode,
+ u64 start, u64 len)
+{
+ int ret;
+ u64 start_block, end_block, nr_blocks;
+ u64 p_block, offset;
+ u32 cluster, p_cluster, nr_clusters;
+ struct super_block *sb = inode->i_sb;
+ u64 end = ocfs2_align_bytes_to_clusters(sb, start);
+
+ if (start + len < end)
+ end = start + len;
+
+ start_block = ocfs2_blocks_for_bytes(sb, start);
+ end_block = ocfs2_blocks_for_bytes(sb, end);
+ nr_blocks = end_block - start_block;
+ if (!nr_blocks)
+ return 0;
+
+ cluster = ocfs2_bytes_to_clusters(sb, start);
+ ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
+ &nr_clusters, NULL);
+ if (ret)
+ return ret;
+ if (!p_cluster)
+ return 0;
+
+ offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
+ p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
+ return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
+}
+
static int ocfs2_zero_partial_clusters(struct inode *inode,
u64 start, u64 len)
{
@@ -1541,6 +1584,7 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
unsigned int csize = osb->s_clustersize;
handle_t *handle;
+ loff_t isize = i_size_read(inode);
/*
* The "start" and "end" values are NOT necessarily part of
@@ -1561,6 +1605,26 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
goto out;
+ /* No page cache for EOF blocks, issue zero out to disk. */
+ if (end > isize) {
+ /*
+ * zeroout eof blocks in last cluster starting from
+ * "isize" even "start" > "isize" because it is
+ * complicated to zeroout just at "start" as "start"
+ * may be not aligned with block size, buffer write
+ * would be required to do that, but out of eof buffer
+ * write is not supported.
+ */
+ ret = ocfs2_zeroout_partial_cluster(inode, isize,
+ end - isize);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ if (start >= isize)
+ goto out;
+ end = isize;
+ }
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
@@ -1869,7 +1933,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
{
int ret;
s64 llen;
- loff_t size;
+ loff_t size, orig_isize;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct buffer_head *di_bh = NULL;
handle_t *handle;
@@ -1961,6 +2025,15 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
default:
ret = -EINVAL;
}
+
+ orig_isize = i_size_read(inode);
+ /* zeroout eof blocks in the cluster. */
+ if (!ret && change_size && orig_isize < size) {
+ ret = ocfs2_zeroout_partial_cluster(inode, orig_isize,
+ size - orig_isize);
+ if (!ret)
+ i_size_write(inode, size);
+ }
up_write(&OCFS2_I(inode)->ip_alloc_sem);
if (ret) {
mlog_errno(ret);
@@ -1977,9 +2050,6 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
goto out_inode_unlock;
}
- if (change_size && i_size_read(inode) < size)
- i_size_write(inode, size);
-
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
if (ret < 0)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 722eb5bc9b8f..bbf1634ff427 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1017,7 +1017,8 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
mlog_errno(status);
}
- if (status == 0) {
+ /* Shutdown the kernel journal system */
+ if (!jbd2_journal_destroy(journal->j_journal) && !status) {
/*
* Do not toggle if flush was unsuccessful otherwise
* will leave dirty metadata in a "clean" journal
@@ -1026,9 +1027,6 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
if (status < 0)
mlog_errno(status);
}
-
- /* Shutdown the kernel journal system */
- jbd2_journal_destroy(journal->j_journal);
journal->j_journal = NULL;
OCFS2_I(inode)->ip_open_count--;
@@ -1082,6 +1080,14 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
+ if (replayed) {
+ jbd2_journal_lock_updates(journal->j_journal);
+ status = jbd2_journal_flush(journal->j_journal);
+ jbd2_journal_unlock_updates(journal->j_journal);
+ if (status < 0)
+ mlog_errno(status);
+ }
+
status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
if (status < 0) {
mlog_errno(status);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index f4cd3c3e9fb7..0a4d2cbf512f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -637,9 +637,11 @@ static inline void ocfs2_update_inode_fsync_trans(handle_t *handle,
{
struct ocfs2_inode_info *oi = OCFS2_I(inode);
- oi->i_sync_tid = handle->h_transaction->t_tid;
- if (datasync)
- oi->i_datasync_tid = handle->h_transaction->t_tid;
+ if (!is_handle_aborted(handle)) {
+ oi->i_sync_tid = handle->h_transaction->t_tid;
+ if (datasync)
+ oi->i_datasync_tid = handle->h_transaction->t_tid;
+ }
}
#endif /* OCFS2_JOURNAL_H */
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 2495066a9ca3..9e7f39b17e67 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -337,8 +337,8 @@ struct ocfs2_super
spinlock_t osb_lock;
u32 s_next_generation;
unsigned long osb_flags;
- s16 s_inode_steal_slot;
- s16 s_meta_steal_slot;
+ u16 s_inode_steal_slot;
+ u16 s_meta_steal_slot;
atomic_t s_num_inodes_stolen;
atomic_t s_num_meta_stolen;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 540ab5b75dbb..5617ec167a9d 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -304,7 +304,7 @@
#define OCFS2_MAX_SLOTS 255
/* Slot map indicator for an empty slot */
-#define OCFS2_INVALID_SLOT -1
+#define OCFS2_INVALID_SLOT ((u16)-1)
#define OCFS2_VOL_UUID_LEN 16
#define OCFS2_MAX_VOL_LABEL_LEN 64
@@ -340,8 +340,8 @@ struct ocfs2_system_inode_info {
enum {
BAD_BLOCK_SYSTEM_INODE = 0,
GLOBAL_INODE_ALLOC_SYSTEM_INODE,
+#define OCFS2_FIRST_ONLINE_SYSTEM_INODE GLOBAL_INODE_ALLOC_SYSTEM_INODE
SLOT_MAP_SYSTEM_INODE,
-#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
HEARTBEAT_SYSTEM_INODE,
GLOBAL_BITMAP_SYSTEM_INODE,
USER_QUOTA_SYSTEM_INODE,
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index c93d67220887..3e5cfcfc8133 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -714,7 +714,7 @@ static int ocfs2_release_dquot(struct dquot *dquot)
mutex_lock(&dquot->dq_lock);
/* Check whether we are not racing with some other dqget() */
- if (atomic_read(&dquot->dq_count) > 1)
+ if (dquot_is_busy(dquot))
goto out;
/* Running from downconvert thread? Postpone quota processing to wq */
if (current == osb->dc_task) {
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index fc6d25f6d444..41a67c9b37e0 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -895,9 +895,9 @@ static void __ocfs2_set_steal_slot(struct ocfs2_super *osb, int slot, int type)
{
spin_lock(&osb->osb_lock);
if (type == INODE_ALLOC_SYSTEM_INODE)
- osb->s_inode_steal_slot = slot;
+ osb->s_inode_steal_slot = (u16)slot;
else if (type == EXTENT_ALLOC_SYSTEM_INODE)
- osb->s_meta_steal_slot = slot;
+ osb->s_meta_steal_slot = (u16)slot;
spin_unlock(&osb->osb_lock);
}
@@ -2863,9 +2863,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
goto bail;
}
- inode_alloc_inode =
- ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
- suballoc_slot);
+ if (suballoc_slot == (u16)OCFS2_INVALID_SLOT)
+ inode_alloc_inode = ocfs2_get_system_file_inode(osb,
+ GLOBAL_INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
+ else
+ inode_alloc_inode = ocfs2_get_system_file_inode(osb,
+ INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
if (!inode_alloc_inode) {
/* the error code could be inaccurate, but we are not able to
* get the correct one. */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 4f5141350af8..337f0628c378 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -96,7 +96,7 @@ struct mount_options
unsigned long commit_interval;
unsigned long mount_opt;
unsigned int atime_quantum;
- signed short slot;
+ unsigned short slot;
int localalloc_opt;
unsigned int resv_level;
int dir_resv_level;
@@ -1372,7 +1372,7 @@ static int ocfs2_parse_options(struct super_block *sb,
goto bail;
}
if (option)
- mopt->slot = (s16)option;
+ mopt->slot = (u16)option;
break;
case Opt_commit:
option = 0;
@@ -1751,6 +1751,7 @@ static void ocfs2_inode_init_once(void *data)
oi->ip_blkno = 0ULL;
oi->ip_clusters = 0;
+ oi->ip_next_orphan = NULL;
ocfs2_resv_init_once(&oi->ip_la_data_resv);
@@ -2207,11 +2208,17 @@ static int ocfs2_initialize_super(struct super_block *sb,
}
if (ocfs2_clusterinfo_valid(osb)) {
+ /*
+ * ci_stack and ci_cluster in ocfs2_cluster_info may not be null
+ * terminated, so make sure no overflow happens here by using
+ * memcpy. Destination strings will always be null terminated
+ * because osb is allocated using kzalloc.
+ */
osb->osb_stackflags =
OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
- strlcpy(osb->osb_cluster_stack,
+ memcpy(osb->osb_cluster_stack,
OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
- OCFS2_STACK_LABEL_LEN + 1);
+ OCFS2_STACK_LABEL_LEN);
if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
mlog(ML_ERROR,
"couldn't mount because of an invalid "
@@ -2220,9 +2227,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
status = -EINVAL;
goto bail;
}
- strlcpy(osb->osb_cluster_name,
+ memcpy(osb->osb_cluster_name,
OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
- OCFS2_CLUSTER_NAME_LEN + 1);
+ OCFS2_CLUSTER_NAME_LEN);
} else {
/* The empty string is identical with classic tools that
* don't know about s_cluster_info. */
diff --git a/fs/open.c b/fs/open.c
index b7e2889a710c..c39c1d1fa082 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -732,6 +732,12 @@ static int do_dentry_open(struct file *f,
return 0;
}
+ /* Any file opened for execve()/uselib() has to be a regular file. */
+ if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) {
+ error = -EACCES;
+ goto cleanup_file;
+ }
+
if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
error = get_write_access(inode);
if (unlikely(error))
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index d49ac2ae7099..3189a32d8fa3 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -24,7 +24,7 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
{
ssize_t list_size, size, value_size = 0;
char *buf, *name, *value = NULL;
- int uninitialized_var(error);
+ int error = 0;
size_t slen;
if (!old->d_inode->i_op->getxattr ||
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 953c88dd6519..bf2c8ae8ed73 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -831,9 +831,13 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old,
}
} else {
new_create = true;
- if (!d_is_negative(newdentry) &&
- (!new_opaque || !ovl_is_whiteout(newdentry)))
- goto out_dput;
+ if (!d_is_negative(newdentry)) {
+ if (!new_opaque || !ovl_is_whiteout(newdentry))
+ goto out_dput;
+ } else {
+ if (flags & RENAME_EXCHANGE)
+ goto out_dput;
+ }
}
if (olddentry == trap)
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 230af81436a4..013d27dc6f58 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -9,7 +9,6 @@
#include <linux/fs.h>
#include <linux/slab.h>
-#include <linux/cred.h>
#include <linux/xattr.h>
#include "overlayfs.h"
@@ -92,7 +91,6 @@ int ovl_permission(struct inode *inode, int mask)
struct ovl_entry *oe;
struct dentry *alias = NULL;
struct inode *realinode;
- const struct cred *old_cred;
struct dentry *realdentry;
bool is_upper;
int err;
@@ -145,18 +143,7 @@ int ovl_permission(struct inode *inode, int mask)
goto out_dput;
}
- /*
- * Check overlay inode with the creds of task and underlying inode
- * with creds of mounter
- */
- err = generic_permission(inode, mask);
- if (err)
- goto out_dput;
-
- old_cred = ovl_override_creds(inode->i_sb);
err = __inode_permission(realinode, mask);
- ovl_revert_creds(old_cred);
-
out_dput:
dput(alias);
return err;
diff --git a/fs/pipe.c b/fs/pipe.c
index 1e7263bb837a..37a003b645ef 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -28,6 +28,21 @@
#include "internal.h"
/*
+ * New pipe buffers will be restricted to this size while the user is exceeding
+ * their pipe buffer quota. The general pipe use case needs at least two
+ * buffers: one for data yet to be read, and one for new data. If this is less
+ * than two, then a write to a non-empty pipe may block even if the pipe is not
+ * full. This can occur with GNU make jobserver or similar uses of pipes as
+ * semaphores: multiple processes may be waiting to write tokens back to the
+ * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
+ *
+ * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
+ * own risk, namely: pipe writes to non-full pipes may block until the pipe is
+ * emptied.
+ */
+#define PIPE_MIN_DEF_BUFFERS 2
+
+/*
* The max size that a non-root user is allowed to grow the pipe. Can
* be set by root in /proc/sys/fs/pipe-max-size
*/
@@ -178,9 +193,9 @@ EXPORT_SYMBOL(generic_pipe_buf_steal);
* in the tee() system call, when we duplicate the buffers in one
* pipe into another.
*/
-void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
+bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{
- page_cache_get(buf->page);
+ return try_get_page(buf->page);
}
EXPORT_SYMBOL(generic_pipe_buf_get);
@@ -621,7 +636,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
if (!too_many_pipe_buffers_hard(user)) {
if (too_many_pipe_buffers_soft(user))
- pipe_bufs = 1;
+ pipe_bufs = PIPE_MIN_DEF_BUFFERS;
pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL);
}
diff --git a/fs/pnode.c b/fs/pnode.c
index 35154a5c9392..b81231711ad7 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -268,14 +268,13 @@ static int propagate_one(struct mount *m)
if (IS_ERR(child))
return PTR_ERR(child);
child->mnt.mnt_flags &= ~MNT_LOCKED;
+ read_seqlock_excl(&mount_lock);
mnt_set_mountpoint(m, mp, child);
+ if (m->mnt_master != dest_master)
+ SET_MNT_MARK(m->mnt_master);
+ read_sequnlock_excl(&mount_lock);
last_dest = m;
last_source = child;
- if (m->mnt_master != dest_master) {
- read_seqlock_excl(&mount_lock);
- SET_MNT_MARK(m->mnt_master);
- read_sequnlock_excl(&mount_lock);
- }
hlist_add_head(&child->mnt_hash, list);
return count_mounts(m->mnt_ns, child);
}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 993bb3b5f4d5..33008eea6bc5 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -594,12 +594,15 @@ EXPORT_SYMBOL_GPL(posix_acl_create);
/**
* posix_acl_update_mode - update mode in set_acl
+ * @inode: target inode
+ * @mode_p: mode (pointer) for update
+ * @acl: acl pointer
*
* Update the file mode when setting an ACL: compute the new file permission
* bits based on the ACL. In addition, if the ACL is equivalent to the new
- * file mode, set *acl to NULL to indicate that no ACL should be set.
+ * file mode, set *@acl to NULL to indicate that no ACL should be set.
*
- * As with chmod, clear the setgit bit if the caller is not in the owning group
+ * As with chmod, clear the setgid bit if the caller is not in the owning group
* or capable of CAP_FSETID (see inode_change_ok).
*
* Called from set_acl inode operations.
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 015cdc615dfb..c4478abd1bef 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -169,51 +169,45 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
task_unlock(p);
rcu_read_unlock();
- seq_printf(m,
- "State:\t%s\n"
- "Tgid:\t%d\n"
- "Pid:\t%d\n"
- "PPid:\t%d\n"
- "TracerPid:\t%d\n"
- "Uid:\t%d\t%d\t%d\t%d\n"
- "Gid:\t%d\t%d\t%d\t%d\n"
- "Ngid:\t%d\n"
- "FDSize:\t%d\nGroups:\t",
- get_task_state(p),
- tgid, pid_nr_ns(pid, ns), ppid, tpid,
- from_kuid_munged(user_ns, cred->uid),
- from_kuid_munged(user_ns, cred->euid),
- from_kuid_munged(user_ns, cred->suid),
- from_kuid_munged(user_ns, cred->fsuid),
- from_kgid_munged(user_ns, cred->gid),
- from_kgid_munged(user_ns, cred->egid),
- from_kgid_munged(user_ns, cred->sgid),
- from_kgid_munged(user_ns, cred->fsgid),
- ngid, max_fds);
-
+ seq_printf(m, "State:\t%s", get_task_state(p));
+
+ seq_put_decimal_ull(m, "\nTgid:\t", tgid);
+ seq_put_decimal_ull(m, "\nPid:\t", pid_nr_ns(pid, ns));
+ seq_put_decimal_ull(m, "\nPPid:\t", ppid);
+ seq_put_decimal_ull(m, "\nTracerPid:\t", tpid);
+ seq_put_decimal_ull(m, "\nUid:\t", from_kuid_munged(user_ns, cred->uid));
+ seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->euid));
+ seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->suid));
+ seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->fsuid));
+ seq_put_decimal_ull(m, "\nGid:\t", from_kgid_munged(user_ns, cred->gid));
+ seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->egid));
+ seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->sgid));
+ seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->fsgid));
+ seq_put_decimal_ull(m, "\nNgid:\t", ngid);
+ seq_put_decimal_ull(m, "\nFDSize:\t", max_fds);
+
+ seq_puts(m, "\nGroups:\t");
group_info = cred->group_info;
for (g = 0; g < group_info->ngroups; g++)
- seq_printf(m, "%d ",
- from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
+ seq_put_decimal_ull(m, g ? " " : "",
+ from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
put_cred(cred);
+ /* Trailing space shouldn't have been added in the first place. */
+ seq_putc(m, ' ');
#ifdef CONFIG_PID_NS
seq_puts(m, "\nNStgid:");
for (g = ns->level; g <= pid->level; g++)
- seq_printf(m, "\t%d",
- task_tgid_nr_ns(p, pid->numbers[g].ns));
+ seq_put_decimal_ull(m, "\t", task_tgid_nr_ns(p, pid->numbers[g].ns));
seq_puts(m, "\nNSpid:");
for (g = ns->level; g <= pid->level; g++)
- seq_printf(m, "\t%d",
- task_pid_nr_ns(p, pid->numbers[g].ns));
+ seq_put_decimal_ull(m, "\t", task_pid_nr_ns(p, pid->numbers[g].ns));
seq_puts(m, "\nNSpgid:");
for (g = ns->level; g <= pid->level; g++)
- seq_printf(m, "\t%d",
- task_pgrp_nr_ns(p, pid->numbers[g].ns));
+ seq_put_decimal_ull(m, "\t", task_pgrp_nr_ns(p, pid->numbers[g].ns));
seq_puts(m, "\nNSsid:");
for (g = ns->level; g <= pid->level; g++)
- seq_printf(m, "\t%d",
- task_session_nr_ns(p, pid->numbers[g].ns));
+ seq_put_decimal_ull(m, "\t", task_session_nr_ns(p, pid->numbers[g].ns));
#endif
seq_putc(m, '\n');
}
@@ -282,11 +276,12 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
unlock_task_sighand(p, &flags);
}
- seq_printf(m, "Threads:\t%d\n", num_threads);
- seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim);
+ seq_put_decimal_ull(m, "Threads:\t", num_threads);
+ seq_put_decimal_ull(m, "\nSigQ:\t", qsize);
+ seq_put_decimal_ull(m, "/", qlim);
/* render them all */
- render_sigset_t(m, "SigPnd:\t", &pending);
+ render_sigset_t(m, "\nSigPnd:\t", &pending);
render_sigset_t(m, "ShdPnd:\t", &shpending);
render_sigset_t(m, "SigBlk:\t", &blocked);
render_sigset_t(m, "SigIgn:\t", &ignored);
@@ -331,7 +326,8 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
{
#ifdef CONFIG_SECCOMP
- seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode);
+ seq_put_decimal_ull(m, "Seccomp:\t", p->seccomp.mode);
+ seq_putc(m, '\n');
#endif
seq_printf(m, "Speculation_Store_Bypass:\t");
switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
@@ -363,10 +359,9 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
static inline void task_context_switch_counts(struct seq_file *m,
struct task_struct *p)
{
- seq_printf(m, "voluntary_ctxt_switches:\t%lu\n"
- "nonvoluntary_ctxt_switches:\t%lu\n",
- p->nvcsw,
- p->nivcsw);
+ seq_put_decimal_ull(m, "voluntary_ctxt_switches:\t", p->nvcsw);
+ seq_put_decimal_ull(m, "\nnonvoluntary_ctxt_switches:\t", p->nivcsw);
+ seq_putc(m, '\n');
}
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
@@ -425,9 +420,21 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
mm = get_task_mm(task);
if (mm) {
vsize = task_vsize(mm);
- if (permitted) {
- eip = KSTK_EIP(task);
- esp = KSTK_ESP(task);
+ /*
+ * esp and eip are intentionally zeroed out. There is no
+ * non-racy way to read them without freezing the task.
+ * Programs that need reliable values can use ptrace(2).
+ *
+ * The only exception is if the task is core dumping because
+ * a program is not able to use ptrace(2) in that case. It is
+ * safe because the task has stopped executing permanently.
+ */
+ if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) {
+ if (try_get_task_stack(task)) {
+ eip = KSTK_EIP(task);
+ esp = KSTK_ESP(task);
+ put_task_stack(task);
+ }
}
}
@@ -498,41 +505,41 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
start_time = nsec_to_clock_t(task->real_start_time);
seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
- seq_put_decimal_ll(m, ' ', ppid);
- seq_put_decimal_ll(m, ' ', pgid);
- seq_put_decimal_ll(m, ' ', sid);
- seq_put_decimal_ll(m, ' ', tty_nr);
- seq_put_decimal_ll(m, ' ', tty_pgrp);
- seq_put_decimal_ull(m, ' ', task->flags);
- seq_put_decimal_ull(m, ' ', min_flt);
- seq_put_decimal_ull(m, ' ', cmin_flt);
- seq_put_decimal_ull(m, ' ', maj_flt);
- seq_put_decimal_ull(m, ' ', cmaj_flt);
- seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime));
- seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime));
- seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime));
- seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime));
- seq_put_decimal_ll(m, ' ', priority);
- seq_put_decimal_ll(m, ' ', nice);
- seq_put_decimal_ll(m, ' ', num_threads);
- seq_put_decimal_ull(m, ' ', 0);
- seq_put_decimal_ull(m, ' ', start_time);
- seq_put_decimal_ull(m, ' ', vsize);
- seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0);
- seq_put_decimal_ull(m, ' ', rsslim);
- seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0);
- seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0);
- seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0);
- seq_put_decimal_ull(m, ' ', esp);
- seq_put_decimal_ull(m, ' ', eip);
+ seq_put_decimal_ll(m, " ", ppid);
+ seq_put_decimal_ll(m, " ", pgid);
+ seq_put_decimal_ll(m, " ", sid);
+ seq_put_decimal_ll(m, " ", tty_nr);
+ seq_put_decimal_ll(m, " ", tty_pgrp);
+ seq_put_decimal_ull(m, " ", task->flags);
+ seq_put_decimal_ull(m, " ", min_flt);
+ seq_put_decimal_ull(m, " ", cmin_flt);
+ seq_put_decimal_ull(m, " ", maj_flt);
+ seq_put_decimal_ull(m, " ", cmaj_flt);
+ seq_put_decimal_ull(m, " ", cputime_to_clock_t(utime));
+ seq_put_decimal_ull(m, " ", cputime_to_clock_t(stime));
+ seq_put_decimal_ll(m, " ", cputime_to_clock_t(cutime));
+ seq_put_decimal_ll(m, " ", cputime_to_clock_t(cstime));
+ seq_put_decimal_ll(m, " ", priority);
+ seq_put_decimal_ll(m, " ", nice);
+ seq_put_decimal_ll(m, " ", num_threads);
+ seq_put_decimal_ull(m, " ", 0);
+ seq_put_decimal_ull(m, " ", start_time);
+ seq_put_decimal_ull(m, " ", vsize);
+ seq_put_decimal_ull(m, " ", mm ? get_mm_rss(mm) : 0);
+ seq_put_decimal_ull(m, " ", rsslim);
+ seq_put_decimal_ull(m, " ", mm ? (permitted ? mm->start_code : 1) : 0);
+ seq_put_decimal_ull(m, " ", mm ? (permitted ? mm->end_code : 1) : 0);
+ seq_put_decimal_ull(m, " ", (permitted && mm) ? mm->start_stack : 0);
+ seq_put_decimal_ull(m, " ", esp);
+ seq_put_decimal_ull(m, " ", eip);
/* The signal information here is obsolete.
* It must be decimal for Linux 2.0 compatibility.
* Use /proc/#/status for real-time signals.
*/
- seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL);
- seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL);
- seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL);
- seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL);
+ seq_put_decimal_ull(m, " ", task->pending.signal.sig[0] & 0x7fffffffUL);
+ seq_put_decimal_ull(m, " ", task->blocked.sig[0] & 0x7fffffffUL);
+ seq_put_decimal_ull(m, " ", sigign.sig[0] & 0x7fffffffUL);
+ seq_put_decimal_ull(m, " ", sigcatch.sig[0] & 0x7fffffffUL);
/*
* We used to output the absolute kernel address, but that's an
@@ -546,31 +553,31 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
else
seq_puts(m, " 0");
- seq_put_decimal_ull(m, ' ', 0);
- seq_put_decimal_ull(m, ' ', 0);
- seq_put_decimal_ll(m, ' ', task->exit_signal);
- seq_put_decimal_ll(m, ' ', task_cpu(task));
- seq_put_decimal_ull(m, ' ', task->rt_priority);
- seq_put_decimal_ull(m, ' ', task->policy);
- seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task));
- seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime));
- seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime));
+ seq_put_decimal_ull(m, " ", 0);
+ seq_put_decimal_ull(m, " ", 0);
+ seq_put_decimal_ll(m, " ", task->exit_signal);
+ seq_put_decimal_ll(m, " ", task_cpu(task));
+ seq_put_decimal_ull(m, " ", task->rt_priority);
+ seq_put_decimal_ull(m, " ", task->policy);
+ seq_put_decimal_ull(m, " ", delayacct_blkio_ticks(task));
+ seq_put_decimal_ull(m, " ", cputime_to_clock_t(gtime));
+ seq_put_decimal_ll(m, " ", cputime_to_clock_t(cgtime));
if (mm && permitted) {
- seq_put_decimal_ull(m, ' ', mm->start_data);
- seq_put_decimal_ull(m, ' ', mm->end_data);
- seq_put_decimal_ull(m, ' ', mm->start_brk);
- seq_put_decimal_ull(m, ' ', mm->arg_start);
- seq_put_decimal_ull(m, ' ', mm->arg_end);
- seq_put_decimal_ull(m, ' ', mm->env_start);
- seq_put_decimal_ull(m, ' ', mm->env_end);
+ seq_put_decimal_ull(m, " ", mm->start_data);
+ seq_put_decimal_ull(m, " ", mm->end_data);
+ seq_put_decimal_ull(m, " ", mm->start_brk);
+ seq_put_decimal_ull(m, " ", mm->arg_start);
+ seq_put_decimal_ull(m, " ", mm->arg_end);
+ seq_put_decimal_ull(m, " ", mm->env_start);
+ seq_put_decimal_ull(m, " ", mm->env_end);
} else
- seq_printf(m, " 0 0 0 0 0 0 0");
+ seq_puts(m, " 0 0 0 0 0 0 0");
if (permitted)
- seq_put_decimal_ll(m, ' ', task->exit_code);
+ seq_put_decimal_ll(m, " ", task->exit_code);
else
- seq_put_decimal_ll(m, ' ', 0);
+ seq_puts(m, " 0");
seq_putc(m, '\n');
if (mm)
@@ -606,13 +613,13 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
* seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
* size, resident, shared, text, data);
*/
- seq_put_decimal_ull(m, 0, size);
- seq_put_decimal_ull(m, ' ', resident);
- seq_put_decimal_ull(m, ' ', shared);
- seq_put_decimal_ull(m, ' ', text);
- seq_put_decimal_ull(m, ' ', 0);
- seq_put_decimal_ull(m, ' ', data);
- seq_put_decimal_ull(m, ' ', 0);
+ seq_put_decimal_ull(m, "", size);
+ seq_put_decimal_ull(m, " ", resident);
+ seq_put_decimal_ull(m, " ", shared);
+ seq_put_decimal_ull(m, " ", text);
+ seq_put_decimal_ull(m, " ", 0);
+ seq_put_decimal_ull(m, " ", data);
+ seq_put_decimal_ull(m, " ", 0);
seq_putc(m, '\n');
return 0;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ae5350d61b0a..c7402cb76f11 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -233,12 +233,12 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
goto out_mmput;
}
- down_read(&mm->mmap_sem);
+ spin_lock(&mm->arg_lock);
arg_start = mm->arg_start;
arg_end = mm->arg_end;
env_start = mm->env_start;
env_end = mm->env_end;
- up_read(&mm->mmap_sem);
+ spin_unlock(&mm->arg_lock);
BUG_ON(arg_start > arg_end);
BUG_ON(env_start > env_end);
@@ -888,7 +888,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
flags |= FOLL_WRITE;
while (count > 0) {
- int this_len = min_t(int, count, PAGE_SIZE);
+ size_t this_len = min_t(size_t, count, PAGE_SIZE);
if (write && copy_from_user(page, buf, this_len)) {
copied = -EFAULT;
@@ -990,10 +990,10 @@ static ssize_t environ_read(struct file *file, char __user *buf,
if (!atomic_inc_not_zero(&mm->mm_users))
goto free;
- down_read(&mm->mmap_sem);
+ spin_lock(&mm->arg_lock);
env_start = mm->env_start;
env_end = mm->env_end;
- up_read(&mm->mmap_sem);
+ spin_unlock(&mm->arg_lock);
while (count > 0) {
size_t this_len, max_len;
@@ -2674,6 +2674,13 @@ out:
}
#ifdef CONFIG_SECURITY
+static int proc_pid_attr_open(struct inode *inode, struct file *file)
+{
+ file->private_data = NULL;
+ __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+ return 0;
+}
+
static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
size_t count, loff_t *ppos)
{
@@ -2703,6 +2710,10 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
ssize_t length;
struct task_struct *task = get_proc_task(inode);
+ /* A task may only write when it was the opener. */
+ if (file->private_data != current->mm)
+ return -EPERM;
+
length = -ESRCH;
if (!task)
goto out_no_task;
@@ -2741,9 +2752,11 @@ out_no_task:
}
static const struct file_operations proc_pid_attr_operations = {
+ .open = proc_pid_attr_open,
.read = proc_pid_attr_read,
.write = proc_pid_attr_write,
.llseek = generic_file_llseek,
+ .release = mem_release,
};
static const struct pid_entry attr_dir_stuff[] = {
@@ -3189,6 +3202,15 @@ static const struct file_operations proc_tgid_base_operations = {
.llseek = default_llseek,
};
+struct pid *tgid_pidfd_to_pid(const struct file *file)
+{
+ if (!d_is_dir(file->f_path.dentry) ||
+ (file->f_op != &proc_tgid_base_operations))
+ return ERR_PTR(-EBADF);
+
+ return proc_pid(file_inode(file));
+}
+
static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
return proc_pident_lookup(dir, dentry,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index bd95b9fdebb0..82140dbc03b7 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -415,7 +415,7 @@ const struct inode_operations proc_link_inode_operations = {
struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
{
- struct inode *inode = new_inode_pseudo(sb);
+ struct inode *inode = new_inode(sb);
if (inode) {
inode->i_ino = de->low_ino;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 21f198aa0961..107db4f559e0 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -507,23 +507,15 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
return -EFAULT;
} else {
if (kern_addr_valid(start)) {
- unsigned long n;
-
/*
* Using bounce buffer to bypass the
* hardened user copy kernel text checks.
*/
- memcpy(buf, (char *) start, tsz);
- n = copy_to_user(buffer, buf, tsz);
- /*
- * We cannot distinguish between fault on source
- * and fault on destination. When this happens
- * we clear too and hope it will trigger the
- * EFAULT again.
- */
- if (n) {
- if (clear_user(buffer + tsz - n,
- n))
+ if (probe_kernel_read(buf, (void *) start, tsz)) {
+ if (clear_user(buffer, tsz))
+ return -EFAULT;
+ } else {
+ if (copy_to_user(buffer, buf, tsz))
return -EFAULT;
}
} else {
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 5e5c443591ea..83720460c5bc 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -29,10 +29,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
unsigned long committed;
long cached;
long available;
- unsigned long pagecache;
- unsigned long wmark_low = 0;
unsigned long pages[NR_LRU_LISTS];
- struct zone *zone;
int lru;
/*
@@ -51,40 +48,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
pages[lru] = global_page_state(NR_LRU_BASE + lru);
- for_each_zone(zone)
- wmark_low += zone->watermark[WMARK_LOW];
-
- /*
- * Estimate the amount of memory available for userspace allocations,
- * without causing swapping.
- */
- available = i.freeram - totalreserve_pages;
-
- /*
- * Not all the page cache can be freed, otherwise the system will
- * start swapping. Assume at least half of the page cache, or the
- * low watermark worth of cache, needs to stay.
- */
- pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
- pagecache -= min(pagecache / 2, wmark_low);
- available += pagecache;
-
- /*
- * Part of the reclaimable slab consists of items that are in use,
- * and cannot be freed. Cap this estimate at the low watermark.
- */
- available += global_page_state(NR_SLAB_RECLAIMABLE) -
- min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
-
- /*
- * Part of the kernel memory, which can be released under memory
- * pressure.
- */
- available += global_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >>
- PAGE_SHIFT;
-
- if (available < 0)
- available = 0;
+ available = si_mem_available();
/*
* Tagged format, for easy grepping and expansion.
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 1b0ea4a5d89e..b46274600f37 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -28,6 +28,9 @@ static const struct proc_ns_operations *ns_entries[] = {
&userns_operations,
#endif
&mntns_operations,
+#ifdef CONFIG_CGROUPS
+ &cgroupns_operations,
+#endif
};
static const char *proc_ns_follow_link(struct dentry *dentry, void **cookie)
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 113b8d061fc0..dffbe533d53f 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -24,6 +24,13 @@ static const char *proc_self_follow_link(struct dentry *dentry, void **cookie)
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
+ /*
+ * Not currently supported. Once we can inherit all of struct pid,
+ * we can allow this.
+ */
+ if (current->flags & PF_KTHREAD)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (!tgid)
return ERR_PTR(-ENOENT);
/* 11 for max length of signed int in decimal + NULL term */
@@ -51,7 +58,7 @@ int proc_setup_self(struct super_block *s)
mutex_lock(&root_inode->i_mutex);
self = d_alloc_name(s->s_root, "self");
if (self) {
- struct inode *inode = new_inode_pseudo(s);
+ struct inode *inode = new_inode(s);
if (inode) {
inode->i_ino = self_inum;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 510413eb25b8..0fd3f81fe51f 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -117,17 +117,16 @@ static int show_stat(struct seq_file *p, void *v)
}
sum += arch_irq_stat();
- seq_puts(p, "cpu ");
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
+ seq_put_decimal_ull(p, "cpu ", cputime64_to_clock_t(user));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
seq_putc(p, '\n');
for_each_online_cpu(i) {
@@ -143,23 +142,23 @@ static int show_stat(struct seq_file *p, void *v)
guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
seq_printf(p, "cpu%d", i);
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
- seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(user));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
+ seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
seq_putc(p, '\n');
}
- seq_printf(p, "intr %llu", (unsigned long long)sum);
+ seq_put_decimal_ull(p, "intr ", (unsigned long long)sum);
/* sum again ? it could be updated? */
for_each_irq_nr(j)
- seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j));
+ seq_put_decimal_ull(p, " ", kstat_irqs_usr(j));
seq_printf(p,
"\nctxt %llu\n"
@@ -173,10 +172,10 @@ static int show_stat(struct seq_file *p, void *v)
nr_running(),
nr_iowait());
- seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq);
+ seq_put_decimal_ull(p, "softirq ", (unsigned long long)sum_softirq);
for (i = 0; i < NR_SOFTIRQS; i++)
- seq_put_decimal_ull(p, ' ', per_softirq_sums[i]);
+ seq_put_decimal_ull(p, " ", per_softirq_sums[i]);
seq_putc(p, '\n');
return 0;
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index 947b0f4fd0a1..4b186aac3011 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -52,7 +52,7 @@ int proc_setup_thread_self(struct super_block *s)
mutex_lock(&root_inode->i_mutex);
thread_self = d_alloc_name(s->s_root, "thread-self");
if (thread_self) {
- struct inode *inode = new_inode_pseudo(s);
+ struct inode *inode = new_inode(s);
if (inode) {
inode->i_ino = thread_self_inum;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 08143139b65a..785d05e3358c 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -105,14 +105,19 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
nr_bytes = count;
/* If pfn is not ram, return zeros for sparse dump files */
- if (pfn_is_ram(pfn) == 0)
- memset(buf, 0, nr_bytes);
- else {
+ if (pfn_is_ram(pfn) == 0) {
+ tmp = 0;
+ if (!userbuf)
+ memset(buf, 0, nr_bytes);
+ else if (clear_user(buf, nr_bytes))
+ tmp = -EFAULT;
+ } else {
tmp = copy_oldmem_page(pfn, buf, nr_bytes,
offset, userbuf);
- if (tmp < 0)
- return tmp;
}
+ if (tmp < 0)
+ return tmp;
+
*ppos += nr_bytes;
count -= nr_bytes;
buf += nr_bytes;
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ac6c78fe19cf..7a380208b006 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -36,7 +36,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
-#include <linux/syslog.h>
+#include <linux/vmalloc.h>
#include "internal.h"
@@ -121,18 +121,6 @@ static const struct seq_operations pstore_ftrace_seq_ops = {
.show = pstore_ftrace_seq_show,
};
-static int pstore_check_syslog_permissions(struct pstore_private *ps)
-{
- switch (ps->type) {
- case PSTORE_TYPE_DMESG:
- case PSTORE_TYPE_CONSOLE:
- return check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
- SYSLOG_FROM_READER);
- default:
- return 0;
- }
-}
-
static ssize_t pstore_file_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
@@ -151,10 +139,6 @@ static int pstore_file_open(struct inode *inode, struct file *file)
int err;
const struct seq_operations *sops = NULL;
- err = pstore_check_syslog_permissions(ps);
- if (err)
- return err;
-
if (ps->type == PSTORE_TYPE_FTRACE)
sops = &pstore_ftrace_seq_ops;
@@ -191,11 +175,6 @@ static const struct file_operations pstore_file_operations = {
static int pstore_unlink(struct inode *dir, struct dentry *dentry)
{
struct pstore_private *p = d_inode(dentry)->i_private;
- int err;
-
- err = pstore_check_syslog_permissions(p);
- if (err)
- return err;
if (p->psi->erase)
p->psi->erase(p->type, p->id, p->count,
@@ -216,7 +195,7 @@ static void pstore_evict_inode(struct inode *inode)
spin_lock_irqsave(&allpstore_lock, flags);
list_del(&p->list);
spin_unlock_irqrestore(&allpstore_lock, flags);
- kfree(p);
+ vfree(p);
}
}
@@ -328,7 +307,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
goto fail;
inode->i_mode = S_IFREG | 0444;
inode->i_fop = &pstore_file_operations;
- private = kmalloc(sizeof *private + size, GFP_KERNEL);
+ private = vmalloc(sizeof *private + size);
if (!private)
goto fail_alloc;
private->type = type;
@@ -402,7 +381,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
fail_lockedalloc:
mutex_unlock(&d_inode(root)->i_mutex);
- kfree(private);
+ vfree(private);
fail_alloc:
iput(inode);
@@ -429,7 +408,7 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent)
inode = pstore_get_inode(sb);
if (inode) {
- inode->i_mode = S_IFDIR | 0755;
+ inode->i_mode = S_IFDIR | 0750;
inode->i_op = &pstore_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
inc_nlink(inode);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 6fbfa8189451..21bf055bdebf 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -36,6 +36,7 @@
#include <linux/hardirq.h>
#include <linux/jiffies.h>
#include <linux/workqueue.h>
+#include <linux/vmalloc.h>
#include "internal.h"
@@ -580,7 +581,7 @@ void pstore_get_records(int quiet)
big_oops_buf_sz);
if (unzipped_len > 0) {
- kfree(buf);
+ vfree(buf);
buf = big_oops_buf;
size = unzipped_len;
compressed = false;
@@ -594,7 +595,7 @@ void pstore_get_records(int quiet)
compressed, (size_t)size, time, psi);
if (unzipped_len < 0) {
/* Free buffer other than big oops */
- kfree(buf);
+ vfree(buf);
buf = NULL;
} else
unzipped_len = -1;
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index c9e4bc47c79d..2a004480fc4f 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -233,7 +233,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
/* ECC correction notice */
ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);
- *buf = kmalloc(size + ecc_notice_size + 1, GFP_KERNEL);
+ *buf = vmalloc(size + ecc_notice_size + 1);
if (*buf == NULL)
return -ENOMEM;
@@ -321,6 +321,17 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
prz = cxt->przs[cxt->dump_write_cnt];
+ /*
+ * Since this is a new crash dump, we need to reset the buffer in
+ * case it still has an old dump present. Without this, the new dump
+ * will get appended, which would seriously confuse anything trying
+ * to check dump file contents. Specifically, ramoops_read_kmsg_hdr()
+ * expects to find a dump header in the beginning of buffer data, so
+ * we must to reset the buffer values, in order to ensure that the
+ * header will be written to the beginning of the buffer.
+ */
+ persistent_ram_zap(prz);
+
hlen = ramoops_write_kmsg_hdr(prz, compressed);
if (size + hlen > prz->buffer_size)
size = prz->buffer_size - hlen;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 11e558efd61e..cdf82f4b5698 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -291,7 +291,7 @@ void persistent_ram_save_old(struct persistent_ram_zone *prz)
if (!prz->old_log) {
persistent_ram_ecc_old(prz);
- prz->old_log = kmalloc(size, GFP_KERNEL);
+ prz->old_log = vmalloc(size);
}
if (!prz->old_log) {
pr_err("failed to allocate buffer\n");
@@ -377,7 +377,7 @@ void *persistent_ram_old(struct persistent_ram_zone *prz)
void persistent_ram_free_old(struct persistent_ram_zone *prz)
{
- kfree(prz->old_log);
+ vfree(prz->old_log);
prz->old_log = NULL;
prz->old_log_size = 0;
}
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index b218f965817b..613cc38c9efa 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -14,13 +14,48 @@
#include <linux/buffer_head.h>
#include "qnx4.h"
+/*
+ * A qnx4 directory entry is an inode entry or link info
+ * depending on the status field in the last byte. The
+ * first byte is where the name start either way, and a
+ * zero means it's empty.
+ *
+ * Also, due to a bug in gcc, we don't want to use the
+ * real (differently sized) name arrays in the inode and
+ * link entries, but always the 'de_name[]' one in the
+ * fake struct entry.
+ *
+ * See
+ *
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578#c6
+ *
+ * for details, but basically gcc will take the size of the
+ * 'name' array from one of the used union entries randomly.
+ *
+ * This use of 'de_name[]' (48 bytes) avoids the false positive
+ * warnings that would happen if gcc decides to use 'inode.di_name'
+ * (16 bytes) even when the pointer and size were to come from
+ * 'link.dl_name' (48 bytes).
+ *
+ * In all cases the actual name pointer itself is the same, it's
+ * only the gcc internal 'what is the size of this field' logic
+ * that can get confused.
+ */
+union qnx4_directory_entry {
+ struct {
+ const char de_name[48];
+ u8 de_pad[15];
+ u8 de_status;
+ };
+ struct qnx4_inode_entry inode;
+ struct qnx4_link_info link;
+};
+
static int qnx4_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
unsigned int offset;
struct buffer_head *bh;
- struct qnx4_inode_entry *de;
- struct qnx4_link_info *le;
unsigned long blknum;
int ix, ino;
int size;
@@ -37,27 +72,27 @@ static int qnx4_readdir(struct file *file, struct dir_context *ctx)
}
ix = (ctx->pos >> QNX4_DIR_ENTRY_SIZE_BITS) % QNX4_INODES_PER_BLOCK;
for (; ix < QNX4_INODES_PER_BLOCK; ix++, ctx->pos += QNX4_DIR_ENTRY_SIZE) {
+ union qnx4_directory_entry *de;
+
offset = ix * QNX4_DIR_ENTRY_SIZE;
- de = (struct qnx4_inode_entry *) (bh->b_data + offset);
- if (!de->di_fname[0])
+ de = (union qnx4_directory_entry *) (bh->b_data + offset);
+
+ if (!de->de_name[0])
continue;
- if (!(de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK)))
+ if (!(de->de_status & (QNX4_FILE_USED|QNX4_FILE_LINK)))
continue;
- if (!(de->di_status & QNX4_FILE_LINK))
- size = QNX4_SHORT_NAME_MAX;
- else
- size = QNX4_NAME_MAX;
- size = strnlen(de->di_fname, size);
- QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, de->di_fname));
- if (!(de->di_status & QNX4_FILE_LINK))
+ if (!(de->de_status & QNX4_FILE_LINK)) {
+ size = sizeof(de->inode.di_fname);
ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1;
- else {
- le = (struct qnx4_link_info*)de;
- ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) *
+ } else {
+ size = sizeof(de->link.dl_fname);
+ ino = ( le32_to_cpu(de->link.dl_inode_blk) - 1 ) *
QNX4_INODES_PER_BLOCK +
- le->dl_inode_ndx;
+ de->link.dl_inode_ndx;
}
- if (!dir_emit(ctx, de->di_fname, size, ino, DT_UNKNOWN)) {
+ size = strnlen(de->de_name, size);
+ QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, name));
+ if (!dir_emit(ctx, de->de_name, size, ino, DT_UNKNOWN)) {
brelse(bh);
return 0;
}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 1cb1d02c5937..b7d5e254792c 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -472,7 +472,7 @@ int dquot_release(struct dquot *dquot)
mutex_lock(&dquot->dq_lock);
/* Check whether we are not racing with some other dqget() */
- if (atomic_read(&dquot->dq_count) > 1)
+ if (dquot_is_busy(dquot))
goto out_dqlock;
mutex_lock(&dqopt->dqio_mutex);
if (dqopt->ops[dquot->dq_id.type]->release_dqblk) {
@@ -604,7 +604,7 @@ EXPORT_SYMBOL(dquot_scan_active);
/* Write all dquot structures to quota files */
int dquot_writeback_dquots(struct super_block *sb, int type)
{
- struct list_head *dirty;
+ struct list_head dirty;
struct dquot *dquot;
struct quota_info *dqopt = sb_dqopt(sb);
int cnt;
@@ -617,9 +617,10 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
if (!sb_has_quota_active(sb, cnt))
continue;
spin_lock(&dq_list_lock);
- dirty = &dqopt->info[cnt].dqi_dirty_list;
- while (!list_empty(dirty)) {
- dquot = list_first_entry(dirty, struct dquot,
+ /* Move list away to avoid livelock. */
+ list_replace_init(&dqopt->info[cnt].dqi_dirty_list, &dirty);
+ while (!list_empty(&dirty)) {
+ dquot = list_first_entry(&dirty, struct dquot,
dq_dirty);
/* Dirty and inactive can be only bad dquot... */
if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
@@ -2782,68 +2783,73 @@ EXPORT_SYMBOL(dquot_quotactl_sysfile_ops);
static int do_proc_dqstats(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- unsigned int type = (int *)table->data - dqstats.stat;
+ unsigned int type = (unsigned long *)table->data - dqstats.stat;
+ s64 value = percpu_counter_sum(&dqstats.counter[type]);
+
+ /* Filter negative values for non-monotonic counters */
+ if (value < 0 && (type == DQST_ALLOC_DQUOTS ||
+ type == DQST_FREE_DQUOTS))
+ value = 0;
/* Update global table */
- dqstats.stat[type] =
- percpu_counter_sum_positive(&dqstats.counter[type]);
- return proc_dointvec(table, write, buffer, lenp, ppos);
+ dqstats.stat[type] = value;
+ return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
static struct ctl_table fs_dqstats_table[] = {
{
.procname = "lookups",
.data = &dqstats.stat[DQST_LOOKUPS],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0444,
.proc_handler = do_proc_dqstats,
},
{
.procname = "drops",
.data = &dqstats.stat[DQST_DROPS],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0444,
.proc_handler = do_proc_dqstats,
},
{
.procname = "reads",
.data = &dqstats.stat[DQST_READS],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0444,
.proc_handler = do_proc_dqstats,
},
{
.procname = "writes",
.data = &dqstats.stat[DQST_WRITES],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0444,
.proc_handler = do_proc_dqstats,
},
{
.procname = "cache_hits",
.data = &dqstats.stat[DQST_CACHE_HITS],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0444,
.proc_handler = do_proc_dqstats,
},
{
.procname = "allocated_dquots",
.data = &dqstats.stat[DQST_ALLOC_DQUOTS],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0444,
.proc_handler = do_proc_dqstats,
},
{
.procname = "free_dquots",
.data = &dqstats.stat[DQST_FREE_DQUOTS],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0444,
.proc_handler = do_proc_dqstats,
},
{
.procname = "syncs",
.data = &dqstats.stat[DQST_SYNCS],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0444,
.proc_handler = do_proc_dqstats,
},
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 58efb83dec1c..4f21724a212b 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -55,7 +55,7 @@ static ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
memset(buf, 0, info->dqi_usable_bs);
return sb->s_op->quota_read(sb, info->dqi_type, buf,
- info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+ info->dqi_usable_bs, (loff_t)blk << info->dqi_blocksize_bits);
}
static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
@@ -64,7 +64,7 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
ssize_t ret;
ret = sb->s_op->quota_write(sb, info->dqi_type, buf,
- info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+ info->dqi_usable_bs, (loff_t)blk << info->dqi_blocksize_bits);
if (ret != info->dqi_usable_bs) {
quota_error(sb, "dquota write failed");
if (ret >= 0)
@@ -277,7 +277,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
blk);
goto out_buf;
}
- dquot->dq_off = (blk << info->dqi_blocksize_bits) +
+ dquot->dq_off = ((loff_t)blk << info->dqi_blocksize_bits) +
sizeof(struct qt_disk_dqdbheader) +
i * info->dqi_entry_size;
kfree(buf);
@@ -416,6 +416,7 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
quota_error(dquot->dq_sb, "Quota structure has offset to "
"other block (%u) than it should (%u)", blk,
(uint)(dquot->dq_off >> info->dqi_blocksize_bits));
+ ret = -EIO;
goto out_buf;
}
ret = read_blk(info, blk, buf);
@@ -481,6 +482,13 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
goto out_buf;
}
newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+ if (newblk < QT_TREEOFF || newblk >= info->dqi_blocks) {
+ quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)",
+ newblk, info->dqi_blocks);
+ ret = -EUCLEAN;
+ goto out_buf;
+ }
+
if (depth == info->dqi_qtree_depth - 1) {
ret = free_dqentry(info, dquot, newblk);
newblk = 0;
@@ -552,7 +560,7 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
ret = -EIO;
goto out_buf;
} else {
- ret = (blk << info->dqi_blocksize_bits) + sizeof(struct
+ ret = ((loff_t)blk << info->dqi_blocksize_bits) + sizeof(struct
qt_disk_dqdbheader) + i * info->dqi_entry_size;
}
out_buf:
@@ -580,6 +588,13 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
if (!blk) /* No reference? */
goto out_buf;
+ if (blk < QT_TREEOFF || blk >= info->dqi_blocks) {
+ quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)",
+ blk, info->dqi_blocks);
+ ret = -EUCLEAN;
+ goto out_buf;
+ }
+
if (depth < info->dqi_qtree_depth - 1)
ret = find_tree_dqentry(info, dquot, blk, depth+1);
else
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 2aa012a68e90..9891b8fb0432 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -266,6 +266,7 @@ static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot)
d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
d->dqb_btime = cpu_to_le64(m->dqb_btime);
d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id));
+ d->dqb_pad = 0;
if (qtree_entry_unused(info, dp))
d->dqb_itime = cpu_to_le64(1);
}
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 889d558b4e05..37fcd10866c3 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -145,7 +145,7 @@ static const struct inode_operations ramfs_dir_inode_operations = {
.mkdir = ramfs_mkdir,
.rmdir = simple_rmdir,
.mknod = ramfs_mknod,
- .rename = simple_rename,
+ .rename2 = simple_rename,
};
static const struct super_operations ramfs_ops = {
diff --git a/fs/read_write.c b/fs/read_write.c
index 7b175b9134ec..27023e8f531e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -21,9 +21,6 @@
#include <asm/uaccess.h>
#include <asm/unistd.h>
-typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
-typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
-
const struct file_operations generic_ro_fops = {
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
@@ -656,7 +653,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
EXPORT_SYMBOL(iov_shorten);
static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
- loff_t *ppos, iter_fn_t fn)
+ loff_t *ppos, int type)
{
struct kiocb kiocb;
ssize_t ret;
@@ -664,7 +661,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- ret = fn(&kiocb, iter);
+ if (type == READ)
+ ret = filp->f_op->read_iter(&kiocb, iter);
+ else
+ ret = filp->f_op->write_iter(&kiocb, iter);
BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
@@ -672,7 +672,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
/* Do it by hand, with file-ops */
static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
- loff_t *ppos, io_fn_t fn)
+ loff_t *ppos, int type)
{
ssize_t ret = 0;
@@ -680,7 +680,13 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
struct iovec iovec = iov_iter_iovec(iter);
ssize_t nr;
- nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
+ if (type == READ) {
+ nr = filp->f_op->read(filp, iovec.iov_base,
+ iovec.iov_len, ppos);
+ } else {
+ nr = filp->f_op->write(filp, iovec.iov_base,
+ iovec.iov_len, ppos);
+ }
if (nr < 0) {
if (!ret)
@@ -783,8 +789,6 @@ static ssize_t do_readv_writev(int type, struct file *file,
struct iovec *iov = iovstack;
struct iov_iter iter;
ssize_t ret;
- io_fn_t fn;
- iter_fn_t iter_fn;
ret = import_iovec(type, uvector, nr_segs,
ARRAY_SIZE(iovstack), &iov, &iter);
@@ -798,19 +802,14 @@ static ssize_t do_readv_writev(int type, struct file *file,
if (ret < 0)
goto out;
- if (type == READ) {
- fn = file->f_op->read;
- iter_fn = file->f_op->read_iter;
- } else {
- fn = (io_fn_t)file->f_op->write;
- iter_fn = file->f_op->write_iter;
+ if (type != READ)
file_start_write(file);
- }
- if (iter_fn)
- ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
+ if ((type == READ && file->f_op->read_iter) ||
+ (type == WRITE && file->f_op->write_iter))
+ ret = do_iter_readv_writev(file, &iter, pos, type);
else
- ret = do_loop_readv_writev(file, &iter, pos, fn);
+ ret = do_loop_readv_writev(file, &iter, pos, type);
if (type != READ)
file_end_write(file);
@@ -957,8 +956,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
struct iovec *iov = iovstack;
struct iov_iter iter;
ssize_t ret;
- io_fn_t fn;
- iter_fn_t iter_fn;
ret = compat_import_iovec(type, uvector, nr_segs,
UIO_FASTIOV, &iov, &iter);
@@ -972,19 +969,14 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
if (ret < 0)
goto out;
- if (type == READ) {
- fn = file->f_op->read;
- iter_fn = file->f_op->read_iter;
- } else {
- fn = (io_fn_t)file->f_op->write;
- iter_fn = file->f_op->write_iter;
+ if (type != READ)
file_start_write(file);
- }
- if (iter_fn)
- ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
+ if ((type == READ && file->f_op->read_iter) ||
+ (type == WRITE && file->f_op->write_iter))
+ ret = do_iter_readv_writev(file, &iter, pos, type);
else
- ret = do_loop_readv_writev(file, &iter, pos, fn);
+ ret = do_loop_readv_writev(file, &iter, pos, type);
if (type != READ)
file_end_write(file);
diff --git a/fs/readdir.c b/fs/readdir.c
index ced679179cac..3494d7a8ff65 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -51,6 +51,40 @@ out:
EXPORT_SYMBOL(iterate_dir);
/*
+ * POSIX says that a dirent name cannot contain NULL or a '/'.
+ *
+ * It's not 100% clear what we should really do in this case.
+ * The filesystem is clearly corrupted, but returning a hard
+ * error means that you now don't see any of the other names
+ * either, so that isn't a perfect alternative.
+ *
+ * And if you return an error, what error do you use? Several
+ * filesystems seem to have decided on EUCLEAN being the error
+ * code for EFSCORRUPTED, and that may be the error to use. Or
+ * just EIO, which is perhaps more obvious to users.
+ *
+ * In order to see the other file names in the directory, the
+ * caller might want to make this a "soft" error: skip the
+ * entry, and return the error at the end instead.
+ *
+ * Note that this should likely do a "memchr(name, 0, len)"
+ * check too, since that would be filesystem corruption as
+ * well. However, that case can't actually confuse user space,
+ * which has to do a strlen() on the name anyway to find the
+ * filename length, and the above "soft error" worry means
+ * that it's probably better left alone until we have that
+ * issue clarified.
+ */
+static int verify_dirent_name(const char *name, int len)
+{
+ if (!len)
+ return -EIO;
+ if (memchr(name, '/', len))
+ return -EIO;
+ return 0;
+}
+
+/*
* Traditional linux readdir() handling..
*
* "count=1" is a special case, meaning that the buffer is one
@@ -159,6 +193,9 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
sizeof(long));
+ buf->error = verify_dirent_name(name, namlen);
+ if (unlikely(buf->error))
+ return buf->error;
buf->error = -EINVAL; /* only used if we fail.. */
if (reclen > buf->count)
return -EINVAL;
@@ -243,6 +280,9 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
sizeof(u64));
+ buf->error = verify_dirent_name(name, namlen);
+ if (unlikely(buf->error))
+ return buf->error;
buf->error = -EINVAL; /* only used if we fail.. */
if (reclen > buf->count)
return -EINVAL;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 60ba35087d12..ccbb15ab029f 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1553,11 +1553,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
* set version 1, version 2 could be used too, because stat data
* key is the same in both versions
*/
- key.version = KEY_FORMAT_3_5;
- key.on_disk_key.k_dir_id = dirino;
- key.on_disk_key.k_objectid = inode->i_ino;
- key.on_disk_key.k_offset = 0;
- key.on_disk_key.k_type = 0;
+ _make_cpu_key(&key, KEY_FORMAT_3_5, dirino, inode->i_ino, 0, 0, 3);
/* look for the object's stat data */
retval = search_item(inode->i_sb, &key, &path_to_sd);
@@ -2161,7 +2157,8 @@ out_end_trans:
out_inserted_sd:
clear_nlink(inode);
th->t_trans_id = 0; /* so the caller can't use this handle later */
- unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
+ if (inode->i_state & I_NEW)
+ unlock_new_inode(inode);
iput(inode);
return err;
}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 00985f9db9f7..6a0fa0cdc1ed 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2770,6 +2770,20 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
goto free_and_return;
}
+ /*
+ * Sanity check to see if journal first block is correct.
+ * If journal first block is invalid it can cause
+ * zeroing important superblock members.
+ */
+ if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
+ SB_ONDISK_JOURNAL_1st_BLOCK(sb) < SB_JOURNAL_1st_RESERVED_BLOCK(sb)) {
+ reiserfs_warning(sb, "journal-1393",
+ "journal 1st super block is invalid: 1st reserved block %d, but actual 1st block is %d",
+ SB_JOURNAL_1st_RESERVED_BLOCK(sb),
+ SB_ONDISK_JOURNAL_1st_BLOCK(sb));
+ goto free_and_return;
+ }
+
if (journal_init_dev(sb, journal, j_dev_name) != 0) {
reiserfs_warning(sb, "sh-462",
"unable to initialize journal device");
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 24cbe013240f..13322c39e6cc 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -386,6 +386,24 @@ void pathrelse(struct treepath *search_path)
search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
}
+static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih)
+{
+ struct reiserfs_de_head *deh;
+ int i;
+
+ deh = B_I_DEH(bh, ih);
+ for (i = 0; i < ih_entry_count(ih); i++) {
+ if (deh_location(&deh[i]) > ih_item_len(ih)) {
+ reiserfs_warning(NULL, "reiserfs-5094",
+ "directory entry location seems wrong %h",
+ &deh[i]);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
{
struct block_head *blkh;
@@ -453,6 +471,15 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
"(second one): %h", ih);
return 0;
}
+ if (is_direntry_le_ih(ih)) {
+ if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) {
+ reiserfs_warning(NULL, "reiserfs-5093",
+ "item entry count seems wrong %h",
+ ih);
+ return 0;
+ }
+ return has_valid_deh_location(bh, ih);
+ }
prev_location = ih_location(ih);
}
@@ -2249,7 +2276,8 @@ error_out:
/* also releases the path */
unfix_nodes(&s_ins_balance);
#ifdef REISERQUOTA_DEBUG
- reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
+ if (inode)
+ reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
"reiserquota insert_item(): freeing %u id=%u type=%c",
quota_bytes, inode->i_uid, head2type(ih));
#endif
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index ee095246da4e..2ffcbe451202 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -599,6 +599,7 @@ static void reiserfs_put_super(struct super_block *s)
reiserfs_write_unlock(s);
mutex_destroy(&REISERFS_SB(s)->lock);
destroy_workqueue(REISERFS_SB(s)->commit_wq);
+ kfree(REISERFS_SB(s)->s_jdev);
kfree(s->s_fs_info);
s->s_fs_info = NULL;
}
@@ -1231,6 +1232,10 @@ static int reiserfs_parse_options(struct super_block *s,
"turned on.");
return 0;
}
+ if (qf_names[qtype] !=
+ REISERFS_SB(s)->s_qf_names[qtype])
+ kfree(qf_names[qtype]);
+ qf_names[qtype] = NULL;
if (*arg) { /* Some filename specified? */
if (REISERFS_SB(s)->s_qf_names[qtype]
&& strcmp(REISERFS_SB(s)->s_qf_names[qtype],
@@ -1260,10 +1265,6 @@ static int reiserfs_parse_options(struct super_block *s,
else
*mount_options |= 1 << REISERFS_GRPQUOTA;
} else {
- if (qf_names[qtype] !=
- REISERFS_SB(s)->s_qf_names[qtype])
- kfree(qf_names[qtype]);
- qf_names[qtype] = NULL;
if (qtype == USRQUOTA)
*mount_options &= ~(1 << REISERFS_USRQUOTA);
else
@@ -1920,7 +1921,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
if (!sbi->s_jdev) {
SWARN(silent, s, "", "Cannot allocate memory for "
"journal device name");
- goto error;
+ goto error_unlocked;
}
}
#ifdef CONFIG_QUOTA
@@ -2049,6 +2050,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
unlock_new_inode(root_inode);
}
+ if (!S_ISDIR(root_inode->i_mode) || !inode_get_bytes(root_inode) ||
+ !root_inode->i_size) {
+ SWARN(silent, s, "", "corrupt root inode, run fsck");
+ iput(root_inode);
+ errval = -EUCLEAN;
+ goto error;
+ }
+
s->s_root = d_make_root(root_inode);
if (!s->s_root)
goto error;
@@ -2208,6 +2217,7 @@ error_unlocked:
kfree(qf_names[j]);
}
#endif
+ kfree(sbi->s_jdev);
kfree(sbi);
s->s_fs_info = NULL;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index d424b3d4bf3b..92d39cbc2d64 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -656,6 +656,13 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
if (get_inode_sd_version(inode) == STAT_DATA_V1)
return -EOPNOTSUPP;
+ /*
+ * priv_root needn't be initialized during mount so allow initial
+ * lookups to succeed.
+ */
+ if (!REISERFS_SB(inode->i_sb)->priv_root)
+ return 0;
+
dentry = xattr_lookup(inode, name, XATTR_REPLACE);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
index 613ff5aef94e..19ca3745301f 100644
--- a/fs/reiserfs/xattr.h
+++ b/fs/reiserfs/xattr.h
@@ -42,7 +42,7 @@ void reiserfs_security_free(struct reiserfs_security_handle *sec);
static inline int reiserfs_xattrs_initialized(struct super_block *sb)
{
- return REISERFS_SB(sb)->priv_root != NULL;
+ return REISERFS_SB(sb)->priv_root && REISERFS_SB(sb)->xattr_root;
}
#define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header))
diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c
index f86f51f99ace..1dcadd22b440 100644
--- a/fs/romfs/storage.c
+++ b/fs/romfs/storage.c
@@ -221,10 +221,8 @@ int romfs_dev_read(struct super_block *sb, unsigned long pos,
size_t limit;
limit = romfs_maxsize(sb);
- if (pos >= limit)
+ if (pos >= limit || buflen > limit - pos)
return -EIO;
- if (buflen > limit - pos)
- buflen = limit - pos;
#ifdef CONFIG_ROMFS_ON_MTD
if (sb->s_mtd)
diff --git a/fs/sdfat/Kconfig b/fs/sdfat/Kconfig
new file mode 100644
index 000000000000..08b12f7f768b
--- /dev/null
+++ b/fs/sdfat/Kconfig
@@ -0,0 +1,126 @@
+config SDFAT_FS
+ tristate "sdFAT filesystem support"
+ select NLS
+ select NLS_UTF8
+ select NLS_CODEPAGE_437
+ select NLS_ISO8859_1
+ help
+ If you want to use the sdFAT file system, then you must say Y or M
+ here to inlucde sdFAT support.
+ sdFAT is unified FAT-based file system which supports not only fat12/
+ 16/32 with vfat but also exfat. sdFAT supports winnt short-name rule.
+ (winnt: emulate the Windows NT rule for display/create.)
+
+ To compile this as a module, choose M here: the module will be called
+ sdfat_core and sdfat_fs.
+
+config SDFAT_USE_FOR_EXFAT
+ bool "Register sdFAT as exFAT"
+ default y
+ depends on SDFAT_FS && !EXFAT_FS
+ help
+ If you want to register sdFAT as available for exFAT, say Y.
+
+config SDFAT_USE_FOR_VFAT
+ bool "Register sdFAT as VFAT"
+ default y
+ depends on SDFAT_FS && !VFAT_FS
+ help
+ If you want to register sdFAT as available for VFAT, say Y.
+
+config SDFAT_DELAYED_META_DIRTY
+ bool "Enable delayed metadata dirty"
+ default y
+ depends on SDFAT_FS
+ help
+ If you enable this feature, metadata(FAT/Directory entry) is updated
+ by flush thread.
+
+config SDFAT_SUPPORT_DIR_SYNC
+ bool "Enable supporting dir sync"
+ default n
+ depends on SDFAT_FS
+ help
+ If you enable this feature, the modification for directory operation
+ is written to a storage at once.
+
+config SDFAT_DEFAULT_CODEPAGE
+ int "Default codepage for sdFAT"
+ default 437
+ depends on SDFAT_FS
+ help
+ This option should be set to the codepage of your sdFAT filesystems.
+
+config SDFAT_DEFAULT_IOCHARSET
+ string "Default iocharset for sdFAT"
+ default "utf8"
+ depends on SDFAT_FS
+ help
+ Set this to the default input/output character set you'd
+ like sdFAT to use. It should probably match the character set
+ that most of your sdFAT filesystems use, and can be overridden
+ with the "iocharset" mount option for sdFAT filesystems.
+
+config SDFAT_CHECK_RO_ATTR
+ bool "Check read-only attribute"
+ default n
+ depends on SDFAT_FS
+
+config SDFAT_ALIGNED_MPAGE_WRITE
+ bool "Enable supporting aligned mpage_write"
+ default y
+ depends on SDFAT_FS
+
+config SDFAT_VIRTUAL_XATTR
+ bool "Virtual xattr support for sdFAT"
+ default y
+ depends on SDFAT_FS
+ help
+ To support virtual xattr.
+
+config SDFAT_VIRTUAL_XATTR_SELINUX_LABEL
+ string "Default string for SELinux label"
+ default "u:object_r:sdcard_external:s0"
+ depends on SDFAT_FS && SDFAT_VIRTUAL_XATTR
+ help
+ Set this to the default string for SELinux label.
+
+config SDFAT_SUPPORT_STLOG
+ bool "Enable storage log"
+ default y
+ depends on SDFAT_FS && PROC_STLOG
+
+config SDFAT_DEBUG
+ bool "enable debug features"
+ depends on SDFAT_FS
+ default y
+
+config SDFAT_DBG_IOCTL
+ bool "enable debug-ioctl features"
+ depends on SDFAT_FS && SDFAT_DEBUG
+ default n
+
+config SDFAT_DBG_MSG
+ bool "enable debug messages"
+ depends on SDFAT_FS && SDFAT_DEBUG
+ default y
+
+config SDFAT_DBG_BUGON
+ bool "enable strict BUG_ON() for debugging"
+ depends on SDFAT_FS && SDFAT_DEBUG
+ default n
+
+config SDFAT_DBG_WARNON
+ bool "enable strict WARN_ON() for debugging"
+ depends on SDFAT_FS && SDFAT_DEBUG
+ default n
+
+config SDFAT_STATISTICS
+ bool "enable statistics for bigdata"
+ depends on SDFAT_FS
+ default y
+
+config SDFAT_UEVENT
+ bool "enable uevent"
+ depends on SDFAT_FS
+ default y
diff --git a/fs/sdfat/LICENSE b/fs/sdfat/LICENSE
new file mode 100644
index 000000000000..d159169d1050
--- /dev/null
+++ b/fs/sdfat/LICENSE
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/fs/sdfat/Makefile b/fs/sdfat/Makefile
new file mode 100644
index 000000000000..a5cd0858c6ca
--- /dev/null
+++ b/fs/sdfat/Makefile
@@ -0,0 +1,24 @@
+#
+# Makefile for the linux FAT12/16/32(VFAT)/64(exFAT) filesystem driver.
+#
+
+obj-$(CONFIG_SDFAT_FS) += sdfat_fs.o
+
+sdfat_fs-objs := sdfat.o core.o core_fat.o core_exfat.o api.o blkdev.o \
+ fatent.o amap_smart.o cache.o dfr.o nls.o misc.o \
+ mpage.o extent.o
+
+sdfat_fs-$(CONFIG_SDFAT_VIRTUAL_XATTR) += xattr.o
+sdfat_fs-$(CONFIG_SDFAT_STATISTICS) += statistics.o
+
+
+all:
+ make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
+
+clean:
+ make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
+
+cscope:
+ rm -rf cscope.files cscope.files
+ find $(PWD) \( -name '*.c' -o -name '*.cpp' -o -name '*.cc' -o -name '*.h' -o -name '*.s' -o -name '*.S' \) -print > cscope.files
+ cscope
diff --git a/fs/sdfat/README.md b/fs/sdfat/README.md
new file mode 100644
index 000000000000..f291e8f1ae00
--- /dev/null
+++ b/fs/sdfat/README.md
@@ -0,0 +1,19 @@
+sdFAT FS support for Linux Kernel 4.4
+=====================================
+
+sdFAT is unified FAT-based file system which supports not only fat12/16/32 with
+vfat but also exfat. sdFAT supports winnt short-name rule.
+
+Suggested Kernel config:
+
+ CONFIG_SDFAT_FS=y
+ CONFIG_SDFAT_DELAYED_META_DIRTY=y
+ CONFIG_SDFAT_SUPPORT_DIR_SYNC=y
+ CONFIG_SDFAT_DEFAULT_CODEPAGE=437
+ CONFIG_SDFAT_DEFAULT_IOCHARSET="utf8"
+ CONFIG_SDFAT_ALIGNED_MPAGE_WRITE=y
+ CONFIG_SDFAT_VIRTUAL_XATTR=y
+ CONFIG_SDFAT_VIRTUAL_XATTR_SELINUX_LABEL="u:object_r:vfat:s0"
+ CONFIG_SDFAT_DEBUG=y
+ CONFIG_SDFAT_DBG_MSG=y
+ CONFIG_SDFAT_STATISTICS=y
diff --git a/fs/sdfat/amap_smart.c b/fs/sdfat/amap_smart.c
new file mode 100644
index 000000000000..b556f868d76e
--- /dev/null
+++ b/fs/sdfat/amap_smart.c
@@ -0,0 +1,1314 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : amap_smart.c */
+/* PURPOSE : FAT32 Smart allocation code for sdFAT */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include "sdfat.h"
+#include "core.h"
+#include "amap_smart.h"
+
+/* AU list related functions */
+static inline void amap_list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+
+ /* Will be used to check if the entry is a single entry(selected) */
+ entry->prev = NULL;
+ entry->next = NULL;
+}
+
+static inline int amap_insert_to_list(AU_INFO_T *au, struct slist_head *shead)
+{
+ struct slist_head *entry = &au->shead;
+
+ ASSERT(!entry->head);
+
+ entry->next = shead->next;
+ entry->head = shead;
+
+ shead->next = entry;
+
+ return 0;
+}
+
+static inline int amap_remove_from_list(AU_INFO_T *au, struct slist_head *shead)
+{
+ struct slist_head *entry = &au->shead;
+ struct slist_head *iter;
+
+ BUG_ON(entry->head != shead);
+
+ iter = shead;
+
+ while (iter->next) {
+ if (iter->next == entry) {
+ // iter->next = iter->next->next
+ iter->next = entry->next;
+
+ entry->next = NULL;
+ entry->head = NULL;
+ return 0;
+ }
+ iter = iter->next;
+ }
+
+ BUG_ON("Not reachable");
+}
+
+/* Full-linear serach => Find AU with max. number of fclu */
+static inline AU_INFO_T *amap_find_hot_au_largest(struct slist_head *shead)
+{
+ struct slist_head *iter;
+ uint16_t max_fclu = 0;
+ AU_INFO_T *entry, *ret = NULL;
+
+ ASSERT(shead->head == shead); /* Singly-list condition */
+ ASSERT(shead->next != shead);
+
+ iter = shead->next;
+
+ while (iter) {
+ entry = list_entry(iter, AU_INFO_T, shead);
+
+ if (entry->free_clusters > max_fclu) {
+ max_fclu = entry->free_clusters;
+ ret = entry;
+ }
+
+ iter = iter->next;
+ }
+
+ return ret;
+}
+
+/* Find partially used AU with max. number of fclu.
+ * If there is no partial AU available, pick a clean one
+ */
+static inline AU_INFO_T *amap_find_hot_au_partial(AMAP_T *amap)
+{
+ struct slist_head *iter;
+ uint16_t max_fclu = 0;
+ AU_INFO_T *entry, *ret = NULL;
+
+ iter = &amap->slist_hot;
+ ASSERT(iter->head == iter); /* Singly-list condition */
+ ASSERT(iter->next != iter);
+
+ iter = iter->next;
+
+ while (iter) {
+ entry = list_entry(iter, AU_INFO_T, shead);
+
+ if (entry->free_clusters > max_fclu) {
+ if (entry->free_clusters < amap->clusters_per_au) {
+ max_fclu = entry->free_clusters;
+ ret = entry;
+ } else {
+ if (!ret)
+ ret = entry;
+ }
+ }
+
+ iter = iter->next;
+ }
+
+ return ret;
+}
+
+
+
+
+/*
+ * Size-base AU management functions
+ */
+
+/*
+ * Add au into cold AU MAP
+ * au: an isolated (not in a list) AU data structure
+ */
+int amap_add_cold_au(AMAP_T *amap, AU_INFO_T *au)
+{
+ FCLU_NODE_T *fclu_node = NULL;
+
+ /* Check if a single entry */
+ BUG_ON(au->head.prev);
+
+ /* Ignore if the au is full */
+ if (!au->free_clusters)
+ return 0;
+
+ /* Find entry */
+ fclu_node = NODE(au->free_clusters, amap);
+
+ /* Insert to the list */
+ list_add_tail(&(au->head), &(fclu_node->head));
+
+ /* Update fclu_hint (Increase) */
+ if (au->free_clusters > amap->fclu_hint)
+ amap->fclu_hint = au->free_clusters;
+
+ return 0;
+}
+
+/*
+ * Remove an AU from AU MAP
+ */
+int amap_remove_cold_au(AMAP_T *amap, AU_INFO_T *au)
+{
+ struct list_head *prev = au->head.prev;
+
+ /* Single entries are not managed in lists */
+ if (!prev) {
+ BUG_ON(au->free_clusters > 0);
+ return 0;
+ }
+
+ /* remove from list */
+ amap_list_del(&(au->head));
+
+ return 0;
+}
+
+
+/* "Find" best fit AU
+ * returns NULL if there is no AU w/ enough free space.
+ *
+ * This function doesn't change AU status.
+ * The caller should call amap_remove_cold_au() if needed.
+ */
+AU_INFO_T *amap_find_cold_au_bestfit(AMAP_T *amap, uint16_t free_clusters)
+{
+ AU_INFO_T *au = NULL;
+ FCLU_NODE_T *fclu_iter;
+
+ if (free_clusters <= 0 || free_clusters > amap->clusters_per_au) {
+ EMSG("AMAP: amap_find_cold_au_bestfit / unexpected arg. (%d)\n",
+ free_clusters);
+ return NULL;
+ }
+
+ fclu_iter = NODE(free_clusters, amap);
+
+ if (amap->fclu_hint < free_clusters) {
+ /* There is no AUs with enough free_clusters */
+ return NULL;
+ }
+
+ /* Naive Hash management (++) */
+ do {
+ if (!list_empty(&fclu_iter->head)) {
+ struct list_head *first = fclu_iter->head.next;
+
+ au = list_entry(first, AU_INFO_T, head);
+
+ break;
+ }
+
+ fclu_iter++;
+ } while (fclu_iter < (amap->fclu_nodes + amap->clusters_per_au));
+
+
+ // BUG_ON(au->free_clusters < 0);
+ BUG_ON(au && (au->free_clusters > amap->clusters_per_au));
+
+ return au;
+}
+
+
+/* "Pop" best fit AU
+ *
+ * returns NULL if there is no AU w/ enough free space.
+ * The returned AU will not be in the list anymore.
+ */
+AU_INFO_T *amap_pop_cold_au_bestfit(AMAP_T *amap, uint16_t free_clusters)
+{
+ /* Naive implementation */
+ AU_INFO_T *au;
+
+ au = amap_find_cold_au_bestfit(amap, free_clusters);
+ if (au)
+ amap_remove_cold_au(amap, au);
+
+ return au;
+}
+
+
+
+/* Pop the AU with the largest free space
+ *
+ * search from 'start_fclu' to 0
+ * (target freecluster : -1 for each step)
+ * start_fclu = 0 means to search from the max. value
+ */
+AU_INFO_T *amap_pop_cold_au_largest(AMAP_T *amap, uint16_t start_fclu)
+{
+ AU_INFO_T *au = NULL;
+ FCLU_NODE_T *fclu_iter;
+
+ if (!start_fclu)
+ start_fclu = amap->clusters_per_au;
+ if (start_fclu > amap->clusters_per_au)
+ start_fclu = amap->clusters_per_au;
+
+ /* Use hint (search start point) */
+ if (amap->fclu_hint < start_fclu)
+ fclu_iter = NODE(amap->fclu_hint, amap);
+ else
+ fclu_iter = NODE(start_fclu, amap);
+
+ /* Naive Hash management */
+ do {
+ if (!list_empty(&fclu_iter->head)) {
+ struct list_head *first = fclu_iter->head.next;
+
+ au = list_entry(first, AU_INFO_T, head);
+ // BUG_ON((au < amap->entries) || ((amap->entries + amap->n_au) <= au));
+
+ amap_list_del(first);
+
+ // (Hint) Possible maximum value of free clusters (among cold)
+ /* if it wasn't the whole search, don't update fclu_hint */
+ if (start_fclu == amap->clusters_per_au)
+ amap->fclu_hint = au->free_clusters;
+
+ break;
+ }
+
+ fclu_iter--;
+ } while (amap->fclu_nodes <= fclu_iter);
+
+ return au;
+}
+
+
+
+/*
+ * ===============================================
+ * Allocation Map related functions
+ * ===============================================
+ */
+
+/* Create AMAP related data structure (mount time) */
+int amap_create(struct super_block *sb, u32 pack_ratio, u32 sect_per_au, u32 hidden_sect)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ AMAP_T *amap;
+ int total_used_clusters;
+ int n_au_table = 0;
+ int i, i_clu, i_au;
+ int i_au_root = -1, i_au_hot_from = INT_MAX;
+ u32 misaligned_sect = hidden_sect;
+ u64 tmp;
+
+ BUG_ON(!fsi->bd_opened);
+
+ if (fsi->amap)
+ return -EEXIST;
+
+ /* Check conditions */
+ if (fsi->vol_type != FAT32) {
+ sdfat_msg(sb, KERN_ERR, "smart allocation is only available "
+ "with fat32-fs");
+ return -ENOTSUPP;
+ }
+
+ if (fsi->num_sectors < AMAP_MIN_SUPPORT_SECTORS) {
+ sdfat_msg(sb, KERN_ERR, "smart allocation is only available "
+ "with sectors above %d", AMAP_MIN_SUPPORT_SECTORS);
+ return -ENOTSUPP;
+ }
+
+ /* AU size must be a multiple of clu_size */
+ if ((sect_per_au <= 0) || (sect_per_au & (fsi->sect_per_clus - 1))) {
+ sdfat_msg(sb, KERN_ERR,
+ "invalid AU size (sect_per_au : %u, "
+ "sect_per_clus : %u) "
+ "please re-format for performance.",
+ sect_per_au, fsi->sect_per_clus);
+ return -EINVAL;
+ }
+
+ /* the start sector of this partition must be a multiple of clu_size */
+ if (misaligned_sect & (fsi->sect_per_clus - 1)) {
+ sdfat_msg(sb, KERN_ERR,
+ "misaligned part (start sect : %u, "
+ "sect_per_clus : %u) "
+ "please re-format for performance.",
+ misaligned_sect, fsi->sect_per_clus);
+ return -EINVAL;
+ }
+
+ /* data start sector must be a multiple of clu_size */
+ if (fsi->data_start_sector & (fsi->sect_per_clus - 1)) {
+ sdfat_msg(sb, KERN_ERR,
+ "misaligned data area (start sect : %llu, "
+ "sect_per_clus : %u) "
+ "please re-format for performance.",
+ fsi->data_start_sector, fsi->sect_per_clus);
+ return -EINVAL;
+ }
+
+ misaligned_sect &= (sect_per_au - 1);
+
+ /* Allocate data structrues */
+ amap = kzalloc(sizeof(AMAP_T), GFP_NOIO);
+ if (!amap)
+ return -ENOMEM;
+
+ amap->sb = sb;
+
+ tmp = fsi->num_sectors + misaligned_sect + sect_per_au - 1;
+ do_div(tmp, sect_per_au);
+ amap->n_au = tmp;
+ amap->n_clean_au = 0;
+ amap->n_full_au = 0;
+
+ /* Reflect block-partition align first,
+ * then partition-data_start align
+ */
+ amap->clu_align_bias = (misaligned_sect / fsi->sect_per_clus);
+ amap->clu_align_bias += (fsi->data_start_sector >> fsi->sect_per_clus_bits) - CLUS_BASE;
+ amap->clusters_per_au = sect_per_au / fsi->sect_per_clus;
+
+ /* That is,
+ * the size of cluster is at least 4KB if the size of AU is 4MB
+ */
+ if (amap->clusters_per_au > MAX_CLU_PER_AU) {
+ sdfat_log_msg(sb, KERN_INFO,
+ "too many clusters per AU (clus/au:%d > %d).",
+ amap->clusters_per_au,
+ MAX_CLU_PER_AU);
+ }
+
+ /* is it needed? why here? */
+ // set_sb_dirty(sb);
+
+ spin_lock_init(&amap->amap_lock);
+
+ amap->option.packing_ratio = pack_ratio;
+ amap->option.au_size = sect_per_au;
+ amap->option.au_align_factor = hidden_sect;
+
+
+ /* Allocate AU info table */
+ n_au_table = (amap->n_au + N_AU_PER_TABLE - 1) / N_AU_PER_TABLE;
+ amap->au_table = kmalloc(sizeof(AU_INFO_T *) * n_au_table, GFP_NOIO);
+ if (!amap->au_table) {
+ sdfat_msg(sb, KERN_ERR,
+ "failed to alloc amap->au_table\n");
+ kfree(amap);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < n_au_table; i++)
+ amap->au_table[i] = (AU_INFO_T *)get_zeroed_page(GFP_NOIO);
+
+ /* Allocate buckets indexed by # of free clusters */
+ amap->fclu_order = get_order(sizeof(FCLU_NODE_T) * amap->clusters_per_au);
+
+ // XXX: amap->clusters_per_au limitation is 512 (w/ 8 byte list_head)
+ sdfat_log_msg(sb, KERN_INFO, "page orders for AU nodes : %d "
+ "(clus_per_au : %d, node_size : %lu)",
+ amap->fclu_order,
+ amap->clusters_per_au,
+ (unsigned long)sizeof(FCLU_NODE_T));
+
+ if (!amap->fclu_order)
+ amap->fclu_nodes = (FCLU_NODE_T *)get_zeroed_page(GFP_NOIO);
+ else
+ amap->fclu_nodes = vzalloc(PAGE_SIZE << amap->fclu_order);
+
+ amap->fclu_hint = amap->clusters_per_au;
+
+ /* Hot AU list, ignored AU list */
+ amap->slist_hot.next = NULL;
+ amap->slist_hot.head = &amap->slist_hot;
+ amap->total_fclu_hot = 0;
+
+ amap->slist_ignored.next = NULL;
+ amap->slist_ignored.head = &amap->slist_ignored;
+
+ /* Strategy related vars. */
+ amap->cur_cold.au = NULL;
+ amap->cur_hot.au = NULL;
+ amap->n_need_packing = 0;
+
+
+ /* Build AMAP info */
+ total_used_clusters = 0; // Count # of used clusters
+
+ i_au_root = i_AU_of_CLU(amap, fsi->root_dir);
+ i_au_hot_from = amap->n_au - (SMART_ALLOC_N_HOT_AU - 1);
+
+ for (i = 0; i < amap->clusters_per_au; i++)
+ INIT_LIST_HEAD(&amap->fclu_nodes[i].head);
+
+ /*
+ * Thanks to kzalloc()
+ * amap->entries[i_au].free_clusters = 0;
+ * amap->entries[i_au].head.prev = NULL;
+ * amap->entries[i_au].head.next = NULL;
+ */
+
+ /* Parse FAT table */
+ for (i_clu = CLUS_BASE; i_clu < fsi->num_clusters; i_clu++) {
+ u32 clu_data;
+ AU_INFO_T *au;
+
+ if (fat_ent_get(sb, i_clu, &clu_data)) {
+ sdfat_msg(sb, KERN_ERR,
+ "failed to read fat entry(%u)\n", i_clu);
+ goto free_and_eio;
+ }
+
+ if (IS_CLUS_FREE(clu_data)) {
+ au = GET_AU(amap, i_AU_of_CLU(amap, i_clu));
+ au->free_clusters++;
+ } else
+ total_used_clusters++;
+ }
+
+ /* Build AU list */
+ for (i_au = 0; i_au < amap->n_au; i_au++) {
+ AU_INFO_T *au = GET_AU(amap, i_au);
+
+ au->idx = i_au;
+ BUG_ON(au->free_clusters > amap->clusters_per_au);
+
+ if (au->free_clusters == amap->clusters_per_au)
+ amap->n_clean_au++;
+ else if (au->free_clusters == 0)
+ amap->n_full_au++;
+
+ /* If hot, insert to the hot list */
+ if (i_au >= i_au_hot_from) {
+ amap_add_hot_au(amap, au);
+ amap->total_fclu_hot += au->free_clusters;
+ } else if (i_au != i_au_root || SMART_ALLOC_N_HOT_AU == 0) {
+ /* Otherwise, insert to the free cluster hash */
+ amap_add_cold_au(amap, au);
+ }
+ }
+
+ /* Hot list -> (root) -> (last) -> (last - 1) -> ... */
+ if (i_au_root >= 0 && SMART_ALLOC_N_HOT_AU > 0) {
+ amap_add_hot_au(amap, GET_AU(amap, i_au_root));
+ amap->total_fclu_hot += GET_AU(amap, i_au_root)->free_clusters;
+ }
+
+ fsi->amap = amap;
+ fsi->used_clusters = total_used_clusters;
+
+ sdfat_msg(sb, KERN_INFO,
+ "AMAP: Smart allocation enabled (opt : %u / %u / %u)",
+ amap->option.au_size, amap->option.au_align_factor,
+ amap->option.packing_ratio);
+
+ /* Debug purpose - check */
+ //{
+ //u32 used_clusters;
+ //fat_count_used_clusters(sb, &used_clusters)
+ //ASSERT(used_clusters == total_used_clusters);
+ //}
+
+ return 0;
+
+
+free_and_eio:
+ if (amap) {
+ if (amap->au_table) {
+ for (i = 0; i < n_au_table; i++)
+ free_page((unsigned long)amap->au_table[i]);
+ kfree(amap->au_table);
+ }
+ if (amap->fclu_nodes) {
+ if (!amap->fclu_order)
+ free_page((unsigned long)amap->fclu_nodes);
+ else
+ vfree(amap->fclu_nodes);
+ }
+ kfree(amap);
+ }
+ return -EIO;
+}
+
+
+/* Free AMAP related structure */
+void amap_destroy(struct super_block *sb)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ int n_au_table;
+
+ if (!amap)
+ return;
+
+ DMSG("%s\n", __func__);
+
+ n_au_table = (amap->n_au + N_AU_PER_TABLE - 1) / N_AU_PER_TABLE;
+
+ if (amap->au_table) {
+ int i;
+
+ for (i = 0; i < n_au_table; i++)
+ free_page((unsigned long)amap->au_table[i]);
+
+ kfree(amap->au_table);
+ }
+ if (!amap->fclu_order)
+ free_page((unsigned long)amap->fclu_nodes);
+ else
+ vfree(amap->fclu_nodes);
+ kfree(amap);
+ SDFAT_SB(sb)->fsi.amap = NULL;
+}
+
+
+/*
+ * Check status of FS
+ * and change destination if needed to disable AU-aligned alloc.
+ * (from ALLOC_COLD_ALIGNED to ALLOC_COLD_SEQ)
+ */
+static inline int amap_update_dest(AMAP_T *amap, int ori_dest)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(amap->sb)->fsi);
+ int n_partial_au, n_partial_freeclus;
+
+ if (ori_dest != ALLOC_COLD_ALIGNED)
+ return ori_dest;
+
+ /* # of partial AUs and # of clusters in those AUs */
+ n_partial_au = amap->n_au - amap->n_clean_au - amap->n_full_au;
+ n_partial_freeclus = fsi->num_clusters - fsi->used_clusters -
+ amap->clusters_per_au * amap->n_clean_au;
+
+ /* Status of AUs : Full / Partial / Clean
+ * If there are many partial (and badly fragmented) AUs,
+ * the throughput will decrease extremly.
+ *
+ * The follow code will treat those worst cases.
+ */
+
+ /* XXX: AMAP heuristics */
+ if ((amap->n_clean_au * 50 <= amap->n_au) &&
+ (n_partial_freeclus*2) < (n_partial_au*amap->clusters_per_au)) {
+ /* If clean AUs are fewer than 2% of n_au (80 AUs per 16GB)
+ * and fragment ratio is more than 2 (AVG free_clusters=half AU)
+ *
+ * disable clean-first allocation
+ * enable VFAT-like sequential allocation
+ */
+ return ALLOC_COLD_SEQ;
+ }
+
+ return ori_dest;
+}
+
+
+#define PACKING_SOFTLIMIT (amap->option.packing_ratio)
+#define PACKING_HARDLIMIT (amap->option.packing_ratio * 4)
+/*
+ * Pick a packing AU if needed.
+ * Otherwise just return NULL
+ *
+ * This function includes some heuristics.
+ */
+static inline AU_INFO_T *amap_get_packing_au(AMAP_T *amap, int dest, int num_to_wb, int *clu_to_skip)
+{
+ AU_INFO_T *au = NULL;
+
+ if (dest == ALLOC_COLD_PACKING) {
+ /* ALLOC_COLD_PACKING:
+ * Packing-first mode for defrag.
+ * Optimized to save clean AU
+ *
+ * 1) best-fit AU
+ * 2) Smallest AU (w/ minimum free clusters)
+ */
+ if (num_to_wb >= amap->clusters_per_au)
+ num_to_wb = num_to_wb % amap->clusters_per_au;
+
+ /* 이거 주석처리하면, AU size 딱 맞을때는 clean, 나머지는 작은거부터 */
+ if (num_to_wb == 0)
+ num_to_wb = 1; // Don't use clean AUs
+
+ au = amap_find_cold_au_bestfit(amap, num_to_wb);
+ if (au && au->free_clusters == amap->clusters_per_au && num_to_wb > 1) {
+ /* if au is clean then get a new partial one */
+ au = amap_find_cold_au_bestfit(amap, 1);
+ }
+
+ if (au) {
+ amap->n_need_packing = 0;
+ amap_remove_cold_au(amap, au);
+ return au;
+ }
+ }
+
+
+ /* Heuristic packing:
+ * This will improve QoS greatly.
+ *
+ * Count # of AU_ALIGNED allocation.
+ * If the number exceeds the specific threshold,
+ * allocate on a partial AU or generate random I/O.
+ */
+ if ((PACKING_SOFTLIMIT > 0) &&
+ (amap->n_need_packing >= PACKING_SOFTLIMIT) &&
+ (num_to_wb < (int)amap->clusters_per_au)) {
+ /* Best-fit packing:
+ * If num_to_wb (expected number to be allocated) is smaller
+ * than AU_SIZE, find a best-fit AU.
+ */
+
+ /* Back margin (heuristics) */
+ if (num_to_wb < amap->clusters_per_au / 4)
+ num_to_wb = amap->clusters_per_au / 4;
+
+ au = amap_find_cold_au_bestfit(amap, num_to_wb);
+ if (au != NULL) {
+ amap_remove_cold_au(amap, au);
+
+ MMSG("AMAP: packing (cnt: %d) / softlimit, "
+ "best-fit (num_to_wb: %d))\n",
+ amap->n_need_packing, num_to_wb);
+
+ if (au->free_clusters > num_to_wb) { // Best-fit search: if 문 무조건 hit
+ *clu_to_skip = au->free_clusters - num_to_wb;
+ /* otherwise don't skip */
+ }
+ amap->n_need_packing = 0;
+ return au;
+ }
+ }
+
+ if ((PACKING_HARDLIMIT) && amap->n_need_packing >= PACKING_HARDLIMIT) {
+ /* Compulsory SLC flushing:
+ * If there was no chance to do best-fit packing
+ * and the # of AU-aligned allocation exceeds HARD threshold,
+ * then pick a clean AU and generate a compulsory random I/O.
+ */
+ au = amap_pop_cold_au_largest(amap, amap->clusters_per_au);
+ if (au) {
+ MMSG("AMAP: packing (cnt: %d) / hard-limit, largest)\n",
+ amap->n_need_packing);
+
+ if (au->free_clusters >= 96) {
+ *clu_to_skip = au->free_clusters / 2;
+ MMSG("AMAP: cluster idx re-position\n");
+ }
+ amap->n_need_packing = 0;
+ return au;
+ }
+ }
+
+ /* Update # of clean AU allocation */
+ amap->n_need_packing++;
+ return NULL;
+}
+
+
+/* Pick a target AU:
+ * This function should be called
+ * only if there are one or more free clusters in the bdev.
+ */
+TARGET_AU_T *amap_get_target_au(AMAP_T *amap, int dest, int num_to_wb)
+{
+ int loop_count = 0;
+
+retry:
+ if (++loop_count >= 3) {
+ /* No space available (or AMAP consistency error)
+ * This could happen because of the ignored AUs but not likely
+ * (because the defrag daemon will not work if there is no enough space)
+ */
+ BUG_ON(amap->slist_ignored.next == NULL);
+ return NULL;
+ }
+
+ /* Hot clusters (DIR) */
+ if (dest == ALLOC_HOT) {
+
+ /* Working hot AU exist? */
+ if (amap->cur_hot.au == NULL || amap->cur_hot.au->free_clusters == 0) {
+ AU_INFO_T *au;
+
+ if (amap->total_fclu_hot == 0) {
+ /* No more hot AU avaialbe */
+ dest = ALLOC_COLD;
+
+ goto retry;
+ }
+
+ au = amap_find_hot_au_partial(amap);
+
+ BUG_ON(au == NULL);
+ BUG_ON(au->free_clusters <= 0);
+
+ amap->cur_hot.au = au;
+ amap->cur_hot.idx = 0;
+ amap->cur_hot.clu_to_skip = 0;
+ }
+
+ /* Now allocate on a hot AU */
+ return &amap->cur_hot;
+ }
+
+ /* Cold allocation:
+ * If amap->cur_cold.au has one or more free cluster(s),
+ * then just return amap->cur_cold
+ */
+ if ((!amap->cur_cold.au)
+ || (amap->cur_cold.idx == amap->clusters_per_au)
+ || (amap->cur_cold.au->free_clusters == 0)) {
+
+ AU_INFO_T *au = NULL;
+ const AU_INFO_T *old_au = amap->cur_cold.au;
+ int n_clu_to_skip = 0;
+
+ if (old_au) {
+ ASSERT(!IS_AU_WORKING(old_au, amap));
+ /* must be NOT WORKING AU.
+ * (only for information gathering)
+ */
+ }
+
+ /* Next target AU is needed:
+ * There are 3 possible ALLOC options for cold AU
+ *
+ * ALLOC_COLD_ALIGNED: Clean AU first, but heuristic packing is ON
+ * ALLOC_COLD_PACKING: Packing AU first (usually for defrag)
+ * ALLOC_COLD_SEQ : Sequential AU allocation (VFAT-like)
+ */
+
+ /* Experimental: Modify allocation destination if needed (ALIGNED => SEQ) */
+ // dest = amap_update_dest(amap, dest);
+
+ if ((dest == ALLOC_COLD_SEQ) && old_au) {
+ int i_au = old_au->idx + 1;
+
+ while (i_au != old_au->idx) {
+ au = GET_AU(amap, i_au);
+
+ if ((au->free_clusters > 0) &&
+ !IS_AU_HOT(au, amap) &&
+ !IS_AU_IGNORED(au, amap)) {
+ MMSG("AMAP: new cold AU(%d) with %d "
+ "clusters (seq)\n",
+ au->idx, au->free_clusters);
+
+ amap_remove_cold_au(amap, au);
+ goto ret_new_cold;
+ }
+ i_au++;
+ if (i_au >= amap->n_au)
+ i_au = 0;
+ }
+
+ // no cold AUs are available => Hot allocation
+ dest = ALLOC_HOT;
+ goto retry;
+ }
+
+
+ /*
+ * Check if packing is needed
+ * (ALLOC_COLD_PACKING is treated by this function)
+ */
+ au = amap_get_packing_au(amap, dest, num_to_wb, &n_clu_to_skip);
+ if (au) {
+ MMSG("AMAP: new cold AU(%d) with %d clusters "
+ "(packing)\n", au->idx, au->free_clusters);
+ goto ret_new_cold;
+ }
+
+ /* ALLOC_COLD_ALIGNED */
+ /* Check if the adjacent AU is clean */
+ if (old_au && ((old_au->idx + 1) < amap->n_au)) {
+ au = GET_AU(amap, old_au->idx + 1);
+ if ((au->free_clusters == amap->clusters_per_au) &&
+ !IS_AU_HOT(au, amap) &&
+ !IS_AU_IGNORED(au, amap)) {
+ MMSG("AMAP: new cold AU(%d) with %d clusters "
+ "(adjacent)\n", au->idx, au->free_clusters);
+ amap_remove_cold_au(amap, au);
+ goto ret_new_cold;
+ }
+ }
+
+ /* Clean or largest AU */
+ au = amap_pop_cold_au_largest(amap, 0);
+ if (!au) {
+ //ASSERT(amap->total_fclu_hot == (fsi->num_clusters - fsi->used_clusters - 2));
+ dest = ALLOC_HOT;
+ goto retry;
+ }
+
+ MMSG("AMAP: New cold AU (%d) with %d clusters\n",
+ au->idx, au->free_clusters);
+
+ret_new_cold:
+ SET_AU_WORKING(au);
+
+ amap->cur_cold.au = au;
+ amap->cur_cold.idx = 0;
+ amap->cur_cold.clu_to_skip = n_clu_to_skip;
+ }
+
+ return &amap->cur_cold;
+}
+
+/* Put and update target AU */
+void amap_put_target_au(AMAP_T *amap, TARGET_AU_T *cur, unsigned int num_allocated)
+{
+ /* Update AMAP info vars. */
+ if (num_allocated > 0 &&
+ (cur->au->free_clusters + num_allocated) == amap->clusters_per_au) {
+ /* if the target AU was a clean AU before this allocation ... */
+ amap->n_clean_au--;
+ }
+ if (num_allocated > 0 &&
+ cur->au->free_clusters == 0)
+ amap->n_full_au++;
+
+ if (IS_AU_HOT(cur->au, amap)) {
+ /* Hot AU */
+ MMSG("AMAP: hot allocation at AU %d\n", cur->au->idx);
+ amap->total_fclu_hot -= num_allocated;
+
+ /* Intra-AU round-robin */
+ if (cur->idx >= amap->clusters_per_au)
+ cur->idx = 0;
+
+ /* No more space available */
+ if (cur->au->free_clusters == 0)
+ cur->au = NULL;
+
+ } else {
+ /* non-hot AU */
+ ASSERT(IS_AU_WORKING(cur->au, amap));
+
+ if (cur->idx >= amap->clusters_per_au || cur->au->free_clusters == 0) {
+ /* It should be inserted back to AU MAP */
+ cur->au->shead.head = NULL; // SET_AU_NOT_WORKING
+ amap_add_cold_au(amap, cur->au);
+
+ // cur->au = NULL; // This value will be used for the next AU selection
+ cur->idx = amap->clusters_per_au; // AU closing
+ }
+ }
+
+}
+
+
+/* Reposition target->idx for packing (Heuristics):
+ * Skip (num_to_skip) free clusters in (cur->au)
+ */
+static inline int amap_skip_cluster(struct super_block *sb, TARGET_AU_T *cur, int num_to_skip)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ u32 clu, read_clu;
+ MMSG_VAR(int num_to_skip_orig = num_to_skip);
+
+ if (num_to_skip >= cur->au->free_clusters) {
+ EMSG("AMAP(%s): skip mis-use. amap_566\n", __func__);
+ return -EIO;
+ }
+
+ clu = CLU_of_i_AU(amap, cur->au->idx, cur->idx);
+ while (num_to_skip > 0) {
+ if (clu >= CLUS_BASE) {
+ /* Cf.
+ * If AMAP's integrity is okay,
+ * we don't need to check if (clu < fsi->num_clusters)
+ */
+
+ if (fat_ent_get(sb, clu, &read_clu))
+ return -EIO;
+
+ if (IS_CLUS_FREE(read_clu))
+ num_to_skip--;
+ }
+
+ // Move clu->idx
+ clu++;
+ (cur->idx)++;
+
+ if (cur->idx >= amap->clusters_per_au) {
+ /* End of AU (Not supposed) */
+ EMSG("AMAP: Skip - End of AU?! (amap_596)\n");
+ cur->idx = 0;
+ return -EIO;
+ }
+ }
+
+ MMSG("AMAP: Skip_clusters (%d skipped => %d, among %d free clus)\n",
+ num_to_skip_orig, cur->idx, cur->au->free_clusters);
+
+ return 0;
+}
+
+
+/* AMAP-based allocation function for FAT32 */
+s32 amap_fat_alloc_cluster(struct super_block *sb, u32 num_alloc, CHAIN_T *p_chain, s32 dest)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ TARGET_AU_T *cur = NULL;
+ AU_INFO_T *target_au = NULL; /* Allocation target AU */
+ s32 ret = -ENOSPC;
+ u32 last_clu = CLUS_EOF, read_clu;
+ u32 new_clu, total_cnt;
+ u32 num_allocated = 0, num_allocated_each = 0;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ BUG_ON(!amap);
+ BUG_ON(IS_CLUS_EOF(fsi->used_clusters));
+
+ total_cnt = fsi->num_clusters - CLUS_BASE;
+
+ if (unlikely(total_cnt < fsi->used_clusters)) {
+ sdfat_fs_error_ratelimit(sb,
+ "AMAP(%s): invalid used clusters(t:%u,u:%u)\n",
+ __func__, total_cnt, fsi->used_clusters);
+ return -EIO;
+ }
+
+ if (num_alloc > total_cnt - fsi->used_clusters)
+ return -ENOSPC;
+
+ p_chain->dir = CLUS_EOF;
+
+ set_sb_dirty(sb);
+
+ // spin_lock(&amap->amap_lock);
+
+retry_alloc:
+ /* Allocation strategy implemented */
+ cur = amap_get_target_au(amap, dest, fsi->reserved_clusters);
+ if (unlikely(!cur)) {
+ // There is no available AU (only ignored-AU are left)
+ sdfat_msg(sb, KERN_ERR, "AMAP Allocator: no avaialble AU.");
+ goto error;
+ }
+
+ /* If there are clusters to skip */
+ if (cur->clu_to_skip > 0) {
+ if (amap_skip_cluster(sb, &amap->cur_cold, cur->clu_to_skip)) {
+ ret = -EIO;
+ goto error;
+ }
+ cur->clu_to_skip = 0;
+ }
+
+ target_au = cur->au;
+
+ /*
+ * cur->au : target AU info pointer
+ * cur->idx : the intra-cluster idx in the AU to start from
+ */
+ BUG_ON(!cur->au);
+ BUG_ON(!cur->au->free_clusters);
+ BUG_ON(cur->idx >= amap->clusters_per_au);
+
+ num_allocated_each = 0;
+ new_clu = CLU_of_i_AU(amap, target_au->idx, cur->idx);
+
+ do {
+ /* Allocate at the target AU */
+ if ((new_clu >= CLUS_BASE) && (new_clu < fsi->num_clusters)) {
+ if (fat_ent_get(sb, new_clu, &read_clu)) {
+ // spin_unlock(&amap->amap_lock);
+ ret = -EIO;
+ goto error;
+ }
+
+ if (IS_CLUS_FREE(read_clu)) {
+ BUG_ON(GET_AU(amap, i_AU_of_CLU(amap, new_clu)) != target_au);
+
+ /* Free cluster found */
+ if (fat_ent_set(sb, new_clu, CLUS_EOF)) {
+ ret = -EIO;
+ goto error;
+ }
+
+ num_allocated_each++;
+
+ if (IS_CLUS_EOF(p_chain->dir)) {
+ p_chain->dir = new_clu;
+ } else {
+ if (fat_ent_set(sb, last_clu, new_clu)) {
+ ret = -EIO;
+ goto error;
+ }
+ }
+ last_clu = new_clu;
+
+ /* Update au info */
+ target_au->free_clusters--;
+ }
+
+ }
+
+ new_clu++;
+ (cur->idx)++;
+
+ /* End of the AU */
+ if ((cur->idx >= amap->clusters_per_au) || !(target_au->free_clusters))
+ break;
+ } while (num_allocated_each < num_alloc);
+
+ /* Update strategy info */
+ amap_put_target_au(amap, cur, num_allocated_each);
+
+
+ num_allocated += num_allocated_each;
+ fsi->used_clusters += num_allocated_each;
+ num_alloc -= num_allocated_each;
+
+
+ if (num_alloc > 0)
+ goto retry_alloc;
+
+ // spin_unlock(&amap->amap_lock);
+ return 0;
+error:
+ if (num_allocated)
+ fsi->fs_func->free_cluster(sb, p_chain, 0);
+ return ret;
+}
+
+
+/* Free cluster for FAT32 (not implemented yet) */
+s32 amap_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse)
+{
+ return -ENOTSUPP;
+}
+
+
+/*
+ * This is called by fat_free_cluster()
+ * to update AMAP info.
+ */
+s32 amap_release_cluster(struct super_block *sb, u32 clu)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ AU_INFO_T *au;
+ int i_au;
+
+ // spin_lock(&amap->amap_lock);
+
+ /* Update AU info */
+ i_au = i_AU_of_CLU(amap, clu);
+ BUG_ON(i_au >= amap->n_au);
+ au = GET_AU(amap, i_au);
+ if (au->free_clusters >= amap->clusters_per_au) {
+ sdfat_fs_error(sb, "%s, au->free_clusters(%hd) is "
+ "greater than or equal to amap->clusters_per_au(%hd)",
+ __func__, au->free_clusters, amap->clusters_per_au);
+ return -EIO;
+ }
+
+ if (IS_AU_HOT(au, amap)) {
+ MMSG("AMAP: Hot cluster freed\n");
+ au->free_clusters++;
+ amap->total_fclu_hot++;
+ } else if (!IS_AU_WORKING(au, amap) && !IS_AU_IGNORED(au, amap)) {
+ /* Ordinary AU - update AU tree */
+ // Can be optimized by implementing amap_update_au
+ amap_remove_cold_au(amap, au);
+ au->free_clusters++;
+ amap_add_cold_au(amap, au);
+ } else
+ au->free_clusters++;
+
+
+ /* Update AMAP info */
+ if (au->free_clusters == amap->clusters_per_au)
+ amap->n_clean_au++;
+ if (au->free_clusters == 1)
+ amap->n_full_au--;
+
+ // spin_unlock(&amap->amap_lock);
+ return 0;
+}
+
+
+/*
+ * Check if the cluster is in a working AU
+ * The caller should hold sb lock.
+ * This func. should be used only if smart allocation is on
+ */
+s32 amap_check_working(struct super_block *sb, u32 clu)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ AU_INFO_T *au;
+
+ BUG_ON(!amap);
+ au = GET_AU(amap, i_AU_of_CLU(amap, clu));
+ return IS_AU_WORKING(au, amap);
+}
+
+
+/*
+ * Return the # of free clusters in that AU
+ */
+s32 amap_get_freeclus(struct super_block *sb, u32 clu)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ AU_INFO_T *au;
+
+ BUG_ON(!amap);
+ au = GET_AU(amap, i_AU_of_CLU(amap, clu));
+ return (s32)au->free_clusters;
+}
+
+
+/*
+ * Add the AU containing 'clu' to the ignored AU list.
+ * The AU will not be used by the allocator.
+ *
+ * XXX: Ignored counter needed
+ */
+s32 amap_mark_ignore(struct super_block *sb, u32 clu)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ AU_INFO_T *au;
+
+ BUG_ON(!amap);
+ au = GET_AU(amap, i_AU_of_CLU(amap, clu));
+
+ if (IS_AU_HOT(au, amap)) {
+ /* Doesn't work with hot AUs */
+ return -EPERM;
+ } else if (IS_AU_WORKING(au, amap)) {
+ return -EBUSY;
+ }
+
+ //BUG_ON(IS_AU_IGNORED(au, amap) && (GET_IGN_CNT(au) == 0));
+ if (IS_AU_IGNORED(au, amap))
+ return 0;
+
+ amap_remove_cold_au(amap, au);
+ amap_insert_to_list(au, &amap->slist_ignored);
+
+ BUG_ON(!IS_AU_IGNORED(au, amap));
+
+ //INC_IGN_CNT(au);
+ MMSG("AMAP: Mark ignored AU (%d)\n", au->idx);
+ return 0;
+}
+
+
+/*
+ * This function could be used only on IGNORED AUs.
+ * The caller should care whether it's ignored or not before using this func.
+ */
+s32 amap_unmark_ignore(struct super_block *sb, u32 clu)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ AU_INFO_T *au;
+
+ BUG_ON(!amap);
+
+ au = GET_AU(amap, i_AU_of_CLU(amap, clu));
+
+ BUG_ON(!IS_AU_IGNORED(au, amap));
+ // BUG_ON(GET_IGN_CNT(au) == 0);
+
+ amap_remove_from_list(au, &amap->slist_ignored);
+ amap_add_cold_au(amap, au);
+
+ BUG_ON(IS_AU_IGNORED(au, amap));
+
+ //DEC_IGN_CNT(au);
+
+ MMSG("AMAP: Unmark ignored AU (%d)\n", au->idx);
+
+ return 0;
+}
+
+/*
+ * Unmark all ignored AU
+ * This will return # of unmarked AUs
+ */
+s32 amap_unmark_ignore_all(struct super_block *sb)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ struct slist_head *entry;
+ AU_INFO_T *au;
+ int n = 0;
+
+ BUG_ON(!amap);
+ entry = amap->slist_ignored.next;
+ while (entry) {
+ au = list_entry(entry, AU_INFO_T, shead);
+
+ BUG_ON(au != GET_AU(amap, au->idx));
+ BUG_ON(!IS_AU_IGNORED(au, amap));
+
+ //CLEAR_IGN_CNT(au);
+ amap_remove_from_list(au, &amap->slist_ignored);
+ amap_add_cold_au(amap, au);
+
+ MMSG("AMAP: Unmark ignored AU (%d)\n", au->idx);
+ n++;
+
+ entry = amap->slist_ignored.next;
+ }
+
+ BUG_ON(amap->slist_ignored.next != NULL);
+ MMSG("AMAP: unmark_ignore_all, total %d AUs\n", n);
+
+ return n;
+}
+
+/**
+ * @fn amap_get_au_stat
+ * @brief report AUs status depending on mode
+ * @return positive on success, 0 otherwise
+ * @param sbi super block info
+ * @param mode TOTAL, CLEAN and FULL
+ */
+u32 amap_get_au_stat(struct super_block *sb, s32 mode)
+{
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+
+ if (!amap)
+ return 0;
+
+ if (mode == VOL_AU_STAT_TOTAL)
+ return amap->n_au;
+ else if (mode == VOL_AU_STAT_CLEAN)
+ return amap->n_clean_au;
+ else if (mode == VOL_AU_STAT_FULL)
+ return amap->n_full_au;
+
+ return 0;
+}
+
diff --git a/fs/sdfat/amap_smart.h b/fs/sdfat/amap_smart.h
new file mode 100644
index 000000000000..caee6f6b3681
--- /dev/null
+++ b/fs/sdfat/amap_smart.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SDFAT_AMAP_H
+#define _SDFAT_AMAP_H
+
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+
+/* AMAP Configuration Variable */
+#define SMART_ALLOC_N_HOT_AU (5)
+
+/* Allocating Destination (for smart allocator):
+ * moved to sdfat.h
+ */
+/*
+ * #define ALLOC_COLD_ALIGNED (1)
+ * #define ALLOC_COLD_PACKING (2)
+ * #define ALLOC_COLD_SEQ (4)
+ */
+
+/* Minimum sectors for support AMAP create */
+#define AMAP_MIN_SUPPORT_SECTORS (1048576)
+
+#define amap_add_hot_au(amap, au) amap_insert_to_list(au, &amap->slist_hot)
+
+/* singly linked list */
+struct slist_head {
+ struct slist_head *next;
+ struct slist_head *head;
+};
+
+/* AU entry type */
+typedef struct __AU_INFO_T {
+ uint16_t idx; /* the index of the AU (0, 1, 2, ... ) */
+ uint16_t free_clusters; /* # of available cluster */
+ union {
+ struct list_head head;
+ struct slist_head shead;/* singly linked list head for hot list */
+ };
+} AU_INFO_T;
+
+
+/* Allocation Target AU */
+typedef struct __TARGET_AU_T {
+ AU_INFO_T *au; /* Working AU */
+ uint16_t idx; /* Intra-AU cluster index */
+ uint16_t clu_to_skip; /* Clusters to skip */
+} TARGET_AU_T;
+
+
+/* AMAP free-clusters-based node */
+typedef struct {
+ struct list_head head; /* the list of AUs */
+} FCLU_NODE_T;
+
+
+/* AMAP options */
+typedef struct {
+ unsigned int packing_ratio; /* Tunable packing ratio */
+ unsigned int au_size; /* AU size in sectors */
+ unsigned int au_align_factor; /* Hidden sectors % au_size */
+} AMAP_OPT_T;
+
+typedef struct __AMAP_T {
+ spinlock_t amap_lock; /* obsolete */
+ struct super_block *sb;
+
+ int n_au;
+ int n_clean_au, n_full_au;
+ int clu_align_bias;
+ uint16_t clusters_per_au;
+ AU_INFO_T **au_table; /* An array of AU_INFO entries */
+ AMAP_OPT_T option;
+
+ /* Size-based AU management pool (cold) */
+ FCLU_NODE_T *fclu_nodes; /* An array of listheads */
+ int fclu_order; /* Page order that fclu_nodes needs */
+ int fclu_hint; /* maximum # of free clusters in an AU */
+
+ /* Hot AU list */
+ unsigned int total_fclu_hot; /* Free clusters in hot list */
+ struct slist_head slist_hot; /* Hot AU list */
+
+ /* Ignored AU list */
+ struct slist_head slist_ignored;
+
+ /* Allocator variables (keep 2 AUs at maximum) */
+ TARGET_AU_T cur_cold;
+ TARGET_AU_T cur_hot;
+ int n_need_packing;
+} AMAP_T;
+
+
+/* AU table */
+#define N_AU_PER_TABLE (int)(PAGE_SIZE / sizeof(AU_INFO_T))
+#define GET_AU(amap, i_AU) (amap->au_table[(i_AU) / N_AU_PER_TABLE] + ((i_AU) % N_AU_PER_TABLE))
+//#define MAX_CLU_PER_AU (int)(PAGE_SIZE / sizeof(FCLU_NODE_T))
+#define MAX_CLU_PER_AU (1024)
+
+/* Cold AU bucket <-> # of freeclusters */
+#define NODE_CLEAN(amap) (&amap->fclu_nodes[amap->clusters_per_au - 1])
+#define NODE(fclu, amap) (&amap->fclu_nodes[fclu - 1])
+#define FREE_CLUSTERS(node, amap) ((int)(node - amap->fclu_nodes) + 1)
+
+/* AU status */
+#define MAGIC_WORKING ((struct slist_head *)0xFFFF5091)
+#define IS_AU_HOT(au, amap) (au->shead.head == &amap->slist_hot)
+#define IS_AU_IGNORED(au, amap) (au->shead.head == &amap->slist_ignored)
+#define IS_AU_WORKING(au, amap) (au->shead.head == MAGIC_WORKING)
+#define SET_AU_WORKING(au) (au->shead.head = MAGIC_WORKING)
+
+/* AU <-> cluster */
+#define i_AU_of_CLU(amap, clu) ((amap->clu_align_bias + clu) / amap->clusters_per_au)
+#define CLU_of_i_AU(amap, i_au, idx) \
+ ((uint32_t)(i_au) * (uint32_t)amap->clusters_per_au + (idx) - amap->clu_align_bias)
+
+/*
+ * NOTE : AMAP internal functions are moved to core.h
+ */
+
+#endif /* _SDFAT_AMAP_H */
diff --git a/fs/sdfat/api.c b/fs/sdfat/api.c
new file mode 100644
index 000000000000..45b0c4106bda
--- /dev/null
+++ b/fs/sdfat/api.c
@@ -0,0 +1,636 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : sdfat_api.c */
+/* PURPOSE : sdFAT volume lock layer */
+/* */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+
+#include "version.h"
+#include "config.h"
+
+#include "sdfat.h"
+#include "core.h"
+
+/*----------------------------------------------------------------------*/
+/* Internal structures */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+static DEFINE_MUTEX(_lock_core);
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Local Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Local Function Declarations */
+/*----------------------------------------------------------------------*/
+
+/*======================================================================*/
+/* Global Function Definitions */
+/* - All functions for global use have same return value format, */
+/* that is, 0 on success and minus error number on */
+/* various error condition. */
+/*======================================================================*/
+
+/*----------------------------------------------------------------------*/
+/* sdFAT Filesystem Init & Exit Functions */
+/*----------------------------------------------------------------------*/
+
+s32 fsapi_init(void)
+{
+ return fscore_init();
+}
+
+s32 fsapi_shutdown(void)
+{
+ return fscore_shutdown();
+}
+
+/*----------------------------------------------------------------------*/
+/* Volume Management Functions */
+/*----------------------------------------------------------------------*/
+
+/* mount the file system volume */
+s32 fsapi_mount(struct super_block *sb)
+{
+ s32 err;
+
+ /* acquire the core lock for file system ccritical section */
+ mutex_lock(&_lock_core);
+
+ err = meta_cache_init(sb);
+ if (err)
+ goto out;
+
+ err = fscore_mount(sb);
+out:
+ if (err)
+ meta_cache_shutdown(sb);
+
+ /* release the core lock for file system critical section */
+ mutex_unlock(&_lock_core);
+
+ return err;
+}
+EXPORT_SYMBOL(fsapi_mount);
+
+/* unmount the file system volume */
+s32 fsapi_umount(struct super_block *sb)
+{
+ s32 err;
+
+ /* acquire the core lock for file system ccritical section */
+ mutex_lock(&_lock_core);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_umount(sb);
+ meta_cache_shutdown(sb);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+
+ /* release the core lock for file system critical section */
+ mutex_unlock(&_lock_core);
+
+ return err;
+}
+EXPORT_SYMBOL(fsapi_umount);
+
+/* get the information of a file system volume */
+s32 fsapi_statfs(struct super_block *sb, VOL_INFO_T *info)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /* check the validity of pointer parameters */
+ ASSERT(info);
+
+ if (fsi->used_clusters == (u32) ~0) {
+ s32 err;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_statfs(sb, info);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+ }
+
+ info->FatType = fsi->vol_type;
+ info->ClusterSize = fsi->cluster_size;
+ info->NumClusters = fsi->num_clusters - 2; /* clu 0 & 1 */
+ info->UsedClusters = fsi->used_clusters + fsi->reserved_clusters;
+ info->FreeClusters = info->NumClusters - info->UsedClusters;
+
+ return 0;
+}
+EXPORT_SYMBOL(fsapi_statfs);
+
+/* synchronize a file system volume */
+s32 fsapi_sync_fs(struct super_block *sb, s32 do_sync)
+{
+ s32 err;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_sync_fs(sb, do_sync);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_sync_fs);
+
+s32 fsapi_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync)
+{
+ s32 err;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_set_vol_flags(sb, new_flag, always_sync);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_set_vol_flags);
+
+/*----------------------------------------------------------------------*/
+/* File Operation Functions */
+/*----------------------------------------------------------------------*/
+
+/* lookup */
+s32 fsapi_lookup(struct inode *inode, u8 *path, FILE_ID_T *fid)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid && path);
+
+ if (unlikely(!strlen(path)))
+ return -EINVAL;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_lookup(inode, path, fid);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_lookup);
+
+/* create a file */
+s32 fsapi_create(struct inode *inode, u8 *path, u8 mode, FILE_ID_T *fid)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid && path);
+
+ if (unlikely(!strlen(path)))
+ return -EINVAL;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_create(inode, path, mode, fid);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_create);
+
+/* read the target string of symlink */
+s32 fsapi_read_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid && buffer);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_read_link(inode, fid, buffer, count, rcount);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_read_link);
+
+/* write the target string of symlink */
+s32 fsapi_write_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid && buffer);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_write_link(inode, fid, buffer, count, wcount);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_write_link);
+
+/* resize the file length */
+s32 fsapi_truncate(struct inode *inode, u64 old_size, u64 new_size)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ TMSG("%s entered (inode %p size %llu)\n", __func__, inode, new_size);
+ err = fscore_truncate(inode, old_size, new_size);
+ TMSG("%s exitted (%d)\n", __func__, err);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_truncate);
+
+/* rename or move a old file into a new file */
+s32 fsapi_rename(struct inode *old_parent_inode, FILE_ID_T *fid,
+ struct inode *new_parent_inode, struct dentry *new_dentry)
+{
+ s32 err;
+ struct super_block *sb = old_parent_inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_rename(old_parent_inode, fid, new_parent_inode, new_dentry);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_rename);
+
+/* remove a file */
+s32 fsapi_remove(struct inode *inode, FILE_ID_T *fid)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_remove(inode, fid);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_remove);
+
+/* get the information of a given file */
+s32 fsapi_read_inode(struct inode *inode, DIR_ENTRY_T *info)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ TMSG("%s entered (inode %p info %p\n", __func__, inode, info);
+ err = fscore_read_inode(inode, info);
+ TMSG("%s exited (err:%d)\n", __func__, err);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_read_inode);
+
+/* set the information of a given file */
+s32 fsapi_write_inode(struct inode *inode, DIR_ENTRY_T *info, int sync)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ TMSG("%s entered (inode %p info %p sync:%d\n",
+ __func__, inode, info, sync);
+ err = fscore_write_inode(inode, info, sync);
+ TMSG("%s exited (err:%d)\n", __func__, err);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_write_inode);
+
+/* return the cluster number in the given cluster offset */
+s32 fsapi_map_clus(struct inode *inode, u32 clu_offset, u32 *clu, int dest)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(clu);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ TMSG("%s entered (inode:%p clus:%08x dest:%d\n",
+ __func__, inode, *clu, dest);
+ err = fscore_map_clus(inode, clu_offset, clu, dest);
+ TMSG("%s exited (clu:%08x err:%d)\n", __func__, *clu, err);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_map_clus);
+
+/* reserve a cluster */
+s32 fsapi_reserve_clus(struct inode *inode)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ TMSG("%s entered (inode:%p)\n", __func__, inode);
+ err = fscore_reserve_clus(inode);
+ TMSG("%s exited (err:%d)\n", __func__, err);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_reserve_clus);
+
+/*----------------------------------------------------------------------*/
+/* Directory Operation Functions */
+/*----------------------------------------------------------------------*/
+
+/* create(make) a directory */
+s32 fsapi_mkdir(struct inode *inode, u8 *path, FILE_ID_T *fid)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid && path);
+
+ if (unlikely(!strlen(path)))
+ return -EINVAL;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_mkdir(inode, path, fid);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_mkdir);
+
+/* read a directory entry from the opened directory */
+s32 fsapi_readdir(struct inode *inode, DIR_ENTRY_T *dir_entry)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(dir_entry);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_readdir(inode, dir_entry);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_readdir);
+
+/* remove a directory */
+s32 fsapi_rmdir(struct inode *inode, FILE_ID_T *fid)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_rmdir(inode, fid);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_rmdir);
+
+/* unlink a file.
+ * that is, remove an entry from a directory. BUT don't truncate
+ */
+s32 fsapi_unlink(struct inode *inode, FILE_ID_T *fid)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(fid);
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = fscore_unlink(inode, fid);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_unlink);
+
+/* reflect the internal dirty flags to VFS bh dirty flags */
+s32 fsapi_cache_flush(struct super_block *sb, int do_sync)
+{
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ fcache_flush(sb, do_sync);
+ dcache_flush(sb, do_sync);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return 0;
+}
+EXPORT_SYMBOL(fsapi_cache_flush);
+
+/* release FAT & buf cache */
+s32 fsapi_cache_release(struct super_block *sb)
+{
+#ifdef CONFIG_SDFAT_DEBUG
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+
+ fcache_release_all(sb);
+ dcache_release_all(sb);
+
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+#endif /* CONFIG_SDFAT_DEBUG */
+ return 0;
+}
+EXPORT_SYMBOL(fsapi_cache_release);
+
+u32 fsapi_get_au_stat(struct super_block *sb, s32 mode)
+{
+ /* volume lock is not required */
+ return fscore_get_au_stat(sb, mode);
+}
+EXPORT_SYMBOL(fsapi_get_au_stat);
+
+/* clear extent cache */
+void fsapi_invalidate_extent(struct inode *inode)
+{
+ /* Volume lock is not required,
+ * because it is only called by evict_inode.
+ * If any other function can call it,
+ * you should check whether volume lock is needed or not.
+ */
+ extent_cache_inval_inode(inode);
+}
+EXPORT_SYMBOL(fsapi_invalidate_extent);
+
+/* check device is ejected */
+s32 fsapi_check_bdi_valid(struct super_block *sb)
+{
+ return fscore_check_bdi_valid(sb);
+}
+EXPORT_SYMBOL(fsapi_check_bdi_valid);
+
+
+
+#ifdef CONFIG_SDFAT_DFR
+/*----------------------------------------------------------------------*/
+/* Defragmentation related */
+/*----------------------------------------------------------------------*/
+s32 fsapi_dfr_get_info(struct super_block *sb, void *arg)
+{
+ /* volume lock is not required */
+ return defrag_get_info(sb, (struct defrag_info_arg *)arg);
+}
+EXPORT_SYMBOL(fsapi_dfr_get_info);
+
+s32 fsapi_dfr_scan_dir(struct super_block *sb, void *args)
+{
+ s32 err;
+
+ /* check the validity of pointer parameters */
+ ASSERT(args);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = defrag_scan_dir(sb, (struct defrag_trav_arg *)args);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_dfr_scan_dir);
+
+s32 fsapi_dfr_validate_clus(struct inode *inode, void *chunk, int skip_prev)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = defrag_validate_cluster(inode,
+ (struct defrag_chunk_info *)chunk, skip_prev);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_dfr_validate_clus);
+
+s32 fsapi_dfr_reserve_clus(struct super_block *sb, s32 nr_clus)
+{
+ s32 err;
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = defrag_reserve_clusters(sb, nr_clus);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+ return err;
+}
+EXPORT_SYMBOL(fsapi_dfr_reserve_clus);
+
+s32 fsapi_dfr_mark_ignore(struct super_block *sb, unsigned int clus)
+{
+ /* volume lock is not required */
+ return defrag_mark_ignore(sb, clus);
+}
+EXPORT_SYMBOL(fsapi_dfr_mark_ignore);
+
+void fsapi_dfr_unmark_ignore_all(struct super_block *sb)
+{
+ /* volume lock is not required */
+ defrag_unmark_ignore_all(sb);
+}
+EXPORT_SYMBOL(fsapi_dfr_unmark_ignore_all);
+
+s32 fsapi_dfr_map_clus(struct inode *inode, u32 clu_offset, u32 *clu)
+{
+ s32 err;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of pointer parameters */
+ ASSERT(clu);
+
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ err = defrag_map_cluster(inode, clu_offset, clu);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+
+ return err;
+}
+EXPORT_SYMBOL(fsapi_dfr_map_clus);
+
+void fsapi_dfr_writepage_endio(struct page *page)
+{
+ /* volume lock is not required */
+ defrag_writepage_end_io(page);
+}
+EXPORT_SYMBOL(fsapi_dfr_writepage_endio);
+
+void fsapi_dfr_update_fat_prev(struct super_block *sb, int force)
+{
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ defrag_update_fat_prev(sb, force);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+}
+EXPORT_SYMBOL(fsapi_dfr_update_fat_prev);
+
+void fsapi_dfr_update_fat_next(struct super_block *sb)
+{
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ defrag_update_fat_next(sb);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+}
+EXPORT_SYMBOL(fsapi_dfr_update_fat_next);
+
+void fsapi_dfr_check_discard(struct super_block *sb)
+{
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ defrag_check_discard(sb);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+}
+EXPORT_SYMBOL(fsapi_dfr_check_discard);
+
+void fsapi_dfr_free_clus(struct super_block *sb, u32 clus)
+{
+ mutex_lock(&(SDFAT_SB(sb)->s_vlock));
+ defrag_free_cluster(sb, clus);
+ mutex_unlock(&(SDFAT_SB(sb)->s_vlock));
+}
+EXPORT_SYMBOL(fsapi_dfr_free_clus);
+
+s32 fsapi_dfr_check_dfr_required(struct super_block *sb, int *totalau, int *cleanau, int *fullau)
+{
+ /* volume lock is not required */
+ return defrag_check_defrag_required(sb, totalau, cleanau, fullau);
+}
+EXPORT_SYMBOL(fsapi_dfr_check_dfr_required);
+
+s32 fsapi_dfr_check_dfr_on(struct inode *inode, loff_t start, loff_t end, s32 cancel, const char *caller)
+{
+ /* volume lock is not required */
+ return defrag_check_defrag_on(inode, start, end, cancel, caller);
+}
+EXPORT_SYMBOL(fsapi_dfr_check_dfr_on);
+
+
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+void fsapi_dfr_spo_test(struct super_block *sb, int flag, const char *caller)
+{
+ /* volume lock is not required */
+ defrag_spo_test(sb, flag, caller);
+}
+EXPORT_SYMBOL(fsapi_dfr_spo_test);
+#endif
+
+
+#endif /* CONFIG_SDFAT_DFR */
+
+/* end of sdfat_api.c */
diff --git a/fs/sdfat/api.h b/fs/sdfat/api.h
new file mode 100644
index 000000000000..344297ab58ae
--- /dev/null
+++ b/fs/sdfat/api.h
@@ -0,0 +1,409 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SDFAT_API_H
+#define _SDFAT_API_H
+
+#include "config.h"
+#include "sdfat_fs.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+
+/*----------------------------------------------------------------------*/
+/* Configure Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+/* cache size (in number of sectors) */
+/* (should be an exponential value of 2) */
+#define FAT_CACHE_SIZE 128
+#define FAT_CACHE_HASH_SIZE 64
+#define BUF_CACHE_SIZE 256
+#define BUF_CACHE_HASH_SIZE 64
+
+/* Read-ahead related */
+/* First config vars. should be pow of 2 */
+#define FCACHE_MAX_RA_SIZE (PAGE_SIZE)
+#define DCACHE_MAX_RA_SIZE (128*1024)
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+/* type values */
+#define TYPE_UNUSED 0x0000
+#define TYPE_DELETED 0x0001
+#define TYPE_INVALID 0x0002
+#define TYPE_CRITICAL_PRI 0x0100
+#define TYPE_BITMAP 0x0101
+#define TYPE_UPCASE 0x0102
+#define TYPE_VOLUME 0x0103
+#define TYPE_DIR 0x0104
+#define TYPE_FILE 0x011F
+#define TYPE_SYMLINK 0x015F
+#define TYPE_CRITICAL_SEC 0x0200
+#define TYPE_STREAM 0x0201
+#define TYPE_EXTEND 0x0202
+#define TYPE_ACL 0x0203
+#define TYPE_BENIGN_PRI 0x0400
+#define TYPE_GUID 0x0401
+#define TYPE_PADDING 0x0402
+#define TYPE_ACLTAB 0x0403
+#define TYPE_BENIGN_SEC 0x0800
+#define TYPE_ALL 0x0FFF
+
+/* eio values */
+#define SDFAT_EIO_NONE (0x00000000)
+#define SDFAT_EIO_READ (0x00000001)
+#define SDFAT_EIO_WRITE (0x00000002)
+#define SDFAT_EIO_BDI (0x00000004)
+
+/* modes for volume allocation unit status */
+#define VOL_AU_STAT_TOTAL (0)
+#define VOL_AU_STAT_CLEAN (1)
+#define VOL_AU_STAT_FULL (2)
+
+/*----------------------------------------------------------------------*/
+/* NLS Type Definitions */
+/*----------------------------------------------------------------------*/
+
+/* DOS name structure */
+typedef struct {
+ u8 name[DOS_NAME_LENGTH];
+ u8 name_case;
+} DOS_NAME_T;
+
+/* unicode name structure */
+typedef struct {
+ u16 name[MAX_NAME_LENGTH+3]; /* +3 for null and for converting */
+ u16 name_hash;
+ u8 name_len;
+} UNI_NAME_T;
+
+/*----------------------------------------------------------------------*/
+/* Type Definitions */
+/*----------------------------------------------------------------------*/
+/* should be merged it to DATE_TIME_T */
+typedef union {
+ struct {
+ u8 off : 7;
+ u8 valid : 1;
+ };
+ u8 value;
+} TIMEZONE_T;
+
+typedef struct {
+ u16 sec; /* 0 ~ 59 */
+ u16 min; /* 0 ~ 59 */
+ u16 hour; /* 0 ~ 23 */
+ u16 day; /* 1 ~ 31 */
+ u16 mon; /* 1 ~ 12 */
+ u16 year; /* 0 ~ 127 (since 1980) */
+ TIMEZONE_T tz;
+} TIMESTAMP_T;
+
+typedef struct {
+ u16 Year;
+ u16 Month;
+ u16 Day;
+ u16 Hour;
+ u16 Minute;
+ u16 Second;
+ u16 MilliSecond;
+ TIMEZONE_T Timezone;
+} DATE_TIME_T;
+
+typedef struct {
+ u64 Offset; // start sector number of the partition
+ u64 Size; // in sectors
+} PART_INFO_T;
+
+typedef struct {
+ u32 SecSize; // sector size in bytes
+ u64 DevSize; // block device size in sectors
+} DEV_INFO_T;
+
+typedef struct {
+ u32 FatType;
+ u32 ClusterSize;
+ u32 NumClusters;
+ u32 FreeClusters;
+ u32 UsedClusters;
+} VOL_INFO_T;
+
+/* directory structure */
+typedef struct {
+ u32 dir;
+ u32 size;
+ u8 flags;
+} CHAIN_T;
+
+/* hint structure */
+typedef struct {
+ u32 clu;
+ union {
+ u32 off; // cluster offset
+ s32 eidx; // entry index
+ };
+} HINT_T;
+
+typedef struct {
+ spinlock_t cache_lru_lock;
+ struct list_head cache_lru;
+ s32 nr_caches;
+ u32 cache_valid_id; // for avoiding the race between alloc and free
+} EXTENT_T;
+
+/* first empty entry hint information */
+typedef struct {
+ s32 eidx; // entry index of a directory
+ s32 count; // count of continuous empty entry
+ CHAIN_T cur; // the cluster that first empty slot exists in
+} HINT_FEMP_T;
+
+/* file id structure */
+typedef struct {
+ CHAIN_T dir;
+ s32 entry;
+ u32 type;
+ u32 attr;
+ u32 start_clu;
+ u64 size;
+ u8 flags;
+ u8 reserved[3]; // padding
+ u32 version; // the copy of low 32bit of i_version to check the validation of hint_stat
+ s64 rwoffset; // file offset or dentry index for readdir
+ EXTENT_T extent; // extent cache for a file
+ HINT_T hint_bmap; // hint for cluster last accessed
+ HINT_T hint_stat; // hint for entry index we try to lookup next time
+ HINT_FEMP_T hint_femp; // hint for first empty entry
+} FILE_ID_T;
+
+typedef struct {
+ s8 *lfn;
+ s8 *sfn;
+ s32 lfnbuf_len; //usally MAX_UNINAME_BUF_SIZE
+ s32 sfnbuf_len; //usally MAX_DOSNAME_BUF_SIZE, used only for vfat, not for exfat
+} DENTRY_NAMEBUF_T;
+
+typedef struct {
+ u32 Attr;
+ u64 Size;
+ u32 NumSubdirs;
+ DATE_TIME_T CreateTimestamp;
+ DATE_TIME_T ModifyTimestamp;
+ DATE_TIME_T AccessTimestamp;
+ DENTRY_NAMEBUF_T NameBuf;
+} DIR_ENTRY_T;
+
+/* cache information */
+typedef struct __cache_entry {
+ struct __cache_entry *next;
+ struct __cache_entry *prev;
+ struct {
+ struct __cache_entry *next;
+ struct __cache_entry *prev;
+ } hash;
+ u64 sec;
+ u32 flag;
+ struct buffer_head *bh;
+} cache_ent_t;
+
+/*----------------------------------------------------------------------*/
+/* Type Definitions : Wrapper & In-Core */
+/*----------------------------------------------------------------------*/
+typedef struct __FATENT_OPS_T {
+ s32 (*ent_get)(struct super_block *sb, u32 loc, u32 *content);
+ s32 (*ent_set)(struct super_block *sb, u32 loc, u32 content);
+} FATENT_OPS_T;
+
+typedef struct {
+ s32 (*alloc_cluster)(struct super_block *, u32, CHAIN_T *, s32);
+ s32 (*free_cluster)(struct super_block *, CHAIN_T *, s32);
+ s32 (*count_used_clusters)(struct super_block *, u32 *);
+ s32 (*init_dir_entry)(struct super_block *, CHAIN_T *, s32, u32, u32, u64);
+ s32 (*init_ext_entry)(struct super_block *, CHAIN_T *, s32, s32, UNI_NAME_T *, DOS_NAME_T *);
+ s32 (*find_dir_entry)(struct super_block *, FILE_ID_T *, CHAIN_T *, UNI_NAME_T *, s32, DOS_NAME_T *, u32);
+ s32 (*delete_dir_entry)(struct super_block *, CHAIN_T *, s32, s32, s32);
+ void (*get_uniname_from_ext_entry)(struct super_block *, CHAIN_T *, s32, u16 *);
+ s32 (*count_ext_entries)(struct super_block *, CHAIN_T *, s32, DENTRY_T *);
+ s32 (*calc_num_entries)(UNI_NAME_T *);
+ s32 (*check_max_dentries)(FILE_ID_T *);
+ u32 (*get_entry_type)(DENTRY_T *);
+ void (*set_entry_type)(DENTRY_T *, u32);
+ u32 (*get_entry_attr)(DENTRY_T *);
+ void (*set_entry_attr)(DENTRY_T *, u32);
+ u8 (*get_entry_flag)(DENTRY_T *);
+ void (*set_entry_flag)(DENTRY_T *, u8);
+ u32 (*get_entry_clu0)(DENTRY_T *);
+ void (*set_entry_clu0)(DENTRY_T *, u32);
+ u64 (*get_entry_size)(DENTRY_T *);
+ void (*set_entry_size)(DENTRY_T *, u64);
+ void (*get_entry_time)(DENTRY_T *, TIMESTAMP_T *, u8);
+ void (*set_entry_time)(DENTRY_T *, TIMESTAMP_T *, u8);
+ u32 (*get_au_stat)(struct super_block *, s32);
+} FS_FUNC_T;
+
+typedef struct __FS_INFO_T {
+ s32 bd_opened; // opened or not
+ u32 vol_type; // volume FAT type
+ u32 vol_id; // volume serial number
+ u64 num_sectors; // num of sectors in volume
+ u32 num_clusters; // num of clusters in volume
+ u32 cluster_size; // cluster size in bytes
+ u32 cluster_size_bits;
+ u32 sect_per_clus; // cluster size in sectors
+ u32 sect_per_clus_bits;
+ u64 FAT1_start_sector; // FAT1 start sector
+ u64 FAT2_start_sector; // FAT2 start sector
+ u64 root_start_sector; // root dir start sector
+ u64 data_start_sector; // data area start sector
+ u32 num_FAT_sectors; // num of FAT sectors
+ u32 root_dir; // root dir cluster
+ u32 dentries_in_root; // num of dentries in root dir
+ u32 dentries_per_clu; // num of dentries per cluster
+ u32 vol_flag; // volume dirty flag
+ struct buffer_head *pbr_bh; // buffer_head of PBR sector
+
+ u32 map_clu; // allocation bitmap start cluster
+ u32 map_sectors; // num of allocation bitmap sectors
+ struct buffer_head **vol_amap; // allocation bitmap
+
+ u16 **vol_utbl; // upcase table
+
+ u32 clu_srch_ptr; // cluster search pointer
+ u32 used_clusters; // number of used clusters
+
+ u32 prev_eio; // block device operation error flag
+
+ FS_FUNC_T *fs_func;
+ FATENT_OPS_T *fatent_ops;
+
+ s32 reserved_clusters; // # of reserved clusters (DA)
+ void *amap; // AU Allocation Map
+
+ /* fat cache */
+ struct {
+ cache_ent_t pool[FAT_CACHE_SIZE];
+ cache_ent_t lru_list;
+ cache_ent_t hash_list[FAT_CACHE_HASH_SIZE];
+ } fcache;
+
+ /* meta cache */
+ struct {
+ cache_ent_t pool[BUF_CACHE_SIZE];
+ cache_ent_t lru_list;
+ cache_ent_t keep_list; // CACHEs in this list will not be kicked by normal lru operations
+ cache_ent_t hash_list[BUF_CACHE_HASH_SIZE];
+ } dcache;
+} FS_INFO_T;
+
+/*======================================================================*/
+/* */
+/* API FUNCTION DECLARATIONS */
+/* (CHANGE THIS PART IF REQUIRED) */
+/* */
+/*======================================================================*/
+
+/*----------------------------------------------------------------------*/
+/* External Function Declarations */
+/*----------------------------------------------------------------------*/
+
+/* file system initialization & shutdown functions */
+s32 fsapi_init(void);
+s32 fsapi_shutdown(void);
+
+/* volume management functions */
+s32 fsapi_mount(struct super_block *sb);
+s32 fsapi_umount(struct super_block *sb);
+s32 fsapi_statfs(struct super_block *sb, VOL_INFO_T *info);
+s32 fsapi_sync_fs(struct super_block *sb, s32 do_sync);
+s32 fsapi_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync);
+
+/* file management functions */
+s32 fsapi_lookup(struct inode *inode, u8 *path, FILE_ID_T *fid);
+s32 fsapi_create(struct inode *inode, u8 *path, u8 mode, FILE_ID_T *fid);
+s32 fsapi_read_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount);
+s32 fsapi_write_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount);
+s32 fsapi_remove(struct inode *inode, FILE_ID_T *fid); /* unlink and truncate */
+s32 fsapi_truncate(struct inode *inode, u64 old_size, u64 new_size);
+s32 fsapi_rename(struct inode *old_parent_inode, FILE_ID_T *fid,
+ struct inode *new_parent_inode, struct dentry *new_dentry);
+s32 fsapi_unlink(struct inode *inode, FILE_ID_T *fid);
+s32 fsapi_read_inode(struct inode *inode, DIR_ENTRY_T *info);
+s32 fsapi_write_inode(struct inode *inode, DIR_ENTRY_T *info, int sync);
+s32 fsapi_map_clus(struct inode *inode, u32 clu_offset, u32 *clu, int dest);
+s32 fsapi_reserve_clus(struct inode *inode);
+
+/* directory management functions */
+s32 fsapi_mkdir(struct inode *inode, u8 *path, FILE_ID_T *fid);
+s32 fsapi_readdir(struct inode *inode, DIR_ENTRY_T *dir_entry);
+s32 fsapi_rmdir(struct inode *inode, FILE_ID_T *fid);
+
+/* FAT & buf cache functions */
+s32 fsapi_cache_flush(struct super_block *sb, int do_sync);
+s32 fsapi_cache_release(struct super_block *sb);
+
+/* extra info functions */
+u32 fsapi_get_au_stat(struct super_block *sb, s32 mode);
+
+/* extent cache functions */
+void fsapi_invalidate_extent(struct inode *inode);
+
+/* bdev management */
+s32 fsapi_check_bdi_valid(struct super_block *sb);
+
+#ifdef CONFIG_SDFAT_DFR
+/*----------------------------------------------------------------------*/
+/* Defragmentation related */
+/*----------------------------------------------------------------------*/
+
+s32 fsapi_dfr_get_info(struct super_block *sb, void *arg);
+
+s32 fsapi_dfr_scan_dir(struct super_block *sb, void *args);
+
+s32 fsapi_dfr_validate_clus(struct inode *inode, void *chunk, int skip_prev);
+s32 fsapi_dfr_reserve_clus(struct super_block *sb, s32 nr_clus);
+s32 fsapi_dfr_mark_ignore(struct super_block *sb, unsigned int clus);
+void fsapi_dfr_unmark_ignore_all(struct super_block *sb);
+
+s32 fsapi_dfr_map_clus(struct inode *inode, u32 clu_offset, u32 *clu);
+void fsapi_dfr_writepage_endio(struct page *page);
+
+void fsapi_dfr_update_fat_prev(struct super_block *sb, int force);
+void fsapi_dfr_update_fat_next(struct super_block *sb);
+void fsapi_dfr_check_discard(struct super_block *sb);
+void fsapi_dfr_free_clus(struct super_block *sb, u32 clus);
+
+s32 fsapi_dfr_check_dfr_required(struct super_block *sb, int *totalau, int *cleanau, int *fullau);
+s32 fsapi_dfr_check_dfr_on(struct inode *inode, loff_t start, loff_t end, s32 cancel, const char *caller);
+
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+void fsapi_dfr_spo_test(struct super_block *sb, int flag, const char *caller);
+#endif /* CONFIG_SDFAT_DFR_DEBUG */
+
+#endif /* CONFIG_SDFAT_DFR */
+
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _SDFAT_API_H */
+
+/* end of api.h */
diff --git a/fs/sdfat/blkdev.c b/fs/sdfat/blkdev.c
new file mode 100644
index 000000000000..264c670df0f0
--- /dev/null
+++ b/fs/sdfat/blkdev.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : blkdev.c */
+/* PURPOSE : sdFAT Block Device Driver Glue Layer */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/************************************************************************/
+
+#include <linux/blkdev.h>
+#include <linux/log2.h>
+#include <linux/backing-dev.h>
+
+#include "sdfat.h"
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Local Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY */
+/************************************************************************/
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
+ /* EMPTY */
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) */
+static struct backing_dev_info *inode_to_bdi(struct inode *bd_inode)
+{
+ return bd_inode->i_mapping->backing_dev_info;
+}
+#endif
+
+/*======================================================================*/
+/* Function Definitions */
+/*======================================================================*/
+s32 bdev_open_dev(struct super_block *sb)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (fsi->bd_opened)
+ return 0;
+
+ fsi->bd_opened = true;
+ return 0;
+}
+
+s32 bdev_close_dev(struct super_block *sb)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ fsi->bd_opened = false;
+ return 0;
+}
+
+static inline s32 block_device_ejected(struct super_block *sb)
+{
+ struct inode *bd_inode = sb->s_bdev->bd_inode;
+ struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
+
+ return (bdi->dev == NULL);
+}
+
+s32 bdev_check_bdi_valid(struct super_block *sb)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (block_device_ejected(sb)) {
+ if (!(fsi->prev_eio & SDFAT_EIO_BDI)) {
+ fsi->prev_eio |= SDFAT_EIO_BDI;
+ sdfat_log_msg(sb, KERN_ERR, "%s: block device is "
+ "eliminated.(bdi:%p)", __func__, sb->s_bdi);
+ sdfat_debug_warn_on(1);
+ }
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+
+/* Make a readahead request */
+s32 bdev_readahead(struct super_block *sb, u64 secno, u64 num_secs)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u32 sects_per_page = (PAGE_SIZE >> sb->s_blocksize_bits);
+ struct blk_plug plug;
+ u64 i;
+
+ if (!fsi->bd_opened)
+ return -EIO;
+
+ blk_start_plug(&plug);
+ for (i = 0; i < num_secs; i++) {
+ if (i && !(i & (sects_per_page - 1)))
+ blk_flush_plug(current);
+ sb_breadahead(sb, (sector_t)(secno + i));
+ }
+ blk_finish_plug(&plug);
+
+ return 0;
+}
+
+s32 bdev_mread(struct super_block *sb, u64 secno, struct buffer_head **bh, u64 num_secs, s32 read)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u8 blksize_bits = sb->s_blocksize_bits;
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ long flags = sbi->debug_flags;
+
+ if (flags & SDFAT_DEBUGFLAGS_ERROR_RW)
+ return -EIO;
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+
+ if (!fsi->bd_opened)
+ return -EIO;
+
+ brelse(*bh);
+
+ if (read)
+ *bh = __bread(sb->s_bdev, (sector_t)secno, num_secs << blksize_bits);
+ else
+ *bh = __getblk(sb->s_bdev, (sector_t)secno, num_secs << blksize_bits);
+
+ /* read successfully */
+ if (*bh)
+ return 0;
+
+ /*
+ * patch 1.2.4 : reset ONCE warning message per volume.
+ */
+ if (!(fsi->prev_eio & SDFAT_EIO_READ)) {
+ fsi->prev_eio |= SDFAT_EIO_READ;
+ sdfat_log_msg(sb, KERN_ERR, "%s: No bh. I/O error.", __func__);
+ sdfat_debug_warn_on(1);
+ }
+
+ return -EIO;
+}
+
+s32 bdev_mwrite(struct super_block *sb, u64 secno, struct buffer_head *bh, u64 num_secs, s32 sync)
+{
+ u64 count;
+ struct buffer_head *bh2;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ long flags = sbi->debug_flags;
+
+ if (flags & SDFAT_DEBUGFLAGS_ERROR_RW)
+ return -EIO;
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+
+ if (!fsi->bd_opened)
+ return -EIO;
+
+ if (secno == bh->b_blocknr) {
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ if (sync && (sync_dirty_buffer(bh) != 0))
+ return -EIO;
+ } else {
+ count = num_secs << sb->s_blocksize_bits;
+
+ bh2 = __getblk(sb->s_bdev, (sector_t)secno, count);
+
+ if (!bh2)
+ goto no_bh;
+
+ lock_buffer(bh2);
+ memcpy(bh2->b_data, bh->b_data, count);
+ set_buffer_uptodate(bh2);
+ mark_buffer_dirty(bh2);
+ unlock_buffer(bh2);
+ if (sync && (sync_dirty_buffer(bh2) != 0)) {
+ __brelse(bh2);
+ goto no_bh;
+ }
+ __brelse(bh2);
+ }
+ return 0;
+no_bh:
+ /*
+ * patch 1.2.4 : reset ONCE warning message per volume.
+ */
+ if (!(fsi->prev_eio & SDFAT_EIO_WRITE)) {
+ fsi->prev_eio |= SDFAT_EIO_WRITE;
+ sdfat_log_msg(sb, KERN_ERR, "%s: No bh. I/O error.", __func__);
+ sdfat_debug_warn_on(1);
+ }
+
+ return -EIO;
+}
+
+s32 bdev_sync_all(struct super_block *sb)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ long flags = sbi->debug_flags;
+
+ if (flags & SDFAT_DEBUGFLAGS_ERROR_RW)
+ return -EIO;
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+
+ if (!fsi->bd_opened)
+ return -EIO;
+
+ return sync_blockdev(sb->s_bdev);
+}
+
+/*
+ * Sector Read/Write Functions
+ */
+s32 read_sect(struct super_block *sb, u64 sec, struct buffer_head **bh, s32 read)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ BUG_ON(!bh);
+ if ((sec >= fsi->num_sectors) && (fsi->num_sectors > 0)) {
+ sdfat_fs_error_ratelimit(sb,
+ "%s: out of range (sect:%llu)", __func__, sec);
+ return -EIO;
+ }
+
+ if (bdev_mread(sb, sec, bh, 1, read)) {
+ sdfat_fs_error_ratelimit(sb,
+ "%s: I/O error (sect:%llu)", __func__, sec);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+s32 write_sect(struct super_block *sb, u64 sec, struct buffer_head *bh, s32 sync)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ BUG_ON(!bh);
+ if ((sec >= fsi->num_sectors) && (fsi->num_sectors > 0)) {
+ sdfat_fs_error_ratelimit(sb,
+ "%s: out of range (sect:%llu)", __func__, sec);
+ return -EIO;
+ }
+
+ if (bdev_mwrite(sb, sec, bh, 1, sync)) {
+ sdfat_fs_error_ratelimit(sb, "%s: I/O error (sect:%llu)",
+ __func__, sec);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+s32 read_msect(struct super_block *sb, u64 sec, struct buffer_head **bh, u64 num_secs, s32 read)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ BUG_ON(!bh);
+ if (((sec+num_secs) > fsi->num_sectors) && (fsi->num_sectors > 0)) {
+ sdfat_fs_error_ratelimit(sb, "%s: out of range(sect:%llu len:%llu)",
+ __func__, sec, num_secs);
+ return -EIO;
+ }
+
+ if (bdev_mread(sb, sec, bh, num_secs, read)) {
+ sdfat_fs_error_ratelimit(sb, "%s: I/O error (sect:%llu len:%llu)",
+ __func__, sec, num_secs);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+s32 write_msect(struct super_block *sb, u64 sec, struct buffer_head *bh, u64 num_secs, s32 sync)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ BUG_ON(!bh);
+ if (((sec+num_secs) > fsi->num_sectors) && (fsi->num_sectors > 0)) {
+ sdfat_fs_error_ratelimit(sb, "%s: out of range(sect:%llu len:%llu)",
+ __func__, sec, num_secs);
+ return -EIO;
+ }
+
+
+ if (bdev_mwrite(sb, sec, bh, num_secs, sync)) {
+ sdfat_fs_error_ratelimit(sb, "%s: I/O error (sect:%llu len:%llu)",
+ __func__, sec, num_secs);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static inline void __blkdev_write_bhs(struct buffer_head **bhs, s32 nr_bhs)
+{
+ s32 i;
+
+ for (i = 0; i < nr_bhs; i++)
+ write_dirty_buffer(bhs[i], WRITE);
+}
+
+static inline s32 __blkdev_sync_bhs(struct buffer_head **bhs, s32 nr_bhs)
+{
+ s32 i, err = 0;
+
+ for (i = 0; i < nr_bhs; i++) {
+ wait_on_buffer(bhs[i]);
+ if (!err && !buffer_uptodate(bhs[i]))
+ err = -EIO;
+ }
+ return err;
+}
+
+static inline s32 __buffer_zeroed(struct super_block *sb, u64 blknr, u64 num_secs)
+{
+ struct buffer_head *bhs[MAX_BUF_PER_PAGE];
+ s32 nr_bhs = MAX_BUF_PER_PAGE;
+ u64 last_blknr = blknr + num_secs;
+ s32 err, i, n;
+ struct blk_plug plug;
+
+ /* Zeroing the unused blocks on this cluster */
+ n = 0;
+ blk_start_plug(&plug);
+ while (blknr < last_blknr) {
+ bhs[n] = sb_getblk(sb, (sector_t)blknr);
+ if (!bhs[n]) {
+ err = -ENOMEM;
+ blk_finish_plug(&plug);
+ goto error;
+ }
+ memset(bhs[n]->b_data, 0, sb->s_blocksize);
+ set_buffer_uptodate(bhs[n]);
+ mark_buffer_dirty(bhs[n]);
+
+ n++;
+ blknr++;
+
+ if (blknr == last_blknr)
+ break;
+
+ if (n == nr_bhs) {
+ __blkdev_write_bhs(bhs, n);
+
+ for (i = 0; i < n; i++)
+ brelse(bhs[i]);
+ n = 0;
+ }
+ }
+ __blkdev_write_bhs(bhs, n);
+ blk_finish_plug(&plug);
+
+ err = __blkdev_sync_bhs(bhs, n);
+ if (err)
+ goto error;
+
+ for (i = 0; i < n; i++)
+ brelse(bhs[i]);
+
+ return 0;
+
+error:
+ EMSG("%s: failed zeroed sect %llu\n", __func__, blknr);
+ for (i = 0; i < n; i++)
+ bforget(bhs[i]);
+
+ return err;
+}
+
+s32 write_msect_zero(struct super_block *sb, u64 sec, u64 num_secs)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (((sec+num_secs) > fsi->num_sectors) && (fsi->num_sectors > 0)) {
+ sdfat_fs_error_ratelimit(sb, "%s: out of range(sect:%llu len:%llu)",
+ __func__, sec, num_secs);
+ return -EIO;
+ }
+
+ /* Just return -EAGAIN if it is failed */
+ if (__buffer_zeroed(sb, sec, num_secs))
+ return -EAGAIN;
+
+ return 0;
+} /* end of write_msect_zero */
+
+/* end of blkdev.c */
diff --git a/fs/sdfat/cache.c b/fs/sdfat/cache.c
new file mode 100644
index 000000000000..8318898646be
--- /dev/null
+++ b/fs/sdfat/cache.c
@@ -0,0 +1,846 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : cache.c */
+/* PURPOSE : sdFAT Cache Manager */
+/* (FAT Cache & Buffer Cache) */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/swap.h> /* for mark_page_accessed() */
+#include <asm/unaligned.h>
+
+#include "sdfat.h"
+#include "core.h"
+
+#define DEBUG_HASH_LIST
+#define DEBUG_HASH_PREV (0xAAAA5555)
+#define DEBUG_HASH_NEXT (0x5555AAAA)
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+/* All buffer structures are protected w/ fsi->v_sem */
+
+/*----------------------------------------------------------------------*/
+/* Local Variable Definitions */
+/*----------------------------------------------------------------------*/
+#define LOCKBIT (0x01)
+#define DIRTYBIT (0x02)
+#define KEEPBIT (0x04)
+
+/*----------------------------------------------------------------------*/
+/* Cache handling function declarations */
+/*----------------------------------------------------------------------*/
+static cache_ent_t *__fcache_find(struct super_block *sb, u64 sec);
+static cache_ent_t *__fcache_get(struct super_block *sb);
+static void __fcache_insert_hash(struct super_block *sb, cache_ent_t *bp);
+static void __fcache_remove_hash(cache_ent_t *bp);
+
+static cache_ent_t *__dcache_find(struct super_block *sb, u64 sec);
+static cache_ent_t *__dcache_get(struct super_block *sb);
+static void __dcache_insert_hash(struct super_block *sb, cache_ent_t *bp);
+static void __dcache_remove_hash(cache_ent_t *bp);
+
+/*----------------------------------------------------------------------*/
+/* Static functions */
+/*----------------------------------------------------------------------*/
+static void push_to_mru(cache_ent_t *bp, cache_ent_t *list)
+{
+ bp->next = list->next;
+ bp->prev = list;
+ list->next->prev = bp;
+ list->next = bp;
+}
+
+static void push_to_lru(cache_ent_t *bp, cache_ent_t *list)
+{
+ bp->prev = list->prev;
+ bp->next = list;
+ list->prev->next = bp;
+ list->prev = bp;
+}
+
+static void move_to_mru(cache_ent_t *bp, cache_ent_t *list)
+{
+ bp->prev->next = bp->next;
+ bp->next->prev = bp->prev;
+ push_to_mru(bp, list);
+}
+
+static void move_to_lru(cache_ent_t *bp, cache_ent_t *list)
+{
+ bp->prev->next = bp->next;
+ bp->next->prev = bp->prev;
+ push_to_lru(bp, list);
+}
+
+static inline s32 __check_hash_valid(cache_ent_t *bp)
+{
+#ifdef DEBUG_HASH_LIST
+ if ((bp->hash.next == (cache_ent_t *)DEBUG_HASH_NEXT) ||
+ (bp->hash.prev == (cache_ent_t *)DEBUG_HASH_PREV)) {
+ return -EINVAL;
+ }
+#endif
+ if ((bp->hash.next == bp) || (bp->hash.prev == bp))
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline void __remove_from_hash(cache_ent_t *bp)
+{
+ (bp->hash.prev)->hash.next = bp->hash.next;
+ (bp->hash.next)->hash.prev = bp->hash.prev;
+ bp->hash.next = bp;
+ bp->hash.prev = bp;
+#ifdef DEBUG_HASH_LIST
+ bp->hash.next = (cache_ent_t *)DEBUG_HASH_NEXT;
+ bp->hash.prev = (cache_ent_t *)DEBUG_HASH_PREV;
+#endif
+}
+
+/* Do FAT mirroring (don't sync)
+ * sec: sector No. in FAT1
+ * bh: bh of sec.
+ */
+static inline s32 __fat_copy(struct super_block *sb, u64 sec, struct buffer_head *bh, int sync)
+{
+#ifdef CONFIG_SDFAT_FAT_MIRRORING
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u64 sec2;
+
+ if (fsi->FAT2_start_sector != fsi->FAT1_start_sector) {
+ sec2 = sec - fsi->FAT1_start_sector + fsi->FAT2_start_sector;
+ BUG_ON(sec2 != (sec + (u64)fsi->num_FAT_sectors));
+
+ MMSG("BD: fat mirroring (%llu in FAT1, %llu in FAT2)\n", sec, sec2);
+ if (write_sect(sb, sec2, bh, sync))
+ return -EIO;
+ }
+#else
+ /* DO NOTHING */
+#endif
+ return 0;
+} /* end of __fat_copy */
+
+/*
+ * returns 1, if bp is flushed
+ * returns 0, if bp is not dirty
+ * returns -1, if error occurs
+ */
+static s32 __fcache_ent_flush(struct super_block *sb, cache_ent_t *bp, u32 sync)
+{
+ if (!(bp->flag & DIRTYBIT))
+ return 0;
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ // Make buffer dirty (XXX: Naive impl.)
+ if (write_sect(sb, bp->sec, bp->bh, 0))
+ return -EIO;
+
+ if (__fat_copy(sb, bp->sec, bp->bh, 0))
+ return -EIO;
+#endif
+ bp->flag &= ~(DIRTYBIT);
+
+ if (sync)
+ sync_dirty_buffer(bp->bh);
+
+ return 1;
+}
+
+static s32 __fcache_ent_discard(struct super_block *sb, cache_ent_t *bp)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ __fcache_remove_hash(bp);
+ bp->sec = ~0;
+ bp->flag = 0;
+
+ if (bp->bh) {
+ __brelse(bp->bh);
+ bp->bh = NULL;
+ }
+ move_to_lru(bp, &fsi->fcache.lru_list);
+ return 0;
+}
+
+u8 *fcache_getblk(struct super_block *sb, u64 sec)
+{
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u32 page_ra_count = FCACHE_MAX_RA_SIZE >> sb->s_blocksize_bits;
+
+ bp = __fcache_find(sb, sec);
+ if (bp) {
+ if (bdev_check_bdi_valid(sb)) {
+ __fcache_ent_flush(sb, bp, 0);
+ __fcache_ent_discard(sb, bp);
+ return NULL;
+ }
+ move_to_mru(bp, &fsi->fcache.lru_list);
+ return bp->bh->b_data;
+ }
+
+ bp = __fcache_get(sb);
+ if (!__check_hash_valid(bp))
+ __fcache_remove_hash(bp);
+
+ bp->sec = sec;
+ bp->flag = 0;
+ __fcache_insert_hash(sb, bp);
+
+ /* Naive FAT read-ahead (increase I/O unit to page_ra_count) */
+ if ((sec & (page_ra_count - 1)) == 0)
+ bdev_readahead(sb, sec, (u64)page_ra_count);
+
+ /*
+ * patch 1.2.4 : buffer_head null pointer exception problem.
+ *
+ * When read_sect is failed, fcache should be moved to
+ * EMPTY hash_list and the first of lru_list.
+ */
+ if (read_sect(sb, sec, &(bp->bh), 1)) {
+ __fcache_ent_discard(sb, bp);
+ return NULL;
+ }
+
+ return bp->bh->b_data;
+}
+
+static inline int __mark_delayed_dirty(struct super_block *sb, cache_ent_t *bp)
+{
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (fsi->vol_type == EXFAT)
+ return -ENOTSUPP;
+
+ bp->flag |= DIRTYBIT;
+ return 0;
+#else
+ return -ENOTSUPP;
+#endif
+}
+
+
+
+s32 fcache_modify(struct super_block *sb, u64 sec)
+{
+ cache_ent_t *bp;
+
+ bp = __fcache_find(sb, sec);
+ if (!bp) {
+ sdfat_fs_error(sb, "Can`t find fcache (sec 0x%016llx)", sec);
+ return -EIO;
+ }
+
+ if (!__mark_delayed_dirty(sb, bp))
+ return 0;
+
+ if (write_sect(sb, sec, bp->bh, 0))
+ return -EIO;
+
+ if (__fat_copy(sb, sec, bp->bh, 0))
+ return -EIO;
+
+ return 0;
+}
+
+/*======================================================================*/
+/* Cache Initialization Functions */
+/*======================================================================*/
+s32 meta_cache_init(struct super_block *sb)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 i;
+
+ /* LRU list */
+ fsi->fcache.lru_list.next = &fsi->fcache.lru_list;
+ fsi->fcache.lru_list.prev = fsi->fcache.lru_list.next;
+
+ for (i = 0; i < FAT_CACHE_SIZE; i++) {
+ fsi->fcache.pool[i].sec = ~0;
+ fsi->fcache.pool[i].flag = 0;
+ fsi->fcache.pool[i].bh = NULL;
+ fsi->fcache.pool[i].prev = NULL;
+ fsi->fcache.pool[i].next = NULL;
+ push_to_mru(&(fsi->fcache.pool[i]), &fsi->fcache.lru_list);
+ }
+
+ fsi->dcache.lru_list.next = &fsi->dcache.lru_list;
+ fsi->dcache.lru_list.prev = fsi->dcache.lru_list.next;
+ fsi->dcache.keep_list.next = &fsi->dcache.keep_list;
+ fsi->dcache.keep_list.prev = fsi->dcache.keep_list.next;
+
+ // Initially, all the BUF_CACHEs are in the LRU list
+ for (i = 0; i < BUF_CACHE_SIZE; i++) {
+ fsi->dcache.pool[i].sec = ~0;
+ fsi->dcache.pool[i].flag = 0;
+ fsi->dcache.pool[i].bh = NULL;
+ fsi->dcache.pool[i].prev = NULL;
+ fsi->dcache.pool[i].next = NULL;
+ push_to_mru(&(fsi->dcache.pool[i]), &fsi->dcache.lru_list);
+ }
+
+ /* HASH list */
+ for (i = 0; i < FAT_CACHE_HASH_SIZE; i++) {
+ fsi->fcache.hash_list[i].sec = ~0;
+ fsi->fcache.hash_list[i].hash.next = &(fsi->fcache.hash_list[i]);
+;
+ fsi->fcache.hash_list[i].hash.prev = fsi->fcache.hash_list[i].hash.next;
+ }
+
+ for (i = 0; i < FAT_CACHE_SIZE; i++)
+ __fcache_insert_hash(sb, &(fsi->fcache.pool[i]));
+
+ for (i = 0; i < BUF_CACHE_HASH_SIZE; i++) {
+ fsi->dcache.hash_list[i].sec = ~0;
+ fsi->dcache.hash_list[i].hash.next = &(fsi->dcache.hash_list[i]);
+
+ fsi->dcache.hash_list[i].hash.prev = fsi->dcache.hash_list[i].hash.next;
+ }
+
+ for (i = 0; i < BUF_CACHE_SIZE; i++)
+ __dcache_insert_hash(sb, &(fsi->dcache.pool[i]));
+
+ return 0;
+}
+
+s32 meta_cache_shutdown(struct super_block *sb)
+{
+ return 0;
+}
+
+/*======================================================================*/
+/* FAT Read/Write Functions */
+/*======================================================================*/
+s32 fcache_release_all(struct super_block *sb)
+{
+ s32 ret = 0;
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 dirtycnt = 0;
+
+ bp = fsi->fcache.lru_list.next;
+ while (bp != &fsi->fcache.lru_list) {
+ s32 ret_tmp = __fcache_ent_flush(sb, bp, 0);
+
+ if (ret_tmp < 0)
+ ret = ret_tmp;
+ else
+ dirtycnt += ret_tmp;
+
+ bp->sec = ~0;
+ bp->flag = 0;
+
+ if (bp->bh) {
+ __brelse(bp->bh);
+ bp->bh = NULL;
+ }
+ bp = bp->next;
+ }
+
+ DMSG("BD:Release / dirty fat cache: %d (err:%d)\n", dirtycnt, ret);
+ return ret;
+}
+
+
+/* internal DIRTYBIT marked => bh dirty */
+s32 fcache_flush(struct super_block *sb, u32 sync)
+{
+ s32 ret = 0;
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 dirtycnt = 0;
+
+ bp = fsi->fcache.lru_list.next;
+ while (bp != &fsi->fcache.lru_list) {
+ ret = __fcache_ent_flush(sb, bp, sync);
+ if (ret < 0)
+ break;
+
+ dirtycnt += ret;
+ bp = bp->next;
+ }
+
+ MMSG("BD: flush / dirty fat cache: %d (err:%d)\n", dirtycnt, ret);
+ return ret;
+}
+
+static cache_ent_t *__fcache_find(struct super_block *sb, u64 sec)
+{
+ s32 off;
+ cache_ent_t *bp, *hp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ off = (sec + (sec >> fsi->sect_per_clus_bits)) & (FAT_CACHE_HASH_SIZE - 1);
+ hp = &(fsi->fcache.hash_list[off]);
+ for (bp = hp->hash.next; bp != hp; bp = bp->hash.next) {
+ if (bp->sec == sec) {
+ /*
+ * patch 1.2.4 : for debugging
+ */
+ WARN(!bp->bh, "[SDFAT] fcache has no bh. "
+ "It will make system panic.\n");
+
+ touch_buffer(bp->bh);
+ return bp;
+ }
+ }
+ return NULL;
+}
+
+static cache_ent_t *__fcache_get(struct super_block *sb)
+{
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ bp = fsi->fcache.lru_list.prev;
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ while (bp->flag & DIRTYBIT) {
+ cache_ent_t *bp_prev = bp->prev;
+
+ bp = bp_prev;
+ if (bp == &fsi->fcache.lru_list) {
+ DMSG("BD: fat cache flooding\n");
+ fcache_flush(sb, 0); // flush all dirty FAT caches
+ bp = fsi->fcache.lru_list.prev;
+ }
+ }
+#endif
+// if (bp->flag & DIRTYBIT)
+// sync_dirty_buffer(bp->bh);
+
+ move_to_mru(bp, &fsi->fcache.lru_list);
+ return bp;
+}
+
+static void __fcache_insert_hash(struct super_block *sb, cache_ent_t *bp)
+{
+ s32 off;
+ cache_ent_t *hp;
+ FS_INFO_T *fsi;
+
+ fsi = &(SDFAT_SB(sb)->fsi);
+ off = (bp->sec + (bp->sec >> fsi->sect_per_clus_bits)) & (FAT_CACHE_HASH_SIZE-1);
+
+ hp = &(fsi->fcache.hash_list[off]);
+ bp->hash.next = hp->hash.next;
+ bp->hash.prev = hp;
+ hp->hash.next->hash.prev = bp;
+ hp->hash.next = bp;
+}
+
+
+static void __fcache_remove_hash(cache_ent_t *bp)
+{
+#ifdef DEBUG_HASH_LIST
+ if ((bp->hash.next == (cache_ent_t *)DEBUG_HASH_NEXT) ||
+ (bp->hash.prev == (cache_ent_t *)DEBUG_HASH_PREV)) {
+ EMSG("%s: FATAL: tried to remove already-removed-cache-entry"
+ "(bp:%p)\n", __func__, bp);
+ return;
+ }
+#endif
+ WARN_ON(bp->flag & DIRTYBIT);
+ __remove_from_hash(bp);
+}
+
+/*======================================================================*/
+/* Buffer Read/Write Functions */
+/*======================================================================*/
+/* Read-ahead a cluster */
+s32 dcache_readahead(struct super_block *sb, u64 sec)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ struct buffer_head *bh;
+ u32 max_ra_count = DCACHE_MAX_RA_SIZE >> sb->s_blocksize_bits;
+ u32 page_ra_count = PAGE_SIZE >> sb->s_blocksize_bits;
+ u32 adj_ra_count = max(fsi->sect_per_clus, page_ra_count);
+ u32 ra_count = min(adj_ra_count, max_ra_count);
+
+ /* Read-ahead is not required */
+ if (fsi->sect_per_clus == 1)
+ return 0;
+
+ if (sec < fsi->data_start_sector) {
+ EMSG("BD: %s: requested sector is invalid(sect:%llu, root:%llu)\n",
+ __func__, sec, fsi->data_start_sector);
+ return -EIO;
+ }
+
+ /* Not sector aligned with ra_count, resize ra_count to page size */
+ if ((sec - fsi->data_start_sector) & (ra_count - 1))
+ ra_count = page_ra_count;
+
+ bh = sb_find_get_block(sb, sec);
+ if (!bh || !buffer_uptodate(bh))
+ bdev_readahead(sb, sec, (u64)ra_count);
+
+ brelse(bh);
+
+ return 0;
+}
+
+/*
+ * returns 1, if bp is flushed
+ * returns 0, if bp is not dirty
+ * returns -1, if error occurs
+ */
+static s32 __dcache_ent_flush(struct super_block *sb, cache_ent_t *bp, u32 sync)
+{
+ if (!(bp->flag & DIRTYBIT))
+ return 0;
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ // Make buffer dirty (XXX: Naive impl.)
+ if (write_sect(sb, bp->sec, bp->bh, 0))
+ return -EIO;
+#endif
+ bp->flag &= ~(DIRTYBIT);
+
+ if (sync)
+ sync_dirty_buffer(bp->bh);
+
+ return 1;
+}
+
+static s32 __dcache_ent_discard(struct super_block *sb, cache_ent_t *bp)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ MMSG("%s : bp[%p] (sec:%016llx flag:%08x bh:%p) list(prev:%p next:%p) "
+ "hash(prev:%p next:%p)\n", __func__,
+ bp, bp->sec, bp->flag, bp->bh, bp->prev, bp->next,
+ bp->hash.prev, bp->hash.next);
+
+ __dcache_remove_hash(bp);
+ bp->sec = ~0;
+ bp->flag = 0;
+
+ if (bp->bh) {
+ __brelse(bp->bh);
+ bp->bh = NULL;
+ }
+
+ move_to_lru(bp, &fsi->dcache.lru_list);
+ return 0;
+}
+
+u8 *dcache_getblk(struct super_block *sb, u64 sec)
+{
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ bp = __dcache_find(sb, sec);
+ if (bp) {
+ if (bdev_check_bdi_valid(sb)) {
+ MMSG("%s: found cache(%p, sect:%llu). But invalid BDI\n"
+ , __func__, bp, sec);
+ __dcache_ent_flush(sb, bp, 0);
+ __dcache_ent_discard(sb, bp);
+ return NULL;
+ }
+
+ if (!(bp->flag & KEEPBIT)) // already in keep list
+ move_to_mru(bp, &fsi->dcache.lru_list);
+
+ return bp->bh->b_data;
+ }
+
+ bp = __dcache_get(sb);
+
+ if (!__check_hash_valid(bp))
+ __dcache_remove_hash(bp);
+
+ bp->sec = sec;
+ bp->flag = 0;
+ __dcache_insert_hash(sb, bp);
+
+ if (read_sect(sb, sec, &(bp->bh), 1)) {
+ __dcache_ent_discard(sb, bp);
+ return NULL;
+ }
+
+ return bp->bh->b_data;
+
+}
+
+s32 dcache_modify(struct super_block *sb, u64 sec)
+{
+ s32 ret = -EIO;
+ cache_ent_t *bp;
+
+ set_sb_dirty(sb);
+
+ bp = __dcache_find(sb, sec);
+ if (unlikely(!bp)) {
+ sdfat_fs_error(sb, "Can`t find dcache (sec 0x%016llx)", sec);
+ return -EIO;
+ }
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ if (SDFAT_SB(sb)->fsi.vol_type != EXFAT) {
+ bp->flag |= DIRTYBIT;
+ return 0;
+ }
+#endif
+ ret = write_sect(sb, sec, bp->bh, 0);
+
+ if (ret) {
+ DMSG("%s : failed to modify buffer(err:%d, sec:%llu, bp:0x%p)\n",
+ __func__, ret, sec, bp);
+ }
+
+ return ret;
+}
+
+s32 dcache_lock(struct super_block *sb, u64 sec)
+{
+ cache_ent_t *bp;
+
+ bp = __dcache_find(sb, sec);
+ if (likely(bp)) {
+ bp->flag |= LOCKBIT;
+ return 0;
+ }
+
+ EMSG("%s : failed to lock buffer(sec:%llu, bp:0x%p)\n", __func__, sec, bp);
+ return -EIO;
+}
+
+s32 dcache_unlock(struct super_block *sb, u64 sec)
+{
+ cache_ent_t *bp;
+
+ bp = __dcache_find(sb, sec);
+ if (likely(bp)) {
+ bp->flag &= ~(LOCKBIT);
+ return 0;
+ }
+
+ EMSG("%s : failed to unlock buffer (sec:%llu, bp:0x%p)\n", __func__, sec, bp);
+ return -EIO;
+}
+
+s32 dcache_release(struct super_block *sb, u64 sec)
+{
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ bp = __dcache_find(sb, sec);
+ if (unlikely(!bp))
+ return -ENOENT;
+
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ if (bp->flag & DIRTYBIT) {
+ if (write_sect(sb, bp->sec, bp->bh, 0))
+ return -EIO;
+ }
+#endif
+ bp->sec = ~0;
+ bp->flag = 0;
+
+ if (bp->bh) {
+ __brelse(bp->bh);
+ bp->bh = NULL;
+ }
+
+ move_to_lru(bp, &fsi->dcache.lru_list);
+ return 0;
+}
+
+s32 dcache_release_all(struct super_block *sb)
+{
+ s32 ret = 0;
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ s32 dirtycnt = 0;
+#endif
+
+ /* Connect list elements:
+ * LRU list : (A - B - ... - bp_front) + (bp_first + ... + bp_last)
+ */
+ while (fsi->dcache.keep_list.prev != &fsi->dcache.keep_list) {
+ cache_ent_t *bp_keep = fsi->dcache.keep_list.prev;
+ // bp_keep->flag &= ~(KEEPBIT); // Will be 0-ed later
+ move_to_mru(bp_keep, &fsi->dcache.lru_list);
+ }
+
+ bp = fsi->dcache.lru_list.next;
+ while (bp != &fsi->dcache.lru_list) {
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ if (bp->flag & DIRTYBIT) {
+ dirtycnt++;
+ if (write_sect(sb, bp->sec, bp->bh, 0))
+ ret = -EIO;
+ }
+#endif
+ bp->sec = ~0;
+ bp->flag = 0;
+
+ if (bp->bh) {
+ __brelse(bp->bh);
+ bp->bh = NULL;
+ }
+ bp = bp->next;
+ }
+
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ DMSG("BD:Release / dirty buf cache: %d (err:%d)", dirtycnt, ret);
+#endif
+ return ret;
+}
+
+
+s32 dcache_flush(struct super_block *sb, u32 sync)
+{
+ s32 ret = 0;
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 dirtycnt = 0;
+ s32 keepcnt = 0;
+
+ /* Connect list elements:
+ * LRU list : (A - B - ... - bp_front) + (bp_first + ... + bp_last)
+ */
+ while (fsi->dcache.keep_list.prev != &fsi->dcache.keep_list) {
+ cache_ent_t *bp_keep = fsi->dcache.keep_list.prev;
+
+ bp_keep->flag &= ~(KEEPBIT); // Will be 0-ed later
+ move_to_mru(bp_keep, &fsi->dcache.lru_list);
+ keepcnt++;
+ }
+
+ bp = fsi->dcache.lru_list.next;
+ while (bp != &fsi->dcache.lru_list) {
+ if (bp->flag & DIRTYBIT) {
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ // Make buffer dirty (XXX: Naive impl.)
+ if (write_sect(sb, bp->sec, bp->bh, 0)) {
+ ret = -EIO;
+ break;
+ }
+
+#endif
+ bp->flag &= ~(DIRTYBIT);
+ dirtycnt++;
+
+ if (sync != 0)
+ sync_dirty_buffer(bp->bh);
+ }
+ bp = bp->next;
+ }
+
+ MMSG("BD: flush / dirty dentry cache: %d (%d from keeplist, err:%d)\n",
+ dirtycnt, keepcnt, ret);
+ return ret;
+}
+
+static cache_ent_t *__dcache_find(struct super_block *sb, u64 sec)
+{
+ s32 off;
+ cache_ent_t *bp, *hp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ off = (sec + (sec >> fsi->sect_per_clus_bits)) & (BUF_CACHE_HASH_SIZE - 1);
+
+ hp = &(fsi->dcache.hash_list[off]);
+ for (bp = hp->hash.next; bp != hp; bp = bp->hash.next) {
+ if (bp->sec == sec) {
+ touch_buffer(bp->bh);
+ return bp;
+ }
+ }
+ return NULL;
+}
+
+static cache_ent_t *__dcache_get(struct super_block *sb)
+{
+ cache_ent_t *bp;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ bp = fsi->dcache.lru_list.prev;
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ while (bp->flag & (DIRTYBIT | LOCKBIT)) {
+ cache_ent_t *bp_prev = bp->prev; // hold prev
+
+ if (bp->flag & DIRTYBIT) {
+ MMSG("BD: Buf cache => Keep list\n");
+ bp->flag |= KEEPBIT;
+ move_to_mru(bp, &fsi->dcache.keep_list);
+ }
+ bp = bp_prev;
+
+ /* If all dcaches are dirty */
+ if (bp == &fsi->dcache.lru_list) {
+ DMSG("BD: buf cache flooding\n");
+ dcache_flush(sb, 0);
+ bp = fsi->dcache.lru_list.prev;
+ }
+ }
+#else
+ while (bp->flag & LOCKBIT)
+ bp = bp->prev;
+#endif
+// if (bp->flag & DIRTYBIT)
+// sync_dirty_buffer(bp->bh);
+
+ move_to_mru(bp, &fsi->dcache.lru_list);
+ return bp;
+}
+
+static void __dcache_insert_hash(struct super_block *sb, cache_ent_t *bp)
+{
+ s32 off;
+ cache_ent_t *hp;
+ FS_INFO_T *fsi;
+
+ fsi = &(SDFAT_SB(sb)->fsi);
+ off = (bp->sec + (bp->sec >> fsi->sect_per_clus_bits)) & (BUF_CACHE_HASH_SIZE-1);
+
+ hp = &(fsi->dcache.hash_list[off]);
+ bp->hash.next = hp->hash.next;
+ bp->hash.prev = hp;
+ hp->hash.next->hash.prev = bp;
+ hp->hash.next = bp;
+}
+
+static void __dcache_remove_hash(cache_ent_t *bp)
+{
+#ifdef DEBUG_HASH_LIST
+ if ((bp->hash.next == (cache_ent_t *)DEBUG_HASH_NEXT) ||
+ (bp->hash.prev == (cache_ent_t *)DEBUG_HASH_PREV)) {
+ EMSG("%s: FATAL: tried to remove already-removed-cache-entry"
+ "(bp:%p)\n", __func__, bp);
+ return;
+ }
+#endif
+ WARN_ON(bp->flag & DIRTYBIT);
+ __remove_from_hash(bp);
+}
+
+
+/* end of cache.c */
diff --git a/fs/sdfat/config.h b/fs/sdfat/config.h
new file mode 100644
index 000000000000..6e2a4e80932c
--- /dev/null
+++ b/fs/sdfat/config.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SDFAT_CONFIG_H
+#define _SDFAT_CONFIG_H
+/*======================================================================*/
+/* */
+/* FFS CONFIGURATIONS */
+/* (CHANGE THIS PART IF REQUIRED) */
+/* */
+/*======================================================================*/
+
+/*----------------------------------------------------------------------*/
+/* Feature Config */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Debug/Experimental Config */
+/*----------------------------------------------------------------------*/
+//#define CONFIG_SDFAT_TRACE_IO
+//#define CONFIG_SDFAT_TRACE_LOCK /* Trace elapsed time in lock_super(sb) */
+
+/*----------------------------------------------------------------------*/
+/* Defragmentation Config */
+/*----------------------------------------------------------------------*/
+//#define CONFIG_SDFAT_DFR
+//#define CONFIG_SDFAT_DFR_PACKING
+//#define CONFIG_SDFAT_DFR_DEBUG
+
+/*----------------------------------------------------------------------*/
+/* Config for Kernel equal or newer than 3.7 */
+/*----------------------------------------------------------------------*/
+#ifndef CONFIG_SDFAT_WRITE_SB_INTERVAL_CSECS
+#define CONFIG_SDFAT_WRITE_SB_INTERVAL_CSECS (dirty_writeback_interval)
+#endif
+
+/*----------------------------------------------------------------------*/
+/* Default Kconfig */
+/*----------------------------------------------------------------------*/
+/* default mount options */
+#ifndef CONFIG_SDFAT_DEFAULT_CODEPAGE /* if Kconfig lacked codepage */
+#define CONFIG_SDFAT_DEFAULT_CODEPAGE 437
+#endif
+
+#ifndef CONFIG_SDFAT_DEFAULT_IOCHARSET /* if Kconfig lacked iocharset */
+#define CONFIG_SDFAT_DEFAULT_IOCHARSET "utf8"
+#endif
+
+#ifndef CONFIG_SDFAT_FAT32_SHORTNAME_SEQ /* Shortname ~1, ... ~9 have higher
+ * priority (WIN32/VFAT-like)
+ */
+//#define CONFIG_SDFAT_FAT32_SHORTNAME_SEQ
+#endif
+
+#ifndef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
+//#define CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
+#endif
+
+#ifndef CONFIG_SDFAT_FAT_MIRRORING /* if Kconfig lacked fat-mirroring option */
+#define CONFIG_SDFAT_FAT_MIRRORING /* Write FAT 1, FAT 2 simultaneously */
+#endif
+
+#ifndef CONFIG_SDFAT_DELAYED_META_DIRTY
+//#define CONFIG_SDFAT_DELAYED_META_DIRTY /* delayed DIR/FAT dirty support */
+#endif
+
+#ifndef CONFIG_SDFAT_SUPPORT_DIR_SYNC
+//#define CONFIG_SDFAT_SUPPORT_DIR_SYNC /* support DIR_SYNC */
+#endif
+
+#ifndef CONFIG_SDFAT_CHECK_RO_ATTR
+//#define CONFIG_SDFAT_CHECK_RO_ATTR
+#endif
+
+#ifndef CONFIG_SDFAT_RESTRICT_EXT_ONLY_SFN
+#define CONFIG_SDFAT_RESTRICT_EXT_ONLY_SFN
+#endif
+
+#ifndef CONFIG_SDFAT_ALLOW_LOOKUP_LOSSY_SFN
+//#define CONFIG_SDFAT_ALLOW_LOOKUP_LOSSY_SFN
+#endif
+
+#ifndef CONFIG_SDFAT_DBG_SHOW_PID
+//#define CONFIG_SDFAT_DBG_SHOW_PID
+#endif
+
+#ifndef CONFIG_SDFAT_VIRTUAL_XATTR
+//#define CONFIG_SDFAT_VIRTUAL_XATTR
+#endif
+
+#ifndef CONFIG_SDFAT_SUPPORT_STLOG
+//#define CONFIG_SDFAT_SUPPORT_STLOG
+#endif
+
+#ifndef CONFIG_SDFAT_DEBUG
+//{
+//#define CONFIG_SDFAT_DEBUG
+
+#ifndef CONFIG_SDFAT_DBG_IOCTL
+//#define CONFIG_SDFAT_DBG_IOCTL
+#endif
+
+#ifndef CONFIG_SDFAT_DBG_MSG
+//#define CONFIG_SDFAT_DBG_MSG
+#endif
+
+#ifndef CONFIG_SDFAT_DBG_CAREFUL
+//#define CONFIG_SDFAT_DBG_CAREFUL
+#endif
+
+#ifndef CONFIG_SDFAT_DBG_BUGON
+//#define CONFIG_SDFAT_DBG_BUGON
+#endif
+
+#ifndef CONFIG_SDFAT_DBG_WARNON
+//#define CONFIG_SDFAT_DBG_WARNON
+#endif
+//}
+#endif /* CONFIG_SDFAT_DEBUG */
+
+
+#ifndef CONFIG_SDFAT_TRACE_SB_LOCK
+//#define CONFIG_SDFAT_TRACE_SB_LOCK
+#endif
+
+#ifndef CONFIG_SDFAT_TRACE_ELAPSED_TIME
+//#define CONFIG_SDFAT_TRACE_ELAPSED_TIME
+#endif
+
+#endif /* _SDFAT_CONFIG_H */
+
+/* end of config.h */
diff --git a/fs/sdfat/core.c b/fs/sdfat/core.c
new file mode 100644
index 000000000000..3a5af0b83d59
--- /dev/null
+++ b/fs/sdfat/core.c
@@ -0,0 +1,3694 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : core.c */
+/* PURPOSE : FAT & exFAT common core code for sdFAT */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/workqueue.h>
+#include <linux/writeback.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0)
+#include <linux/iversion.h>
+#endif
+
+#include "sdfat.h"
+#include "core.h"
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+
+/*************************************************************************
+ * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0)
+static inline u64 inode_peek_iversion(struct inode *inode)
+{
+ return inode->i_version;
+}
+#endif
+
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+static inline void __set_sb_dirty(struct super_block *sb)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+ sb->s_dirt = 1;
+#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ sbi->s_dirt = 1;
+ /* Insert work */
+ spin_lock(&sbi->work_lock);
+ if (!sbi->write_super_queued) {
+ unsigned long delay;
+
+ delay = msecs_to_jiffies(CONFIG_SDFAT_WRITE_SB_INTERVAL_CSECS * 10);
+ queue_delayed_work(system_long_wq, &sbi->write_super_work, delay);
+ sbi->write_super_queued = 1;
+ }
+ spin_unlock(&sbi->work_lock);
+#endif
+}
+
+void set_sb_dirty(struct super_block *sb)
+{
+ __set_sb_dirty(sb);
+ // XXX: to be removed later, prints too much output
+ //TMSG("%s finished.\n", __func__);
+}
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Local Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+static s8 *reserved_names[] = {
+ "AUX ", "CON ", "NUL ", "PRN ",
+ "COM1 ", "COM2 ", "COM3 ", "COM4 ",
+ "COM5 ", "COM6 ", "COM7 ", "COM8 ", "COM9 ",
+ "LPT1 ", "LPT2 ", "LPT3 ", "LPT4 ",
+ "LPT5 ", "LPT6 ", "LPT7 ", "LPT8 ", "LPT9 ",
+ NULL
+};
+
+/*======================================================================*/
+/* Local Function Definitions */
+/*======================================================================*/
+
+/*
+ * File System Management Functions
+ */
+
+static s32 check_type_size(void)
+{
+ /* critical check for system requirement on size of DENTRY_T structure */
+ if (sizeof(DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(DOS_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(EXT_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(FILE_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(STRM_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(NAME_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(BMAP_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(CASE_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ if (sizeof(VOLM_DENTRY_T) != DENTRY_SIZE)
+ return -EINVAL;
+
+ return 0;
+}
+
+static s32 __fs_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 err;
+ s32 sync = 0;
+
+ /* flags are not changed */
+ if (fsi->vol_flag == new_flag)
+ return 0;
+
+ fsi->vol_flag = new_flag;
+
+ /* skip updating volume dirty flag,
+ * if this volume has been mounted with read-only
+ */
+ if (sb->s_flags & MS_RDONLY)
+ return 0;
+
+ if (!fsi->pbr_bh) {
+ err = read_sect(sb, 0, &(fsi->pbr_bh), 1);
+ if (err) {
+ EMSG("%s : failed to read boot sector\n", __func__);
+ return err;
+ }
+ }
+
+ if (fsi->vol_type == EXFAT) {
+ pbr64_t *bpb = (pbr64_t *)fsi->pbr_bh->b_data;
+ bpb->bsx.vol_flags = cpu_to_le16(new_flag);
+ } else if (fsi->vol_type == FAT32) {
+ pbr32_t *bpb = (pbr32_t *)fsi->pbr_bh->b_data;
+ bpb->bsx.state = new_flag & VOL_DIRTY ? FAT_VOL_DIRTY : 0x00;
+ } else { /* FAT16/12 */
+ pbr16_t *bpb = (pbr16_t *) fsi->pbr_bh->b_data;
+ bpb->bpb.state = new_flag & VOL_DIRTY ? FAT_VOL_DIRTY : 0x00;
+ }
+
+ if (always_sync)
+ sync = 1;
+ else if ((new_flag == VOL_DIRTY) && (!buffer_dirty(fsi->pbr_bh)))
+ sync = 1;
+ else
+ sync = 0;
+
+ err = write_sect(sb, 0, fsi->pbr_bh, sync);
+ if (err)
+ EMSG("%s : failed to modify volume flag\n", __func__);
+
+ return err;
+}
+
+static s32 fs_set_vol_flags(struct super_block *sb, u16 new_flag)
+{
+ return __fs_set_vol_flags(sb, new_flag, 0);
+}
+
+s32 fscore_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync)
+{
+ return __fs_set_vol_flags(sb, new_flag, always_sync);
+}
+
+static inline s32 __fs_meta_sync(struct super_block *sb, s32 do_sync)
+{
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (fsi->vol_type != EXFAT) {
+ MMSG("meta flush in fs_sync(sync=%d)\n", do_sync);
+ fcache_flush(sb, 0);
+ dcache_flush(sb, 0);
+ }
+#else
+ /* DO NOTHING */
+#endif
+ return 0;
+}
+
+static s32 fs_sync(struct super_block *sb, s32 do_sync)
+{
+ s32 err;
+
+ if (!do_sync)
+ return 0;
+
+ err = __fs_meta_sync(sb, do_sync);
+
+ if (!err)
+ err = bdev_sync_all(sb);
+
+ if (err)
+ EMSG("%s : failed to sync. (err:%d)\n", __func__, err);
+
+ return err;
+}
+
+/*
+ * Cluster Management Functions
+ */
+
+static s32 __clear_cluster(struct inode *inode, u32 clu)
+{
+ u64 s, n;
+ struct super_block *sb = inode->i_sb;
+ u32 sect_size = (u32)sb->s_blocksize;
+ s32 ret = 0;
+ struct buffer_head *tmp_bh = NULL;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (IS_CLUS_FREE(clu)) { /* FAT16 root_dir */
+ s = fsi->root_start_sector;
+ n = fsi->data_start_sector;
+ } else {
+ s = CLUS_TO_SECT(fsi, clu);
+ n = s + fsi->sect_per_clus;
+ }
+
+ if (IS_DIRSYNC(inode)) {
+ ret = write_msect_zero(sb, s, (u64)fsi->sect_per_clus);
+ if (ret != -EAGAIN)
+ return ret;
+ }
+
+ /* Trying buffered zero writes
+ * if it doesn't have DIRSYNC or write_msect_zero() returned -EAGAIN
+ */
+ for ( ; s < n; s++) {
+#if 0
+ dcache_release(sb, s);
+#endif
+ ret = read_sect(sb, s, &tmp_bh, 0);
+ if (ret)
+ goto out;
+
+ memset((u8 *)tmp_bh->b_data, 0x0, sect_size);
+ ret = write_sect(sb, s, tmp_bh, 0);
+ if (ret)
+ goto out;
+ }
+out:
+ brelse(tmp_bh);
+ return ret;
+} /* end of __clear_cluster */
+
+static s32 __find_last_cluster(struct super_block *sb, CHAIN_T *p_chain, u32 *ret_clu)
+{
+ u32 clu, next;
+ u32 count = 0;
+
+ next = p_chain->dir;
+ if (p_chain->flags == 0x03) {
+ *ret_clu = next + p_chain->size - 1;
+ return 0;
+ }
+
+ do {
+ count++;
+ clu = next;
+ if (fat_ent_get_safe(sb, clu, &next))
+ return -EIO;
+ } while (!IS_CLUS_EOF(next));
+
+ if (p_chain->size != count) {
+ sdfat_fs_error(sb, "bogus directory size "
+ "(clus : ondisk(%d) != counted(%d))",
+ p_chain->size, count);
+ sdfat_debug_bug_on(1);
+ return -EIO;
+ }
+
+ *ret_clu = clu;
+ return 0;
+}
+
+
+static s32 __count_num_clusters(struct super_block *sb, CHAIN_T *p_chain, u32 *ret_count)
+{
+ u32 i, count;
+ u32 clu;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (!p_chain->dir || IS_CLUS_EOF(p_chain->dir)) {
+ *ret_count = 0;
+ return 0;
+ }
+
+ if (p_chain->flags == 0x03) {
+ *ret_count = p_chain->size;
+ return 0;
+ }
+
+ clu = p_chain->dir;
+ count = 0;
+ for (i = CLUS_BASE; i < fsi->num_clusters; i++) {
+ count++;
+ if (fat_ent_get_safe(sb, clu, &clu))
+ return -EIO;
+ if (IS_CLUS_EOF(clu))
+ break;
+ }
+
+ *ret_count = count;
+ return 0;
+}
+
+/*
+ * Upcase table Management Functions
+ */
+static void free_upcase_table(struct super_block *sb)
+{
+ u32 i;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u16 **upcase_table;
+
+ upcase_table = fsi->vol_utbl;
+ for (i = 0 ; i < UTBL_COL_COUNT ; i++) {
+ /* kfree(NULL) is safe */
+ kfree(upcase_table[i]);
+ upcase_table[i] = NULL;
+ }
+
+ /* kfree(NULL) is safe */
+ kfree(fsi->vol_utbl);
+ fsi->vol_utbl = NULL;
+}
+
+static s32 __load_upcase_table(struct super_block *sb, u64 sector, u64 num_sectors, u32 utbl_checksum)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ struct buffer_head *tmp_bh = NULL;
+ u32 sect_size = (u32)sb->s_blocksize;
+ s32 ret = -EIO;
+ u32 i, j;
+
+ u8 skip = false;
+ u32 index = 0;
+ u32 checksum = 0;
+ u16 **upcase_table = kzalloc((UTBL_COL_COUNT * sizeof(u16 *)), GFP_KERNEL);
+
+ if (!upcase_table)
+ return -ENOMEM;
+ /* thanks for kzalloc
+ * memset(upcase_table, 0, UTBL_COL_COUNT * sizeof(u16 *));
+ */
+
+ fsi->vol_utbl = upcase_table;
+ num_sectors += sector;
+
+ while (sector < num_sectors) {
+ ret = read_sect(sb, sector, &tmp_bh, 1);
+ if (ret) {
+ EMSG("%s: failed to read sector(0x%llx)\n",
+ __func__, sector);
+ goto error;
+ }
+ sector++;
+
+ for (i = 0; i < sect_size && index <= 0xFFFF; i += 2) {
+ /* FIXME : is __le16 ok? */
+ //u16 uni = le16_to_cpu(((__le16*)(tmp_bh->b_data))[i]);
+ u16 uni = get_unaligned_le16((u8 *)tmp_bh->b_data+i);
+
+ checksum = ((checksum & 1) ? 0x80000000 : 0) +
+ (checksum >> 1) + *(((u8 *)tmp_bh->b_data)+i);
+ checksum = ((checksum & 1) ? 0x80000000 : 0) +
+ (checksum >> 1) + *(((u8 *)tmp_bh->b_data)+(i+1));
+
+ if (skip) {
+ MMSG("skip from 0x%X to 0x%X(amount of 0x%X)\n",
+ index, index+uni, uni);
+ index += uni;
+ skip = false;
+ } else if (uni == index) {
+ index++;
+ } else if (uni == 0xFFFF) {
+ skip = true;
+ } else { /* uni != index , uni != 0xFFFF */
+ u16 col_index = get_col_index(index);
+
+ if (!upcase_table[col_index]) {
+ upcase_table[col_index] =
+ kmalloc((UTBL_ROW_COUNT * sizeof(u16)), GFP_KERNEL);
+ if (!upcase_table[col_index]) {
+ EMSG("failed to allocate memory"
+ " for column 0x%X\n",
+ col_index);
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ for (j = 0; j < UTBL_ROW_COUNT; j++)
+ upcase_table[col_index][j] = (col_index << LOW_INDEX_BIT) | j;
+ }
+
+ upcase_table[col_index][get_row_index(index)] = uni;
+ index++;
+ }
+ }
+ }
+ if (index >= 0xFFFF && utbl_checksum == checksum) {
+ DMSG("%s: load upcase table successfully"
+ "(idx:0x%08x, utbl_chksum:0x%08x)\n",
+ __func__, index, utbl_checksum);
+ if (tmp_bh)
+ brelse(tmp_bh);
+ return 0;
+ }
+
+ EMSG("%s: failed to load upcase table"
+ "(idx:0x%08x, chksum:0x%08x, utbl_chksum:0x%08x)\n",
+ __func__, index, checksum, utbl_checksum);
+
+ ret = -EINVAL;
+error:
+ if (tmp_bh)
+ brelse(tmp_bh);
+ free_upcase_table(sb);
+ return ret;
+}
+
+static s32 __load_default_upcase_table(struct super_block *sb)
+{
+ s32 i, ret = -EIO;
+ u32 j;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ u8 skip = false;
+ u32 index = 0;
+ u16 uni = 0;
+ u16 **upcase_table;
+
+ upcase_table = kmalloc((UTBL_COL_COUNT * sizeof(u16 *)), GFP_KERNEL);
+ if (!upcase_table)
+ return -ENOMEM;
+
+ fsi->vol_utbl = upcase_table;
+ memset(upcase_table, 0, UTBL_COL_COUNT * sizeof(u16 *));
+
+ for (i = 0; index <= 0xFFFF && i < SDFAT_NUM_UPCASE*2; i += 2) {
+ /* FIXME : is __le16 ok? */
+ //uni = le16_to_cpu(((__le16*)uni_def_upcase)[i>>1]);
+ uni = get_unaligned_le16((u8 *)uni_def_upcase+i);
+ if (skip) {
+ MMSG("skip from 0x%x ", index);
+ index += uni;
+ MMSG("to 0x%x (amount of 0x%x)\n", index, uni);
+ skip = false;
+ } else if (uni == index) {
+ index++;
+ } else if (uni == 0xFFFF) {
+ skip = true;
+ } else { /* uni != index , uni != 0xFFFF */
+ u16 col_index = get_col_index(index);
+
+ if (!upcase_table[col_index]) {
+ upcase_table[col_index] = kmalloc((UTBL_ROW_COUNT * sizeof(u16)), GFP_KERNEL);
+ if (!upcase_table[col_index]) {
+ EMSG("failed to allocate memory for "
+ "new column 0x%x\n", col_index);
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ for (j = 0; j < UTBL_ROW_COUNT; j++)
+ upcase_table[col_index][j] = (col_index << LOW_INDEX_BIT) | j;
+ }
+
+ upcase_table[col_index][get_row_index(index)] = uni;
+ index++;
+ }
+ }
+
+ if (index >= 0xFFFF)
+ return 0;
+
+error:
+ /* FATAL error: default upcase table has error */
+ free_upcase_table(sb);
+ return ret;
+}
+
+static s32 load_upcase_table(struct super_block *sb)
+{
+ s32 i, ret;
+ u32 tbl_clu, type;
+ u64 sector, tbl_size, num_sectors;
+ u8 blksize_bits = sb->s_blocksize_bits;
+ CHAIN_T clu;
+ CASE_DENTRY_T *ep;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ clu.dir = fsi->root_dir;
+ clu.flags = 0x01;
+
+ if (fsi->vol_type != EXFAT)
+ goto load_default;
+
+ while (!IS_CLUS_EOF(clu.dir)) {
+ for (i = 0; i < fsi->dentries_per_clu; i++) {
+ ep = (CASE_DENTRY_T *) get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ type = fsi->fs_func->get_entry_type((DENTRY_T *) ep);
+
+ if (type == TYPE_UNUSED)
+ break;
+ if (type != TYPE_UPCASE)
+ continue;
+
+ tbl_clu = le32_to_cpu(ep->start_clu);
+ tbl_size = le64_to_cpu(ep->size);
+
+ sector = CLUS_TO_SECT(fsi, tbl_clu);
+ num_sectors = ((tbl_size - 1) >> blksize_bits) + 1;
+ ret = __load_upcase_table(sb, sector, num_sectors,
+ le32_to_cpu(ep->checksum));
+
+ if (ret && (ret != -EIO))
+ goto load_default;
+
+ /* load successfully */
+ return ret;
+ }
+
+ if (get_next_clus_safe(sb, &(clu.dir)))
+ return -EIO;
+ }
+
+load_default:
+ sdfat_log_msg(sb, KERN_INFO, "trying to load default upcase table");
+ /* load default upcase table */
+ return __load_default_upcase_table(sb);
+} /* end of load_upcase_table */
+
+
+/*
+ * Directory Entry Management Functions
+ */
+s32 walk_fat_chain(struct super_block *sb, CHAIN_T *p_dir, u32 byte_offset, u32 *clu)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u32 clu_offset;
+ u32 cur_clu;
+
+ clu_offset = byte_offset >> fsi->cluster_size_bits;
+ cur_clu = p_dir->dir;
+
+ if (p_dir->flags == 0x03) {
+ cur_clu += clu_offset;
+ } else {
+ while (clu_offset > 0) {
+ if (get_next_clus_safe(sb, &cur_clu))
+ return -EIO;
+ if (IS_CLUS_EOF(cur_clu)) {
+ sdfat_fs_error(sb, "invalid dentry access "
+ "beyond EOF (clu : %u, eidx : %d)",
+ p_dir->dir,
+ byte_offset >> DENTRY_SIZE_BITS);
+ return -EIO;
+ }
+ clu_offset--;
+ }
+ }
+
+ if (clu)
+ *clu = cur_clu;
+ return 0;
+}
+
+static s32 find_location(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u64 *sector, s32 *offset)
+{
+ s32 ret;
+ u32 off, clu = 0;
+ u32 blksize_mask = (u32)(sb->s_blocksize-1);
+ u8 blksize_bits = sb->s_blocksize_bits;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ off = entry << DENTRY_SIZE_BITS;
+
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(p_dir->dir)) {
+ *offset = off & blksize_mask;
+ *sector = off >> blksize_bits;
+ *sector += fsi->root_start_sector;
+ return 0;
+ }
+
+ ret = walk_fat_chain(sb, p_dir, off, &clu);
+ if (ret)
+ return ret;
+
+ /* byte offset in cluster */
+ off &= (fsi->cluster_size - 1);
+
+ /* byte offset in sector */
+ *offset = off & blksize_mask;
+
+ /* sector offset in cluster */
+ *sector = off >> blksize_bits;
+ *sector += CLUS_TO_SECT(fsi, clu);
+ return 0;
+} /* end of find_location */
+
+DENTRY_T *get_dentry_in_dir(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u64 *sector)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u32 dentries_per_page = PAGE_SIZE >> DENTRY_SIZE_BITS;
+ s32 off;
+ u64 sec;
+ u8 *buf;
+
+ if (p_dir->dir == DIR_DELETED) {
+ EMSG("%s : abnormal access to deleted dentry\n", __func__);
+ BUG_ON(!fsi->prev_eio);
+ return NULL;
+ }
+
+ if (find_location(sb, p_dir, entry, &sec, &off))
+ return NULL;
+
+ /* DIRECTORY READAHEAD :
+ * Try to read ahead per a page except root directory of fat12/16
+ */
+ if ((!IS_CLUS_FREE(p_dir->dir)) &&
+ !(entry & (dentries_per_page - 1)))
+ dcache_readahead(sb, sec);
+
+ buf = dcache_getblk(sb, sec);
+ if (!buf)
+ return NULL;
+
+ if (sector)
+ *sector = sec;
+ return (DENTRY_T *)(buf + off);
+} /* end of get_dentry_in_dir */
+
+/* used only in search empty_slot() */
+#define CNT_UNUSED_NOHIT (-1)
+#define CNT_UNUSED_HIT (-2)
+/* search EMPTY CONTINUOUS "num_entries" entries */
+static s32 search_empty_slot(struct super_block *sb, HINT_FEMP_T *hint_femp, CHAIN_T *p_dir, s32 num_entries)
+{
+ s32 i, dentry, num_empty = 0;
+ s32 dentries_per_clu;
+ u32 type;
+ CHAIN_T clu;
+ DENTRY_T *ep;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (IS_CLUS_FREE(p_dir->dir)) /* FAT16 root_dir */
+ dentries_per_clu = fsi->dentries_in_root;
+ else
+ dentries_per_clu = fsi->dentries_per_clu;
+
+ ASSERT(-1 <= hint_femp->eidx);
+
+ if (hint_femp->eidx != -1) {
+ clu.dir = hint_femp->cur.dir;
+ clu.size = hint_femp->cur.size;
+ clu.flags = hint_femp->cur.flags;
+
+ dentry = hint_femp->eidx;
+
+ if (num_entries <= hint_femp->count) {
+ MMSG("%s: empty slot(HIT) - found "
+ "(clu : 0x%08x eidx : %d)\n",
+ __func__, hint_femp->cur.dir, hint_femp->eidx);
+ hint_femp->eidx = -1;
+
+ if (fsi->vol_type == EXFAT)
+ return dentry;
+
+ return dentry + (num_entries - 1);
+ }
+ MMSG("%s: empty slot(HIT) - search from "
+ "(clu : 0x%08x eidx : %d)\n",
+ __func__, hint_femp->cur.dir, hint_femp->eidx);
+ } else {
+ MMSG("%s: empty slot(MISS) - search from "
+ "(clu:0x%08x eidx : 0)\n",
+ __func__, p_dir->dir);
+
+ clu.dir = p_dir->dir;
+ clu.size = p_dir->size;
+ clu.flags = p_dir->flags;
+
+ dentry = 0;
+ }
+
+ while (!IS_CLUS_EOF(clu.dir)) {
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(p_dir->dir))
+ i = dentry % dentries_per_clu;
+ else
+ i = dentry & (dentries_per_clu-1);
+
+ for ( ; i < dentries_per_clu; i++, dentry++) {
+ ep = get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ type = fsi->fs_func->get_entry_type(ep);
+
+ if ((type == TYPE_UNUSED) || (type == TYPE_DELETED)) {
+ num_empty++;
+ if (hint_femp->eidx == -1) {
+ hint_femp->eidx = dentry;
+ hint_femp->count = CNT_UNUSED_NOHIT;
+
+ hint_femp->cur.dir = clu.dir;
+ hint_femp->cur.size = clu.size;
+ hint_femp->cur.flags = clu.flags;
+ }
+
+ if ((type == TYPE_UNUSED) &&
+ (hint_femp->count != CNT_UNUSED_HIT)) {
+ hint_femp->count = CNT_UNUSED_HIT;
+ }
+ } else {
+ if ((hint_femp->eidx != -1) &&
+ (hint_femp->count == CNT_UNUSED_HIT)) {
+ /* unused empty group means
+ * an empty group which includes
+ * unused dentry
+ */
+ sdfat_fs_error(sb,
+ "found bogus dentry(%d) "
+ "beyond unused empty group(%d) "
+ "(start_clu : %u, cur_clu : %u)",
+ dentry, hint_femp->eidx, p_dir->dir,
+ clu.dir);
+ return -EIO;
+ }
+
+ num_empty = 0;
+ hint_femp->eidx = -1;
+ }
+
+ if (num_empty >= num_entries) {
+ /* found and invalidate hint_femp */
+ hint_femp->eidx = -1;
+
+ if (fsi->vol_type == EXFAT)
+ return (dentry - (num_entries-1));
+
+ return dentry;
+ }
+ }
+
+ if (IS_CLUS_FREE(p_dir->dir))
+ break; /* FAT16 root_dir */
+
+ if (clu.flags == 0x03) {
+ if ((--clu.size) > 0)
+ clu.dir++;
+ else
+ clu.dir = CLUS_EOF;
+ } else {
+ if (get_next_clus_safe(sb, &(clu.dir)))
+ return -EIO;
+ }
+ }
+
+ return -ENOSPC;
+} /* end of search_empty_slot */
+
+/* find empty directory entry.
+ * if there isn't any empty slot, expand cluster chain.
+ */
+static s32 find_empty_entry(struct inode *inode, CHAIN_T *p_dir, s32 num_entries)
+{
+ s32 dentry;
+ u32 ret, last_clu;
+ u64 sector;
+ u64 size = 0;
+ CHAIN_T clu;
+ DENTRY_T *ep = NULL;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ HINT_FEMP_T hint_femp;
+
+ hint_femp.eidx = -1;
+
+ ASSERT(-1 <= fid->hint_femp.eidx);
+
+ if (fid->hint_femp.eidx != -1) {
+ memcpy(&hint_femp, &fid->hint_femp, sizeof(HINT_FEMP_T));
+ fid->hint_femp.eidx = -1;
+ }
+
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(p_dir->dir))
+ return search_empty_slot(sb, &hint_femp, p_dir, num_entries);
+
+ while ((dentry = search_empty_slot(sb, &hint_femp, p_dir, num_entries)) < 0) {
+ if (dentry == -EIO)
+ break;
+
+ if (fsi->fs_func->check_max_dentries(fid))
+ return -ENOSPC;
+
+ /* we trust p_dir->size regardless of FAT type */
+ if (__find_last_cluster(sb, p_dir, &last_clu))
+ return -EIO;
+
+ /*
+ * Allocate new cluster to this directory
+ */
+ clu.dir = last_clu + 1;
+ clu.size = 0; /* UNUSED */
+ clu.flags = p_dir->flags;
+
+ /* (0) check if there are reserved clusters
+ * (create_dir 의 주석 참고)
+ */
+ if (!IS_CLUS_EOF(fsi->used_clusters) &&
+ ((fsi->used_clusters + fsi->reserved_clusters) >= (fsi->num_clusters - 2)))
+ return -ENOSPC;
+
+ /* (1) allocate a cluster */
+ ret = fsi->fs_func->alloc_cluster(sb, 1, &clu, ALLOC_HOT);
+ if (ret)
+ return ret;
+
+ if (__clear_cluster(inode, clu.dir))
+ return -EIO;
+
+ /* (2) append to the FAT chain */
+ if (clu.flags != p_dir->flags) {
+ /* no-fat-chain bit is disabled,
+ * so fat-chain should be synced with alloc-bmp
+ */
+ chain_cont_cluster(sb, p_dir->dir, p_dir->size);
+ p_dir->flags = 0x01;
+ hint_femp.cur.flags = 0x01;
+ }
+
+ if (clu.flags == 0x01)
+ if (fat_ent_set(sb, last_clu, clu.dir))
+ return -EIO;
+
+ if (hint_femp.eidx == -1) {
+ /* the special case that new dentry
+ * should be allocated from the start of new cluster
+ */
+ hint_femp.eidx = (s32)(p_dir->size <<
+ (fsi->cluster_size_bits - DENTRY_SIZE_BITS));
+ hint_femp.count = fsi->dentries_per_clu;
+
+ hint_femp.cur.dir = clu.dir;
+ hint_femp.cur.size = 0;
+ hint_femp.cur.flags = clu.flags;
+ }
+ hint_femp.cur.size++;
+ p_dir->size++;
+ size = (p_dir->size << fsi->cluster_size_bits);
+
+ /* (3) update the directory entry */
+ if ((fsi->vol_type == EXFAT) && (p_dir->dir != fsi->root_dir)) {
+ ep = get_dentry_in_dir(sb,
+ &(fid->dir), fid->entry+1, &sector);
+ if (!ep)
+ return -EIO;
+ fsi->fs_func->set_entry_size(ep, size);
+ fsi->fs_func->set_entry_flag(ep, p_dir->flags);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+
+ if (update_dir_chksum(sb, &(fid->dir), fid->entry))
+ return -EIO;
+ }
+
+ /* directory inode should be updated in here */
+ i_size_write(inode, (loff_t)size);
+ SDFAT_I(inode)->i_size_ondisk += fsi->cluster_size;
+ SDFAT_I(inode)->i_size_aligned += fsi->cluster_size;
+ SDFAT_I(inode)->fid.size = size;
+ SDFAT_I(inode)->fid.flags = p_dir->flags;
+ inode->i_blocks += 1 << (fsi->cluster_size_bits - sb->s_blocksize_bits);
+ }
+
+ return dentry;
+} /* end of find_empty_entry */
+
+#define SDFAT_MIN_SUBDIR (2)
+static const char *dot_name[SDFAT_MIN_SUBDIR] = { DOS_CUR_DIR_NAME, DOS_PAR_DIR_NAME };
+
+static s32 __count_dos_name_entries(struct super_block *sb, CHAIN_T *p_dir, u32 type, u32 *dotcnt)
+{
+ s32 i, count = 0, check_dot = 0;
+ s32 dentries_per_clu;
+ u32 entry_type;
+ CHAIN_T clu;
+ DENTRY_T *ep;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (IS_CLUS_FREE(p_dir->dir)) /* FAT16 root_dir */
+ dentries_per_clu = fsi->dentries_in_root;
+ else
+ dentries_per_clu = fsi->dentries_per_clu;
+
+ clu.dir = p_dir->dir;
+ clu.size = p_dir->size;
+ clu.flags = p_dir->flags;
+
+ if (dotcnt) {
+ *dotcnt = 0;
+ if (fsi->vol_type != EXFAT)
+ check_dot = 1;
+ }
+
+ while (!IS_CLUS_EOF(clu.dir)) {
+ for (i = 0; i < dentries_per_clu; i++) {
+ ep = get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ entry_type = fsi->fs_func->get_entry_type(ep);
+
+ if (entry_type == TYPE_UNUSED)
+ return count;
+ if (!(type & TYPE_CRITICAL_PRI) && !(type & TYPE_BENIGN_PRI))
+ continue;
+
+ if ((type != TYPE_ALL) && (type != entry_type))
+ continue;
+
+ count++;
+ if (check_dot && (i < SDFAT_MIN_SUBDIR)) {
+ BUG_ON(fsi->vol_type == EXFAT);
+ /* 11 is DOS_NAME_LENGTH */
+ if (!strncmp(ep->dummy, dot_name[i], 11))
+ (*dotcnt)++;
+ }
+ }
+
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(p_dir->dir))
+ break;
+
+ if (clu.flags == 0x03) {
+ if ((--clu.size) > 0)
+ clu.dir++;
+ else
+ clu.dir = CLUS_EOF;
+ } else {
+ if (get_next_clus_safe(sb, &(clu.dir)))
+ return -EIO;
+ }
+
+ check_dot = 0;
+ }
+
+ return count;
+}
+
+s32 check_dir_empty(struct super_block *sb, CHAIN_T *p_dir)
+{
+ s32 i, count = 0;
+ s32 dentries_per_clu;
+ u32 type;
+ CHAIN_T clu;
+ DENTRY_T *ep;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (IS_CLUS_FREE(p_dir->dir)) /* FAT16 root_dir */
+ dentries_per_clu = fsi->dentries_in_root;
+ else
+ dentries_per_clu = fsi->dentries_per_clu;
+
+ clu.dir = p_dir->dir;
+ clu.size = p_dir->size;
+ clu.flags = p_dir->flags;
+
+ while (!IS_CLUS_EOF(clu.dir)) {
+ for (i = 0; i < dentries_per_clu; i++) {
+ ep = get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ type = fsi->fs_func->get_entry_type(ep);
+
+ if (type == TYPE_UNUSED)
+ return 0;
+
+ if ((type != TYPE_FILE) && (type != TYPE_DIR))
+ continue;
+
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(p_dir->dir))
+ return -ENOTEMPTY;
+
+ if (fsi->vol_type == EXFAT)
+ return -ENOTEMPTY;
+
+ if ((p_dir->dir == fsi->root_dir) || (++count > 2))
+ return -ENOTEMPTY;
+ }
+
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(p_dir->dir))
+ return -ENOTEMPTY;
+
+ if (clu.flags == 0x03) {
+ if ((--clu.size) > 0)
+ clu.dir++;
+ else
+ clu.dir = CLUS_EOF;
+ } else {
+ if (get_next_clus_safe(sb, &(clu.dir)))
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Name Conversion Functions
+ */
+#ifdef CONFIG_SDFAT_ALLOW_LOOKUP_LOSSY_SFN
+ /* over name length only */
+#define NEED_INVALIDATE_SFN(x) ((x) & NLS_NAME_OVERLEN)
+#else
+ /* all lossy case */
+#define NEED_INVALIDATE_SFN(x) (x)
+#endif
+
+/* NOTE :
+ * We should keep shortname code compatible with v1.0.15 or lower
+ * So, we try to check ext-only-name at create-mode only.
+ *
+ * i.e. '.mtp' ->
+ * v1.0.15 : ' MTP' with name_case, 0x10
+ * v1.1.0 : 'MT????~?' with name_case, 0x00 and longname.
+ */
+static inline void preprocess_ext_only_sfn(s32 lookup, u16 first_char, DOS_NAME_T *p_dosname, s32 *lossy)
+{
+#ifdef CONFIG_SDFAT_RESTRICT_EXT_ONLY_SFN
+ int i;
+ /* check ext-only-name at create-mode */
+ if (*lossy || lookup || (first_char != (u16)'.'))
+ return;
+
+ p_dosname->name_case = 0xFF;
+
+ /* move ext-name to base-name */
+ for (i = 0; i < 3; i++) {
+ p_dosname->name[i] = p_dosname->name[8+i];
+ if (p_dosname->name[i] == ' ')
+ p_dosname->name[i] = '_';
+ }
+
+ /* fill remained space with '_' */
+ for (i = 3; i < 8; i++)
+ p_dosname->name[i] = '_';
+
+ /* eliminate ext-name */
+ for (i = 8; i < 11; i++)
+ p_dosname->name[i] = ' ';
+
+ *lossy = NLS_NAME_LOSSY;
+#endif /* CONFIG_SDFAT_CAN_CREATE_EXT_ONLY_SFN */
+}
+
+/* input : dir, uni_name
+ * output : num_of_entry, dos_name(format : aaaaaa~1.bbb)
+ */
+static s32 get_num_entries_and_dos_name(struct super_block *sb, CHAIN_T *p_dir,
+ UNI_NAME_T *p_uniname, s32 *entries,
+ DOS_NAME_T *p_dosname, s32 lookup)
+{
+ s32 ret, num_entries, lossy = NLS_NAME_NO_LOSSY;
+ s8 **r;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /* Init null char. */
+ p_dosname->name[0] = '\0';
+
+ num_entries = fsi->fs_func->calc_num_entries(p_uniname);
+ if (num_entries == 0)
+ return -EINVAL;
+
+ if (fsi->vol_type == EXFAT)
+ goto out;
+
+ nls_uni16s_to_sfn(sb, p_uniname, p_dosname, &lossy);
+
+ preprocess_ext_only_sfn(lookup, p_uniname->name[0], p_dosname, &lossy);
+
+ if (!lossy) {
+ for (r = reserved_names; *r; r++) {
+ if (!strncmp((void *) p_dosname->name, *r, 8))
+ return -EINVAL;
+ }
+
+ if (p_dosname->name_case != 0xFF)
+ num_entries = 1;
+ } else if (!lookup) {
+ /* create new dos name */
+ ret = fat_generate_dos_name_new(sb, p_dir, p_dosname,
+ num_entries);
+ if (ret)
+ return ret;
+
+ } else if (NEED_INVALIDATE_SFN(lossy)) {
+ /* FIXME : We should check num_entries */
+ p_dosname->name[0] = '\0';
+ }
+
+ if (num_entries > 1)
+ p_dosname->name_case = 0x0;
+out:
+ *entries = num_entries;
+ return 0;
+} /* end of get_num_entries_and_dos_name */
+
+void get_uniname_from_dos_entry(struct super_block *sb, DOS_DENTRY_T *ep, UNI_NAME_T *p_uniname, u8 mode)
+{
+ DOS_NAME_T dos_name;
+
+ if (mode == 0x0)
+ dos_name.name_case = 0x0;
+ else
+ dos_name.name_case = ep->lcase;
+
+ memcpy(dos_name.name, ep->name, DOS_NAME_LENGTH);
+ nls_sfn_to_uni16s(sb, &dos_name, p_uniname);
+} /* end of get_uniname_from_dos_entry */
+
+/* returns the length of a struct qstr, ignoring trailing dots */
+static inline unsigned int __striptail_len(unsigned int len, const char *name)
+{
+ while (len && name[len - 1] == '.')
+ len--;
+ return len;
+}
+
+/*
+ * Name Resolution Functions :
+ * Zero if it was successful; otherwise nonzero.
+ */
+static s32 __resolve_path(struct inode *inode, const u8 *path, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, int lookup)
+{
+ s32 namelen;
+ s32 lossy = NLS_NAME_NO_LOSSY;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+
+ /* DOT and DOTDOT are handled by VFS layer */
+
+ /* strip all trailing spaces */
+ /* DO NOTHING : Is needed? */
+
+ /* strip all trailing periods */
+ namelen = __striptail_len(strlen(path), path);
+ if (!namelen)
+ return -ENOENT;
+
+ /* the limitation of linux? */
+ if (strlen(path) > (MAX_NAME_LENGTH * MAX_CHARSET_SIZE))
+ return -ENAMETOOLONG;
+
+ /*
+ * strip all leading spaces :
+ * "MS windows 7" supports leading spaces.
+ * So we should skip this preprocessing for compatibility.
+ */
+
+ /* file name conversion :
+ * If lookup case, we allow bad-name for compatibility.
+ */
+ namelen = nls_vfsname_to_uni16s(sb, path, namelen, p_uniname, &lossy);
+ if (namelen < 0)
+ return namelen; /* return error value */
+
+ if ((lossy && !lookup) || !namelen)
+ return -EINVAL;
+
+ sdfat_debug_bug_on(fid->size != i_size_read(inode));
+// fid->size = i_size_read(inode);
+
+ p_dir->dir = fid->start_clu;
+ p_dir->size = (u32)(fid->size >> fsi->cluster_size_bits);
+ p_dir->flags = fid->flags;
+
+ return 0;
+}
+
+static inline s32 resolve_path(struct inode *inode, const u8 *path, CHAIN_T *dir, UNI_NAME_T *uni)
+{
+ return __resolve_path(inode, path, dir, uni, 0);
+}
+
+static inline s32 resolve_path_for_lookup(struct inode *inode, const u8 *path, CHAIN_T *dir, UNI_NAME_T *uni)
+{
+ return __resolve_path(inode, path, dir, uni, 1);
+}
+
+static s32 create_dir(struct inode *inode, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, FILE_ID_T *fid)
+{
+ s32 dentry, num_entries;
+ u64 ret;
+ u64 size;
+ CHAIN_T clu;
+ DOS_NAME_T dos_name, dot_name;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ ret = get_num_entries_and_dos_name(sb, p_dir, p_uniname, &num_entries, &dos_name, 0);
+ if (ret)
+ return ret;
+
+ /* find_empty_entry must be called before alloc_cluster */
+ dentry = find_empty_entry(inode, p_dir, num_entries);
+ if (dentry < 0)
+ return dentry; /* -EIO or -ENOSPC */
+
+ clu.dir = CLUS_EOF;
+ clu.size = 0;
+ clu.flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01;
+
+ /* (0) Check if there are reserved clusters up to max. */
+ if ((fsi->used_clusters + fsi->reserved_clusters) >= (fsi->num_clusters - CLUS_BASE))
+ return -ENOSPC;
+
+ /* (1) allocate a cluster */
+ ret = fsi->fs_func->alloc_cluster(sb, 1, &clu, ALLOC_HOT);
+ if (ret)
+ return ret;
+
+ ret = __clear_cluster(inode, clu.dir);
+ if (ret)
+ return ret;
+
+ size = fsi->cluster_size;
+ if (fsi->vol_type != EXFAT) {
+ /* initialize the . and .. entry
+ * Information for . points to itself
+ * Information for .. points to parent dir
+ */
+
+ dot_name.name_case = 0x0;
+ memcpy(dot_name.name, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH);
+
+ ret = fsi->fs_func->init_dir_entry(sb, &clu, 0, TYPE_DIR, clu.dir, 0);
+ if (ret)
+ return ret;
+
+ ret = fsi->fs_func->init_ext_entry(sb, &clu, 0, 1, NULL, &dot_name);
+ if (ret)
+ return ret;
+
+ memcpy(dot_name.name, DOS_PAR_DIR_NAME, DOS_NAME_LENGTH);
+
+ if (p_dir->dir == fsi->root_dir)
+ ret = fsi->fs_func->init_dir_entry(sb, &clu, 1, TYPE_DIR, CLUS_FREE, 0);
+ else
+ ret = fsi->fs_func->init_dir_entry(sb, &clu, 1, TYPE_DIR, p_dir->dir, 0);
+
+ if (ret)
+ return ret;
+
+ ret = fsi->fs_func->init_ext_entry(sb, &clu, 1, 1, NULL, &dot_name);
+ if (ret)
+ return ret;
+ }
+
+ /* (2) update the directory entry */
+ /* make sub-dir entry in parent directory */
+ ret = fsi->fs_func->init_dir_entry(sb, p_dir, dentry, TYPE_DIR, clu.dir, size);
+ if (ret)
+ return ret;
+
+ ret = fsi->fs_func->init_ext_entry(sb, p_dir, dentry, num_entries, p_uniname, &dos_name);
+ if (ret)
+ return ret;
+
+ fid->dir.dir = p_dir->dir;
+ fid->dir.size = p_dir->size;
+ fid->dir.flags = p_dir->flags;
+ fid->entry = dentry;
+
+ fid->attr = ATTR_SUBDIR;
+ fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01;
+ fid->size = size;
+ fid->start_clu = clu.dir;
+
+ fid->type = TYPE_DIR;
+ fid->rwoffset = 0;
+ fid->hint_bmap.off = CLUS_EOF;
+
+ /* hint_stat will be used if this is directory. */
+ fid->version = 0;
+ fid->hint_stat.eidx = 0;
+ fid->hint_stat.clu = fid->start_clu;
+ fid->hint_femp.eidx = -1;
+
+ return 0;
+} /* end of create_dir */
+
+static s32 create_file(struct inode *inode, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, u8 mode, FILE_ID_T *fid)
+{
+ s32 ret, dentry, num_entries;
+ DOS_NAME_T dos_name;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ ret = get_num_entries_and_dos_name(sb, p_dir, p_uniname, &num_entries, &dos_name, 0);
+ if (ret)
+ return ret;
+
+ /* find_empty_entry must be called before alloc_cluster() */
+ dentry = find_empty_entry(inode, p_dir, num_entries);
+ if (dentry < 0)
+ return dentry; /* -EIO or -ENOSPC */
+
+ /* (1) update the directory entry */
+ /* fill the dos name directory entry information of the created file.
+ * the first cluster is not determined yet. (0)
+ */
+ ret = fsi->fs_func->init_dir_entry(sb, p_dir, dentry, TYPE_FILE | mode, CLUS_FREE, 0);
+ if (ret)
+ return ret;
+
+ ret = fsi->fs_func->init_ext_entry(sb, p_dir, dentry, num_entries, p_uniname, &dos_name);
+ if (ret)
+ return ret;
+
+ fid->dir.dir = p_dir->dir;
+ fid->dir.size = p_dir->size;
+ fid->dir.flags = p_dir->flags;
+ fid->entry = dentry;
+
+ fid->attr = ATTR_ARCHIVE | mode;
+ fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01;
+ fid->size = 0;
+ fid->start_clu = CLUS_EOF;
+
+ fid->type = TYPE_FILE;
+ fid->rwoffset = 0;
+ fid->hint_bmap.off = CLUS_EOF;
+
+ /* hint_stat will be used if this is directory. */
+ fid->version = 0;
+ fid->hint_stat.eidx = 0;
+ fid->hint_stat.clu = fid->start_clu;
+ fid->hint_femp.eidx = -1;
+
+ return 0;
+} /* end of create_file */
+
+static s32 remove_file(struct inode *inode, CHAIN_T *p_dir, s32 entry)
+{
+ s32 num_entries;
+ u64 sector;
+ DENTRY_T *ep;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ ep = get_dentry_in_dir(sb, p_dir, entry, &sector);
+ if (!ep)
+ return -EIO;
+
+ dcache_lock(sb, sector);
+
+ /* dcache_lock() before call count_ext_entries() */
+ num_entries = fsi->fs_func->count_ext_entries(sb, p_dir, entry, ep);
+ if (num_entries < 0) {
+ dcache_unlock(sb, sector);
+ return -EIO;
+ }
+ num_entries++;
+
+ dcache_unlock(sb, sector);
+
+ /* (1) update the directory entry */
+ return fsi->fs_func->delete_dir_entry(sb, p_dir, entry, 0, num_entries);
+} /* end of remove_file */
+
+static s32 rename_file(struct inode *inode, CHAIN_T *p_dir, s32 oldentry, UNI_NAME_T *p_uniname, FILE_ID_T *fid)
+{
+ s32 ret, newentry = -1, num_old_entries, num_new_entries;
+ u64 sector_old, sector_new;
+ DOS_NAME_T dos_name;
+ DENTRY_T *epold, *epnew;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ epold = get_dentry_in_dir(sb, p_dir, oldentry, &sector_old);
+ if (!epold)
+ return -EIO;
+
+ dcache_lock(sb, sector_old);
+
+ /* dcache_lock() before call count_ext_entries() */
+ num_old_entries = fsi->fs_func->count_ext_entries(sb, p_dir, oldentry, epold);
+ if (num_old_entries < 0) {
+ dcache_unlock(sb, sector_old);
+ return -EIO;
+ }
+ num_old_entries++;
+
+ ret = get_num_entries_and_dos_name(sb, p_dir, p_uniname, &num_new_entries, &dos_name, 0);
+ if (ret) {
+ dcache_unlock(sb, sector_old);
+ return ret;
+ }
+
+ if (num_old_entries < num_new_entries) {
+ newentry = find_empty_entry(inode, p_dir, num_new_entries);
+ if (newentry < 0) {
+ dcache_unlock(sb, sector_old);
+ return newentry; /* -EIO or -ENOSPC */
+ }
+
+ epnew = get_dentry_in_dir(sb, p_dir, newentry, &sector_new);
+ if (!epnew) {
+ dcache_unlock(sb, sector_old);
+ return -EIO;
+ }
+
+ memcpy((void *) epnew, (void *) epold, DENTRY_SIZE);
+ if (fsi->fs_func->get_entry_type(epnew) == TYPE_FILE) {
+ fsi->fs_func->set_entry_attr(epnew, fsi->fs_func->get_entry_attr(epnew) | ATTR_ARCHIVE);
+ fid->attr |= ATTR_ARCHIVE;
+ }
+ dcache_modify(sb, sector_new);
+ dcache_unlock(sb, sector_old);
+
+ if (fsi->vol_type == EXFAT) {
+ epold = get_dentry_in_dir(sb, p_dir, oldentry+1, &sector_old);
+ dcache_lock(sb, sector_old);
+ epnew = get_dentry_in_dir(sb, p_dir, newentry+1, &sector_new);
+
+ if (!epold || !epnew) {
+ dcache_unlock(sb, sector_old);
+ return -EIO;
+ }
+
+ memcpy((void *) epnew, (void *) epold, DENTRY_SIZE);
+ dcache_modify(sb, sector_new);
+ dcache_unlock(sb, sector_old);
+ }
+
+ ret = fsi->fs_func->init_ext_entry(sb, p_dir, newentry, num_new_entries, p_uniname, &dos_name);
+ if (ret)
+ return ret;
+
+ fsi->fs_func->delete_dir_entry(sb, p_dir, oldentry, 0, num_old_entries);
+ fid->entry = newentry;
+ } else {
+ if (fsi->fs_func->get_entry_type(epold) == TYPE_FILE) {
+ fsi->fs_func->set_entry_attr(epold, fsi->fs_func->get_entry_attr(epold) | ATTR_ARCHIVE);
+ fid->attr |= ATTR_ARCHIVE;
+ }
+ dcache_modify(sb, sector_old);
+ dcache_unlock(sb, sector_old);
+
+ ret = fsi->fs_func->init_ext_entry(sb, p_dir, oldentry, num_new_entries, p_uniname, &dos_name);
+ if (ret)
+ return ret;
+
+ fsi->fs_func->delete_dir_entry(sb, p_dir, oldentry, num_new_entries, num_old_entries);
+ }
+
+ return 0;
+} /* end of rename_file */
+
+static s32 move_file(struct inode *inode, CHAIN_T *p_olddir, s32 oldentry,
+ CHAIN_T *p_newdir, UNI_NAME_T *p_uniname, FILE_ID_T *fid)
+{
+ s32 ret, newentry, num_new_entries, num_old_entries;
+ u64 sector_mov, sector_new;
+ CHAIN_T clu;
+ DOS_NAME_T dos_name;
+ DENTRY_T *epmov, *epnew;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ epmov = get_dentry_in_dir(sb, p_olddir, oldentry, &sector_mov);
+ if (!epmov)
+ return -EIO;
+
+ /* check if the source and target directory is the same */
+ if (fsi->fs_func->get_entry_type(epmov) == TYPE_DIR &&
+ fsi->fs_func->get_entry_clu0(epmov) == p_newdir->dir)
+ return -EINVAL;
+
+ dcache_lock(sb, sector_mov);
+
+ /* dcache_lock() before call count_ext_entries() */
+ num_old_entries = fsi->fs_func->count_ext_entries(sb, p_olddir, oldentry, epmov);
+ if (num_old_entries < 0) {
+ dcache_unlock(sb, sector_mov);
+ return -EIO;
+ }
+ num_old_entries++;
+
+ ret = get_num_entries_and_dos_name(sb, p_newdir, p_uniname, &num_new_entries, &dos_name, 0);
+ if (ret) {
+ dcache_unlock(sb, sector_mov);
+ return ret;
+ }
+
+ newentry = find_empty_entry(inode, p_newdir, num_new_entries);
+ if (newentry < 0) {
+ dcache_unlock(sb, sector_mov);
+ return newentry; /* -EIO or -ENOSPC */
+ }
+
+ epnew = get_dentry_in_dir(sb, p_newdir, newentry, &sector_new);
+ if (!epnew) {
+ dcache_unlock(sb, sector_mov);
+ return -EIO;
+ }
+
+ memcpy((void *) epnew, (void *) epmov, DENTRY_SIZE);
+ if (fsi->fs_func->get_entry_type(epnew) == TYPE_FILE) {
+ fsi->fs_func->set_entry_attr(epnew, fsi->fs_func->get_entry_attr(epnew) | ATTR_ARCHIVE);
+ fid->attr |= ATTR_ARCHIVE;
+ }
+ dcache_modify(sb, sector_new);
+ dcache_unlock(sb, sector_mov);
+
+ if (fsi->vol_type == EXFAT) {
+ epmov = get_dentry_in_dir(sb, p_olddir, oldentry+1, &sector_mov);
+ dcache_lock(sb, sector_mov);
+ epnew = get_dentry_in_dir(sb, p_newdir, newentry+1, &sector_new);
+ if (!epmov || !epnew) {
+ dcache_unlock(sb, sector_mov);
+ return -EIO;
+ }
+
+ memcpy((void *) epnew, (void *) epmov, DENTRY_SIZE);
+ dcache_modify(sb, sector_new);
+ dcache_unlock(sb, sector_mov);
+ } else if (fsi->fs_func->get_entry_type(epnew) == TYPE_DIR) {
+ /* change ".." pointer to new parent dir */
+ clu.dir = fsi->fs_func->get_entry_clu0(epnew);
+ clu.flags = 0x01;
+
+ epnew = get_dentry_in_dir(sb, &clu, 1, &sector_new);
+ if (!epnew)
+ return -EIO;
+
+ if (p_newdir->dir == fsi->root_dir)
+ fsi->fs_func->set_entry_clu0(epnew, CLUS_FREE);
+ else
+ fsi->fs_func->set_entry_clu0(epnew, p_newdir->dir);
+ dcache_modify(sb, sector_new);
+ }
+
+ ret = fsi->fs_func->init_ext_entry(sb, p_newdir, newentry, num_new_entries, p_uniname, &dos_name);
+ if (ret)
+ return ret;
+
+ fsi->fs_func->delete_dir_entry(sb, p_olddir, oldentry, 0, num_old_entries);
+
+ fid->dir.dir = p_newdir->dir;
+ fid->dir.size = p_newdir->size;
+ fid->dir.flags = p_newdir->flags;
+
+ fid->entry = newentry;
+
+ return 0;
+} /* end of move_file */
+
+
+/*======================================================================*/
+/* Global Function Definitions */
+/*======================================================================*/
+/* roll back to the initial state of the file system */
+s32 fscore_init(void)
+{
+ s32 ret;
+
+ ret = check_type_size();
+ if (ret)
+ return ret;
+
+ return extent_cache_init();
+}
+
+/* make free all memory-alloced global buffers */
+s32 fscore_shutdown(void)
+{
+ extent_cache_shutdown();
+ return 0;
+}
+
+/* check device is ejected */
+s32 fscore_check_bdi_valid(struct super_block *sb)
+{
+ return bdev_check_bdi_valid(sb);
+}
+
+static bool is_exfat(pbr_t *pbr)
+{
+ int i = 53;
+
+ do {
+ if (pbr->bpb.f64.res_zero[i-1])
+ break;
+ } while (--i);
+ return i ? false : true;
+}
+
+static bool is_fat32(pbr_t *pbr)
+{
+ if (le16_to_cpu(pbr->bpb.f16.num_fat_sectors))
+ return false;
+ return true;
+}
+
+inline pbr_t *read_pbr_with_logical_sector(struct super_block *sb, struct buffer_head **prev_bh)
+{
+ pbr_t *p_pbr = (pbr_t *) (*prev_bh)->b_data;
+ u16 logical_sect = 0;
+
+ if (is_exfat(p_pbr))
+ logical_sect = 1 << p_pbr->bsx.f64.sect_size_bits;
+ else
+ logical_sect = get_unaligned_le16(&p_pbr->bpb.f16.sect_size);
+
+ /* is x a power of 2?
+ * (x) != 0 && (((x) & ((x) - 1)) == 0)
+ */
+ if (!is_power_of_2(logical_sect)
+ || (logical_sect < 512)
+ || (logical_sect > 4096)) {
+ sdfat_log_msg(sb, KERN_ERR, "bogus logical sector size %u",
+ logical_sect);
+ return NULL;
+ }
+
+ if (logical_sect < sb->s_blocksize) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "logical sector size too small for device"
+ " (logical sector size = %u)", logical_sect);
+ return NULL;
+ }
+
+ if (logical_sect > sb->s_blocksize) {
+ struct buffer_head *bh = NULL;
+
+ __brelse(*prev_bh);
+ *prev_bh = NULL;
+
+ if (!sb_set_blocksize(sb, logical_sect)) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "unable to set blocksize %u", logical_sect);
+ return NULL;
+ }
+ bh = sb_bread(sb, 0);
+ if (!bh) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "unable to read boot sector "
+ "(logical sector size = %lu)", sb->s_blocksize);
+ return NULL;
+ }
+
+ *prev_bh = bh;
+ p_pbr = (pbr_t *) bh->b_data;
+ }
+
+ sdfat_log_msg(sb, KERN_INFO,
+ "set logical sector size : %lu", sb->s_blocksize);
+
+ return p_pbr;
+}
+
+/* mount the file system volume */
+s32 fscore_mount(struct super_block *sb)
+{
+ s32 ret;
+ pbr_t *p_pbr;
+ struct buffer_head *tmp_bh = NULL;
+ struct gendisk *disk = sb->s_bdev->bd_disk;
+ struct hd_struct *part = sb->s_bdev->bd_part;
+ struct sdfat_mount_options *opts = &(SDFAT_SB(sb)->options);
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /* initialize previous I/O error */
+ fsi->prev_eio = 0;
+
+ /* open the block device */
+ if (bdev_open_dev(sb))
+ return -EIO;
+
+ /* set block size to read super block */
+ sb_min_blocksize(sb, 512);
+
+ /* read boot sector */
+ ret = read_sect(sb, 0, &tmp_bh, 1);
+ if (ret) {
+ sdfat_log_msg(sb, KERN_ERR, "unable to read boot sector");
+ ret = -EIO;
+ goto bd_close;
+ }
+
+ /* PRB is read */
+ p_pbr = (pbr_t *) tmp_bh->b_data;
+
+ /* check the validity of PBR */
+ if (le16_to_cpu((p_pbr->signature)) != PBR_SIGNATURE) {
+ sdfat_log_msg(sb, KERN_ERR, "invalid boot record signature");
+ brelse(tmp_bh);
+ ret = -EINVAL;
+ goto bd_close;
+ }
+
+ /* check logical sector size */
+ p_pbr = read_pbr_with_logical_sector(sb, &tmp_bh);
+ if (!p_pbr) {
+ brelse(tmp_bh);
+ ret = -EIO;
+ goto bd_close;
+ }
+
+ /* fill fs_struct */
+ if (is_exfat(p_pbr)) {
+ if (opts->fs_type && opts->fs_type != FS_TYPE_EXFAT) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "not specified filesystem type "
+ "(media:exfat, opts:%s)",
+ FS_TYPE_STR[opts->fs_type]);
+ ret = -EINVAL;
+ goto free_bh;
+ }
+ /* set maximum file size for exFAT */
+ sb->s_maxbytes = 0x7fffffffffffffffLL;
+ opts->improved_allocation = 0;
+ opts->defrag = 0;
+ ret = mount_exfat(sb, p_pbr);
+ } else if (is_fat32(p_pbr)) {
+ if (opts->fs_type && opts->fs_type != FS_TYPE_VFAT) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "not specified filesystem type "
+ "(media:vfat, opts:%s)",
+ FS_TYPE_STR[opts->fs_type]);
+ ret = -EINVAL;
+ goto free_bh;
+ }
+ /* set maximum file size for FAT */
+ sb->s_maxbytes = 0xffffffff;
+ ret = mount_fat32(sb, p_pbr);
+ } else {
+ if (opts->fs_type && opts->fs_type != FS_TYPE_VFAT) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "not specified filesystem type "
+ "(media:vfat, opts:%s)",
+ FS_TYPE_STR[opts->fs_type]);
+ ret = -EINVAL;
+ goto free_bh;
+ }
+ /* set maximum file size for FAT */
+ sb->s_maxbytes = 0xffffffff;
+ opts->improved_allocation = 0;
+ opts->defrag = 0;
+ ret = mount_fat16(sb, p_pbr);
+ }
+free_bh:
+ brelse(tmp_bh);
+ if (ret) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to mount fs-core");
+ goto bd_close;
+ }
+
+ /* warn misaligned data data start sector must be a multiple of clu_size */
+ sdfat_log_msg(sb, KERN_INFO,
+ "detected volume info : %s "
+ "(bps : %lu, spc : %u, data start : %llu, %s)",
+ sdfat_get_vol_type_str(fsi->vol_type),
+ sb->s_blocksize, fsi->sect_per_clus, fsi->data_start_sector,
+ (fsi->data_start_sector & (fsi->sect_per_clus - 1)) ?
+ "misaligned" : "aligned");
+
+ sdfat_log_msg(sb, KERN_INFO,
+ "detected volume size : %llu KB (disk : %llu KB, "
+ "part : %llu KB)",
+ (fsi->num_sectors * (sb->s_blocksize >> SECTOR_SIZE_BITS)) >> 1,
+ disk ? (u64)((disk->part0.nr_sects) >> 1) : 0,
+ part ? (u64)((part->nr_sects) >> 1) : 0);
+
+ ret = load_upcase_table(sb);
+ if (ret) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to load upcase table");
+ goto bd_close;
+ }
+
+ if (fsi->vol_type != EXFAT)
+ goto update_used_clus;
+
+ /* allocate-bitmap is only for exFAT */
+ ret = load_alloc_bmp(sb);
+ if (ret) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to load alloc-bitmap");
+ goto free_upcase;
+ }
+
+update_used_clus:
+ if (fsi->used_clusters == (u32) ~0) {
+ ret = fsi->fs_func->count_used_clusters(sb, &fsi->used_clusters);
+ if (ret) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to scan clusters");
+ goto free_alloc_bmp;
+ }
+ }
+
+ return 0;
+free_alloc_bmp:
+ if (fsi->vol_type == EXFAT)
+ free_alloc_bmp(sb);
+free_upcase:
+ free_upcase_table(sb);
+bd_close:
+ bdev_close_dev(sb);
+ return ret;
+} /* end of fscore_mount */
+
+/* umount the file system volume */
+s32 fscore_umount(struct super_block *sb)
+{
+ s32 ret = 0;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (fs_sync(sb, 0))
+ ret = -EIO;
+
+ if (fs_set_vol_flags(sb, VOL_CLEAN))
+ ret = -EIO;
+
+ free_upcase_table(sb);
+
+ if (fsi->vol_type == EXFAT)
+ free_alloc_bmp(sb);
+
+ if (fcache_release_all(sb))
+ ret = -EIO;
+
+ if (dcache_release_all(sb))
+ ret = -EIO;
+
+ amap_destroy(sb);
+
+ if (fsi->prev_eio)
+ ret = -EIO;
+ /* close the block device */
+ bdev_close_dev(sb);
+ return ret;
+}
+
+/* get the information of a file system volume */
+s32 fscore_statfs(struct super_block *sb, VOL_INFO_T *info)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (fsi->used_clusters == (u32) ~0) {
+ if (fsi->fs_func->count_used_clusters(sb, &fsi->used_clusters))
+ return -EIO;
+ }
+
+ info->FatType = fsi->vol_type;
+ info->ClusterSize = fsi->cluster_size;
+ info->NumClusters = fsi->num_clusters - 2; /* clu 0 & 1 */
+ info->UsedClusters = fsi->used_clusters + fsi->reserved_clusters;
+ info->FreeClusters = info->NumClusters - info->UsedClusters;
+
+ return 0;
+}
+
+/* synchronize all file system volumes */
+s32 fscore_sync_fs(struct super_block *sb, s32 do_sync)
+{
+ /* synchronize the file system */
+ if (fs_sync(sb, do_sync))
+ return -EIO;
+
+ if (fs_set_vol_flags(sb, VOL_CLEAN))
+ return -EIO;
+
+ return 0;
+}
+
+/* stat allocation unit of a file system volume */
+u32 fscore_get_au_stat(struct super_block *sb, s32 mode)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (fsi->fs_func->get_au_stat)
+ return fsi->fs_func->get_au_stat(sb, mode);
+
+ /* No error, just returns 0 */
+ return 0;
+}
+
+
+/*----------------------------------------------------------------------*/
+/* File Operation Functions */
+/*----------------------------------------------------------------------*/
+/* lookup a file */
+s32 fscore_lookup(struct inode *inode, u8 *path, FILE_ID_T *fid)
+{
+ s32 ret, dentry, num_entries;
+ CHAIN_T dir;
+ UNI_NAME_T uni_name;
+ DOS_NAME_T dos_name;
+ DENTRY_T *ep, *ep2;
+ ENTRY_SET_CACHE_T *es = NULL;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *dir_fid = &(SDFAT_I(inode)->fid);
+
+ TMSG("%s entered\n", __func__);
+
+ /* check the validity of directory name in the given pathname */
+ ret = resolve_path_for_lookup(inode, path, &dir, &uni_name);
+ if (ret)
+ return ret;
+
+ ret = get_num_entries_and_dos_name(sb, &dir, &uni_name, &num_entries, &dos_name, 1);
+ if (ret)
+ return ret;
+
+ /* check the validation of hint_stat and initialize it if required */
+ if (dir_fid->version != (u32)inode_peek_iversion(inode)) {
+ dir_fid->hint_stat.clu = dir.dir;
+ dir_fid->hint_stat.eidx = 0;
+ dir_fid->version = (u32)inode_peek_iversion(inode);
+ dir_fid->hint_femp.eidx = -1;
+ }
+
+ /* search the file name for directories */
+ dentry = fsi->fs_func->find_dir_entry(sb, dir_fid, &dir, &uni_name,
+ num_entries, &dos_name, TYPE_ALL);
+
+ if ((dentry < 0) && (dentry != -EEXIST))
+ return dentry; /* -error value */
+
+ fid->dir.dir = dir.dir;
+ fid->dir.size = dir.size;
+ fid->dir.flags = dir.flags;
+ fid->entry = dentry;
+
+ /* root directory itself */
+ if (unlikely(dentry == -EEXIST)) {
+ fid->type = TYPE_DIR;
+ fid->rwoffset = 0;
+ fid->hint_bmap.off = CLUS_EOF;
+
+ fid->attr = ATTR_SUBDIR;
+ fid->flags = 0x01;
+ fid->size = 0;
+ fid->start_clu = fsi->root_dir;
+ } else {
+ if (fsi->vol_type == EXFAT) {
+ es = get_dentry_set_in_dir(sb, &dir, dentry, ES_2_ENTRIES, &ep);
+ if (!es)
+ return -EIO;
+ ep2 = ep+1;
+ } else {
+ ep = get_dentry_in_dir(sb, &dir, dentry, NULL);
+ if (!ep)
+ return -EIO;
+ ep2 = ep;
+ }
+
+ fid->type = fsi->fs_func->get_entry_type(ep);
+ fid->rwoffset = 0;
+ fid->hint_bmap.off = CLUS_EOF;
+ fid->attr = fsi->fs_func->get_entry_attr(ep);
+
+ fid->size = fsi->fs_func->get_entry_size(ep2);
+ if ((fid->type == TYPE_FILE) && (fid->size == 0)) {
+ fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01;
+ fid->start_clu = CLUS_EOF;
+ } else {
+ fid->flags = fsi->fs_func->get_entry_flag(ep2);
+ fid->start_clu = fsi->fs_func->get_entry_clu0(ep2);
+ }
+
+ if ((fid->type == TYPE_DIR) && (fsi->vol_type != EXFAT)) {
+ u32 num_clu = 0;
+ CHAIN_T tmp_dir;
+
+ tmp_dir.dir = fid->start_clu;
+ tmp_dir.flags = fid->flags;
+ tmp_dir.size = 0; /* UNUSED */
+
+ if (__count_num_clusters(sb, &tmp_dir, &num_clu))
+ return -EIO;
+ fid->size = (u64)num_clu << fsi->cluster_size_bits;
+ }
+
+ /* FOR GRACEFUL ERROR HANDLING */
+ if (IS_CLUS_FREE(fid->start_clu)) {
+ sdfat_fs_error(sb,
+ "non-zero size file starts with zero cluster "
+ "(size : %llu, p_dir : %u, entry : 0x%08x)",
+ fid->size, fid->dir.dir, fid->entry);
+ sdfat_debug_bug_on(1);
+ return -EIO;
+ }
+
+ if (fsi->vol_type == EXFAT)
+ release_dentry_set(es);
+ }
+
+ /* hint_stat will be used if this is directory. */
+ fid->version = 0;
+ fid->hint_stat.eidx = 0;
+ fid->hint_stat.clu = fid->start_clu;
+ fid->hint_femp.eidx = -1;
+
+ TMSG("%s exited successfully\n", __func__);
+ return 0;
+} /* end of fscore_lookup */
+
+/* create a file */
+s32 fscore_create(struct inode *inode, u8 *path, u8 mode, FILE_ID_T *fid)
+{
+ s32 ret/*, dentry*/;
+ CHAIN_T dir;
+ UNI_NAME_T uni_name;
+ struct super_block *sb = inode->i_sb;
+
+ /* check the validity of directory name in the given pathname */
+ ret = resolve_path(inode, path, &dir, &uni_name);
+ if (ret)
+ return ret;
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ /* create a new file */
+ ret = create_file(inode, &dir, &uni_name, mode, fid);
+
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+
+ return ret;
+}
+
+/* read data from a opened file */
+s32 fscore_read_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount)
+{
+ s32 ret = 0;
+ s32 offset, sec_offset;
+ u32 clu_offset;
+ u32 clu;
+ u64 logsector, oneblkread, read_bytes;
+ struct buffer_head *tmp_bh = NULL;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /* check if the given file ID is opened */
+ if (fid->type != TYPE_FILE)
+ return -EPERM;
+
+ if (fid->rwoffset > fid->size)
+ fid->rwoffset = fid->size;
+
+ if (count > (fid->size - fid->rwoffset))
+ count = fid->size - fid->rwoffset;
+
+ if (count == 0) {
+ if (rcount)
+ *rcount = 0;
+ return 0;
+ }
+
+ read_bytes = 0;
+
+ while (count > 0) {
+ clu_offset = fid->rwoffset >> fsi->cluster_size_bits;
+ clu = fid->start_clu;
+
+ if (fid->flags == 0x03) {
+ clu += clu_offset;
+ } else {
+ /* hint information */
+ if ((clu_offset > 0) &&
+ ((fid->hint_bmap.off != CLUS_EOF) && (fid->hint_bmap.off > 0)) &&
+ (clu_offset >= fid->hint_bmap.off)) {
+ clu_offset -= fid->hint_bmap.off;
+ clu = fid->hint_bmap.clu;
+ }
+
+ while (clu_offset > 0) {
+ ret = get_next_clus_safe(sb, &clu);
+ if (ret)
+ goto err_out;
+
+ clu_offset--;
+ }
+ }
+
+ /* hint information */
+ fid->hint_bmap.off = fid->rwoffset >> fsi->cluster_size_bits;
+ fid->hint_bmap.clu = clu;
+
+ offset = (s32)(fid->rwoffset & (fsi->cluster_size - 1)); /* byte offset in cluster */
+ sec_offset = offset >> sb->s_blocksize_bits; /* sector offset in cluster */
+ offset &= (sb->s_blocksize - 1); /* byte offset in sector */
+
+ logsector = CLUS_TO_SECT(fsi, clu) + sec_offset;
+
+ oneblkread = (u64)(sb->s_blocksize - offset);
+ if (oneblkread > count)
+ oneblkread = count;
+
+ if ((offset == 0) && (oneblkread == sb->s_blocksize)) {
+ ret = read_sect(sb, logsector, &tmp_bh, 1);
+ if (ret)
+ goto err_out;
+ memcpy(((s8 *) buffer)+read_bytes, ((s8 *) tmp_bh->b_data), (s32) oneblkread);
+ } else {
+ ret = read_sect(sb, logsector, &tmp_bh, 1);
+ if (ret)
+ goto err_out;
+ memcpy(((s8 *) buffer)+read_bytes, ((s8 *) tmp_bh->b_data)+offset, (s32) oneblkread);
+ }
+ count -= oneblkread;
+ read_bytes += oneblkread;
+ fid->rwoffset += oneblkread;
+ }
+
+err_out:
+ brelse(tmp_bh);
+
+ /* set the size of read bytes */
+ if (rcount != NULL)
+ *rcount = read_bytes;
+
+ return ret;
+} /* end of fscore_read_link */
+
+/* write data into a opened file */
+s32 fscore_write_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount)
+{
+ s32 ret = 0;
+ s32 modified = false, offset, sec_offset;
+ u32 clu_offset, num_clusters, num_alloc;
+ u32 clu, last_clu;
+ u64 logsector, sector, oneblkwrite, write_bytes;
+ CHAIN_T new_clu;
+ TIMESTAMP_T tm;
+ DENTRY_T *ep, *ep2;
+ ENTRY_SET_CACHE_T *es = NULL;
+ struct buffer_head *tmp_bh = NULL;
+ struct super_block *sb = inode->i_sb;
+ u32 blksize = (u32)sb->s_blocksize;
+ u32 blksize_mask = (u32)(sb->s_blocksize-1);
+ u8 blksize_bits = sb->s_blocksize_bits;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /* check if the given file ID is opened */
+ if (fid->type != TYPE_FILE)
+ return -EPERM;
+
+ if (fid->rwoffset > fid->size)
+ fid->rwoffset = fid->size;
+
+ if (count == 0) {
+ if (wcount)
+ *wcount = 0;
+ return 0;
+ }
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ if (fid->size == 0)
+ num_clusters = 0;
+ else
+ num_clusters = ((fid->size-1) >> fsi->cluster_size_bits) + 1;
+
+ write_bytes = 0;
+
+ while (count > 0) {
+ clu_offset = (fid->rwoffset >> fsi->cluster_size_bits);
+ clu = last_clu = fid->start_clu;
+
+ if (fid->flags == 0x03) {
+ if ((clu_offset > 0) && (!IS_CLUS_EOF(clu))) {
+ last_clu += clu_offset - 1;
+
+ if (clu_offset == num_clusters)
+ clu = CLUS_EOF;
+ else
+ clu += clu_offset;
+ }
+ } else {
+ /* hint information */
+ if ((clu_offset > 0) &&
+ ((fid->hint_bmap.off != CLUS_EOF) && (fid->hint_bmap.off > 0)) &&
+ (clu_offset >= fid->hint_bmap.off)) {
+ clu_offset -= fid->hint_bmap.off;
+ clu = fid->hint_bmap.clu;
+ }
+
+ while ((clu_offset > 0) && (!IS_CLUS_EOF(clu))) {
+ last_clu = clu;
+ ret = get_next_clus_safe(sb, &clu);
+ if (ret)
+ goto err_out;
+
+ clu_offset--;
+ }
+ }
+
+ if (IS_CLUS_EOF(clu)) {
+ num_alloc = ((count-1) >> fsi->cluster_size_bits) + 1;
+ new_clu.dir = IS_CLUS_EOF(last_clu) ? CLUS_EOF : last_clu+1;
+ new_clu.size = 0;
+ new_clu.flags = fid->flags;
+
+ /* (1) allocate a chain of clusters */
+ ret = fsi->fs_func->alloc_cluster(sb, num_alloc, &new_clu, ALLOC_COLD);
+ if (ret)
+ goto err_out;
+
+ /* (2) append to the FAT chain */
+ if (IS_CLUS_EOF(last_clu)) {
+ if (new_clu.flags == 0x01)
+ fid->flags = 0x01;
+ fid->start_clu = new_clu.dir;
+ modified = true;
+ } else {
+ if (new_clu.flags != fid->flags) {
+ /* no-fat-chain bit is disabled,
+ * so fat-chain should be synced with
+ * alloc-bmp
+ */
+ chain_cont_cluster(sb, fid->start_clu, num_clusters);
+ fid->flags = 0x01;
+ modified = true;
+ }
+ if (new_clu.flags == 0x01) {
+ ret = fat_ent_set(sb, last_clu, new_clu.dir);
+ if (ret)
+ goto err_out;
+ }
+ }
+
+ num_clusters += num_alloc;
+ clu = new_clu.dir;
+ }
+
+ /* hint information */
+ fid->hint_bmap.off = fid->rwoffset >> fsi->cluster_size_bits;
+ fid->hint_bmap.clu = clu;
+
+ /* byte offset in cluster */
+ offset = (s32)(fid->rwoffset & (fsi->cluster_size-1));
+ /* sector offset in cluster */
+ sec_offset = offset >> blksize_bits;
+ /* byte offset in sector */
+ offset &= blksize_mask;
+ logsector = CLUS_TO_SECT(fsi, clu) + sec_offset;
+
+ oneblkwrite = (u64)(blksize - offset);
+ if (oneblkwrite > count)
+ oneblkwrite = count;
+
+ if ((offset == 0) && (oneblkwrite == blksize)) {
+ ret = read_sect(sb, logsector, &tmp_bh, 0);
+ if (ret)
+ goto err_out;
+
+ memcpy(((s8 *)tmp_bh->b_data),
+ ((s8 *)buffer)+write_bytes,
+ (s32)oneblkwrite);
+
+ ret = write_sect(sb, logsector, tmp_bh, 0);
+ if (ret) {
+ brelse(tmp_bh);
+ goto err_out;
+ }
+ } else {
+ if ((offset > 0) || ((fid->rwoffset+oneblkwrite) < fid->size)) {
+ ret = read_sect(sb, logsector, &tmp_bh, 1);
+ if (ret)
+ goto err_out;
+ } else {
+ ret = read_sect(sb, logsector, &tmp_bh, 0);
+ if (ret)
+ goto err_out;
+ }
+
+ memcpy(((s8 *) tmp_bh->b_data)+offset, ((s8 *) buffer)+write_bytes, (s32) oneblkwrite);
+ ret = write_sect(sb, logsector, tmp_bh, 0);
+ if (ret) {
+ brelse(tmp_bh);
+ goto err_out;
+ }
+ }
+
+ count -= oneblkwrite;
+ write_bytes += oneblkwrite;
+ fid->rwoffset += oneblkwrite;
+
+ fid->attr |= ATTR_ARCHIVE;
+
+ if (fid->size < fid->rwoffset) {
+ fid->size = fid->rwoffset;
+ modified = true;
+ }
+ }
+
+ brelse(tmp_bh);
+
+ /* (3) update the direcoty entry */
+ /* get_entry_(set_)in_dir shoulb be check DIR_DELETED flag. */
+ if (fsi->vol_type == EXFAT) {
+ es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep);
+ if (!es) {
+ ret = -EIO;
+ goto err_out;
+ }
+ ep2 = ep+1;
+ } else {
+ ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+ if (!ep) {
+ ret = -EIO;
+ goto err_out;
+ }
+ ep2 = ep;
+ }
+
+ fsi->fs_func->set_entry_time(ep, tm_now(SDFAT_SB(sb), &tm), TM_MODIFY);
+ fsi->fs_func->set_entry_attr(ep, fid->attr);
+
+ if (modified) {
+ if (fsi->fs_func->get_entry_flag(ep2) != fid->flags)
+ fsi->fs_func->set_entry_flag(ep2, fid->flags);
+
+ if (fsi->fs_func->get_entry_size(ep2) != fid->size)
+ fsi->fs_func->set_entry_size(ep2, fid->size);
+
+ if (fsi->fs_func->get_entry_clu0(ep2) != fid->start_clu)
+ fsi->fs_func->set_entry_clu0(ep2, fid->start_clu);
+ }
+
+ if (fsi->vol_type == EXFAT) {
+ if (update_dir_chksum_with_entry_set(sb, es)) {
+ ret = -EIO;
+ goto err_out;
+ }
+ release_dentry_set(es);
+ } else {
+ if (dcache_modify(sb, sector)) {
+ ret = -EIO;
+ goto err_out;
+ }
+ }
+
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+
+err_out:
+ /* set the size of written bytes */
+ if (wcount)
+ *wcount = write_bytes;
+
+ return ret;
+} /* end of fscore_write_link */
+
+/* resize the file length */
+s32 fscore_truncate(struct inode *inode, u64 old_size, u64 new_size)
+{
+ u32 num_clusters_new, num_clusters_da, num_clusters_phys;
+ u32 last_clu = CLUS_FREE;
+ u64 sector;
+ CHAIN_T clu;
+ TIMESTAMP_T tm;
+ DENTRY_T *ep, *ep2;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ ENTRY_SET_CACHE_T *es = NULL;
+ s32 evict = (fid->dir.dir == DIR_DELETED) ? 1 : 0;
+
+ /* check if the given file ID is opened */
+ if ((fid->type != TYPE_FILE) && (fid->type != TYPE_DIR))
+ return -EPERM;
+
+ /* TO CHECK inode type and size */
+ MMSG("%s: inode(%p) type(%s) size:%lld->%lld\n", __func__, inode,
+ (fid->type == TYPE_FILE) ? "file" : "dir", old_size, new_size);
+
+ /* XXX : This is for debugging. */
+
+ /* It can be when write failed */
+#if 0
+ if (fid->size != old_size) {
+ DMSG("%s: inode(%p) size-mismatch(old:%lld != fid:%lld)\n",
+ __func__, inode, old_size, fid->size);
+ WARN_ON(1);
+ }
+#endif
+ /*
+ * There is no lock to protect fid->size.
+ * So, we should get old_size and use it.
+ */
+ if (old_size <= new_size)
+ return 0;
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ /* Reserved count update */
+ #define num_clusters(v) ((v) ? (u32)(((v) - 1) >> fsi->cluster_size_bits) + 1 : 0)
+ num_clusters_da = num_clusters(SDFAT_I(inode)->i_size_aligned);
+ num_clusters_new = num_clusters(i_size_read(inode));
+ num_clusters_phys = num_clusters(SDFAT_I(inode)->i_size_ondisk);
+
+ /* num_clusters(i_size_old) should be equal to num_clusters_da */
+ BUG_ON((num_clusters(old_size)) != (num_clusters(SDFAT_I(inode)->i_size_aligned)));
+
+ /* for debugging (FIXME: is okay on no-da case?) */
+ BUG_ON(num_clusters_da < num_clusters_phys);
+
+ if ((num_clusters_da != num_clusters_phys) &&
+ (num_clusters_new < num_clusters_da)) {
+ /* Decrement reserved clusters
+ * n_reserved = num_clusters_da - max(new,phys)
+ */
+ int n_reserved = (num_clusters_new > num_clusters_phys) ?
+ (num_clusters_da - num_clusters_new) :
+ (num_clusters_da - num_clusters_phys);
+
+ fsi->reserved_clusters -= n_reserved;
+ BUG_ON(fsi->reserved_clusters < 0);
+ }
+
+ clu.dir = fid->start_clu;
+ /* In no-da case, num_clusters_phys is equal to below value
+ * clu.size = (u32)((old_size-1) >> fsi->cluster_size_bits) + 1;
+ */
+ clu.size = num_clusters_phys;
+ clu.flags = fid->flags;
+
+ /* For bigdata */
+ sdfat_statistics_set_trunc(clu.flags, &clu);
+
+ if (new_size > 0) {
+ /* Truncate FAT chain num_clusters after the first cluster
+ * num_clusters = min(new, phys);
+ */
+ u32 num_clusters = (num_clusters_new < num_clusters_phys) ?
+ num_clusters_new : num_clusters_phys;
+
+ /* Follow FAT chain
+ * (defensive coding - works fine even with corrupted FAT table
+ */
+ if (clu.flags == 0x03) {
+ clu.dir += num_clusters;
+ clu.size -= num_clusters;
+#if 0
+ /* extent_get_clus can`t know last_cluster
+ * when find target cluster in cache.
+ */
+ } else if (fid->type == TYPE_FILE) {
+ u32 fclus = 0;
+ s32 err = extent_get_clus(inode, num_clusters,
+ &fclus, &(clu.dir), &last_clu, 0);
+ if (err)
+ return -EIO;
+ ASSERT(fclus == num_clusters);
+
+ if ((num_clusters > 1) && (last_clu == fid->start_clu)) {
+ u32 fclus_tmp = 0;
+ u32 temp = 0;
+
+ err = extent_get_clus(inode, num_clusters - 1,
+ &fclus_tmp, &last_clu, &temp, 0);
+ if (err)
+ return -EIO;
+ ASSERT(fclus_tmp == (num_clusters - 1));
+ }
+
+ num_clusters -= fclus;
+ clu.size -= fclus;
+#endif
+ } else {
+ while (num_clusters > 0) {
+ last_clu = clu.dir;
+ if (get_next_clus_safe(sb, &(clu.dir)))
+ return -EIO;
+
+ num_clusters--;
+ clu.size--;
+ }
+ }
+
+ /* Optimization avialable: */
+#if 0
+ if (num_clusters_new < num_clusters) {
+ < loop >
+ } else {
+ // num_clusters_new >= num_clusters_phys
+ // FAT truncation is not necessary
+
+ clu.dir = CLUS_EOF;
+ clu.size = 0;
+ }
+#endif
+ } else if (new_size == 0) {
+ fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01;
+ fid->start_clu = CLUS_EOF;
+ }
+ fid->size = new_size;
+
+ if (fid->type == TYPE_FILE)
+ fid->attr |= ATTR_ARCHIVE;
+
+ /*
+ * clu.dir: free from
+ * clu.size: # of clusters to free (exFAT, 0x03 only), no fat_free if 0
+ * clu.flags: fid->flags (exFAT only)
+ */
+
+ /* (1) update the directory entry */
+ if (!evict) {
+
+ if (fsi->vol_type == EXFAT) {
+ es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep);
+ if (!es)
+ return -EIO;
+ ep2 = ep+1;
+ } else {
+ ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+ if (!ep)
+ return -EIO;
+ ep2 = ep;
+ }
+
+ fsi->fs_func->set_entry_time(ep, tm_now(SDFAT_SB(sb), &tm), TM_MODIFY);
+ fsi->fs_func->set_entry_attr(ep, fid->attr);
+
+ /*
+ * if (fsi->vol_type != EXFAT)
+ * dcache_modify(sb, sector);
+ */
+
+ /* File size should be zero if there is no cluster allocated */
+ if (IS_CLUS_EOF(fid->start_clu))
+ fsi->fs_func->set_entry_size(ep2, 0);
+ else
+ fsi->fs_func->set_entry_size(ep2, new_size);
+
+ if (new_size == 0) {
+ /* Any directory can not be truncated to zero */
+ BUG_ON(fid->type != TYPE_FILE);
+
+ fsi->fs_func->set_entry_flag(ep2, 0x01);
+ fsi->fs_func->set_entry_clu0(ep2, CLUS_FREE);
+ }
+
+ if (fsi->vol_type == EXFAT) {
+ if (update_dir_chksum_with_entry_set(sb, es))
+ return -EIO;
+ release_dentry_set(es);
+ } else {
+ if (dcache_modify(sb, sector))
+ return -EIO;
+ }
+
+ } /* end of if(fid->dir.dir != DIR_DELETED) */
+
+ /* (2) cut off from the FAT chain */
+ if ((fid->flags == 0x01) &&
+ (!IS_CLUS_FREE(last_clu)) && (!IS_CLUS_EOF(last_clu))) {
+ if (fat_ent_set(sb, last_clu, CLUS_EOF))
+ return -EIO;
+ }
+
+ /* (3) invalidate cache and free the clusters */
+ /* clear extent cache */
+ extent_cache_inval_inode(inode);
+
+ /* hint information */
+ fid->hint_bmap.off = CLUS_EOF;
+ fid->hint_bmap.clu = CLUS_EOF;
+ if (fid->rwoffset > fid->size)
+ fid->rwoffset = fid->size;
+
+ /* hint_stat will be used if this is directory. */
+ fid->hint_stat.eidx = 0;
+ fid->hint_stat.clu = fid->start_clu;
+ fid->hint_femp.eidx = -1;
+
+ /* free the clusters */
+ if (fsi->fs_func->free_cluster(sb, &clu, evict))
+ return -EIO;
+
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+
+ return 0;
+} /* end of fscore_truncate */
+
+static void update_parent_info(FILE_ID_T *fid, struct inode *parent_inode)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(parent_inode->i_sb)->fsi);
+ FILE_ID_T *parent_fid = &(SDFAT_I(parent_inode)->fid);
+
+ /*
+ * the problem that FILE_ID_T caches wrong parent info.
+ *
+ * because of flag-mismatch of fid->dir,
+ * there is abnormal traversing cluster chain.
+ */
+ if (unlikely((parent_fid->flags != fid->dir.flags)
+ || (parent_fid->size != (fid->dir.size<<fsi->cluster_size_bits))
+ || (parent_fid->start_clu != fid->dir.dir))) {
+
+ fid->dir.dir = parent_fid->start_clu;
+ fid->dir.flags = parent_fid->flags;
+ fid->dir.size = ((parent_fid->size + (fsi->cluster_size-1))
+ >> fsi->cluster_size_bits);
+ }
+}
+
+/* rename or move a old file into a new file */
+s32 fscore_rename(struct inode *old_parent_inode, FILE_ID_T *fid,
+ struct inode *new_parent_inode, struct dentry *new_dentry)
+{
+ s32 ret;
+ s32 dentry;
+ CHAIN_T olddir, newdir;
+ CHAIN_T *p_dir = NULL;
+ UNI_NAME_T uni_name;
+ DENTRY_T *ep;
+ struct super_block *sb = old_parent_inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u8 *new_path = (u8 *) new_dentry->d_name.name;
+ struct inode *new_inode = new_dentry->d_inode;
+ int num_entries;
+ FILE_ID_T *new_fid = NULL;
+ u32 new_entry_type = TYPE_UNUSED;
+ s32 new_entry = 0;
+
+ /* check the validity of pointer parameters */
+ if ((new_path == NULL) || (strlen(new_path) == 0))
+ return -EINVAL;
+
+ if (fid->dir.dir == DIR_DELETED) {
+ EMSG("%s : abnormal access to deleted source dentry\n", __func__);
+ return -ENOENT;
+ }
+
+ /* patch 1.2.4 : the problem that FILE_ID_T caches wrong parent info. */
+ update_parent_info(fid, old_parent_inode);
+
+ olddir.dir = fid->dir.dir;
+ olddir.size = fid->dir.size;
+ olddir.flags = fid->dir.flags;
+
+ dentry = fid->entry;
+
+ /* check if the old file is "." or ".." */
+ if (fsi->vol_type != EXFAT) {
+ if ((olddir.dir != fsi->root_dir) && (dentry < 2))
+ return -EPERM;
+ }
+
+ ep = get_dentry_in_dir(sb, &olddir, dentry, NULL);
+ if (!ep)
+ return -EIO;
+
+#ifdef CONFIG_SDFAT_CHECK_RO_ATTR
+ if (fsi->fs_func->get_entry_attr(ep) & ATTR_READONLY)
+ return -EPERM;
+#endif
+
+ /* check whether new dir is existing directory and empty */
+ if (new_inode) {
+ ret = -EIO;
+ new_fid = &SDFAT_I(new_inode)->fid;
+
+ if (new_fid->dir.dir == DIR_DELETED) {
+ EMSG("%s : abnormal access to deleted target dentry\n", __func__);
+ goto out;
+ }
+
+ /* patch 1.2.4 :
+ * the problem that FILE_ID_T caches wrong parent info.
+ *
+ * FIXME : is needed?
+ */
+ update_parent_info(new_fid, new_parent_inode);
+
+ p_dir = &(new_fid->dir);
+ new_entry = new_fid->entry;
+ ep = get_dentry_in_dir(sb, p_dir, new_entry, NULL);
+ if (!ep)
+ goto out;
+
+ new_entry_type = fsi->fs_func->get_entry_type(ep);
+
+ /* if new_inode exists, update fid */
+ new_fid->size = i_size_read(new_inode);
+
+ if (new_entry_type == TYPE_DIR) {
+ CHAIN_T new_clu;
+
+ new_clu.dir = new_fid->start_clu;
+ new_clu.size = ((new_fid->size-1) >> fsi->cluster_size_bits) + 1;
+ new_clu.flags = new_fid->flags;
+
+ ret = check_dir_empty(sb, &new_clu);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* check the validity of directory name in the given new pathname */
+ ret = resolve_path(new_parent_inode, new_path, &newdir, &uni_name);
+ if (ret)
+ return ret;
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ if (olddir.dir == newdir.dir)
+ ret = rename_file(new_parent_inode, &olddir, dentry, &uni_name, fid);
+ else
+ ret = move_file(new_parent_inode, &olddir, dentry, &newdir, &uni_name, fid);
+
+ if ((!ret) && new_inode) {
+ /* delete entries of new_dir */
+ ep = get_dentry_in_dir(sb, p_dir, new_entry, NULL);
+ if (!ep) {
+ ret = -EIO;
+ goto del_out;
+ }
+
+ num_entries = fsi->fs_func->count_ext_entries(sb, p_dir, new_entry, ep);
+ if (num_entries < 0) {
+ ret = -EIO;
+ goto del_out;
+ }
+
+
+ if (fsi->fs_func->delete_dir_entry(sb, p_dir, new_entry, 0, num_entries+1)) {
+ ret = -EIO;
+ goto del_out;
+ }
+
+ /* Free the clusters if new_inode is a dir(as if fscore_rmdir) */
+ if (new_entry_type == TYPE_DIR) {
+ /* new_fid, new_clu_to_free */
+ CHAIN_T new_clu_to_free;
+
+ new_clu_to_free.dir = new_fid->start_clu;
+ new_clu_to_free.size = ((new_fid->size-1) >> fsi->cluster_size_bits) + 1;
+ new_clu_to_free.flags = new_fid->flags;
+
+ if (fsi->fs_func->free_cluster(sb, &new_clu_to_free, 1)) {
+ /* just set I/O error only */
+ ret = -EIO;
+ }
+
+ new_fid->size = 0;
+ new_fid->start_clu = CLUS_EOF;
+ new_fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01;
+ }
+del_out:
+ /* Update new_inode fid
+ * Prevent syncing removed new_inode
+ * (new_fid is already initialized above code ("if (new_inode)")
+ */
+ new_fid->dir.dir = DIR_DELETED;
+ }
+out:
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+
+ return ret;
+} /* end of fscore_rename */
+
+/* remove a file */
+s32 fscore_remove(struct inode *inode, FILE_ID_T *fid)
+{
+ s32 ret;
+ s32 dentry;
+ CHAIN_T dir, clu_to_free;
+ DENTRY_T *ep;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ dir.dir = fid->dir.dir;
+ dir.size = fid->dir.size;
+ dir.flags = fid->dir.flags;
+
+ dentry = fid->entry;
+
+ if (fid->dir.dir == DIR_DELETED) {
+ EMSG("%s : abnormal access to deleted dentry\n", __func__);
+ return -ENOENT;
+ }
+
+ ep = get_dentry_in_dir(sb, &dir, dentry, NULL);
+ if (!ep)
+ return -EIO;
+
+
+#ifdef CONFIG_SDFAT_CHECK_RO_ATTR
+ if (fsi->fs_func->get_entry_attr(ep) & ATTR_READONLY)
+ return -EPERM;
+#endif
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ /* (1) update the directory entry */
+ ret = remove_file(inode, &dir, dentry);
+ if (ret)
+ goto out;
+
+ clu_to_free.dir = fid->start_clu;
+ clu_to_free.size = ((fid->size-1) >> fsi->cluster_size_bits) + 1;
+ clu_to_free.flags = fid->flags;
+
+ /* (2) invalidate extent cache and free the clusters
+ */
+ /* clear extent cache */
+ extent_cache_inval_inode(inode);
+ ret = fsi->fs_func->free_cluster(sb, &clu_to_free, 0);
+ /* WARN : DO NOT RETURN ERROR IN HERE */
+
+ /* (3) update FILE_ID_T */
+ fid->size = 0;
+ fid->start_clu = CLUS_EOF;
+ fid->flags = (fsi->vol_type == EXFAT) ? 0x03 : 0x01;
+ fid->dir.dir = DIR_DELETED;
+
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+out:
+ return ret;
+} /* end of fscore_remove */
+
+
+/*
+ * Get the information of a given file
+ * REMARK : This function does not need any file name on linux
+ *
+ * info.Size means the value saved on disk.
+ * But root directory doesn`t have real dentry,
+ * so the size of root directory returns calculated one exceptively.
+ */
+s32 fscore_read_inode(struct inode *inode, DIR_ENTRY_T *info)
+{
+ u64 sector;
+ s32 count;
+ CHAIN_T dir;
+ TIMESTAMP_T tm;
+ DENTRY_T *ep, *ep2;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ ENTRY_SET_CACHE_T *es = NULL;
+ u8 is_dir = (fid->type == TYPE_DIR) ? 1 : 0;
+
+ TMSG("%s entered\n", __func__);
+
+ extent_cache_init_inode(inode);
+
+ /* if root directory */
+ if (is_dir && (fid->dir.dir == fsi->root_dir) && (fid->entry == -1)) {
+ info->Attr = ATTR_SUBDIR;
+ memset((s8 *) &info->CreateTimestamp, 0, sizeof(DATE_TIME_T));
+ memset((s8 *) &info->ModifyTimestamp, 0, sizeof(DATE_TIME_T));
+ memset((s8 *) &info->AccessTimestamp, 0, sizeof(DATE_TIME_T));
+ //strcpy(info->NameBuf.sfn, ".");
+ //strcpy(info->NameBuf.lfn, ".");
+
+ dir.dir = fsi->root_dir;
+ dir.flags = 0x01;
+ dir.size = 0; /* UNUSED */
+
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(fsi->root_dir)) {
+ info->Size = fsi->dentries_in_root << DENTRY_SIZE_BITS;
+ } else {
+ u32 num_clu;
+
+ if (__count_num_clusters(sb, &dir, &num_clu))
+ return -EIO;
+ info->Size = (u64)num_clu << fsi->cluster_size_bits;
+ }
+
+ count = __count_dos_name_entries(sb, &dir, TYPE_DIR, NULL);
+ if (count < 0)
+ return -EIO;
+ info->NumSubdirs = count;
+
+ return 0;
+ }
+
+ /* get the directory entry of given file or directory */
+ if (fsi->vol_type == EXFAT) {
+ /* es should be released */
+ es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_2_ENTRIES, &ep);
+ if (!es)
+ return -EIO;
+ ep2 = ep+1;
+ } else {
+ ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+ if (!ep)
+ return -EIO;
+ ep2 = ep;
+ /* dcache should be unlocked */
+ dcache_lock(sb, sector);
+ }
+
+ /* set FILE_INFO structure using the acquired DENTRY_T */
+ info->Attr = fsi->fs_func->get_entry_attr(ep);
+
+ fsi->fs_func->get_entry_time(ep, &tm, TM_CREATE);
+ info->CreateTimestamp.Year = tm.year;
+ info->CreateTimestamp.Month = tm.mon;
+ info->CreateTimestamp.Day = tm.day;
+ info->CreateTimestamp.Hour = tm.hour;
+ info->CreateTimestamp.Minute = tm.min;
+ info->CreateTimestamp.Second = tm.sec;
+ info->CreateTimestamp.MilliSecond = 0;
+ info->CreateTimestamp.Timezone.value = tm.tz.value;
+
+ fsi->fs_func->get_entry_time(ep, &tm, TM_MODIFY);
+ info->ModifyTimestamp.Year = tm.year;
+ info->ModifyTimestamp.Month = tm.mon;
+ info->ModifyTimestamp.Day = tm.day;
+ info->ModifyTimestamp.Hour = tm.hour;
+ info->ModifyTimestamp.Minute = tm.min;
+ info->ModifyTimestamp.Second = tm.sec;
+ info->ModifyTimestamp.MilliSecond = 0;
+ info->ModifyTimestamp.Timezone.value = tm.tz.value;
+
+ memset((s8 *) &info->AccessTimestamp, 0, sizeof(DATE_TIME_T));
+
+ info->NumSubdirs = 0;
+ info->Size = fsi->fs_func->get_entry_size(ep2);
+
+ if (fsi->vol_type == EXFAT)
+ release_dentry_set(es);
+ else
+ dcache_unlock(sb, sector);
+
+ if (is_dir) {
+ u32 dotcnt = 0;
+
+ dir.dir = fid->start_clu;
+ dir.flags = fid->flags;
+ dir.size = fid->size >> fsi->cluster_size_bits;
+ /*
+ * NOTE :
+ * If "dir.flags" has 0x01, "dir.size" is meaningless.
+ */
+#if 0
+ if (info->Size == 0) {
+ s32 num_clu;
+
+ if (__count_num_clusters(sb, &dir, &num_clu))
+ return -EIO;
+ info->Size = (u64)num_clu << fsi->cluster_size_bits;
+ }
+#endif
+ count = __count_dos_name_entries(sb, &dir, TYPE_DIR, &dotcnt);
+ if (count < 0)
+ return -EIO;
+
+ if (fsi->vol_type == EXFAT) {
+ count += SDFAT_MIN_SUBDIR;
+ } else {
+ /*
+ * if directory has been corrupted,
+ * we have to adjust subdir count.
+ */
+ BUG_ON(dotcnt > SDFAT_MIN_SUBDIR);
+ if (dotcnt < SDFAT_MIN_SUBDIR) {
+ EMSG("%s: contents of the directory has been "
+ "corrupted (parent clus : %08x, idx : %d)",
+ __func__, fid->dir.dir, fid->entry);
+ }
+ count += (SDFAT_MIN_SUBDIR - dotcnt);
+ }
+ info->NumSubdirs = count;
+ }
+
+ TMSG("%s exited successfully\n", __func__);
+ return 0;
+} /* end of fscore_read_inode */
+
+/* set the information of a given file
+ * REMARK : This function does not need any file name on linux
+ */
+s32 fscore_write_inode(struct inode *inode, DIR_ENTRY_T *info, s32 sync)
+{
+ s32 ret = -EIO;
+ u64 sector;
+ TIMESTAMP_T tm;
+ DENTRY_T *ep, *ep2;
+ ENTRY_SET_CACHE_T *es = NULL;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ u8 is_dir = (fid->type == TYPE_DIR) ? 1 : 0;
+
+
+ /* SKIP WRITING INODE :
+ * if the indoe is already unlinked,
+ * there is no need for updating inode
+ */
+ if (fid->dir.dir == DIR_DELETED)
+ return 0;
+
+ if (is_dir && (fid->dir.dir == fsi->root_dir) && (fid->entry == -1))
+ return 0;
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ /* get the directory entry of given file or directory */
+ if (fsi->vol_type == EXFAT) {
+ es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep);
+ if (!es)
+ return -EIO;
+ ep2 = ep+1;
+ } else {
+ /* for other than exfat */
+ ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+ if (!ep)
+ return -EIO;
+ ep2 = ep;
+ }
+
+
+ fsi->fs_func->set_entry_attr(ep, info->Attr);
+
+ /* set FILE_INFO structure using the acquired DENTRY_T */
+ tm.tz = info->CreateTimestamp.Timezone;
+ tm.sec = info->CreateTimestamp.Second;
+ tm.min = info->CreateTimestamp.Minute;
+ tm.hour = info->CreateTimestamp.Hour;
+ tm.day = info->CreateTimestamp.Day;
+ tm.mon = info->CreateTimestamp.Month;
+ tm.year = info->CreateTimestamp.Year;
+ fsi->fs_func->set_entry_time(ep, &tm, TM_CREATE);
+
+ tm.tz = info->ModifyTimestamp.Timezone;
+ tm.sec = info->ModifyTimestamp.Second;
+ tm.min = info->ModifyTimestamp.Minute;
+ tm.hour = info->ModifyTimestamp.Hour;
+ tm.day = info->ModifyTimestamp.Day;
+ tm.mon = info->ModifyTimestamp.Month;
+ tm.year = info->ModifyTimestamp.Year;
+ fsi->fs_func->set_entry_time(ep, &tm, TM_MODIFY);
+
+ if (is_dir && fsi->vol_type != EXFAT) {
+ /* overwirte dirsize if FAT32 and dir size != 0 */
+ if (fsi->fs_func->get_entry_size(ep2))
+ fsi->fs_func->set_entry_size(ep2, 0);
+ } else {
+ /* File size should be zero if there is no cluster allocated */
+ u64 on_disk_size = info->Size;
+
+ if (IS_CLUS_EOF(fid->start_clu))
+ on_disk_size = 0;
+
+ fsi->fs_func->set_entry_size(ep2, on_disk_size);
+ }
+
+ if (fsi->vol_type == EXFAT) {
+ ret = update_dir_chksum_with_entry_set(sb, es);
+ release_dentry_set(es);
+ } else {
+ ret = dcache_modify(sb, sector);
+ }
+
+ fs_sync(sb, sync);
+ /* Comment below code to prevent super block update frequently */
+ //fs_set_vol_flags(sb, VOL_CLEAN);
+
+ return ret;
+} /* end of fscore_write_inode */
+
+
+/*
+ * Input: inode, (logical) clu_offset, target allocation area
+ * Output: errcode, cluster number
+ * *clu = (~0), if it's unable to allocate a new cluster
+ */
+s32 fscore_map_clus(struct inode *inode, u32 clu_offset, u32 *clu, int dest)
+{
+ s32 ret, modified = false;
+ u32 last_clu;
+ u64 sector;
+ CHAIN_T new_clu;
+ DENTRY_T *ep;
+ ENTRY_SET_CACHE_T *es = NULL;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ u32 local_clu_offset = clu_offset;
+ s32 reserved_clusters = fsi->reserved_clusters;
+ u32 num_to_be_allocated = 0, num_clusters = 0;
+
+ fid->rwoffset = (s64)(clu_offset) << fsi->cluster_size_bits;
+
+ if (SDFAT_I(inode)->i_size_ondisk > 0)
+ num_clusters = (u32)((SDFAT_I(inode)->i_size_ondisk-1) >> fsi->cluster_size_bits) + 1;
+
+ if (clu_offset >= num_clusters)
+ num_to_be_allocated = clu_offset - num_clusters + 1;
+
+ if ((dest == ALLOC_NOWHERE) && (num_to_be_allocated > 0)) {
+ *clu = CLUS_EOF;
+ return 0;
+ }
+
+ /* check always request cluster is 1 */
+ //ASSERT(num_to_be_allocated == 1);
+
+ sdfat_debug_check_clusters(inode);
+
+ *clu = last_clu = fid->start_clu;
+
+ /* XXX: Defensive code needed.
+ * what if i_size_ondisk != # of allocated clusters
+ */
+ if (fid->flags == 0x03) {
+ if ((clu_offset > 0) && (!IS_CLUS_EOF(*clu))) {
+ last_clu += clu_offset - 1;
+
+ if (clu_offset == num_clusters)
+ *clu = CLUS_EOF;
+ else
+ *clu += clu_offset;
+ }
+ } else if (fid->type == TYPE_FILE) {
+ u32 fclus = 0;
+ s32 err = extent_get_clus(inode, clu_offset,
+ &fclus, clu, &last_clu, 1);
+ if (err)
+ return -EIO;
+
+ clu_offset -= fclus;
+ } else {
+ /* hint information */
+ if ((clu_offset > 0) &&
+ ((fid->hint_bmap.off != CLUS_EOF) && (fid->hint_bmap.off > 0)) &&
+ (clu_offset >= fid->hint_bmap.off)) {
+ clu_offset -= fid->hint_bmap.off;
+ /* hint_bmap.clu should be valid */
+ ASSERT(fid->hint_bmap.clu >= 2);
+ *clu = fid->hint_bmap.clu;
+ }
+
+ while ((clu_offset > 0) && (!IS_CLUS_EOF(*clu))) {
+ last_clu = *clu;
+ if (get_next_clus_safe(sb, clu))
+ return -EIO;
+ clu_offset--;
+ }
+ }
+
+ if (IS_CLUS_EOF(*clu)) {
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ new_clu.dir = (IS_CLUS_EOF(last_clu)) ? CLUS_EOF : last_clu + 1;
+ new_clu.size = 0;
+ new_clu.flags = fid->flags;
+
+ /* (1) allocate a cluster */
+ if (num_to_be_allocated < 1) {
+ /* Broken FAT (i_sze > allocated FAT) */
+ EMSG("%s: invalid fat chain : inode(%p) "
+ "num_to_be_allocated(%d) "
+ "i_size_ondisk(%lld) fid->flags(%02x) "
+ "fid->start(%08x) fid->hint_off(%u) "
+ "fid->hint_clu(%u) fid->rwoffset(%llu) "
+ "modified_clu_off(%d) last_clu(%08x) "
+ "new_clu(%08x)", __func__, inode,
+ num_to_be_allocated,
+ (SDFAT_I(inode)->i_size_ondisk),
+ fid->flags, fid->start_clu,
+ fid->hint_bmap.off, fid->hint_bmap.clu,
+ fid->rwoffset, clu_offset,
+ last_clu, new_clu.dir);
+ sdfat_fs_error(sb, "broken FAT chain.");
+ return -EIO;
+ }
+
+ ret = fsi->fs_func->alloc_cluster(sb, num_to_be_allocated, &new_clu, ALLOC_COLD);
+ if (ret)
+ return ret;
+
+ if (IS_CLUS_EOF(new_clu.dir) || IS_CLUS_FREE(new_clu.dir)) {
+ sdfat_fs_error(sb, "bogus cluster new allocated"
+ "(last_clu : %u, new_clu : %u)",
+ last_clu, new_clu.dir);
+ ASSERT(0);
+ return -EIO;
+ }
+
+ /* Reserved cluster dec. */
+ // XXX: Inode DA flag needed
+ if (SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_DELAY) {
+ BUG_ON(reserved_clusters < num_to_be_allocated);
+ reserved_clusters -= num_to_be_allocated;
+
+ }
+
+ /* (2) append to the FAT chain */
+ if (IS_CLUS_EOF(last_clu)) {
+ if (new_clu.flags == 0x01)
+ fid->flags = 0x01;
+ fid->start_clu = new_clu.dir;
+ modified = true;
+ } else {
+ if (new_clu.flags != fid->flags) {
+ /* no-fat-chain bit is disabled,
+ * so fat-chain should be synced with alloc-bmp
+ */
+ chain_cont_cluster(sb, fid->start_clu, num_clusters);
+ fid->flags = 0x01;
+ modified = true;
+ }
+ if (new_clu.flags == 0x01)
+ if (fat_ent_set(sb, last_clu, new_clu.dir))
+ return -EIO;
+ }
+
+ num_clusters += num_to_be_allocated;
+ *clu = new_clu.dir;
+
+ if (fid->dir.dir != DIR_DELETED) {
+
+ if (fsi->vol_type == EXFAT) {
+ es = get_dentry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep);
+ if (!es)
+ return -EIO;
+ /* get stream entry */
+ ep++;
+ }
+
+ /* (3) update directory entry */
+ if (modified) {
+ if (fsi->vol_type != EXFAT) {
+ ep = get_dentry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+ if (!ep)
+ return -EIO;
+ }
+
+ if (fsi->fs_func->get_entry_flag(ep) != fid->flags)
+ fsi->fs_func->set_entry_flag(ep, fid->flags);
+
+ if (fsi->fs_func->get_entry_clu0(ep) != fid->start_clu)
+ fsi->fs_func->set_entry_clu0(ep, fid->start_clu);
+
+ fsi->fs_func->set_entry_size(ep, fid->size);
+
+ if (fsi->vol_type != EXFAT) {
+ if (dcache_modify(sb, sector))
+ return -EIO;
+ }
+ }
+
+ if (fsi->vol_type == EXFAT) {
+ if (update_dir_chksum_with_entry_set(sb, es))
+ return -EIO;
+ release_dentry_set(es);
+ }
+
+ } /* end of if != DIR_DELETED */
+
+
+ /* add number of new blocks to inode (non-DA only) */
+ if (!(SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_DELAY)) {
+ inode->i_blocks += num_to_be_allocated << (fsi->cluster_size_bits - sb->s_blocksize_bits);
+ } else {
+ // DA의 경우, i_blocks가 이미 증가해있어야 함.
+ BUG_ON(clu_offset >= (inode->i_blocks >> (fsi->cluster_size_bits - sb->s_blocksize_bits)));
+ }
+#if 0
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+#endif
+ /* (4) Move *clu pointer along FAT chains (hole care)
+ * because the caller of this function expect *clu to be the last cluster.
+ * This only works when num_to_be_allocated >= 2,
+ * *clu = (the first cluster of the allocated chain) => (the last cluster of ...)
+ */
+ if (fid->flags == 0x03) {
+ *clu += num_to_be_allocated - 1;
+ } else {
+ while (num_to_be_allocated > 1) {
+ if (get_next_clus_safe(sb, clu))
+ return -EIO;
+ num_to_be_allocated--;
+ }
+ }
+
+ }
+
+ /* update reserved_clusters */
+ fsi->reserved_clusters = reserved_clusters;
+
+ /* hint information */
+ fid->hint_bmap.off = local_clu_offset;
+ fid->hint_bmap.clu = *clu;
+
+ return 0;
+} /* end of fscore_map_clus */
+
+/* allocate reserved cluster */
+s32 fscore_reserve_clus(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if ((fsi->used_clusters + fsi->reserved_clusters) >= (fsi->num_clusters - 2))
+ return -ENOSPC;
+
+ if (bdev_check_bdi_valid(sb))
+ return -EIO;
+
+ fsi->reserved_clusters++;
+
+ /* inode->i_blocks update */
+ inode->i_blocks += 1 << (fsi->cluster_size_bits - sb->s_blocksize_bits);
+
+ sdfat_debug_check_clusters(inode);
+
+ return 0;
+}
+
+/* remove an entry, BUT don't truncate */
+s32 fscore_unlink(struct inode *inode, FILE_ID_T *fid)
+{
+ s32 dentry;
+ CHAIN_T dir;
+ DENTRY_T *ep;
+ struct super_block *sb = inode->i_sb;
+
+ dir.dir = fid->dir.dir;
+ dir.size = fid->dir.size;
+ dir.flags = fid->dir.flags;
+
+ dentry = fid->entry;
+
+ if (fid->dir.dir == DIR_DELETED) {
+ EMSG("%s : abnormal access to deleted dentry\n", __func__);
+ return -ENOENT;
+ }
+
+ ep = get_dentry_in_dir(sb, &dir, dentry, NULL);
+ if (!ep)
+ return -EIO;
+
+#ifdef CONFIG_SDFAT_CHECK_RO_ATTR
+ if (SDFAT_SB(sb)->fsi.fs_func->get_entry_attr(ep) & ATTR_READONLY)
+ return -EPERM;
+#endif
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ /* (1) update the directory entry */
+ if (remove_file(inode, &dir, dentry))
+ return -EIO;
+
+ /* This doesn't modify fid */
+ fid->dir.dir = DIR_DELETED;
+
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+
+ return 0;
+}
+
+/*----------------------------------------------------------------------*/
+/* Directory Operation Functions */
+/*----------------------------------------------------------------------*/
+
+/* create a directory */
+s32 fscore_mkdir(struct inode *inode, u8 *path, FILE_ID_T *fid)
+{
+ s32 ret/*, dentry*/;
+ CHAIN_T dir;
+ UNI_NAME_T uni_name;
+ struct super_block *sb = inode->i_sb;
+
+ TMSG("%s entered\n", __func__);
+
+ /* check the validity of directory name in the given old pathname */
+ ret = resolve_path(inode, path, &dir, &uni_name);
+ if (ret)
+ goto out;
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ ret = create_dir(inode, &dir, &uni_name, fid);
+
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+out:
+ TMSG("%s exited with err(%d)\n", __func__, ret);
+ return ret;
+}
+
+/* read a directory entry from the opened directory */
+s32 fscore_readdir(struct inode *inode, DIR_ENTRY_T *dir_entry)
+{
+ s32 i;
+ s32 dentries_per_clu, dentries_per_clu_bits = 0;
+ u32 type, clu_offset;
+ u64 sector;
+ CHAIN_T dir, clu;
+ UNI_NAME_T uni_name;
+ TIMESTAMP_T tm;
+ DENTRY_T *ep;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ u32 dentry = (u32)(fid->rwoffset & 0xFFFFFFFF); /* u32 is enough for directory */
+
+ /* check if the given file ID is opened */
+ if (fid->type != TYPE_DIR)
+ return -EPERM;
+
+ if (fid->entry == -1) {
+ dir.dir = fsi->root_dir;
+ dir.size = 0; /* just initialize, but will not use */
+ dir.flags = 0x01;
+ } else {
+ dir.dir = fid->start_clu;
+ dir.size = fid->size >> fsi->cluster_size_bits;
+ dir.flags = fid->flags;
+ sdfat_debug_bug_on(dentry >= (dir.size * fsi->dentries_per_clu));
+ }
+
+ if (IS_CLUS_FREE(dir.dir)) { /* FAT16 root_dir */
+ dentries_per_clu = fsi->dentries_in_root;
+
+ /* Prevent readdir over directory size */
+ if (dentry >= dentries_per_clu) {
+ clu.dir = CLUS_EOF;
+ } else {
+ clu.dir = dir.dir;
+ clu.size = dir.size;
+ clu.flags = dir.flags;
+ }
+ } else {
+ dentries_per_clu = fsi->dentries_per_clu;
+ dentries_per_clu_bits = ilog2(dentries_per_clu);
+
+ clu_offset = dentry >> dentries_per_clu_bits;
+ clu.dir = dir.dir;
+ clu.size = dir.size;
+ clu.flags = dir.flags;
+
+ if (clu.flags == 0x03) {
+ clu.dir += clu_offset;
+ clu.size -= clu_offset;
+ } else {
+ /* hint_information */
+ if ((clu_offset > 0) &&
+ ((fid->hint_bmap.off != CLUS_EOF) && (fid->hint_bmap.off > 0)) &&
+ (clu_offset >= fid->hint_bmap.off)) {
+ clu_offset -= fid->hint_bmap.off;
+ clu.dir = fid->hint_bmap.clu;
+ }
+
+ while (clu_offset > 0) {
+ if (get_next_clus_safe(sb, &(clu.dir)))
+ return -EIO;
+
+ clu_offset--;
+ }
+ }
+ }
+
+ while (!IS_CLUS_EOF(clu.dir)) {
+ if (IS_CLUS_FREE(dir.dir)) /* FAT16 root_dir */
+ i = dentry % dentries_per_clu;
+ else
+ i = dentry & (dentries_per_clu-1);
+
+ for ( ; i < dentries_per_clu; i++, dentry++) {
+ ep = get_dentry_in_dir(sb, &clu, i, &sector);
+ if (!ep)
+ return -EIO;
+
+ type = fsi->fs_func->get_entry_type(ep);
+
+ if (type == TYPE_UNUSED)
+ break;
+
+ if ((type != TYPE_FILE) && (type != TYPE_DIR))
+ continue;
+
+ dcache_lock(sb, sector);
+ dir_entry->Attr = fsi->fs_func->get_entry_attr(ep);
+
+ fsi->fs_func->get_entry_time(ep, &tm, TM_CREATE);
+ dir_entry->CreateTimestamp.Year = tm.year;
+ dir_entry->CreateTimestamp.Month = tm.mon;
+ dir_entry->CreateTimestamp.Day = tm.day;
+ dir_entry->CreateTimestamp.Hour = tm.hour;
+ dir_entry->CreateTimestamp.Minute = tm.min;
+ dir_entry->CreateTimestamp.Second = tm.sec;
+ dir_entry->CreateTimestamp.MilliSecond = 0;
+
+ fsi->fs_func->get_entry_time(ep, &tm, TM_MODIFY);
+ dir_entry->ModifyTimestamp.Year = tm.year;
+ dir_entry->ModifyTimestamp.Month = tm.mon;
+ dir_entry->ModifyTimestamp.Day = tm.day;
+ dir_entry->ModifyTimestamp.Hour = tm.hour;
+ dir_entry->ModifyTimestamp.Minute = tm.min;
+ dir_entry->ModifyTimestamp.Second = tm.sec;
+ dir_entry->ModifyTimestamp.MilliSecond = 0;
+
+ memset((s8 *) &dir_entry->AccessTimestamp, 0, sizeof(DATE_TIME_T));
+
+ *(uni_name.name) = 0x0;
+ fsi->fs_func->get_uniname_from_ext_entry(sb, &dir, dentry, uni_name.name);
+ if (*(uni_name.name) == 0x0)
+ get_uniname_from_dos_entry(sb, (DOS_DENTRY_T *) ep, &uni_name, 0x1);
+ nls_uni16s_to_vfsname(sb, &uni_name,
+ dir_entry->NameBuf.lfn,
+ dir_entry->NameBuf.lfnbuf_len);
+ dcache_unlock(sb, sector);
+
+ if (fsi->vol_type == EXFAT) {
+ ep = get_dentry_in_dir(sb, &clu, i+1, NULL);
+ if (!ep)
+ return -EIO;
+ } else {
+ get_uniname_from_dos_entry(sb, (DOS_DENTRY_T *) ep, &uni_name, 0x0);
+ nls_uni16s_to_vfsname(sb, &uni_name,
+ dir_entry->NameBuf.sfn,
+ dir_entry->NameBuf.sfnbuf_len);
+ }
+
+ dir_entry->Size = fsi->fs_func->get_entry_size(ep);
+
+ /*
+ * Update hint information :
+ * fat16 root directory does not need it.
+ */
+ if (!IS_CLUS_FREE(dir.dir)) {
+ fid->hint_bmap.off = dentry >> dentries_per_clu_bits;
+ fid->hint_bmap.clu = clu.dir;
+ }
+
+ fid->rwoffset = (s64) ++dentry;
+
+ return 0;
+ }
+
+ /* fat16 root directory */
+ if (IS_CLUS_FREE(dir.dir))
+ break;
+
+ if (clu.flags == 0x03) {
+ if ((--clu.size) > 0)
+ clu.dir++;
+ else
+ clu.dir = CLUS_EOF;
+ } else {
+ if (get_next_clus_safe(sb, &(clu.dir)))
+ return -EIO;
+ }
+ }
+
+ dir_entry->NameBuf.lfn[0] = '\0';
+
+ fid->rwoffset = (s64)dentry;
+
+ return 0;
+} /* end of fscore_readdir */
+
+/* remove a directory */
+s32 fscore_rmdir(struct inode *inode, FILE_ID_T *fid)
+{
+ s32 ret;
+ s32 dentry;
+ DENTRY_T *ep;
+ CHAIN_T dir, clu_to_free;
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ dir.dir = fid->dir.dir;
+ dir.size = fid->dir.size;
+ dir.flags = fid->dir.flags;
+
+ dentry = fid->entry;
+
+ if (fid->dir.dir == DIR_DELETED) {
+ EMSG("%s : abnormal access to deleted dentry\n", __func__);
+ return -ENOENT;
+ }
+
+ /* check if the file is "." or ".." */
+ if (fsi->vol_type != EXFAT) {
+ if ((dir.dir != fsi->root_dir) && (dentry < 2))
+ return -EPERM;
+ }
+
+ ep = get_dentry_in_dir(sb, &dir, dentry, NULL);
+ if (!ep)
+ return -EIO;
+
+#ifdef CONFIG_SDFAT_CHECK_RO_ATTR
+ if (SDFAT_SB(sb)->fsi.fs_func->get_entry_attr(ep) & ATTR_READONLY)
+ return -EPERM;
+#endif
+
+ clu_to_free.dir = fid->start_clu;
+ clu_to_free.size = ((fid->size-1) >> fsi->cluster_size_bits) + 1;
+ clu_to_free.flags = fid->flags;
+
+ ret = check_dir_empty(sb, &clu_to_free);
+ if (ret) {
+ if (ret == -EIO)
+ EMSG("%s : failed to check_dir_empty : err(%d)\n",
+ __func__, ret);
+ return ret;
+ }
+
+ fs_set_vol_flags(sb, VOL_DIRTY);
+
+ /* (1) update the directory entry */
+ ret = remove_file(inode, &dir, dentry);
+ if (ret) {
+ EMSG("%s : failed to remove_file : err(%d)\n", __func__, ret);
+ return ret;
+ }
+
+ fid->dir.dir = DIR_DELETED;
+
+ fs_sync(sb, 0);
+ fs_set_vol_flags(sb, VOL_CLEAN);
+
+ return ret;
+} /* end of fscore_rmdir */
+
+/* end of core.c */
diff --git a/fs/sdfat/core.h b/fs/sdfat/core.h
new file mode 100644
index 000000000000..1f8ed5a28ef3
--- /dev/null
+++ b/fs/sdfat/core.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SDFAT_CORE_H
+#define _SDFAT_CORE_H
+
+#include <asm/byteorder.h>
+
+#include "config.h"
+#include "api.h"
+#include "upcase.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+#define get_next_clus(sb, pclu) fat_ent_get(sb, *(pclu), pclu)
+#define get_next_clus_safe(sb, pclu) fat_ent_get_safe(sb, *(pclu), pclu)
+
+/* file status */
+/* this prevents
+ * fscore_write_inode, fscore_map_clus, ... with the unlinked inodes
+ * from corrupting on-disk dentry data.
+ *
+ * The fid->dir value of unlinked inode will be DIR_DELETED
+ * and those functions must check if fid->dir is valid prior to
+ * the calling of get_dentry_in_dir()
+ */
+#define DIR_DELETED 0xFFFF0321
+
+/*----------------------------------------------------------------------*/
+/* Type Definitions */
+/*----------------------------------------------------------------------*/
+#define ES_2_ENTRIES 2
+#define ES_3_ENTRIES 3
+#define ES_ALL_ENTRIES 0
+
+typedef struct {
+ u64 sector; // sector number that contains file_entry
+ u32 offset; // byte offset in the sector
+ s32 alloc_flag; // flag in stream entry. 01 for cluster chain, 03 for contig. clusters.
+ u32 num_entries;
+ void *__buf; // __buf should be the last member
+} ENTRY_SET_CACHE_T;
+
+
+
+/*----------------------------------------------------------------------*/
+/* External Function Declarations */
+/*----------------------------------------------------------------------*/
+
+/* file system initialization & shutdown functions */
+s32 fscore_init(void);
+s32 fscore_shutdown(void);
+
+/* bdev management */
+s32 fscore_check_bdi_valid(struct super_block *sb);
+
+/* chain management */
+s32 chain_cont_cluster(struct super_block *sb, u32 chain, u32 len);
+
+/* volume management functions */
+s32 fscore_mount(struct super_block *sb);
+s32 fscore_umount(struct super_block *sb);
+s32 fscore_statfs(struct super_block *sb, VOL_INFO_T *info);
+s32 fscore_sync_fs(struct super_block *sb, s32 do_sync);
+s32 fscore_set_vol_flags(struct super_block *sb, u16 new_flag, s32 always_sync);
+u32 fscore_get_au_stat(struct super_block *sb, s32 mode);
+
+/* file management functions */
+s32 fscore_lookup(struct inode *inode, u8 *path, FILE_ID_T *fid);
+s32 fscore_create(struct inode *inode, u8 *path, u8 mode, FILE_ID_T *fid);
+s32 fscore_read_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount);
+s32 fscore_write_link(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount);
+s32 fscore_truncate(struct inode *inode, u64 old_size, u64 new_size);
+s32 fscore_rename(struct inode *old_parent_inode, FILE_ID_T *fid,
+ struct inode *new_parent_inode, struct dentry *new_dentry);
+s32 fscore_remove(struct inode *inode, FILE_ID_T *fid);
+s32 fscore_read_inode(struct inode *inode, DIR_ENTRY_T *info);
+s32 fscore_write_inode(struct inode *inode, DIR_ENTRY_T *info, int sync);
+s32 fscore_map_clus(struct inode *inode, u32 clu_offset, u32 *clu, int dest);
+s32 fscore_reserve_clus(struct inode *inode);
+s32 fscore_unlink(struct inode *inode, FILE_ID_T *fid);
+
+/* directory management functions */
+s32 fscore_mkdir(struct inode *inode, u8 *path, FILE_ID_T *fid);
+s32 fscore_readdir(struct inode *inode, DIR_ENTRY_T *dir_ent);
+s32 fscore_rmdir(struct inode *inode, FILE_ID_T *fid);
+
+
+/*----------------------------------------------------------------------*/
+/* External Function Declarations (NOT TO UPPER LAYER) */
+/*----------------------------------------------------------------------*/
+
+/* core.c : core code for common */
+/* dir entry management functions */
+DENTRY_T *get_dentry_in_dir(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u64 *sector);
+
+/* name conversion functions */
+void get_uniname_from_dos_entry(struct super_block *sb, DOS_DENTRY_T *ep, UNI_NAME_T *p_uniname, u8 mode);
+
+/* file operation functions */
+s32 walk_fat_chain(struct super_block *sb, CHAIN_T *p_dir, u32 byte_offset, u32 *clu);
+
+/* sdfat/cache.c */
+s32 meta_cache_init(struct super_block *sb);
+s32 meta_cache_shutdown(struct super_block *sb);
+u8 *fcache_getblk(struct super_block *sb, u64 sec);
+s32 fcache_modify(struct super_block *sb, u64 sec);
+s32 fcache_release_all(struct super_block *sb);
+s32 fcache_flush(struct super_block *sb, u32 sync);
+
+u8 *dcache_getblk(struct super_block *sb, u64 sec);
+s32 dcache_modify(struct super_block *sb, u64 sec);
+s32 dcache_lock(struct super_block *sb, u64 sec);
+s32 dcache_unlock(struct super_block *sb, u64 sec);
+s32 dcache_release(struct super_block *sb, u64 sec);
+s32 dcache_release_all(struct super_block *sb);
+s32 dcache_flush(struct super_block *sb, u32 sync);
+s32 dcache_readahead(struct super_block *sb, u64 sec);
+
+
+/* fatent.c */
+s32 fat_ent_ops_init(struct super_block *sb);
+s32 fat_ent_get(struct super_block *sb, u32 loc, u32 *content);
+s32 fat_ent_set(struct super_block *sb, u32 loc, u32 content);
+s32 fat_ent_get_safe(struct super_block *sb, u32 loc, u32 *content);
+
+/* core_fat.c : core code for fat */
+s32 fat_generate_dos_name_new(struct super_block *sb, CHAIN_T *p_dir, DOS_NAME_T *p_dosname, s32 n_entries);
+s32 mount_fat16(struct super_block *sb, pbr_t *p_pbr);
+s32 mount_fat32(struct super_block *sb, pbr_t *p_pbr);
+
+/* core_exfat.c : core code for exfat */
+
+s32 load_alloc_bmp(struct super_block *sb);
+void free_alloc_bmp(struct super_block *sb);
+ENTRY_SET_CACHE_T *get_dentry_set_in_dir(struct super_block *sb,
+ CHAIN_T *p_dir, s32 entry, u32 type, DENTRY_T **file_ep);
+void release_dentry_set(ENTRY_SET_CACHE_T *es);
+s32 update_dir_chksum(struct super_block *sb, CHAIN_T *p_dir, s32 entry);
+s32 update_dir_chksum_with_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es);
+bool is_dir_empty(struct super_block *sb, CHAIN_T *p_dir);
+s32 mount_exfat(struct super_block *sb, pbr_t *p_pbr);
+
+/* amap_smart.c : creation on mount / destroy on umount */
+int amap_create(struct super_block *sb, u32 pack_ratio, u32 sect_per_au, u32 hidden_sect);
+void amap_destroy(struct super_block *sb);
+
+/* amap_smart.c : (de)allocation functions */
+s32 amap_fat_alloc_cluster(struct super_block *sb, u32 num_alloc, CHAIN_T *p_chain, s32 dest);
+s32 amap_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse);/* Not impelmented */
+s32 amap_release_cluster(struct super_block *sb, u32 clu); /* Only update AMAP */
+
+/* amap_smart.c : misc (for defrag) */
+s32 amap_mark_ignore(struct super_block *sb, u32 clu);
+s32 amap_unmark_ignore(struct super_block *sb, u32 clu);
+s32 amap_unmark_ignore_all(struct super_block *sb);
+s32 amap_check_working(struct super_block *sb, u32 clu);
+s32 amap_get_freeclus(struct super_block *sb, u32 clu);
+
+/* amap_smart.c : stat AU */
+u32 amap_get_au_stat(struct super_block *sb, s32 mode);
+
+
+/* blkdev.c */
+s32 bdev_open_dev(struct super_block *sb);
+s32 bdev_close_dev(struct super_block *sb);
+s32 bdev_check_bdi_valid(struct super_block *sb);
+s32 bdev_readahead(struct super_block *sb, u64 secno, u64 num_secs);
+s32 bdev_mread(struct super_block *sb, u64 secno, struct buffer_head **bh, u64 num_secs, s32 read);
+s32 bdev_mwrite(struct super_block *sb, u64 secno, struct buffer_head *bh, u64 num_secs, s32 sync);
+s32 bdev_sync_all(struct super_block *sb);
+
+/* blkdev.c : sector read/write functions */
+s32 read_sect(struct super_block *sb, u64 sec, struct buffer_head **bh, s32 read);
+s32 write_sect(struct super_block *sb, u64 sec, struct buffer_head *bh, s32 sync);
+s32 read_msect(struct super_block *sb, u64 sec, struct buffer_head **bh, s64 num_secs, s32 read);
+s32 write_msect(struct super_block *sb, u64 sec, struct buffer_head *bh, s64 num_secs, s32 sync);
+s32 write_msect_zero(struct super_block *sb, u64 sec, u64 num_secs);
+
+/* misc.c */
+u8 calc_chksum_1byte(void *data, s32 len, u8 chksum);
+u16 calc_chksum_2byte(void *data, s32 len, u16 chksum, s32 type);
+
+/* extent.c */
+s32 extent_cache_init(void);
+void extent_cache_shutdown(void);
+void extent_cache_init_inode(struct inode *inode);
+void extent_cache_inval_inode(struct inode *inode);
+s32 extent_get_clus(struct inode *inode, u32 cluster, u32 *fclus,
+ u32 *dclus, u32 *last_dclus, s32 allow_eof);
+/*----------------------------------------------------------------------*/
+/* Wrapper Function */
+/*----------------------------------------------------------------------*/
+void set_sb_dirty(struct super_block *sb);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _SDFAT_CORE_H */
+
+/* end of core.h */
diff --git a/fs/sdfat/core_exfat.c b/fs/sdfat/core_exfat.c
new file mode 100644
index 000000000000..9e4b994d0e90
--- /dev/null
+++ b/fs/sdfat/core_exfat.c
@@ -0,0 +1,1560 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : core_exfat.c */
+/* PURPOSE : exFAT-fs core code for sdFAT */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/workqueue.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+
+#include "sdfat.h"
+#include "core.h"
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Local Variable Definitions */
+/*----------------------------------------------------------------------*/
+static u8 free_bit[] = {
+ 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/* 0 ~ 19*/
+ 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,/* 20 ~ 39*/
+ 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/* 40 ~ 59*/
+ 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/* 60 ~ 79*/
+ 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2,/* 80 ~ 99*/
+ 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,/*100 ~ 119*/
+ 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*120 ~ 139*/
+ 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,/*140 ~ 159*/
+ 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/*160 ~ 179*/
+ 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,/*180 ~ 199*/
+ 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*200 ~ 219*/
+ 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/*220 ~ 239*/
+ 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 /*240 ~ 254*/
+};
+
+static u8 used_bit[] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3,/* 0 ~ 19*/
+ 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4,/* 20 ~ 39*/
+ 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5,/* 40 ~ 59*/
+ 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,/* 60 ~ 79*/
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4,/* 80 ~ 99*/
+ 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,/*100 ~ 119*/
+ 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4,/*120 ~ 139*/
+ 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,/*140 ~ 159*/
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,/*160 ~ 179*/
+ 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,/*180 ~ 199*/
+ 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6,/*200 ~ 219*/
+ 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,/*220 ~ 239*/
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 /*240 ~ 255*/
+};
+
+
+/*======================================================================*/
+/* Local Function Definitions */
+/*======================================================================*/
+/*
+ * Directory Entry Management Functions
+ */
+static u32 exfat_get_entry_type(DENTRY_T *p_entry)
+{
+ FILE_DENTRY_T *ep = (FILE_DENTRY_T *) p_entry;
+
+ if (ep->type == EXFAT_UNUSED)
+ return TYPE_UNUSED;
+ if (ep->type < 0x80)
+ return TYPE_DELETED;
+ if (ep->type == 0x80)
+ return TYPE_INVALID;
+ if (ep->type < 0xA0) {
+ if (ep->type == 0x81)
+ return TYPE_BITMAP;
+ if (ep->type == 0x82)
+ return TYPE_UPCASE;
+ if (ep->type == 0x83)
+ return TYPE_VOLUME;
+ if (ep->type == 0x85) {
+ if (le16_to_cpu(ep->attr) & ATTR_SUBDIR)
+ return TYPE_DIR;
+ return TYPE_FILE;
+ }
+ return TYPE_CRITICAL_PRI;
+ }
+ if (ep->type < 0xC0) {
+ if (ep->type == 0xA0)
+ return TYPE_GUID;
+ if (ep->type == 0xA1)
+ return TYPE_PADDING;
+ if (ep->type == 0xA2)
+ return TYPE_ACLTAB;
+ return TYPE_BENIGN_PRI;
+ }
+ if (ep->type < 0xE0) {
+ if (ep->type == 0xC0)
+ return TYPE_STREAM;
+ if (ep->type == 0xC1)
+ return TYPE_EXTEND;
+ if (ep->type == 0xC2)
+ return TYPE_ACL;
+ return TYPE_CRITICAL_SEC;
+ }
+ return TYPE_BENIGN_SEC;
+} /* end of exfat_get_entry_type */
+
+static void exfat_set_entry_type(DENTRY_T *p_entry, u32 type)
+{
+ FILE_DENTRY_T *ep = (FILE_DENTRY_T *) p_entry;
+
+ if (type == TYPE_UNUSED) {
+ ep->type = 0x0;
+ } else if (type == TYPE_DELETED) {
+ ep->type &= ~0x80;
+ } else if (type == TYPE_STREAM) {
+ ep->type = 0xC0;
+ } else if (type == TYPE_EXTEND) {
+ ep->type = 0xC1;
+ } else if (type == TYPE_BITMAP) {
+ ep->type = 0x81;
+ } else if (type == TYPE_UPCASE) {
+ ep->type = 0x82;
+ } else if (type == TYPE_VOLUME) {
+ ep->type = 0x83;
+ } else if (type == TYPE_DIR) {
+ ep->type = 0x85;
+ ep->attr = cpu_to_le16(ATTR_SUBDIR);
+ } else if (type == TYPE_FILE) {
+ ep->type = 0x85;
+ ep->attr = cpu_to_le16(ATTR_ARCHIVE);
+ } else if (type == TYPE_SYMLINK) {
+ ep->type = 0x85;
+ ep->attr = cpu_to_le16(ATTR_ARCHIVE | ATTR_SYMLINK);
+ }
+} /* end of exfat_set_entry_type */
+
+static u32 exfat_get_entry_attr(DENTRY_T *p_entry)
+{
+ FILE_DENTRY_T *ep = (FILE_DENTRY_T *)p_entry;
+
+ return (u32)le16_to_cpu(ep->attr);
+} /* end of exfat_get_entry_attr */
+
+static void exfat_set_entry_attr(DENTRY_T *p_entry, u32 attr)
+{
+ FILE_DENTRY_T *ep = (FILE_DENTRY_T *)p_entry;
+
+ ep->attr = cpu_to_le16((u16) attr);
+} /* end of exfat_set_entry_attr */
+
+static u8 exfat_get_entry_flag(DENTRY_T *p_entry)
+{
+ STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry;
+
+ return ep->flags;
+} /* end of exfat_get_entry_flag */
+
+static void exfat_set_entry_flag(DENTRY_T *p_entry, u8 flags)
+{
+ STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry;
+
+ ep->flags = flags;
+} /* end of exfat_set_entry_flag */
+
+static u32 exfat_get_entry_clu0(DENTRY_T *p_entry)
+{
+ STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry;
+
+ return (u32)le32_to_cpu(ep->start_clu);
+} /* end of exfat_get_entry_clu0 */
+
+static void exfat_set_entry_clu0(DENTRY_T *p_entry, u32 start_clu)
+{
+ STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry;
+
+ ep->start_clu = cpu_to_le32(start_clu);
+} /* end of exfat_set_entry_clu0 */
+
+static u64 exfat_get_entry_size(DENTRY_T *p_entry)
+{
+ STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry;
+
+ return le64_to_cpu(ep->valid_size);
+} /* end of exfat_get_entry_size */
+
+static void exfat_set_entry_size(DENTRY_T *p_entry, u64 size)
+{
+ STRM_DENTRY_T *ep = (STRM_DENTRY_T *)p_entry;
+
+ ep->valid_size = cpu_to_le64(size);
+ ep->size = cpu_to_le64(size);
+} /* end of exfat_set_entry_size */
+
+static void exfat_get_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode)
+{
+ u16 t = 0x00, d = 0x21, tz = 0x00;
+ FILE_DENTRY_T *ep = (FILE_DENTRY_T *)p_entry;
+
+ switch (mode) {
+ case TM_CREATE:
+ t = le16_to_cpu(ep->create_time);
+ d = le16_to_cpu(ep->create_date);
+ tz = ep->create_tz;
+ break;
+ case TM_MODIFY:
+ t = le16_to_cpu(ep->modify_time);
+ d = le16_to_cpu(ep->modify_date);
+ tz = ep->modify_tz;
+ break;
+ case TM_ACCESS:
+ t = le16_to_cpu(ep->access_time);
+ d = le16_to_cpu(ep->access_date);
+ tz = ep->access_tz;
+ break;
+ }
+
+ tp->tz.value = tz;
+ tp->sec = (t & 0x001F) << 1;
+ tp->min = (t >> 5) & 0x003F;
+ tp->hour = (t >> 11);
+ tp->day = (d & 0x001F);
+ tp->mon = (d >> 5) & 0x000F;
+ tp->year = (d >> 9);
+} /* end of exfat_get_entry_time */
+
+static void exfat_set_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode)
+{
+ u16 t, d;
+ FILE_DENTRY_T *ep = (FILE_DENTRY_T *)p_entry;
+
+ t = (tp->hour << 11) | (tp->min << 5) | (tp->sec >> 1);
+ d = (tp->year << 9) | (tp->mon << 5) | tp->day;
+
+ switch (mode) {
+ case TM_CREATE:
+ ep->create_time = cpu_to_le16(t);
+ ep->create_date = cpu_to_le16(d);
+ ep->create_tz = tp->tz.value;
+ break;
+ case TM_MODIFY:
+ ep->modify_time = cpu_to_le16(t);
+ ep->modify_date = cpu_to_le16(d);
+ ep->modify_tz = tp->tz.value;
+ break;
+ case TM_ACCESS:
+ ep->access_time = cpu_to_le16(t);
+ ep->access_date = cpu_to_le16(d);
+ ep->access_tz = tp->tz.value;
+ break;
+ }
+} /* end of exfat_set_entry_time */
+
+
+static void __init_file_entry(struct super_block *sb, FILE_DENTRY_T *ep, u32 type)
+{
+ TIMESTAMP_T tm, *tp;
+
+ exfat_set_entry_type((DENTRY_T *) ep, type);
+
+ tp = tm_now(SDFAT_SB(sb), &tm);
+ exfat_set_entry_time((DENTRY_T *) ep, tp, TM_CREATE);
+ exfat_set_entry_time((DENTRY_T *) ep, tp, TM_MODIFY);
+ exfat_set_entry_time((DENTRY_T *) ep, tp, TM_ACCESS);
+ ep->create_time_ms = 0;
+ ep->modify_time_ms = 0;
+} /* end of __init_file_entry */
+
+static void __init_strm_entry(STRM_DENTRY_T *ep, u8 flags, u32 start_clu, u64 size)
+{
+ exfat_set_entry_type((DENTRY_T *) ep, TYPE_STREAM);
+ ep->flags = flags;
+ ep->start_clu = cpu_to_le32(start_clu);
+ ep->valid_size = cpu_to_le64(size);
+ ep->size = cpu_to_le64(size);
+} /* end of __init_strm_entry */
+
+static void __init_name_entry(NAME_DENTRY_T *ep, u16 *uniname)
+{
+ s32 i;
+
+ exfat_set_entry_type((DENTRY_T *) ep, TYPE_EXTEND);
+ ep->flags = 0x0;
+
+ for (i = 0; i < 15; i++) {
+ ep->unicode_0_14[i] = cpu_to_le16(*uniname);
+ if (*uniname == 0x0)
+ break;
+ uniname++;
+ }
+} /* end of __init_name_entry */
+
+static s32 exfat_init_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u32 type, u32 start_clu, u64 size)
+{
+ u64 sector;
+ u8 flags;
+ FILE_DENTRY_T *file_ep;
+ STRM_DENTRY_T *strm_ep;
+
+ flags = (type == TYPE_FILE) ? 0x01 : 0x03;
+
+ /* we cannot use get_dentry_set_in_dir here because file ep is not initialized yet */
+ file_ep = (FILE_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, &sector);
+ if (!file_ep)
+ return -EIO;
+
+ strm_ep = (STRM_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry+1, &sector);
+ if (!strm_ep)
+ return -EIO;
+
+ __init_file_entry(sb, file_ep, type);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+
+ __init_strm_entry(strm_ep, flags, start_clu, size);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+
+ return 0;
+} /* end of exfat_init_dir_entry */
+
+s32 update_dir_chksum(struct super_block *sb, CHAIN_T *p_dir, s32 entry)
+{
+ s32 ret = -EIO;
+ s32 i, num_entries;
+ u64 sector;
+ u16 chksum;
+ FILE_DENTRY_T *file_ep;
+ DENTRY_T *ep;
+
+ file_ep = (FILE_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, &sector);
+ if (!file_ep)
+ return -EIO;
+
+ dcache_lock(sb, sector);
+
+ num_entries = (s32) file_ep->num_ext + 1;
+ chksum = calc_chksum_2byte((void *) file_ep, DENTRY_SIZE, 0, CS_DIR_ENTRY);
+
+ for (i = 1; i < num_entries; i++) {
+ ep = get_dentry_in_dir(sb, p_dir, entry+i, NULL);
+ if (!ep)
+ goto out_unlock;
+
+ chksum = calc_chksum_2byte((void *) ep, DENTRY_SIZE, chksum, CS_DEFAULT);
+ }
+
+ file_ep->checksum = cpu_to_le16(chksum);
+ ret = dcache_modify(sb, sector);
+out_unlock:
+ dcache_unlock(sb, sector);
+ return ret;
+
+} /* end of update_dir_chksum */
+
+
+static s32 exfat_init_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 num_entries,
+ UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname)
+{
+ s32 i;
+ u64 sector;
+ u16 *uniname = p_uniname->name;
+ FILE_DENTRY_T *file_ep;
+ STRM_DENTRY_T *strm_ep;
+ NAME_DENTRY_T *name_ep;
+
+ file_ep = (FILE_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, &sector);
+ if (!file_ep)
+ return -EIO;
+
+ file_ep->num_ext = (u8)(num_entries - 1);
+ dcache_modify(sb, sector);
+
+ strm_ep = (STRM_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry+1, &sector);
+ if (!strm_ep)
+ return -EIO;
+
+ strm_ep->name_len = p_uniname->name_len;
+ strm_ep->name_hash = cpu_to_le16(p_uniname->name_hash);
+ dcache_modify(sb, sector);
+
+ for (i = 2; i < num_entries; i++) {
+ name_ep = (NAME_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry+i, &sector);
+ if (!name_ep)
+ return -EIO;
+
+ __init_name_entry(name_ep, uniname);
+ dcache_modify(sb, sector);
+ uniname += 15;
+ }
+
+ update_dir_chksum(sb, p_dir, entry);
+
+ return 0;
+} /* end of exfat_init_ext_entry */
+
+
+static s32 exfat_delete_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 order, s32 num_entries)
+{
+ s32 i;
+ u64 sector;
+ DENTRY_T *ep;
+
+ for (i = order; i < num_entries; i++) {
+ ep = get_dentry_in_dir(sb, p_dir, entry+i, &sector);
+ if (!ep)
+ return -EIO;
+
+ exfat_set_entry_type(ep, TYPE_DELETED);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static s32 __write_partial_entries_in_entry_set(struct super_block *sb,
+ ENTRY_SET_CACHE_T *es, u64 sec, u32 off, u32 count)
+{
+ s32 num_entries;
+ u32 buf_off = (off - es->offset);
+ u32 remaining_byte_in_sector, copy_entries;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u32 clu;
+ u8 *buf, *esbuf = (u8 *)&(es->__buf);
+
+ TMSG("%s entered\n", __func__);
+ MMSG("%s: es %p sec %llu off %u cnt %d\n", __func__, es, sec, off, count);
+ num_entries = count;
+
+ while (num_entries) {
+ /* write per sector base */
+ remaining_byte_in_sector = (1 << sb->s_blocksize_bits) - off;
+ copy_entries = min((s32)(remaining_byte_in_sector >> DENTRY_SIZE_BITS), num_entries);
+ buf = dcache_getblk(sb, sec);
+ if (!buf)
+ goto err_out;
+ MMSG("es->buf %p buf_off %u\n", esbuf, buf_off);
+ MMSG("copying %d entries from %p to sector %llu\n", copy_entries, (esbuf + buf_off), sec);
+ memcpy(buf + off, esbuf + buf_off, copy_entries << DENTRY_SIZE_BITS);
+ dcache_modify(sb, sec);
+ num_entries -= copy_entries;
+
+ if (num_entries) {
+ // get next sector
+ if (IS_LAST_SECT_IN_CLUS(fsi, sec)) {
+ clu = SECT_TO_CLUS(fsi, sec);
+ if (es->alloc_flag == 0x03)
+ clu++;
+ else if (get_next_clus_safe(sb, &clu))
+ goto err_out;
+ sec = CLUS_TO_SECT(fsi, clu);
+ } else {
+ sec++;
+ }
+ off = 0;
+ buf_off += copy_entries << DENTRY_SIZE_BITS;
+ }
+ }
+
+ TMSG("%s exited successfully\n", __func__);
+ return 0;
+err_out:
+ TMSG("%s failed\n", __func__);
+ return -EIO;
+}
+
+/* write back all entries in entry set */
+static s32 __write_whole_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es)
+{
+ return __write_partial_entries_in_entry_set(sb, es, es->sector, es->offset, es->num_entries);
+}
+
+s32 update_dir_chksum_with_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es)
+{
+ DENTRY_T *ep;
+ u16 chksum = 0;
+ s32 chksum_type = CS_DIR_ENTRY, i;
+
+ ep = (DENTRY_T *)&(es->__buf);
+ for (i = 0; i < es->num_entries; i++) {
+ MMSG("%s %p\n", __func__, ep);
+ chksum = calc_chksum_2byte((void *) ep, DENTRY_SIZE, chksum, chksum_type);
+ ep++;
+ chksum_type = CS_DEFAULT;
+ }
+
+ ep = (DENTRY_T *)&(es->__buf);
+ ((FILE_DENTRY_T *)ep)->checksum = cpu_to_le16(chksum);
+ return __write_whole_entry_set(sb, es);
+}
+
+/* returns a set of dentries for a file or dir.
+ * Note that this is a copy (dump) of dentries so that user should call write_entry_set()
+ * to apply changes made in this entry set to the real device.
+ * in:
+ * sb+p_dir+entry: indicates a file/dir
+ * type: specifies how many dentries should be included.
+ * out:
+ * file_ep: will point the first dentry(= file dentry) on success
+ * return:
+ * pointer of entry set on success,
+ * NULL on failure.
+ */
+
+#define ES_MODE_STARTED 0
+#define ES_MODE_GET_FILE_ENTRY 1
+#define ES_MODE_GET_STRM_ENTRY 2
+#define ES_MODE_GET_NAME_ENTRY 3
+#define ES_MODE_GET_CRITICAL_SEC_ENTRY 4
+ENTRY_SET_CACHE_T *get_dentry_set_in_dir(struct super_block *sb,
+ CHAIN_T *p_dir, s32 entry, u32 type, DENTRY_T **file_ep)
+{
+ s32 ret;
+ u32 off, byte_offset, clu = 0;
+ u32 entry_type;
+ u64 sec;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ ENTRY_SET_CACHE_T *es = NULL;
+ DENTRY_T *ep, *pos;
+ u8 *buf;
+ u8 num_entries;
+ s32 mode = ES_MODE_STARTED;
+
+ /* FIXME : is available in error case? */
+ if (p_dir->dir == DIR_DELETED) {
+ EMSG("%s : access to deleted dentry\n", __func__);
+ BUG_ON(!fsi->prev_eio);
+ return NULL;
+ }
+
+ TMSG("%s entered\n", __func__);
+ MMSG("p_dir dir %u flags %x size %d\n", p_dir->dir, p_dir->flags, p_dir->size);
+ MMSG("entry %d type %d\n", entry, type);
+
+ byte_offset = entry << DENTRY_SIZE_BITS;
+ ret = walk_fat_chain(sb, p_dir, byte_offset, &clu);
+ if (ret)
+ return NULL;
+
+ /* byte offset in cluster */
+ byte_offset &= fsi->cluster_size - 1;
+
+ /* byte offset in sector */
+ off = byte_offset & (u32)(sb->s_blocksize - 1);
+
+ /* sector offset in cluster */
+ sec = byte_offset >> (sb->s_blocksize_bits);
+ sec += CLUS_TO_SECT(fsi, clu);
+
+ buf = dcache_getblk(sb, sec);
+ if (!buf)
+ goto err_out;
+
+ ep = (DENTRY_T *)(buf + off);
+ entry_type = exfat_get_entry_type(ep);
+
+ if ((entry_type != TYPE_FILE)
+ && (entry_type != TYPE_DIR))
+ goto err_out;
+
+ if (type == ES_ALL_ENTRIES)
+ num_entries = ((FILE_DENTRY_T *)ep)->num_ext+1;
+ else
+ num_entries = type;
+
+ MMSG("trying to malloc %lx bytes for %d entries\n",
+ (unsigned long)(offsetof(ENTRY_SET_CACHE_T, __buf) + (num_entries) * sizeof(DENTRY_T)), num_entries);
+ es = kmalloc((offsetof(ENTRY_SET_CACHE_T, __buf) + (num_entries) * sizeof(DENTRY_T)), GFP_KERNEL);
+ if (!es) {
+ EMSG("%s: failed to alloc entryset\n", __func__);
+ goto err_out;
+ }
+
+ es->num_entries = num_entries;
+ es->sector = sec;
+ es->offset = off;
+ es->alloc_flag = p_dir->flags;
+
+ pos = (DENTRY_T *) &(es->__buf);
+
+ while (num_entries) {
+ // instead of copying whole sector, we will check every entry.
+ // this will provide minimum stablity and consistency.
+ entry_type = exfat_get_entry_type(ep);
+
+ if ((entry_type == TYPE_UNUSED) || (entry_type == TYPE_DELETED))
+ goto err_out;
+
+ switch (mode) {
+ case ES_MODE_STARTED:
+ if ((entry_type == TYPE_FILE) || (entry_type == TYPE_DIR))
+ mode = ES_MODE_GET_FILE_ENTRY;
+ else
+ goto err_out;
+ break;
+ case ES_MODE_GET_FILE_ENTRY:
+ if (entry_type == TYPE_STREAM)
+ mode = ES_MODE_GET_STRM_ENTRY;
+ else
+ goto err_out;
+ break;
+ case ES_MODE_GET_STRM_ENTRY:
+ if (entry_type == TYPE_EXTEND)
+ mode = ES_MODE_GET_NAME_ENTRY;
+ else
+ goto err_out;
+ break;
+ case ES_MODE_GET_NAME_ENTRY:
+ if (entry_type == TYPE_EXTEND)
+ break;
+ else if (entry_type == TYPE_STREAM)
+ goto err_out;
+ else if (entry_type & TYPE_CRITICAL_SEC)
+ mode = ES_MODE_GET_CRITICAL_SEC_ENTRY;
+ else
+ goto err_out;
+ break;
+ case ES_MODE_GET_CRITICAL_SEC_ENTRY:
+ if ((entry_type == TYPE_EXTEND) || (entry_type == TYPE_STREAM))
+ goto err_out;
+ else if ((entry_type & TYPE_CRITICAL_SEC) != TYPE_CRITICAL_SEC)
+ goto err_out;
+ break;
+ }
+
+ /* copy dentry */
+ memcpy(pos, ep, sizeof(DENTRY_T));
+
+ if (--num_entries == 0)
+ break;
+
+ if (((off + DENTRY_SIZE) & (u32)(sb->s_blocksize - 1)) <
+ (off & (u32)(sb->s_blocksize - 1))) {
+ // get the next sector
+ if (IS_LAST_SECT_IN_CLUS(fsi, sec)) {
+ if (es->alloc_flag == 0x03)
+ clu++;
+ else if (get_next_clus_safe(sb, &clu))
+ goto err_out;
+ sec = CLUS_TO_SECT(fsi, clu);
+ } else {
+ sec++;
+ }
+ buf = dcache_getblk(sb, sec);
+ if (!buf)
+ goto err_out;
+ off = 0;
+ ep = (DENTRY_T *)(buf);
+ } else {
+ ep++;
+ off += DENTRY_SIZE;
+ }
+ pos++;
+ }
+
+ if (file_ep)
+ *file_ep = (DENTRY_T *)&(es->__buf);
+
+ MMSG("es sec %llu offset %u flags %d, num_entries %u buf ptr %p\n",
+ es->sector, es->offset, es->alloc_flag, es->num_entries, &(es->__buf));
+ TMSG("%s exited %p\n", __func__, es);
+ return es;
+err_out:
+ TMSG("%s exited (return NULL) (es %p)\n", __func__, es);
+
+ /* kfree(NULL) is safe */
+ kfree(es);
+ es = NULL;
+ return NULL;
+}
+
+void release_dentry_set(ENTRY_SET_CACHE_T *es)
+{
+ TMSG("%s %p\n", __func__, es);
+
+ /* kfree(NULL) is safe */
+ kfree(es);
+ es = NULL;
+}
+
+static s32 __extract_uni_name_from_name_entry(NAME_DENTRY_T *ep, u16 *uniname, s32 order)
+{
+ s32 i, len = 0;
+
+ for (i = 0; i < 15; i++) {
+ /* FIXME : unaligned? */
+ *uniname = le16_to_cpu(ep->unicode_0_14[i]);
+ if (*uniname == 0x0)
+ return len;
+ uniname++;
+ len++;
+ }
+
+ *uniname = 0x0;
+ return len;
+
+} /* end of __extract_uni_name_from_name_entry */
+
+#define DIRENT_STEP_FILE (0)
+#define DIRENT_STEP_STRM (1)
+#define DIRENT_STEP_NAME (2)
+#define DIRENT_STEP_SECD (3)
+
+/* return values of exfat_find_dir_entry()
+ * >= 0 : return dir entiry position with the name in dir
+ * -EEXIST : (root dir, ".") it is the root dir itself
+ * -ENOENT : entry with the name does not exist
+ * -EIO : I/O error
+ */
+static s32 exfat_find_dir_entry(struct super_block *sb, FILE_ID_T *fid,
+ CHAIN_T *p_dir, UNI_NAME_T *p_uniname, s32 num_entries, DOS_NAME_T *unused, u32 type)
+{
+ s32 i, rewind = 0, dentry = 0, end_eidx = 0, num_ext = 0, len;
+ s32 order, step, name_len;
+ s32 dentries_per_clu, num_empty = 0;
+ u32 entry_type;
+ u16 entry_uniname[16], *uniname = NULL, unichar;
+ CHAIN_T clu;
+ DENTRY_T *ep;
+ HINT_T *hint_stat = &fid->hint_stat;
+ HINT_FEMP_T candi_empty;
+ FILE_DENTRY_T *file_ep;
+ STRM_DENTRY_T *strm_ep;
+ NAME_DENTRY_T *name_ep;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /*
+ * REMARK:
+ * DOT and DOTDOT are handled by VFS layer
+ */
+
+ if (IS_CLUS_FREE(p_dir->dir))
+ return -EIO;
+
+ dentries_per_clu = fsi->dentries_per_clu;
+
+ clu.dir = p_dir->dir;
+ clu.size = p_dir->size;
+ clu.flags = p_dir->flags;
+
+ if (hint_stat->eidx) {
+ clu.dir = hint_stat->clu;
+ dentry = hint_stat->eidx;
+ end_eidx = dentry;
+ }
+
+ candi_empty.eidx = -1;
+rewind:
+ order = 0;
+ step = DIRENT_STEP_FILE;
+ while (!IS_CLUS_EOF(clu.dir)) {
+ i = dentry & (dentries_per_clu - 1);
+ for (; i < dentries_per_clu; i++, dentry++) {
+ if (rewind && (dentry == end_eidx))
+ goto not_found;
+
+ ep = get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ entry_type = exfat_get_entry_type(ep);
+
+ if ((entry_type == TYPE_UNUSED) || (entry_type == TYPE_DELETED)) {
+ step = DIRENT_STEP_FILE;
+
+ num_empty++;
+ if (candi_empty.eidx == -1) {
+ if (num_empty == 1) {
+ candi_empty.cur.dir = clu.dir;
+ candi_empty.cur.size = clu.size;
+ candi_empty.cur.flags = clu.flags;
+ }
+
+ if (num_empty >= num_entries) {
+ candi_empty.eidx = dentry - (num_empty - 1);
+ ASSERT(0 <= candi_empty.eidx);
+ candi_empty.count = num_empty;
+
+ if ((fid->hint_femp.eidx == -1) ||
+ (candi_empty.eidx <= fid->hint_femp.eidx)) {
+ memcpy(&fid->hint_femp,
+ &candi_empty,
+ sizeof(HINT_FEMP_T));
+ }
+ }
+ }
+
+ if (entry_type == TYPE_UNUSED)
+ goto not_found;
+ continue;
+ }
+
+ num_empty = 0;
+ candi_empty.eidx = -1;
+
+ if ((entry_type == TYPE_FILE) || (entry_type == TYPE_DIR)) {
+ step = DIRENT_STEP_FILE;
+ if ((type == TYPE_ALL) || (type == entry_type)) {
+ file_ep = (FILE_DENTRY_T *) ep;
+ num_ext = file_ep->num_ext;
+ step = DIRENT_STEP_STRM;
+ }
+ continue;
+ }
+
+ if (entry_type == TYPE_STREAM) {
+ if (step != DIRENT_STEP_STRM) {
+ step = DIRENT_STEP_FILE;
+ continue;
+ }
+ step = DIRENT_STEP_FILE;
+ strm_ep = (STRM_DENTRY_T *) ep;
+ if ((p_uniname->name_hash == le16_to_cpu(strm_ep->name_hash)) &&
+ (p_uniname->name_len == strm_ep->name_len)) {
+ step = DIRENT_STEP_NAME;
+ order = 1;
+ name_len = 0;
+ }
+ continue;
+ }
+
+ if (entry_type == TYPE_EXTEND) {
+ if (step != DIRENT_STEP_NAME) {
+ step = DIRENT_STEP_FILE;
+ continue;
+ }
+ name_ep = (NAME_DENTRY_T *) ep;
+
+ if ((++order) == 2)
+ uniname = p_uniname->name;
+ else
+ uniname += 15;
+
+ len = __extract_uni_name_from_name_entry(name_ep, entry_uniname, order);
+ name_len += len;
+
+ unichar = *(uniname+len);
+ *(uniname+len) = 0x0;
+
+ if (nls_cmp_uniname(sb, uniname, entry_uniname)) {
+ step = DIRENT_STEP_FILE;
+ } else if (name_len == p_uniname->name_len) {
+ if (order == num_ext) {
+ //fid->hint_femp.eidx = -1;
+ goto found;
+ }
+ step = DIRENT_STEP_SECD;
+ }
+
+ *(uniname+len) = unichar;
+ continue;
+ }
+
+ if (entry_type & (TYPE_CRITICAL_SEC | TYPE_BENIGN_SEC)) {
+ if (step == DIRENT_STEP_SECD) {
+ if (++order == num_ext)
+ goto found;
+ continue;
+ }
+ }
+ step = DIRENT_STEP_FILE;
+ }
+
+ if (clu.flags == 0x03) {
+ if ((--clu.size) > 0)
+ clu.dir++;
+ else
+ clu.dir = CLUS_EOF;
+ } else {
+ if (get_next_clus_safe(sb, &clu.dir))
+ return -EIO;
+ }
+ }
+
+not_found:
+ /* we started at not 0 index,so we should try to find target
+ * from 0 index to the index we started at.
+ */
+ if (!rewind && end_eidx) {
+ rewind = 1;
+ dentry = 0;
+ clu.dir = p_dir->dir;
+ /* reset empty hint */
+ num_empty = 0;
+ candi_empty.eidx = -1;
+ goto rewind;
+ }
+
+ /* initialized hint_stat */
+ hint_stat->clu = p_dir->dir;
+ hint_stat->eidx = 0;
+ return -ENOENT;
+
+found:
+ /* next dentry we'll find is out of this cluster */
+ if (!((dentry + 1) & (dentries_per_clu-1))) {
+ int ret = 0;
+
+ if (clu.flags == 0x03) {
+ if ((--clu.size) > 0)
+ clu.dir++;
+ else
+ clu.dir = CLUS_EOF;
+ } else {
+ ret = get_next_clus_safe(sb, &clu.dir);
+ }
+
+ if (ret || IS_CLUS_EOF(clu.dir)) {
+ /* just initialized hint_stat */
+ hint_stat->clu = p_dir->dir;
+ hint_stat->eidx = 0;
+ return (dentry - num_ext);
+ }
+ }
+
+ hint_stat->clu = clu.dir;
+ hint_stat->eidx = dentry + 1;
+ return (dentry - num_ext);
+} /* end of exfat_find_dir_entry */
+
+/* returns -EIO on error */
+static s32 exfat_count_ext_entries(struct super_block *sb, CHAIN_T *p_dir, s32 entry, DENTRY_T *p_entry)
+{
+ s32 i, count = 0;
+ u32 type;
+ FILE_DENTRY_T *file_ep = (FILE_DENTRY_T *) p_entry;
+ DENTRY_T *ext_ep;
+
+ for (i = 0, entry++; i < file_ep->num_ext; i++, entry++) {
+ ext_ep = get_dentry_in_dir(sb, p_dir, entry, NULL);
+ if (!ext_ep)
+ return -EIO;
+
+ type = exfat_get_entry_type(ext_ep);
+ if ((type == TYPE_EXTEND) || (type == TYPE_STREAM))
+ count++;
+ else
+ return count;
+ }
+
+ return count;
+} /* end of exfat_count_ext_entries */
+
+
+/*
+ * Name Conversion Functions
+ */
+static void exfat_get_uniname_from_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u16 *uniname)
+{
+ s32 i;
+ DENTRY_T *ep;
+ ENTRY_SET_CACHE_T *es;
+
+ es = get_dentry_set_in_dir(sb, p_dir, entry, ES_ALL_ENTRIES, &ep);
+ if (!es)
+ return;
+
+ if (es->num_entries < 3)
+ goto out;
+
+ ep += 2;
+
+ /*
+ * First entry : file entry
+ * Second entry : stream-extension entry
+ * Third entry : first file-name entry
+ * So, the index of first file-name dentry should start from 2.
+ */
+ for (i = 2; i < es->num_entries; i++, ep++) {
+ /* end of name entry */
+ if (exfat_get_entry_type(ep) != TYPE_EXTEND)
+ goto out;
+
+ __extract_uni_name_from_name_entry((NAME_DENTRY_T *)ep, uniname, i);
+ uniname += 15;
+ }
+
+out:
+ release_dentry_set(es);
+} /* end of exfat_get_uniname_from_ext_entry */
+
+static s32 exfat_calc_num_entries(UNI_NAME_T *p_uniname)
+{
+ s32 len;
+
+ len = p_uniname->name_len;
+ if (len == 0)
+ return 0;
+
+ /* 1 file entry + 1 stream entry + name entries */
+ return((len-1) / 15 + 3);
+
+} /* end of exfat_calc_num_entries */
+
+static s32 exfat_check_max_dentries(FILE_ID_T *fid)
+{
+ if ((fid->size >> DENTRY_SIZE_BITS) >= MAX_EXFAT_DENTRIES) {
+ /* exFAT spec allows a dir to grow upto 8388608(256MB) dentries */
+ return -ENOSPC;
+ }
+ return 0;
+} /* end of check_max_dentries */
+
+/*
+ * Allocation Bitmap Management Functions
+ */
+s32 load_alloc_bmp(struct super_block *sb)
+{
+ s32 ret;
+ u32 i, j, map_size, type, need_map_size;
+ u64 sector;
+ CHAIN_T clu;
+ BMAP_DENTRY_T *ep;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ clu.dir = fsi->root_dir;
+ clu.flags = 0x01;
+
+ while (!IS_CLUS_EOF(clu.dir)) {
+ for (i = 0; i < fsi->dentries_per_clu; i++) {
+ ep = (BMAP_DENTRY_T *) get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ type = exfat_get_entry_type((DENTRY_T *) ep);
+
+ if (type == TYPE_UNUSED)
+ break;
+ if (type != TYPE_BITMAP)
+ continue;
+
+ if (ep->flags == 0x0) {
+ fsi->map_clu = le32_to_cpu(ep->start_clu);
+ map_size = (u32) le64_to_cpu(ep->size);
+
+ need_map_size = (((fsi->num_clusters - CLUS_BASE) - 1) >> 3) + 1;
+ if (need_map_size != map_size) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "bogus allocation bitmap size(need : %u, cur : %u)",
+ need_map_size, map_size);
+ /* Only allowed when bogus allocation bitmap size is large */
+ if (need_map_size > map_size)
+ return -EIO;
+ }
+ fsi->map_sectors = ((need_map_size - 1) >> (sb->s_blocksize_bits)) + 1;
+ fsi->vol_amap =
+ kmalloc((sizeof(struct buffer_head *) * fsi->map_sectors), GFP_KERNEL);
+ if (!fsi->vol_amap)
+ return -ENOMEM;
+
+ sector = CLUS_TO_SECT(fsi, fsi->map_clu);
+
+ for (j = 0; j < fsi->map_sectors; j++) {
+ fsi->vol_amap[j] = NULL;
+ ret = read_sect(sb, sector+j, &(fsi->vol_amap[j]), 1);
+ if (ret) {
+ /* release all buffers and free vol_amap */
+ i = 0;
+ while (i < j)
+ brelse(fsi->vol_amap[i++]);
+
+ /* kfree(NULL) is safe */
+ kfree(fsi->vol_amap);
+ fsi->vol_amap = NULL;
+ return ret;
+ }
+ }
+
+ fsi->pbr_bh = NULL;
+ return 0;
+ }
+ }
+
+ if (get_next_clus_safe(sb, &clu.dir))
+ return -EIO;
+ }
+
+ return -EINVAL;
+} /* end of load_alloc_bmp */
+
+void free_alloc_bmp(struct super_block *sb)
+{
+ s32 i;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ brelse(fsi->pbr_bh);
+
+ for (i = 0; i < fsi->map_sectors; i++)
+ __brelse(fsi->vol_amap[i]);
+
+ /* kfree(NULL) is safe */
+ kfree(fsi->vol_amap);
+ fsi->vol_amap = NULL;
+}
+
+/* WARN :
+ * If the value of "clu" is 0, it means cluster 2 which is
+ * the first cluster of cluster heap.
+ */
+static s32 set_alloc_bitmap(struct super_block *sb, u32 clu)
+{
+ s32 i, b;
+ u64 sector;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ i = clu >> (sb->s_blocksize_bits + 3);
+ b = clu & (u32)((sb->s_blocksize << 3) - 1);
+
+ sector = CLUS_TO_SECT(fsi, fsi->map_clu) + i;
+ bitmap_set((unsigned long *)(fsi->vol_amap[i]->b_data), b, 1);
+
+ return write_sect(sb, sector, fsi->vol_amap[i], 0);
+} /* end of set_alloc_bitmap */
+
+/* WARN :
+ * If the value of "clu" is 0, it means cluster 2 which is
+ * the first cluster of cluster heap.
+ */
+static s32 clr_alloc_bitmap(struct super_block *sb, u32 clu)
+{
+ s32 ret;
+ s32 i, b;
+ u64 sector;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct sdfat_mount_options *opts = &sbi->options;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ i = clu >> (sb->s_blocksize_bits + 3);
+ b = clu & (u32)((sb->s_blocksize << 3) - 1);
+
+ sector = CLUS_TO_SECT(fsi, fsi->map_clu) + i;
+
+ bitmap_clear((unsigned long *)(fsi->vol_amap[i]->b_data), b, 1);
+
+ ret = write_sect(sb, sector, fsi->vol_amap[i], 0);
+
+ if (opts->discard) {
+ s32 ret_discard;
+
+ TMSG("discard cluster(%08x)\n", clu+2);
+ ret_discard = sb_issue_discard(sb, CLUS_TO_SECT(fsi, clu+2),
+ (1 << fsi->sect_per_clus_bits), GFP_NOFS, 0);
+
+ if (ret_discard == -EOPNOTSUPP) {
+ sdfat_msg(sb, KERN_ERR,
+ "discard not supported by device, disabling");
+ opts->discard = 0;
+ }
+ }
+
+ return ret;
+} /* end of clr_alloc_bitmap */
+
+/* WARN :
+ * If the value of "clu" is 0, it means cluster 2 which is
+ * the first cluster of cluster heap.
+ */
+static u32 test_alloc_bitmap(struct super_block *sb, u32 clu)
+{
+ u32 i, map_i, map_b;
+ u32 clu_base, clu_free;
+ u8 k, clu_mask;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ clu_base = (clu & ~(0x7)) + 2;
+ clu_mask = (1 << (clu - clu_base + 2)) - 1;
+
+ map_i = clu >> (sb->s_blocksize_bits + 3);
+ map_b = (clu >> 3) & (u32)(sb->s_blocksize - 1);
+
+ for (i = 2; i < fsi->num_clusters; i += 8) {
+ k = *(((u8 *) fsi->vol_amap[map_i]->b_data) + map_b);
+ if (clu_mask > 0) {
+ k |= clu_mask;
+ clu_mask = 0;
+ }
+ if (k < 0xFF) {
+ clu_free = clu_base + free_bit[k];
+ if (clu_free < fsi->num_clusters)
+ return clu_free;
+ }
+ clu_base += 8;
+
+ if (((++map_b) >= (u32)sb->s_blocksize) ||
+ (clu_base >= fsi->num_clusters)) {
+ if ((++map_i) >= fsi->map_sectors) {
+ clu_base = 2;
+ map_i = 0;
+ }
+ map_b = 0;
+ }
+ }
+
+ return CLUS_EOF;
+} /* end of test_alloc_bitmap */
+
+void sync_alloc_bmp(struct super_block *sb)
+{
+ s32 i;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (fsi->vol_amap == NULL)
+ return;
+
+ for (i = 0; i < fsi->map_sectors; i++)
+ sync_dirty_buffer(fsi->vol_amap[i]);
+}
+
+static s32 exfat_chain_cont_cluster(struct super_block *sb, u32 chain, u32 len)
+{
+ if (!len)
+ return 0;
+
+ while (len > 1) {
+ if (fat_ent_set(sb, chain, chain+1))
+ return -EIO;
+ chain++;
+ len--;
+ }
+
+ if (fat_ent_set(sb, chain, CLUS_EOF))
+ return -EIO;
+ return 0;
+}
+
+s32 chain_cont_cluster(struct super_block *sb, u32 chain, u32 len)
+{
+ return exfat_chain_cont_cluster(sb, chain, len);
+}
+
+
+static s32 exfat_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse)
+{
+ s32 ret = -EIO;
+ u32 num_clusters = 0;
+ u32 clu;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 i;
+ u64 sector;
+
+ /* invalid cluster number */
+ if (IS_CLUS_FREE(p_chain->dir) || IS_CLUS_EOF(p_chain->dir))
+ return 0;
+
+ /* no cluster to truncate */
+ if (p_chain->size == 0) {
+ DMSG("%s: cluster(%u) truncation is not required.",
+ __func__, p_chain->dir);
+ return 0;
+ }
+
+ /* check cluster validation */
+ if ((p_chain->dir < 2) && (p_chain->dir >= fsi->num_clusters)) {
+ EMSG("%s: invalid start cluster (%u)\n", __func__, p_chain->dir);
+ sdfat_debug_bug_on(1);
+ return -EIO;
+ }
+
+ set_sb_dirty(sb);
+ clu = p_chain->dir;
+
+ if (p_chain->flags == 0x03) {
+ do {
+ if (do_relse) {
+ sector = CLUS_TO_SECT(fsi, clu);
+ for (i = 0; i < fsi->sect_per_clus; i++) {
+ if (dcache_release(sb, sector+i) == -EIO)
+ goto out;
+ }
+ }
+
+ if (clr_alloc_bitmap(sb, clu-2))
+ goto out;
+ clu++;
+
+ num_clusters++;
+ } while (num_clusters < p_chain->size);
+ } else {
+ do {
+ if (do_relse) {
+ sector = CLUS_TO_SECT(fsi, clu);
+ for (i = 0; i < fsi->sect_per_clus; i++) {
+ if (dcache_release(sb, sector+i) == -EIO)
+ goto out;
+ }
+ }
+
+ if (clr_alloc_bitmap(sb, (clu - CLUS_BASE)))
+ goto out;
+
+ if (get_next_clus_safe(sb, &clu))
+ goto out;
+
+ num_clusters++;
+ } while (!IS_CLUS_EOF(clu));
+ }
+
+ /* success */
+ ret = 0;
+out:
+
+ fsi->used_clusters -= num_clusters;
+ return ret;
+} /* end of exfat_free_cluster */
+
+static s32 exfat_alloc_cluster(struct super_block *sb, u32 num_alloc, CHAIN_T *p_chain, s32 dest)
+{
+ s32 ret = -ENOSPC;
+ u32 num_clusters = 0, total_cnt;
+ u32 hint_clu, new_clu, last_clu = CLUS_EOF;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ total_cnt = fsi->num_clusters - CLUS_BASE;
+
+ if (unlikely(total_cnt < fsi->used_clusters)) {
+ sdfat_fs_error_ratelimit(sb,
+ "%s: invalid used clusters(t:%u,u:%u)\n",
+ __func__, total_cnt, fsi->used_clusters);
+ return -EIO;
+ }
+
+ if (num_alloc > total_cnt - fsi->used_clusters)
+ return -ENOSPC;
+
+ hint_clu = p_chain->dir;
+ /* find new cluster */
+ if (IS_CLUS_EOF(hint_clu)) {
+ if (fsi->clu_srch_ptr < CLUS_BASE) {
+ EMSG("%s: fsi->clu_srch_ptr is invalid (%u)\n",
+ __func__, fsi->clu_srch_ptr);
+ ASSERT(0);
+ fsi->clu_srch_ptr = CLUS_BASE;
+ }
+
+ hint_clu = test_alloc_bitmap(sb, fsi->clu_srch_ptr - CLUS_BASE);
+ if (IS_CLUS_EOF(hint_clu))
+ return -ENOSPC;
+ }
+
+ /* check cluster validation */
+ if ((hint_clu < CLUS_BASE) && (hint_clu >= fsi->num_clusters)) {
+ EMSG("%s: hint_cluster is invalid (%u)\n", __func__, hint_clu);
+ ASSERT(0);
+ hint_clu = CLUS_BASE;
+ if (p_chain->flags == 0x03) {
+ if (exfat_chain_cont_cluster(sb, p_chain->dir, num_clusters))
+ return -EIO;
+ p_chain->flags = 0x01;
+ }
+ }
+
+ set_sb_dirty(sb);
+
+ p_chain->dir = CLUS_EOF;
+
+ while ((new_clu = test_alloc_bitmap(sb, hint_clu - CLUS_BASE)) != CLUS_EOF) {
+ if ((new_clu != hint_clu) && (p_chain->flags == 0x03)) {
+ if (exfat_chain_cont_cluster(sb, p_chain->dir, num_clusters)) {
+ ret = -EIO;
+ goto error;
+ }
+ p_chain->flags = 0x01;
+ }
+
+ /* update allocation bitmap */
+ if (set_alloc_bitmap(sb, new_clu - CLUS_BASE)) {
+ ret = -EIO;
+ goto error;
+ }
+
+ num_clusters++;
+
+ /* update FAT table */
+ if (p_chain->flags == 0x01) {
+ if (fat_ent_set(sb, new_clu, CLUS_EOF)) {
+ ret = -EIO;
+ goto error;
+ }
+ }
+
+ if (IS_CLUS_EOF(p_chain->dir)) {
+ p_chain->dir = new_clu;
+ } else if (p_chain->flags == 0x01) {
+ if (fat_ent_set(sb, last_clu, new_clu)) {
+ ret = -EIO;
+ goto error;
+ }
+ }
+ last_clu = new_clu;
+
+ if ((--num_alloc) == 0) {
+ fsi->clu_srch_ptr = hint_clu;
+ fsi->used_clusters += num_clusters;
+
+ p_chain->size += num_clusters;
+ return 0;
+ }
+
+ hint_clu = new_clu + 1;
+ if (hint_clu >= fsi->num_clusters) {
+ hint_clu = CLUS_BASE;
+
+ if (p_chain->flags == 0x03) {
+ if (exfat_chain_cont_cluster(sb, p_chain->dir, num_clusters)) {
+ ret = -EIO;
+ goto error;
+ }
+ p_chain->flags = 0x01;
+ }
+ }
+ }
+error:
+ if (num_clusters)
+ exfat_free_cluster(sb, p_chain, 0);
+ return ret;
+} /* end of exfat_alloc_cluster */
+
+static s32 exfat_count_used_clusters(struct super_block *sb, u32 *ret_count)
+{
+ u32 count = 0;
+ u32 i, map_i, map_b;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u32 total_clus = fsi->num_clusters - 2;
+
+ map_i = map_b = 0;
+
+ for (i = 0; i < total_clus; i += 8) {
+ u8 k = *(((u8 *) fsi->vol_amap[map_i]->b_data) + map_b);
+
+ count += used_bit[k];
+ if ((++map_b) >= (u32)sb->s_blocksize) {
+ map_i++;
+ map_b = 0;
+ }
+ }
+
+ /* FIXME : abnormal bitmap count should be handled as more smart */
+ if (total_clus < count)
+ count = total_clus;
+
+ *ret_count = count;
+ return 0;
+} /* end of exfat_count_used_clusters */
+
+
+/*
+ * File Operation Functions
+ */
+static FS_FUNC_T exfat_fs_func = {
+ .alloc_cluster = exfat_alloc_cluster,
+ .free_cluster = exfat_free_cluster,
+ .count_used_clusters = exfat_count_used_clusters,
+
+ .init_dir_entry = exfat_init_dir_entry,
+ .init_ext_entry = exfat_init_ext_entry,
+ .find_dir_entry = exfat_find_dir_entry,
+ .delete_dir_entry = exfat_delete_dir_entry,
+ .get_uniname_from_ext_entry = exfat_get_uniname_from_ext_entry,
+ .count_ext_entries = exfat_count_ext_entries,
+ .calc_num_entries = exfat_calc_num_entries,
+ .check_max_dentries = exfat_check_max_dentries,
+
+ .get_entry_type = exfat_get_entry_type,
+ .set_entry_type = exfat_set_entry_type,
+ .get_entry_attr = exfat_get_entry_attr,
+ .set_entry_attr = exfat_set_entry_attr,
+ .get_entry_flag = exfat_get_entry_flag,
+ .set_entry_flag = exfat_set_entry_flag,
+ .get_entry_clu0 = exfat_get_entry_clu0,
+ .set_entry_clu0 = exfat_set_entry_clu0,
+ .get_entry_size = exfat_get_entry_size,
+ .set_entry_size = exfat_set_entry_size,
+ .get_entry_time = exfat_get_entry_time,
+ .set_entry_time = exfat_set_entry_time,
+};
+
+s32 mount_exfat(struct super_block *sb, pbr_t *p_pbr)
+{
+ pbr64_t *p_bpb = (pbr64_t *)p_pbr;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (!p_bpb->bsx.num_fats) {
+ sdfat_msg(sb, KERN_ERR, "bogus number of FAT structure");
+ return -EINVAL;
+ }
+
+ fsi->sect_per_clus = 1 << p_bpb->bsx.sect_per_clus_bits;
+ fsi->sect_per_clus_bits = p_bpb->bsx.sect_per_clus_bits;
+ fsi->cluster_size_bits = fsi->sect_per_clus_bits + sb->s_blocksize_bits;
+ fsi->cluster_size = 1 << fsi->cluster_size_bits;
+
+ fsi->num_FAT_sectors = le32_to_cpu(p_bpb->bsx.fat_length);
+
+ fsi->FAT1_start_sector = le32_to_cpu(p_bpb->bsx.fat_offset);
+ if (p_bpb->bsx.num_fats == 1)
+ fsi->FAT2_start_sector = fsi->FAT1_start_sector;
+ else
+ fsi->FAT2_start_sector = fsi->FAT1_start_sector + fsi->num_FAT_sectors;
+
+ fsi->root_start_sector = le32_to_cpu(p_bpb->bsx.clu_offset);
+ fsi->data_start_sector = fsi->root_start_sector;
+
+ fsi->num_sectors = le64_to_cpu(p_bpb->bsx.vol_length);
+ fsi->num_clusters = le32_to_cpu(p_bpb->bsx.clu_count) + 2;
+ /* because the cluster index starts with 2 */
+
+ fsi->vol_type = EXFAT;
+ fsi->vol_id = le32_to_cpu(p_bpb->bsx.vol_serial);
+
+ fsi->root_dir = le32_to_cpu(p_bpb->bsx.root_cluster);
+ fsi->dentries_in_root = 0;
+ fsi->dentries_per_clu = 1 << (fsi->cluster_size_bits - DENTRY_SIZE_BITS);
+
+ fsi->vol_flag = (u32) le16_to_cpu(p_bpb->bsx.vol_flags);
+ fsi->clu_srch_ptr = CLUS_BASE;
+ fsi->used_clusters = (u32) ~0;
+
+ fsi->fs_func = &exfat_fs_func;
+ fat_ent_ops_init(sb);
+
+ if (p_bpb->bsx.vol_flags & VOL_DIRTY) {
+ fsi->vol_flag |= VOL_DIRTY;
+ sdfat_log_msg(sb, KERN_WARNING, "Volume was not properly "
+ "unmounted. Some data may be corrupt. "
+ "Please run fsck.");
+ }
+
+ return 0;
+} /* end of mount_exfat */
+
+/* end of core_exfat.c */
diff --git a/fs/sdfat/core_fat.c b/fs/sdfat/core_fat.c
new file mode 100644
index 000000000000..5e0a196ae42b
--- /dev/null
+++ b/fs/sdfat/core_fat.c
@@ -0,0 +1,1465 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : core_fat.c */
+/* PURPOSE : FAT-fs core code for sdFAT */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/workqueue.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+
+#include "sdfat.h"
+#include "core.h"
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+#define MAX_LFN_ORDER (20)
+
+/*
+ * MAX_EST_AU_SECT should be changed according to 32/64bits.
+ * On 32bit, 4KB page supports 512 clusters per AU.
+ * But, on 64bit, 4KB page can handle a half of total list_head of 32bit's.
+ * Bcause the size of list_head structure on 64bit increases twofold over 32bit.
+ */
+#if (BITS_PER_LONG == 64)
+//#define MAX_EST_AU_SECT (16384) /* upto 8MB */
+#define MAX_EST_AU_SECT (32768) /* upto 16MB, used more page for list_head */
+#else
+#define MAX_EST_AU_SECT (32768) /* upto 16MB */
+#endif
+
+/*======================================================================*/
+/* Local Function Declarations */
+/*======================================================================*/
+static s32 __extract_uni_name_from_ext_entry(EXT_DENTRY_T *, u16 *, s32);
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Local Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*======================================================================*/
+/* Local Function Definitions */
+/*======================================================================*/
+static u32 __calc_default_au_size(struct super_block *sb)
+{
+ struct block_device *bdev = sb->s_bdev;
+ struct gendisk *disk;
+ struct request_queue *queue;
+ struct queue_limits *limit;
+ unsigned int est_au_sect = MAX_EST_AU_SECT;
+ unsigned int est_au_size = 0;
+ unsigned int queue_au_size = 0;
+ sector_t total_sect = 0;
+
+ /* we assumed that sector size is 512 bytes */
+
+ disk = bdev->bd_disk;
+ if (!disk)
+ goto out;
+
+ queue = disk->queue;
+ if (!queue)
+ goto out;
+
+ limit = &queue->limits;
+ queue_au_size = limit->discard_granularity;
+
+ /* estimate function(x) =
+ * (total_sect / 2) * 512 / 1024
+ * => (total_sect >> 1) >> 1)
+ * => (total_sect >> 2)
+ * => estimated bytes size
+ *
+ * ex1) <= 8GB -> 4MB
+ * ex2) 16GB -> 8MB
+ * ex3) >= 32GB -> 16MB
+ */
+ total_sect = disk->part0.nr_sects;
+ est_au_size = total_sect >> 2;
+
+ /* au_size assumed that bytes per sector is 512 */
+ est_au_sect = est_au_size >> 9;
+
+ MMSG("DBG1: total_sect(%llu) est_au_size(%u) est_au_sect(%u)\n",
+ (u64)total_sect, est_au_size, est_au_sect);
+
+ if (est_au_sect <= 8192) {
+ /* 4MB */
+ est_au_sect = 8192;
+ } else if (est_au_sect <= 16384) {
+ /* 8MB */
+ est_au_sect = 16384;
+ } else {
+ /* 8MB or 16MB */
+ est_au_sect = MAX_EST_AU_SECT;
+ }
+
+ MMSG("DBG2: total_sect(%llu) est_au_size(%u) est_au_sect(%u)\n",
+ (u64)total_sect, est_au_size, est_au_sect);
+
+ if (est_au_size < queue_au_size &&
+ queue_au_size <= (MAX_EST_AU_SECT << 9)) {
+ DMSG("use queue_au_size(%u) instead of est_au_size(%u)\n",
+ queue_au_size, est_au_size);
+ est_au_sect = queue_au_size >> 9;
+ }
+
+out:
+ if (sb->s_blocksize != 512) {
+ ASSERT(sb->s_blocksize_bits > 9);
+ sdfat_log_msg(sb, KERN_INFO,
+ "adjustment est_au_size by logical block size(%lu)",
+ sb->s_blocksize);
+ est_au_sect >>= (sb->s_blocksize_bits - 9);
+ }
+
+ sdfat_log_msg(sb, KERN_INFO, "set default AU sectors : %u "
+ "(queue_au_size : %u KB, disk_size : %llu MB)",
+ est_au_sect, queue_au_size >> 10, (u64)(total_sect >> 11));
+ return est_au_sect;
+}
+
+
+/*
+ * Cluster Management Functions
+ */
+static s32 fat_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse)
+{
+ s32 ret = -EIO;
+ s32 num_clusters = 0;
+ u32 clu, prev;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 i;
+ u64 sector;
+
+ /* invalid cluster number */
+ if (IS_CLUS_FREE(p_chain->dir) || IS_CLUS_EOF(p_chain->dir))
+ return 0;
+
+ /* no cluster to truncate */
+ if (!p_chain->size) {
+ DMSG("%s: cluster(%u) truncation is not required.",
+ __func__, p_chain->dir);
+ return 0;
+ }
+
+ /* check cluster validation */
+ if ((p_chain->dir < 2) && (p_chain->dir >= fsi->num_clusters)) {
+ EMSG("%s: invalid start cluster (%u)\n", __func__, p_chain->dir);
+ sdfat_debug_bug_on(1);
+ return -EIO;
+ }
+
+
+ set_sb_dirty(sb);
+ clu = p_chain->dir;
+
+ do {
+ if (do_relse) {
+ sector = CLUS_TO_SECT(fsi, clu);
+ for (i = 0; i < fsi->sect_per_clus; i++) {
+ if (dcache_release(sb, sector+i) == -EIO)
+ goto out;
+ }
+ }
+
+ prev = clu;
+ if (get_next_clus_safe(sb, &clu)) {
+ /* print more helpful log */
+ if (IS_CLUS_BAD(clu)) {
+ sdfat_log_msg(sb, KERN_ERR, "%s : "
+ "deleting bad cluster (clu[%u]->BAD)",
+ __func__, prev);
+ } else if (IS_CLUS_FREE(clu)) {
+ sdfat_log_msg(sb, KERN_ERR, "%s : "
+ "deleting free cluster (clu[%u]->FREE)",
+ __func__, prev);
+ }
+ goto out;
+ }
+
+ /* Free FAT chain */
+ if (fat_ent_set(sb, prev, CLUS_FREE))
+ goto out;
+
+ /* Update AMAP if needed */
+ if (fsi->amap) {
+ if (amap_release_cluster(sb, prev))
+ return -EIO;
+ }
+
+ num_clusters++;
+
+ } while (!IS_CLUS_EOF(clu));
+
+ /* success */
+ ret = 0;
+out:
+ fsi->used_clusters -= num_clusters;
+ return ret;
+} /* end of fat_free_cluster */
+
+static s32 fat_alloc_cluster(struct super_block *sb, u32 num_alloc, CHAIN_T *p_chain, s32 dest)
+{
+ s32 ret = -ENOSPC;
+ u32 i, num_clusters = 0, total_cnt;
+ u32 new_clu, last_clu = CLUS_EOF, read_clu;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ total_cnt = fsi->num_clusters - CLUS_BASE;
+
+ if (unlikely(total_cnt < fsi->used_clusters)) {
+ sdfat_fs_error_ratelimit(sb,
+ "%s : invalid used clusters(t:%u,u:%u)\n",
+ __func__, total_cnt, fsi->used_clusters);
+ return -EIO;
+ }
+
+ if (num_alloc > total_cnt - fsi->used_clusters)
+ return -ENOSPC;
+
+ new_clu = p_chain->dir;
+ if (IS_CLUS_EOF(new_clu))
+ new_clu = fsi->clu_srch_ptr;
+ else if (new_clu >= fsi->num_clusters)
+ new_clu = CLUS_BASE;
+
+ set_sb_dirty(sb);
+
+ p_chain->dir = CLUS_EOF;
+
+ for (i = CLUS_BASE; i < fsi->num_clusters; i++) {
+ if (fat_ent_get(sb, new_clu, &read_clu)) {
+ ret = -EIO;
+ goto error;
+ }
+
+ if (IS_CLUS_FREE(read_clu)) {
+ if (fat_ent_set(sb, new_clu, CLUS_EOF)) {
+ ret = -EIO;
+ goto error;
+ }
+ num_clusters++;
+
+ if (IS_CLUS_EOF(p_chain->dir)) {
+ p_chain->dir = new_clu;
+ } else {
+ if (fat_ent_set(sb, last_clu, new_clu)) {
+ ret = -EIO;
+ goto error;
+ }
+ }
+
+ last_clu = new_clu;
+
+ if ((--num_alloc) == 0) {
+ fsi->clu_srch_ptr = new_clu;
+ fsi->used_clusters += num_clusters;
+
+ return 0;
+ }
+ }
+ if ((++new_clu) >= fsi->num_clusters)
+ new_clu = CLUS_BASE;
+ }
+error:
+ if (num_clusters)
+ fat_free_cluster(sb, p_chain, 0);
+ return ret;
+} /* end of fat_alloc_cluster */
+
+static s32 fat_count_used_clusters(struct super_block *sb, u32 *ret_count)
+{
+ s32 i;
+ u32 clu, count = 0;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ for (i = CLUS_BASE; i < fsi->num_clusters; i++) {
+ if (fat_ent_get(sb, i, &clu))
+ return -EIO;
+
+ if (!IS_CLUS_FREE(clu))
+ count++;
+ }
+
+ *ret_count = count;
+ return 0;
+} /* end of fat_count_used_clusters */
+
+
+/*
+ * Directory Entry Management Functions
+ */
+static u32 fat_get_entry_type(DENTRY_T *p_entry)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+
+ /* first byte of 32bytes dummy */
+ if (*(ep->name) == MSDOS_UNUSED)
+ return TYPE_UNUSED;
+
+ /* 0xE5 of Kanji Japanese is replaced to 0x05 */
+ else if (*(ep->name) == MSDOS_DELETED)
+ return TYPE_DELETED;
+
+ /* 11th byte of 32bytes dummy */
+ else if ((ep->attr & ATTR_EXTEND_MASK) == ATTR_EXTEND)
+ return TYPE_EXTEND;
+
+ else if (!(ep->attr & (ATTR_SUBDIR | ATTR_VOLUME)))
+ return TYPE_FILE;
+
+ else if ((ep->attr & (ATTR_SUBDIR | ATTR_VOLUME)) == ATTR_SUBDIR)
+ return TYPE_DIR;
+
+ else if ((ep->attr & (ATTR_SUBDIR | ATTR_VOLUME)) == ATTR_VOLUME)
+ return TYPE_VOLUME;
+
+ return TYPE_INVALID;
+} /* end of fat_get_entry_type */
+
+static void fat_set_entry_type(DENTRY_T *p_entry, u32 type)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+
+ if (type == TYPE_UNUSED)
+ *(ep->name) = MSDOS_UNUSED; /* 0x0 */
+
+ else if (type == TYPE_DELETED)
+ *(ep->name) = MSDOS_DELETED; /* 0xE5 */
+
+ else if (type == TYPE_EXTEND)
+ ep->attr = ATTR_EXTEND;
+
+ else if (type == TYPE_DIR)
+ ep->attr = ATTR_SUBDIR;
+
+ else if (type == TYPE_FILE)
+ ep->attr = ATTR_ARCHIVE;
+
+ else if (type == TYPE_SYMLINK)
+ ep->attr = ATTR_ARCHIVE | ATTR_SYMLINK;
+} /* end of fat_set_entry_type */
+
+static u32 fat_get_entry_attr(DENTRY_T *p_entry)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+
+ return (u32)ep->attr;
+} /* end of fat_get_entry_attr */
+
+static void fat_set_entry_attr(DENTRY_T *p_entry, u32 attr)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+
+ ep->attr = (u8)attr;
+} /* end of fat_set_entry_attr */
+
+static u8 fat_get_entry_flag(DENTRY_T *p_entry)
+{
+ return 0x01;
+} /* end of fat_get_entry_flag */
+
+static void fat_set_entry_flag(DENTRY_T *p_entry, u8 flags)
+{
+} /* end of fat_set_entry_flag */
+
+static u32 fat_get_entry_clu0(DENTRY_T *p_entry)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+ /* FIXME : is ok? */
+ return(((u32)(le16_to_cpu(ep->start_clu_hi)) << 16) | le16_to_cpu(ep->start_clu_lo));
+} /* end of fat_get_entry_clu0 */
+
+static void fat_set_entry_clu0(DENTRY_T *p_entry, u32 start_clu)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+
+ ep->start_clu_lo = cpu_to_le16(CLUSTER_16(start_clu));
+ ep->start_clu_hi = cpu_to_le16(CLUSTER_16(start_clu >> 16));
+} /* end of fat_set_entry_clu0 */
+
+static u64 fat_get_entry_size(DENTRY_T *p_entry)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+
+ return (u64)le32_to_cpu(ep->size);
+} /* end of fat_get_entry_size */
+
+static void fat_set_entry_size(DENTRY_T *p_entry, u64 size)
+{
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *)p_entry;
+
+ ep->size = cpu_to_le32((u32)size);
+} /* end of fat_set_entry_size */
+
+static void fat_get_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode)
+{
+ u16 t = 0x00, d = 0x21;
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry;
+
+ switch (mode) {
+ case TM_CREATE:
+ t = le16_to_cpu(ep->create_time);
+ d = le16_to_cpu(ep->create_date);
+ break;
+ case TM_MODIFY:
+ t = le16_to_cpu(ep->modify_time);
+ d = le16_to_cpu(ep->modify_date);
+ break;
+ }
+
+ tp->tz.value = 0x00;
+ tp->sec = (t & 0x001F) << 1;
+ tp->min = (t >> 5) & 0x003F;
+ tp->hour = (t >> 11);
+ tp->day = (d & 0x001F);
+ tp->mon = (d >> 5) & 0x000F;
+ tp->year = (d >> 9);
+} /* end of fat_get_entry_time */
+
+static void fat_set_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode)
+{
+ u16 t, d;
+ DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry;
+
+ t = (tp->hour << 11) | (tp->min << 5) | (tp->sec >> 1);
+ d = (tp->year << 9) | (tp->mon << 5) | tp->day;
+
+ switch (mode) {
+ case TM_CREATE:
+ ep->create_time = cpu_to_le16(t);
+ ep->create_date = cpu_to_le16(d);
+ break;
+ case TM_MODIFY:
+ ep->modify_time = cpu_to_le16(t);
+ ep->modify_date = cpu_to_le16(d);
+ break;
+ }
+} /* end of fat_set_entry_time */
+
+static void __init_dos_entry(struct super_block *sb, DOS_DENTRY_T *ep, u32 type, u32 start_clu)
+{
+ TIMESTAMP_T tm, *tp;
+
+ fat_set_entry_type((DENTRY_T *) ep, type);
+ ep->start_clu_lo = cpu_to_le16(CLUSTER_16(start_clu));
+ ep->start_clu_hi = cpu_to_le16(CLUSTER_16(start_clu >> 16));
+ ep->size = 0;
+
+ tp = tm_now(SDFAT_SB(sb), &tm);
+ fat_set_entry_time((DENTRY_T *) ep, tp, TM_CREATE);
+ fat_set_entry_time((DENTRY_T *) ep, tp, TM_MODIFY);
+ ep->access_date = 0;
+ ep->create_time_ms = 0;
+} /* end of __init_dos_entry */
+
+static void __init_ext_entry(EXT_DENTRY_T *ep, s32 order, u8 chksum, u16 *uniname)
+{
+ s32 i;
+ u8 end = false;
+
+ fat_set_entry_type((DENTRY_T *) ep, TYPE_EXTEND);
+ ep->order = (u8) order;
+ ep->sysid = 0;
+ ep->checksum = chksum;
+ ep->start_clu = 0;
+
+ /* unaligned name */
+ for (i = 0; i < 5; i++) {
+ if (!end) {
+ put_unaligned_le16(*uniname, &(ep->unicode_0_4[i<<1]));
+ if (*uniname == 0x0)
+ end = true;
+ else
+ uniname++;
+ } else {
+ put_unaligned_le16(0xFFFF, &(ep->unicode_0_4[i<<1]));
+ }
+ }
+
+ /* aligned name */
+ for (i = 0; i < 6; i++) {
+ if (!end) {
+ ep->unicode_5_10[i] = cpu_to_le16(*uniname);
+ if (*uniname == 0x0)
+ end = true;
+ else
+ uniname++;
+ } else {
+ ep->unicode_5_10[i] = cpu_to_le16(0xFFFF);
+ }
+ }
+
+ /* aligned name */
+ for (i = 0; i < 2; i++) {
+ if (!end) {
+ ep->unicode_11_12[i] = cpu_to_le16(*uniname);
+ if (*uniname == 0x0)
+ end = true;
+ else
+ uniname++;
+ } else {
+ ep->unicode_11_12[i] = cpu_to_le16(0xFFFF);
+ }
+ }
+} /* end of __init_ext_entry */
+
+static s32 fat_init_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u32 type,
+ u32 start_clu, u64 size)
+{
+ u64 sector;
+ DOS_DENTRY_T *dos_ep;
+
+ dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, p_dir, entry, &sector);
+ if (!dos_ep)
+ return -EIO;
+
+ __init_dos_entry(sb, dos_ep, type, start_clu);
+ dcache_modify(sb, sector);
+
+ return 0;
+} /* end of fat_init_dir_entry */
+
+static s32 fat_init_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 num_entries,
+ UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname)
+{
+ s32 i;
+ u64 sector;
+ u8 chksum;
+ u16 *uniname = p_uniname->name;
+ DOS_DENTRY_T *dos_ep;
+ EXT_DENTRY_T *ext_ep;
+
+ dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, p_dir, entry, &sector);
+ if (!dos_ep)
+ return -EIO;
+
+ dos_ep->lcase = p_dosname->name_case;
+ memcpy(dos_ep->name, p_dosname->name, DOS_NAME_LENGTH);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+
+ if ((--num_entries) > 0) {
+ chksum = calc_chksum_1byte((void *) dos_ep->name, DOS_NAME_LENGTH, 0);
+
+ for (i = 1; i < num_entries; i++) {
+ ext_ep = (EXT_DENTRY_T *) get_dentry_in_dir(sb, p_dir, entry-i, &sector);
+ if (!ext_ep)
+ return -EIO;
+
+ __init_ext_entry(ext_ep, i, chksum, uniname);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+ uniname += 13;
+ }
+
+ ext_ep = (EXT_DENTRY_T *) get_dentry_in_dir(sb, p_dir, entry-i, &sector);
+ if (!ext_ep)
+ return -EIO;
+
+ __init_ext_entry(ext_ep, i+MSDOS_LAST_LFN, chksum, uniname);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+ }
+
+ return 0;
+} /* end of fat_init_ext_entry */
+
+static s32 fat_delete_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 order, s32 num_entries)
+{
+ s32 i;
+ u64 sector;
+ DENTRY_T *ep;
+
+ for (i = num_entries-1; i >= order; i--) {
+ ep = get_dentry_in_dir(sb, p_dir, entry-i, &sector);
+ if (!ep)
+ return -EIO;
+
+ fat_set_entry_type(ep, TYPE_DELETED);
+ if (dcache_modify(sb, sector))
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/* return values of fat_find_dir_entry()
+ * >= 0 : return dir entiry position with the name in dir
+ * -EEXIST : (root dir, ".") it is the root dir itself
+ * -ENOENT : entry with the name does not exist
+ * -EIO : I/O error
+ */
+static inline s32 __get_dentries_per_clu(FS_INFO_T *fsi, s32 clu)
+{
+ if (IS_CLUS_FREE(clu)) /* FAT16 root_dir */
+ return fsi->dentries_in_root;
+
+ return fsi->dentries_per_clu;
+}
+
+static s32 fat_find_dir_entry(struct super_block *sb, FILE_ID_T *fid,
+ CHAIN_T *p_dir, UNI_NAME_T *p_uniname, s32 num_entries, DOS_NAME_T *p_dosname, u32 type)
+{
+ s32 i, rewind = 0, dentry = 0, end_eidx = 0;
+ s32 chksum = 0, lfn_ord = 0, lfn_len = 0;
+ s32 dentries_per_clu, num_empty = 0;
+ u32 entry_type;
+ u16 entry_uniname[14], *uniname = NULL;
+ CHAIN_T clu;
+ DENTRY_T *ep;
+ HINT_T *hint_stat = &fid->hint_stat;
+ HINT_FEMP_T candi_empty;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /*
+ * REMARK:
+ * DOT and DOTDOT are handled by VFS layer
+ */
+
+ dentries_per_clu = __get_dentries_per_clu(fsi, p_dir->dir);
+ clu.dir = p_dir->dir;
+ clu.flags = p_dir->flags;
+
+ if (hint_stat->eidx) {
+ clu.dir = hint_stat->clu;
+ dentry = hint_stat->eidx;
+ end_eidx = dentry;
+ }
+
+ candi_empty.eidx = -1;
+
+ MMSG("lookup dir= %s\n", p_dosname->name);
+rewind:
+ while (!IS_CLUS_EOF(clu.dir)) {
+ i = dentry % dentries_per_clu;
+ for (; i < dentries_per_clu; i++, dentry++) {
+ if (rewind && (dentry == end_eidx))
+ goto not_found;
+
+ ep = get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ entry_type = fat_get_entry_type(ep);
+
+ /*
+ * Most directory entries have long name,
+ * So, we check extend directory entry first.
+ */
+ if (entry_type == TYPE_EXTEND) {
+ EXT_DENTRY_T *ext_ep = (EXT_DENTRY_T *)ep;
+ u32 cur_ord = (u32)ext_ep->order;
+ u32 cur_chksum = (s32)ext_ep->checksum;
+ s32 len = 13;
+ u16 unichar;
+
+ num_empty = 0;
+ candi_empty.eidx = -1;
+
+ /* check whether new lfn or not */
+ if (cur_ord & MSDOS_LAST_LFN) {
+ cur_ord &= ~(MSDOS_LAST_LFN);
+ chksum = cur_chksum;
+ len = (13 * (cur_ord-1));
+ uniname = (p_uniname->name + len);
+ lfn_ord = cur_ord + 1;
+ lfn_len = 0;
+
+ /* check minimum name length */
+ if (cur_ord &&
+ (len > p_uniname->name_len)) {
+ /* MISMATCHED NAME LENGTH */
+ lfn_len = -1;
+ }
+ len = 0;
+ }
+
+ /* invalid lfn order */
+ if (!cur_ord || (cur_ord > MAX_LFN_ORDER) ||
+ ((cur_ord + 1) != lfn_ord))
+ goto reset_dentry_set;
+
+ /* check checksum of directory entry set */
+ if (cur_chksum != chksum)
+ goto reset_dentry_set;
+
+ /* update order for next dentry */
+ lfn_ord = cur_ord;
+
+ /* check whether mismatched lfn or not */
+ if (lfn_len == -1) {
+ /* MISMATCHED LFN DENTRY SET */
+ continue;
+ }
+
+ if (!uniname) {
+ sdfat_fs_error(sb,
+ "%s : abnormal dentry "
+ "(start_clu[%u], "
+ "idx[%u])", __func__,
+ p_dir->dir, dentry);
+ sdfat_debug_bug_on(1);
+ return -EIO;
+ }
+
+ /* update position of name buffer */
+ uniname -= len;
+
+ /* get utf16 characters saved on this entry */
+ len = __extract_uni_name_from_ext_entry(ext_ep, entry_uniname, lfn_ord);
+
+ /* replace last char to null */
+ unichar = *(uniname+len);
+ *(uniname+len) = (u16)0x0;
+
+ /* uniname ext_dentry unit compare repeatdly */
+ if (nls_cmp_uniname(sb, uniname, entry_uniname)) {
+ /* DO HANDLE WRONG NAME */
+ lfn_len = -1;
+ } else {
+ /* add matched chars length */
+ lfn_len += len;
+ }
+
+ /* restore previous character */
+ *(uniname+len) = unichar;
+
+ /* jump to check next dentry */
+ continue;
+
+ } else if ((entry_type == TYPE_FILE) || (entry_type == TYPE_DIR)) {
+ DOS_DENTRY_T *dos_ep = (DOS_DENTRY_T *)ep;
+ u32 cur_chksum = (s32)calc_chksum_1byte(
+ (void *) dos_ep->name,
+ DOS_NAME_LENGTH, 0);
+
+ num_empty = 0;
+ candi_empty.eidx = -1;
+
+ MMSG("checking dir= %c%c%c%c%c%c%c%c%c%c%c\n",
+ dos_ep->name[0], dos_ep->name[1],
+ dos_ep->name[2], dos_ep->name[3],
+ dos_ep->name[4], dos_ep->name[5],
+ dos_ep->name[6], dos_ep->name[7],
+ dos_ep->name[8], dos_ep->name[9],
+ dos_ep->name[10]);
+
+ /*
+ * if there is no valid long filename,
+ * we should check short filename.
+ */
+ if (!lfn_len || (cur_chksum != chksum)) {
+ /* check shortname */
+ if ((p_dosname->name[0] != '\0') &&
+ !nls_cmp_sfn(sb,
+ p_dosname->name,
+ dos_ep->name)) {
+ goto found;
+ }
+ /* check name length */
+ } else if ((lfn_len > 0) &&
+ ((s32)p_uniname->name_len ==
+ lfn_len)) {
+ goto found;
+ }
+
+ /* DO HANDLE MISMATCHED SFN, FALL THROUGH */
+ } else if ((entry_type == TYPE_UNUSED) || (entry_type == TYPE_DELETED)) {
+ num_empty++;
+ if (candi_empty.eidx == -1) {
+ if (num_empty == 1) {
+ candi_empty.cur.dir = clu.dir;
+ candi_empty.cur.size = clu.size;
+ candi_empty.cur.flags = clu.flags;
+ }
+
+ if (num_empty >= num_entries) {
+ candi_empty.eidx = dentry - (num_empty - 1);
+ ASSERT(0 <= candi_empty.eidx);
+ candi_empty.count = num_empty;
+
+ if ((fid->hint_femp.eidx == -1) ||
+ (candi_empty.eidx <= fid->hint_femp.eidx)) {
+ memcpy(&fid->hint_femp,
+ &candi_empty,
+ sizeof(HINT_FEMP_T));
+ }
+ }
+ }
+
+ if (entry_type == TYPE_UNUSED)
+ goto not_found;
+ /* FALL THROUGH */
+ }
+reset_dentry_set:
+ /* TYPE_DELETED, TYPE_VOLUME OR MISMATCHED SFN */
+ lfn_ord = 0;
+ lfn_len = 0;
+ chksum = 0;
+ }
+
+ if (IS_CLUS_FREE(p_dir->dir))
+ break; /* FAT16 root_dir */
+
+ if (get_next_clus_safe(sb, &clu.dir))
+ return -EIO;
+ }
+
+not_found:
+ /* we started at not 0 index,so we should try to find target
+ * from 0 index to the index we started at.
+ */
+ if (!rewind && end_eidx) {
+ rewind = 1;
+ dentry = 0;
+ clu.dir = p_dir->dir;
+ /* reset dentry set */
+ lfn_ord = 0;
+ lfn_len = 0;
+ chksum = 0;
+ /* reset empty hint_*/
+ num_empty = 0;
+ candi_empty.eidx = -1;
+ goto rewind;
+ }
+
+ /* initialized hint_stat */
+ hint_stat->clu = p_dir->dir;
+ hint_stat->eidx = 0;
+ return -ENOENT;
+
+found:
+ /* next dentry we'll find is out of this cluster */
+ if (!((dentry + 1) % dentries_per_clu)) {
+ int ret = 0;
+ /* FAT16 root_dir */
+ if (IS_CLUS_FREE(p_dir->dir))
+ clu.dir = CLUS_EOF;
+ else
+ ret = get_next_clus_safe(sb, &clu.dir);
+
+ if (ret || IS_CLUS_EOF(clu.dir)) {
+ /* just initialized hint_stat */
+ hint_stat->clu = p_dir->dir;
+ hint_stat->eidx = 0;
+ return dentry;
+ }
+ }
+
+ hint_stat->clu = clu.dir;
+ hint_stat->eidx = dentry + 1;
+ return dentry;
+} /* end of fat_find_dir_entry */
+
+/* returns -EIO on error */
+static s32 fat_count_ext_entries(struct super_block *sb, CHAIN_T *p_dir, s32 entry, DENTRY_T *p_entry)
+{
+ s32 count = 0;
+ u8 chksum;
+ DOS_DENTRY_T *dos_ep = (DOS_DENTRY_T *) p_entry;
+ EXT_DENTRY_T *ext_ep;
+
+ chksum = calc_chksum_1byte((void *) dos_ep->name, DOS_NAME_LENGTH, 0);
+
+ for (entry--; entry >= 0; entry--) {
+ ext_ep = (EXT_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, NULL);
+ if (!ext_ep)
+ return -EIO;
+
+ if ((fat_get_entry_type((DENTRY_T *)ext_ep) == TYPE_EXTEND) &&
+ (ext_ep->checksum == chksum)) {
+ count++;
+ if (ext_ep->order > MSDOS_LAST_LFN)
+ return count;
+ } else {
+ return count;
+ }
+ }
+
+ return count;
+}
+
+
+/*
+ * Name Conversion Functions
+ */
+static s32 __extract_uni_name_from_ext_entry(EXT_DENTRY_T *ep, u16 *uniname, s32 order)
+{
+ s32 i, len = 0;
+
+ for (i = 0; i < 5; i++) {
+ *uniname = get_unaligned_le16(&(ep->unicode_0_4[i<<1]));
+ if (*uniname == 0x0)
+ return len;
+ uniname++;
+ len++;
+ }
+
+ if (order < 20) {
+ for (i = 0; i < 6; i++) {
+ /* FIXME : unaligned? */
+ *uniname = le16_to_cpu(ep->unicode_5_10[i]);
+ if (*uniname == 0x0)
+ return len;
+ uniname++;
+ len++;
+ }
+ } else {
+ for (i = 0; i < 4; i++) {
+ /* FIXME : unaligned? */
+ *uniname = le16_to_cpu(ep->unicode_5_10[i]);
+ if (*uniname == 0x0)
+ return len;
+ uniname++;
+ len++;
+ }
+ *uniname = 0x0; /* uniname[MAX_NAME_LENGTH] */
+ return len;
+ }
+
+ for (i = 0; i < 2; i++) {
+ /* FIXME : unaligned? */
+ *uniname = le16_to_cpu(ep->unicode_11_12[i]);
+ if (*uniname == 0x0)
+ return len;
+ uniname++;
+ len++;
+ }
+
+ *uniname = 0x0;
+ return len;
+
+} /* end of __extract_uni_name_from_ext_entry */
+
+static void fat_get_uniname_from_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u16 *uniname)
+{
+ u32 i;
+ u16 *name = uniname;
+ u32 chksum;
+
+ DOS_DENTRY_T *dos_ep =
+ (DOS_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, NULL);
+
+ if (unlikely(!dos_ep))
+ goto invalid_lfn;
+
+ chksum = (u32)calc_chksum_1byte(
+ (void *) dos_ep->name,
+ DOS_NAME_LENGTH, 0);
+
+ for (entry--, i = 1; entry >= 0; entry--, i++) {
+ EXT_DENTRY_T *ep;
+
+ ep = (EXT_DENTRY_T *)get_dentry_in_dir(sb, p_dir, entry, NULL);
+ if (!ep)
+ goto invalid_lfn;
+
+ if (fat_get_entry_type((DENTRY_T *) ep) != TYPE_EXTEND)
+ goto invalid_lfn;
+
+ if (chksum != (u32)ep->checksum)
+ goto invalid_lfn;
+
+ if (i != (u32)(ep->order & ~(MSDOS_LAST_LFN)))
+ goto invalid_lfn;
+
+ __extract_uni_name_from_ext_entry(ep, name, (s32)i);
+ if (ep->order & MSDOS_LAST_LFN)
+ return;
+
+ name += 13;
+ }
+invalid_lfn:
+ *uniname = (u16)0x0;
+} /* end of fat_get_uniname_from_ext_entry */
+
+/* Find if the shortname exists
+ * and check if there are free entries
+ */
+static s32 __fat_find_shortname_entry(struct super_block *sb, CHAIN_T *p_dir,
+ u8 *p_dosname, s32 *offset, __attribute__((unused))int n_entry_needed)
+{
+ u32 type;
+ s32 i, dentry = 0;
+ s32 dentries_per_clu;
+ DENTRY_T *ep = NULL;
+ DOS_DENTRY_T *dos_ep = NULL;
+ CHAIN_T clu = *p_dir;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (offset)
+ *offset = -1;
+
+ if (IS_CLUS_FREE(clu.dir)) /* FAT16 root_dir */
+ dentries_per_clu = fsi->dentries_in_root;
+ else
+ dentries_per_clu = fsi->dentries_per_clu;
+
+ while (!IS_CLUS_EOF(clu.dir)) {
+ for (i = 0; i < dentries_per_clu; i++, dentry++) {
+ ep = get_dentry_in_dir(sb, &clu, i, NULL);
+ if (!ep)
+ return -EIO;
+
+ type = fat_get_entry_type(ep);
+
+ if ((type == TYPE_FILE) || (type == TYPE_DIR)) {
+ dos_ep = (DOS_DENTRY_T *)ep;
+ if (!nls_cmp_sfn(sb, p_dosname, dos_ep->name)) {
+ if (offset)
+ *offset = dentry;
+ return 0;
+ }
+ }
+ }
+
+ /* fat12/16 root dir */
+ if (IS_CLUS_FREE(clu.dir))
+ break;
+
+ if (get_next_clus_safe(sb, &clu.dir))
+ return -EIO;
+ }
+ return -ENOENT;
+}
+
+#ifdef CONFIG_SDFAT_FAT32_SHORTNAME_SEQ
+static void __fat_attach_count_to_dos_name(u8 *dosname, s32 count)
+{
+ s32 i, j, length;
+ s8 str_count[6];
+
+ snprintf(str_count, sizeof(str_count), "~%d", count);
+ length = strlen(str_count);
+
+ i = j = 0;
+ while (j <= (8 - length)) {
+ i = j;
+ if (dosname[j] == ' ')
+ break;
+ if (dosname[j] & 0x80)
+ j += 2;
+ else
+ j++;
+ }
+
+ for (j = 0; j < length; i++, j++)
+ dosname[i] = (u8) str_count[j];
+
+ if (i == 7)
+ dosname[7] = ' ';
+
+} /* end of __fat_attach_count_to_dos_name */
+#endif
+
+s32 fat_generate_dos_name_new(struct super_block *sb, CHAIN_T *p_dir, DOS_NAME_T *p_dosname, s32 n_entry_needed)
+{
+ s32 i;
+ s32 baselen, err;
+ u8 work[DOS_NAME_LENGTH], buf[5];
+ u8 tail;
+
+ baselen = 8;
+ memset(work, ' ', DOS_NAME_LENGTH);
+ memcpy(work, p_dosname->name, DOS_NAME_LENGTH);
+
+ while (baselen && (work[--baselen] == ' ')) {
+ /* DO NOTHING, JUST FOR CHECK_PATCH */
+ }
+
+ if (baselen > 6)
+ baselen = 6;
+
+ BUG_ON(baselen < 0);
+
+#ifdef CONFIG_SDFAT_FAT32_SHORTNAME_SEQ
+ /* example) namei_exfat.c -> NAMEI_~1 - NAMEI_~9 */
+ work[baselen] = '~';
+ for (i = 1; i < 10; i++) {
+ // '0' + i = 1 ~ 9 ASCII
+ work[baselen + 1] = '0' + i;
+ err = __fat_find_shortname_entry(sb, p_dir, work, NULL, n_entry_needed);
+ if (err == -ENOENT) {
+ /* void return */
+ __fat_attach_count_to_dos_name(p_dosname->name, i);
+ return 0;
+ }
+
+ /* any other error */
+ if (err)
+ return err;
+ }
+#endif
+
+ i = jiffies;
+ tail = (jiffies >> 16) & 0x7;
+
+ if (baselen > 2)
+ baselen = 2;
+
+ BUG_ON(baselen < 0);
+
+ work[baselen + 4] = '~';
+ // 1 ~ 8 ASCII
+ work[baselen + 5] = '1' + tail;
+ while (1) {
+ snprintf(buf, sizeof(buf), "%04X", i & 0xffff);
+ memcpy(&work[baselen], buf, 4);
+ err = __fat_find_shortname_entry(sb, p_dir, work, NULL, n_entry_needed);
+ if (err == -ENOENT) {
+ memcpy(p_dosname->name, work, DOS_NAME_LENGTH);
+ break;
+ }
+
+ /* any other error */
+ if (err)
+ return err;
+
+ i -= 11;
+ }
+ return 0;
+} /* end of generate_dos_name_new */
+
+static s32 fat_calc_num_entries(UNI_NAME_T *p_uniname)
+{
+ s32 len;
+
+ len = p_uniname->name_len;
+ if (len == 0)
+ return 0;
+
+ /* 1 dos name entry + extended entries */
+ return((len-1) / 13 + 2);
+
+} /* end of calc_num_enties */
+
+static s32 fat_check_max_dentries(FILE_ID_T *fid)
+{
+ if ((fid->size >> DENTRY_SIZE_BITS) >= MAX_FAT_DENTRIES) {
+ /* FAT spec allows a dir to grow upto 65536 dentries */
+ return -ENOSPC;
+ }
+ return 0;
+} /* end of check_max_dentries */
+
+
+/*
+ * File Operation Functions
+ */
+static FS_FUNC_T fat_fs_func = {
+ .alloc_cluster = fat_alloc_cluster,
+ .free_cluster = fat_free_cluster,
+ .count_used_clusters = fat_count_used_clusters,
+
+ .init_dir_entry = fat_init_dir_entry,
+ .init_ext_entry = fat_init_ext_entry,
+ .find_dir_entry = fat_find_dir_entry,
+ .delete_dir_entry = fat_delete_dir_entry,
+ .get_uniname_from_ext_entry = fat_get_uniname_from_ext_entry,
+ .count_ext_entries = fat_count_ext_entries,
+ .calc_num_entries = fat_calc_num_entries,
+ .check_max_dentries = fat_check_max_dentries,
+
+ .get_entry_type = fat_get_entry_type,
+ .set_entry_type = fat_set_entry_type,
+ .get_entry_attr = fat_get_entry_attr,
+ .set_entry_attr = fat_set_entry_attr,
+ .get_entry_flag = fat_get_entry_flag,
+ .set_entry_flag = fat_set_entry_flag,
+ .get_entry_clu0 = fat_get_entry_clu0,
+ .set_entry_clu0 = fat_set_entry_clu0,
+ .get_entry_size = fat_get_entry_size,
+ .set_entry_size = fat_set_entry_size,
+ .get_entry_time = fat_get_entry_time,
+ .set_entry_time = fat_set_entry_time,
+};
+
+static FS_FUNC_T amap_fat_fs_func = {
+ .alloc_cluster = amap_fat_alloc_cluster,
+ .free_cluster = fat_free_cluster,
+ .count_used_clusters = fat_count_used_clusters,
+
+ .init_dir_entry = fat_init_dir_entry,
+ .init_ext_entry = fat_init_ext_entry,
+ .find_dir_entry = fat_find_dir_entry,
+ .delete_dir_entry = fat_delete_dir_entry,
+ .get_uniname_from_ext_entry = fat_get_uniname_from_ext_entry,
+ .count_ext_entries = fat_count_ext_entries,
+ .calc_num_entries = fat_calc_num_entries,
+ .check_max_dentries = fat_check_max_dentries,
+
+ .get_entry_type = fat_get_entry_type,
+ .set_entry_type = fat_set_entry_type,
+ .get_entry_attr = fat_get_entry_attr,
+ .set_entry_attr = fat_set_entry_attr,
+ .get_entry_flag = fat_get_entry_flag,
+ .set_entry_flag = fat_set_entry_flag,
+ .get_entry_clu0 = fat_get_entry_clu0,
+ .set_entry_clu0 = fat_set_entry_clu0,
+ .get_entry_size = fat_get_entry_size,
+ .set_entry_size = fat_set_entry_size,
+ .get_entry_time = fat_get_entry_time,
+ .set_entry_time = fat_set_entry_time,
+
+ .get_au_stat = amap_get_au_stat,
+};
+
+s32 mount_fat16(struct super_block *sb, pbr_t *p_pbr)
+{
+ s32 num_root_sectors;
+ bpb16_t *p_bpb = &(p_pbr->bpb.f16);
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (!p_bpb->num_fats) {
+ sdfat_msg(sb, KERN_ERR, "bogus number of FAT structure");
+ return -EINVAL;
+ }
+
+ num_root_sectors = get_unaligned_le16(p_bpb->num_root_entries) << DENTRY_SIZE_BITS;
+ num_root_sectors = ((num_root_sectors-1) >> sb->s_blocksize_bits) + 1;
+
+ fsi->sect_per_clus = p_bpb->sect_per_clus;
+ fsi->sect_per_clus_bits = ilog2(p_bpb->sect_per_clus);
+ fsi->cluster_size_bits = fsi->sect_per_clus_bits + sb->s_blocksize_bits;
+ fsi->cluster_size = 1 << fsi->cluster_size_bits;
+
+ fsi->num_FAT_sectors = le16_to_cpu(p_bpb->num_fat_sectors);
+
+ fsi->FAT1_start_sector = le16_to_cpu(p_bpb->num_reserved);
+ if (p_bpb->num_fats == 1)
+ fsi->FAT2_start_sector = fsi->FAT1_start_sector;
+ else
+ fsi->FAT2_start_sector = fsi->FAT1_start_sector + fsi->num_FAT_sectors;
+
+ fsi->root_start_sector = fsi->FAT2_start_sector + fsi->num_FAT_sectors;
+ fsi->data_start_sector = fsi->root_start_sector + num_root_sectors;
+
+ fsi->num_sectors = get_unaligned_le16(p_bpb->num_sectors);
+ if (!fsi->num_sectors)
+ fsi->num_sectors = le32_to_cpu(p_bpb->num_huge_sectors);
+
+ if (!fsi->num_sectors) {
+ sdfat_msg(sb, KERN_ERR, "bogus number of total sector count");
+ return -EINVAL;
+ }
+
+ fsi->num_clusters = (u32)((fsi->num_sectors - fsi->data_start_sector) >> fsi->sect_per_clus_bits) + CLUS_BASE;
+ /* because the cluster index starts with 2 */
+
+ fsi->vol_type = FAT16;
+ if (fsi->num_clusters < FAT12_THRESHOLD)
+ fsi->vol_type = FAT12;
+
+ fsi->vol_id = get_unaligned_le32(p_bpb->vol_serial);
+
+ fsi->root_dir = 0;
+ fsi->dentries_in_root = get_unaligned_le16(p_bpb->num_root_entries);
+ if (!fsi->dentries_in_root) {
+ sdfat_msg(sb, KERN_ERR, "bogus number of max dentry count "
+ "of the root directory");
+ return -EINVAL;
+ }
+
+ fsi->dentries_per_clu = 1 << (fsi->cluster_size_bits - DENTRY_SIZE_BITS);
+
+ fsi->vol_flag = VOL_CLEAN;
+ fsi->clu_srch_ptr = 2;
+ fsi->used_clusters = (u32) ~0;
+
+ fsi->fs_func = &fat_fs_func;
+ fat_ent_ops_init(sb);
+
+ if (p_bpb->state & FAT_VOL_DIRTY) {
+ fsi->vol_flag |= VOL_DIRTY;
+ sdfat_log_msg(sb, KERN_WARNING, "Volume was not properly "
+ "unmounted. Some data may be corrupt. "
+ "Please run fsck.");
+ }
+
+ return 0;
+} /* end of mount_fat16 */
+
+static sector_t __calc_hidden_sect(struct super_block *sb)
+{
+ struct block_device *bdev = sb->s_bdev;
+ sector_t hidden = 0;
+
+ if (!bdev)
+ goto out;
+
+ hidden = bdev->bd_part->start_sect;
+ /* a disk device, not a partition */
+ if (!hidden) {
+ if (bdev != bdev->bd_contains)
+ sdfat_log_msg(sb, KERN_WARNING,
+ "hidden(0), but disk has a partition table");
+ goto out;
+ }
+
+ if (sb->s_blocksize_bits != 9) {
+ ASSERT(sb->s_blocksize_bits > 9);
+ hidden >>= (sb->s_blocksize_bits - 9);
+ }
+
+out:
+ sdfat_log_msg(sb, KERN_INFO, "start_sect of part(%d) : %lld",
+ bdev ? bdev->bd_part->partno : -1, (s64)hidden);
+ return hidden;
+
+}
+
+s32 mount_fat32(struct super_block *sb, pbr_t *p_pbr)
+{
+ pbr32_t *p_bpb = (pbr32_t *)p_pbr;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (!p_bpb->bpb.num_fats) {
+ sdfat_msg(sb, KERN_ERR, "bogus number of FAT structure");
+ return -EINVAL;
+ }
+
+ fsi->sect_per_clus = p_bpb->bpb.sect_per_clus;
+ fsi->sect_per_clus_bits = ilog2(p_bpb->bpb.sect_per_clus);
+ fsi->cluster_size_bits = fsi->sect_per_clus_bits + sb->s_blocksize_bits;
+ fsi->cluster_size = 1 << fsi->cluster_size_bits;
+
+ fsi->num_FAT_sectors = le32_to_cpu(p_bpb->bpb.num_fat32_sectors);
+
+ fsi->FAT1_start_sector = le16_to_cpu(p_bpb->bpb.num_reserved);
+ if (p_bpb->bpb.num_fats == 1)
+ fsi->FAT2_start_sector = fsi->FAT1_start_sector;
+ else
+ fsi->FAT2_start_sector = fsi->FAT1_start_sector + fsi->num_FAT_sectors;
+
+ fsi->root_start_sector = fsi->FAT2_start_sector + fsi->num_FAT_sectors;
+ fsi->data_start_sector = fsi->root_start_sector;
+
+ /* SPEC violation for compatibility */
+ fsi->num_sectors = get_unaligned_le16(p_bpb->bpb.num_sectors);
+ if (!fsi->num_sectors)
+ fsi->num_sectors = le32_to_cpu(p_bpb->bpb.num_huge_sectors);
+
+ /* 2nd check */
+ if (!fsi->num_sectors) {
+ sdfat_msg(sb, KERN_ERR, "bogus number of total sector count");
+ return -EINVAL;
+ }
+
+ fsi->num_clusters = (u32)((fsi->num_sectors - fsi->data_start_sector) >> fsi->sect_per_clus_bits) + CLUS_BASE;
+ /* because the cluster index starts with 2 */
+
+ fsi->vol_type = FAT32;
+ fsi->vol_id = get_unaligned_le32(p_bpb->bsx.vol_serial);
+
+ fsi->root_dir = le32_to_cpu(p_bpb->bpb.root_cluster);
+ fsi->dentries_in_root = 0;
+ fsi->dentries_per_clu = 1 << (fsi->cluster_size_bits - DENTRY_SIZE_BITS);
+
+ fsi->vol_flag = VOL_CLEAN;
+ fsi->clu_srch_ptr = 2;
+ fsi->used_clusters = (u32) ~0;
+
+ fsi->fs_func = &fat_fs_func;
+
+ /* Delayed / smart allocation related init */
+ fsi->reserved_clusters = 0;
+
+ /* Should be initialized before calling amap_create() */
+ fat_ent_ops_init(sb);
+
+ /* AU Map Creation */
+ if (SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_SMART) {
+ u32 hidden_sectors = le32_to_cpu(p_bpb->bpb.num_hid_sectors);
+ u32 calc_hid_sect = 0;
+ int ret;
+
+
+ /* calculate hidden sector size */
+ calc_hid_sect = __calc_hidden_sect(sb);
+ if (calc_hid_sect != hidden_sectors) {
+ sdfat_log_msg(sb, KERN_WARNING, "abnormal hidden "
+ "sector : bpb(%u) != ondisk(%u)",
+ hidden_sectors, calc_hid_sect);
+ if (SDFAT_SB(sb)->options.adj_hidsect) {
+ sdfat_log_msg(sb, KERN_INFO,
+ "adjustment hidden sector : "
+ "bpb(%u) -> ondisk(%u)",
+ hidden_sectors, calc_hid_sect);
+ hidden_sectors = calc_hid_sect;
+ }
+ }
+
+ SDFAT_SB(sb)->options.amap_opt.misaligned_sect = hidden_sectors;
+
+ /* calculate AU size if it's not set */
+ if (!SDFAT_SB(sb)->options.amap_opt.sect_per_au) {
+ SDFAT_SB(sb)->options.amap_opt.sect_per_au =
+ __calc_default_au_size(sb);
+ }
+
+ ret = amap_create(sb,
+ SDFAT_SB(sb)->options.amap_opt.pack_ratio,
+ SDFAT_SB(sb)->options.amap_opt.sect_per_au,
+ SDFAT_SB(sb)->options.amap_opt.misaligned_sect);
+ if (ret) {
+ sdfat_log_msg(sb, KERN_WARNING, "failed to create AMAP."
+ " disabling smart allocation. (err:%d)", ret);
+ SDFAT_SB(sb)->options.improved_allocation &= ~(SDFAT_ALLOC_SMART);
+ } else {
+ fsi->fs_func = &amap_fat_fs_func;
+ }
+ }
+
+ /* Check dependency of mount options */
+ if (SDFAT_SB(sb)->options.improved_allocation !=
+ (SDFAT_ALLOC_DELAY | SDFAT_ALLOC_SMART)) {
+ sdfat_log_msg(sb, KERN_INFO, "disabling defragmentation because"
+ " smart, delay options are disabled");
+ SDFAT_SB(sb)->options.defrag = 0;
+ }
+
+ if (p_bpb->bsx.state & FAT_VOL_DIRTY) {
+ fsi->vol_flag |= VOL_DIRTY;
+ sdfat_log_msg(sb, KERN_WARNING, "Volume was not properly "
+ "unmounted. Some data may be corrupt. "
+ "Please run fsck.");
+ }
+
+ return 0;
+} /* end of mount_fat32 */
+
+/* end of core_fat.c */
diff --git a/fs/sdfat/dfr.c b/fs/sdfat/dfr.c
new file mode 100644
index 000000000000..abb4710b4340
--- /dev/null
+++ b/fs/sdfat/dfr.c
@@ -0,0 +1,1372 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* @PROJECT : exFAT & FAT12/16/32 File System */
+/* @FILE : dfr.c */
+/* @PURPOSE : Defragmentation support for SDFAT32 */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/list.h>
+#include <linux/blkdev.h>
+
+#include "sdfat.h"
+#include "core.h"
+#include "amap_smart.h"
+
+#ifdef CONFIG_SDFAT_DFR
+/**
+ * @fn defrag_get_info
+ * @brief get HW params for defrag daemon
+ * @return 0 on success, -errno otherwise
+ * @param sb super block
+ * @param arg defrag info arguments
+ * @remark protected by super_block
+ */
+int
+defrag_get_info(
+ IN struct super_block *sb,
+ OUT struct defrag_info_arg *arg)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+
+ if (!arg)
+ return -EINVAL;
+
+ arg->sec_sz = sb->s_blocksize;
+ arg->clus_sz = fsi->cluster_size;
+ arg->total_sec = fsi->num_sectors;
+ arg->fat_offset_sec = fsi->FAT1_start_sector;
+ arg->fat_sz_sec = fsi->num_FAT_sectors;
+ arg->n_fat = (fsi->FAT1_start_sector == fsi->FAT2_start_sector) ? 1 : 2;
+
+ arg->sec_per_au = amap->option.au_size;
+ arg->hidden_sectors = amap->option.au_align_factor % amap->option.au_size;
+
+ return 0;
+}
+
+
+static int
+__defrag_scan_dir(
+ IN struct super_block *sb,
+ IN DOS_DENTRY_T *dos_ep,
+ IN loff_t i_pos,
+ OUT struct defrag_trav_arg *arg)
+{
+ FS_INFO_T *fsi = NULL;
+ UNI_NAME_T uniname;
+ unsigned int type = 0, start_clus = 0;
+ int err = -EPERM;
+
+ /* Check params */
+ ERR_HANDLE2((!sb || !dos_ep || !i_pos || !arg), err, -EINVAL);
+ fsi = &(SDFAT_SB(sb)->fsi);
+
+ /* Get given entry's type */
+ type = fsi->fs_func->get_entry_type((DENTRY_T *) dos_ep);
+
+ /* Check dos_ep */
+ if (!strncmp(dos_ep->name, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH)) {
+ ;
+ } else if (!strncmp(dos_ep->name, DOS_PAR_DIR_NAME, DOS_NAME_LENGTH)) {
+ ;
+ } else if ((type == TYPE_DIR) || (type == TYPE_FILE)) {
+
+ /* Set start_clus */
+ SET32_HI(start_clus, le16_to_cpu(dos_ep->start_clu_hi));
+ SET32_LO(start_clus, le16_to_cpu(dos_ep->start_clu_lo));
+ arg->start_clus = start_clus;
+
+ /* Set type & i_pos */
+ if (type == TYPE_DIR)
+ arg->type = DFR_TRAV_TYPE_DIR;
+ else
+ arg->type = DFR_TRAV_TYPE_FILE;
+
+ arg->i_pos = i_pos;
+
+ /* Set name */
+ memset(&uniname, 0, sizeof(UNI_NAME_T));
+ get_uniname_from_dos_entry(sb, dos_ep, &uniname, 0x1);
+ /* FIXME :
+ * we should think that whether the size of arg->name
+ * is enough or not
+ */
+ nls_uni16s_to_vfsname(sb, &uniname,
+ arg->name, sizeof(arg->name));
+
+ err = 0;
+ /* End case */
+ } else if (type == TYPE_UNUSED) {
+ err = -ENOENT;
+ } else {
+ ;
+ }
+
+error:
+ return err;
+}
+
+
+/**
+ * @fn defrag_scan_dir
+ * @brief scan given directory
+ * @return 0 on success, -errno otherwise
+ * @param sb super block
+ * @param args traverse args
+ * @remark protected by inode_lock, super_block and volume lock
+ */
+int
+defrag_scan_dir(
+ IN struct super_block *sb,
+ INOUT struct defrag_trav_arg *args)
+{
+ struct sdfat_sb_info *sbi = NULL;
+ FS_INFO_T *fsi = NULL;
+ struct defrag_trav_header *header = NULL;
+ DOS_DENTRY_T *dos_ep;
+ CHAIN_T chain;
+ int dot_found = 0, args_idx = DFR_TRAV_HEADER_IDX + 1, clus = 0, index = 0;
+ int err = 0, j = 0;
+
+ /* Check params */
+ ERR_HANDLE2((!sb || !args), err, -EINVAL);
+ sbi = SDFAT_SB(sb);
+ fsi = &(sbi->fsi);
+ header = (struct defrag_trav_header *) args;
+
+ /* Exceptional case for ROOT */
+ if (header->i_pos == DFR_TRAV_ROOT_IPOS) {
+ header->start_clus = fsi->root_dir;
+ dfr_debug("IOC_DFR_TRAV for ROOT: start_clus %08x", header->start_clus);
+ dot_found = 1;
+ }
+
+ chain.dir = header->start_clus;
+ chain.size = 0;
+ chain.flags = 0;
+
+ /* Check if this is directory */
+ if (!dot_found) {
+ FAT32_CHECK_CLUSTER(fsi, chain.dir, err);
+ ERR_HANDLE(err);
+ dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, &chain, 0, NULL);
+ ERR_HANDLE2(!dos_ep, err, -EIO);
+
+ if (strncmp(dos_ep->name, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH)) {
+ err = -EINVAL;
+ dfr_err("Scan: Not a directory, err %d", err);
+ goto error;
+ }
+ }
+
+ /* For more-scan case */
+ if ((header->stat == DFR_TRAV_STAT_MORE) &&
+ (header->start_clus == sbi->dfr_hint_clus) &&
+ (sbi->dfr_hint_idx > 0)) {
+
+ index = sbi->dfr_hint_idx;
+ for (j = 0; j < (sbi->dfr_hint_idx / fsi->dentries_per_clu); j++) {
+ /* Follow FAT-chain */
+ FAT32_CHECK_CLUSTER(fsi, chain.dir, err);
+ ERR_HANDLE(err);
+ err = fat_ent_get(sb, chain.dir, &(chain.dir));
+ ERR_HANDLE(err);
+
+ if (!IS_CLUS_EOF(chain.dir)) {
+ clus++;
+ index -= fsi->dentries_per_clu;
+ } else {
+ /**
+ * This directory modified. Stop scanning.
+ */
+ err = -EINVAL;
+ dfr_err("Scan: SCAN_MORE failed, err %d", err);
+ goto error;
+ }
+ }
+
+ /* For first-scan case */
+ } else {
+ clus = 0;
+ index = 0;
+ }
+
+scan_fat_chain:
+ /* Scan given directory and get info of children */
+ for ( ; index < fsi->dentries_per_clu; index++) {
+ DOS_DENTRY_T *dos_ep = NULL;
+ loff_t i_pos = 0;
+
+ /* Get dos_ep */
+ FAT32_CHECK_CLUSTER(fsi, chain.dir, err);
+ ERR_HANDLE(err);
+ dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, &chain, index, NULL);
+ ERR_HANDLE2(!dos_ep, err, -EIO);
+
+ /* Make i_pos for this entry */
+ SET64_HI(i_pos, header->start_clus);
+ SET64_LO(i_pos, clus * fsi->dentries_per_clu + index);
+
+ err = __defrag_scan_dir(sb, dos_ep, i_pos, &args[args_idx]);
+ if (!err) {
+ /* More-scan case */
+ if (++args_idx >= (PAGE_SIZE / sizeof(struct defrag_trav_arg))) {
+ sbi->dfr_hint_clus = header->start_clus;
+ sbi->dfr_hint_idx = clus * fsi->dentries_per_clu + index + 1;
+
+ header->stat = DFR_TRAV_STAT_MORE;
+ header->nr_entries = args_idx;
+ goto error;
+ }
+ /* Error case */
+ } else if (err == -EINVAL) {
+ sbi->dfr_hint_clus = sbi->dfr_hint_idx = 0;
+ dfr_err("Scan: err %d", err);
+ goto error;
+ /* End case */
+ } else if (err == -ENOENT) {
+ sbi->dfr_hint_clus = sbi->dfr_hint_idx = 0;
+ err = 0;
+ goto done;
+ } else {
+ /* DO NOTHING */
+ }
+ err = 0;
+ }
+
+ /* Follow FAT-chain */
+ FAT32_CHECK_CLUSTER(fsi, chain.dir, err);
+ ERR_HANDLE(err);
+ err = fat_ent_get(sb, chain.dir, &(chain.dir));
+ ERR_HANDLE(err);
+
+ if (!IS_CLUS_EOF(chain.dir)) {
+ index = 0;
+ clus++;
+ goto scan_fat_chain;
+ }
+
+done:
+ /* Update header */
+ header->stat = DFR_TRAV_STAT_DONE;
+ header->nr_entries = args_idx;
+
+error:
+ return err;
+}
+
+
+static int
+__defrag_validate_cluster_prev(
+ IN struct super_block *sb,
+ IN struct defrag_chunk_info *chunk)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ CHAIN_T dir;
+ DENTRY_T *ep = NULL;
+ unsigned int entry = 0, clus = 0;
+ int err = 0;
+
+ if (chunk->prev_clus == 0) {
+ /* For the first cluster of a file */
+ dir.dir = GET64_HI(chunk->i_pos);
+ dir.flags = 0x1; // Assume non-continuous
+
+ entry = GET64_LO(chunk->i_pos);
+
+ FAT32_CHECK_CLUSTER(fsi, dir.dir, err);
+ ERR_HANDLE(err);
+ ep = get_dentry_in_dir(sb, &dir, entry, NULL);
+ if (!ep) {
+ err = -EPERM;
+ goto error;
+ }
+
+ /* should call fat_get_entry_clu0(ep) */
+ clus = fsi->fs_func->get_entry_clu0(ep);
+ if (clus != chunk->d_clus) {
+ err = -ENXIO;
+ goto error;
+ }
+ } else {
+ /* Normal case */
+ FAT32_CHECK_CLUSTER(fsi, chunk->prev_clus, err);
+ ERR_HANDLE(err);
+ err = fat_ent_get(sb, chunk->prev_clus, &clus);
+ if (err)
+ goto error;
+ if (chunk->d_clus != clus)
+ err = -ENXIO;
+ }
+
+error:
+ return err;
+}
+
+
+static int
+__defrag_validate_cluster_next(
+ IN struct super_block *sb,
+ IN struct defrag_chunk_info *chunk)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ unsigned int clus = 0;
+ int err = 0;
+
+ /* Check next_clus */
+ FAT32_CHECK_CLUSTER(fsi, (chunk->d_clus + chunk->nr_clus - 1), err);
+ ERR_HANDLE(err);
+ err = fat_ent_get(sb, (chunk->d_clus + chunk->nr_clus - 1), &clus);
+ if (err)
+ goto error;
+ if (chunk->next_clus != (clus & FAT32_EOF))
+ err = -ENXIO;
+
+error:
+ return err;
+}
+
+
+/**
+ * @fn __defrag_check_au
+ * @brief check if this AU is in use
+ * @return 0 if idle, 1 if busy
+ * @param sb super block
+ * @param clus physical cluster num
+ * @param limit # of used clusters from daemon
+ */
+static int
+__defrag_check_au(
+ struct super_block *sb,
+ u32 clus,
+ u32 limit)
+{
+ unsigned int nr_free = amap_get_freeclus(sb, clus);
+
+#if defined(CONFIG_SDFAT_DFR_DEBUG) && defined(CONFIG_SDFAT_DBG_MSG)
+ if (nr_free < limit) {
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ AU_INFO_T *au = GET_AU(amap, i_AU_of_CLU(amap, clus));
+
+ dfr_debug("AU[%d] nr_free %d, limit %d", au->idx, nr_free, limit);
+ }
+#endif
+ return ((nr_free < limit) ? 1 : 0);
+}
+
+
+/**
+ * @fn defrag_validate_cluster
+ * @brief validate cluster info of given chunk
+ * @return 0 on success, -errno otherwise
+ * @param inode inode of given chunk
+ * @param chunk given chunk
+ * @param skip_prev flag to skip checking previous cluster info
+ * @remark protected by super_block and volume lock
+ */
+int
+defrag_validate_cluster(
+ IN struct inode *inode,
+ IN struct defrag_chunk_info *chunk,
+ IN int skip_prev)
+{
+ struct super_block *sb = inode->i_sb;
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ unsigned int clus = 0;
+ int err = 0, i = 0;
+
+ /* If this inode is unlink-ed, skip it */
+ if (fid->dir.dir == DIR_DELETED)
+ return -ENOENT;
+
+ /* Skip working-AU */
+ err = amap_check_working(sb, chunk->d_clus);
+ if (err)
+ return -EBUSY;
+
+ /* Check # of free_clus of belonged AU */
+ err = __defrag_check_au(inode->i_sb, chunk->d_clus, CLUS_PER_AU(sb) - chunk->au_clus);
+ if (err)
+ return -EINVAL;
+
+ /* Check chunk's clusters */
+ for (i = 0; i < chunk->nr_clus; i++) {
+ err = fsapi_map_clus(inode, chunk->f_clus + i, &clus, ALLOC_NOWHERE);
+ if (err || (chunk->d_clus + i != clus)) {
+ if (!err)
+ err = -ENXIO;
+ goto error;
+ }
+ }
+
+ /* Check next_clus */
+ err = __defrag_validate_cluster_next(sb, chunk);
+ ERR_HANDLE(err);
+
+ if (!skip_prev) {
+ /* Check prev_clus */
+ err = __defrag_validate_cluster_prev(sb, chunk);
+ ERR_HANDLE(err);
+ }
+
+error:
+ return err;
+}
+
+
+/**
+ * @fn defrag_reserve_clusters
+ * @brief reserve clusters for defrag
+ * @return 0 on success, -errno otherwise
+ * @param sb super block
+ * @param nr_clus # of clusters to reserve
+ * @remark protected by super_block and volume lock
+ */
+int
+defrag_reserve_clusters(
+ INOUT struct super_block *sb,
+ IN int nr_clus)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+
+ if (!(sbi->options.improved_allocation & SDFAT_ALLOC_DELAY))
+ /* Nothing to do */
+ return 0;
+
+ /* Check error case */
+ if (fsi->used_clusters + fsi->reserved_clusters + nr_clus >= fsi->num_clusters - 2) {
+ return -ENOSPC;
+ } else if (fsi->reserved_clusters + nr_clus < 0) {
+ dfr_err("Reserve count: reserved_clusters %d, nr_clus %d",
+ fsi->reserved_clusters, nr_clus);
+ BUG_ON(fsi->reserved_clusters + nr_clus < 0);
+ }
+
+ sbi->dfr_reserved_clus += nr_clus;
+ fsi->reserved_clusters += nr_clus;
+
+ return 0;
+}
+
+
+/**
+ * @fn defrag_mark_ignore
+ * @brief mark corresponding AU to be ignored
+ * @return 0 on success, -errno otherwise
+ * @param sb super block
+ * @param clus given cluster num
+ * @remark protected by super_block
+ */
+int
+defrag_mark_ignore(
+ INOUT struct super_block *sb,
+ IN unsigned int clus)
+{
+ int err = 0;
+
+ if (SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_SMART)
+ err = amap_mark_ignore(sb, clus);
+
+ if (err)
+ dfr_debug("err %d", err);
+ return err;
+}
+
+
+/**
+ * @fn defrag_unmark_ignore_all
+ * @brief unmark all ignored AUs
+ * @return void
+ * @param sb super block
+ * @remark protected by super_block
+ */
+void
+defrag_unmark_ignore_all(struct super_block *sb)
+{
+ if (SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_SMART)
+ amap_unmark_ignore_all(sb);
+}
+
+
+/**
+ * @fn defrag_map_cluster
+ * @brief get_block function for defrag dests
+ * @return 0 on success, -errno otherwise
+ * @param inode inode
+ * @param clu_offset logical cluster offset
+ * @param clu mapped cluster (physical)
+ * @remark protected by super_block and volume lock
+ */
+int
+defrag_map_cluster(
+ struct inode *inode,
+ unsigned int clu_offset,
+ unsigned int *clu)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+#ifdef CONFIG_SDFAT_DFR_PACKING
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+#endif
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info);
+ struct defrag_chunk_info *chunk = NULL;
+ CHAIN_T new_clu;
+ int i = 0, nr_new = 0, err = 0;
+
+ /* Get corresponding chunk */
+ for (i = 0; i < ino_dfr->nr_chunks; i++) {
+ chunk = &(ino_dfr->chunks[i]);
+
+ if ((chunk->f_clus <= clu_offset) && (clu_offset < chunk->f_clus + chunk->nr_clus)) {
+ /* For already allocated new_clus */
+ if (sbi->dfr_new_clus[chunk->new_idx + clu_offset - chunk->f_clus]) {
+ *clu = sbi->dfr_new_clus[chunk->new_idx + clu_offset - chunk->f_clus];
+ return 0;
+ }
+ break;
+ }
+ }
+ BUG_ON(!chunk);
+
+ fscore_set_vol_flags(sb, VOL_DIRTY, 0);
+
+ new_clu.dir = CLUS_EOF;
+ new_clu.size = 0;
+ new_clu.flags = fid->flags;
+
+ /* Allocate new cluster */
+#ifdef CONFIG_SDFAT_DFR_PACKING
+ if (amap->n_clean_au * DFR_FULL_RATIO <= amap->n_au * DFR_DEFAULT_PACKING_RATIO)
+ err = fsi->fs_func->alloc_cluster(sb, 1, &new_clu, ALLOC_COLD_PACKING);
+ else
+ err = fsi->fs_func->alloc_cluster(sb, 1, &new_clu, ALLOC_COLD_ALIGNED);
+#else
+ err = fsi->fs_func->alloc_cluster(sb, 1, &new_clu, ALLOC_COLD_ALIGNED);
+#endif
+
+ if (err) {
+ dfr_err("Map: 1 %d", 0);
+ return err;
+ }
+
+ /* Decrease reserved cluster count */
+ defrag_reserve_clusters(sb, -1);
+
+ /* Add new_clus info in ino_dfr */
+ sbi->dfr_new_clus[chunk->new_idx + clu_offset - chunk->f_clus] = new_clu.dir;
+
+ /* Make FAT-chain for new_clus */
+ for (i = 0; i < chunk->nr_clus; i++) {
+#if 0
+ if (sbi->dfr_new_clus[chunk->new_idx + i])
+ nr_new++;
+ else
+ break;
+#else
+ if (!sbi->dfr_new_clus[chunk->new_idx + i])
+ break;
+ nr_new++;
+#endif
+ }
+ if (nr_new == chunk->nr_clus) {
+ for (i = 0; i < chunk->nr_clus - 1; i++) {
+ FAT32_CHECK_CLUSTER(fsi, sbi->dfr_new_clus[chunk->new_idx + i], err);
+ BUG_ON(err);
+ if (fat_ent_set(sb,
+ sbi->dfr_new_clus[chunk->new_idx + i],
+ sbi->dfr_new_clus[chunk->new_idx + i + 1]))
+ return -EIO;
+ }
+ }
+
+ *clu = new_clu.dir;
+ return 0;
+}
+
+
+/**
+ * @fn defrag_writepage_end_io
+ * @brief check WB status of requested page
+ * @return void
+ * @param page page
+ */
+void
+defrag_writepage_end_io(
+ INOUT struct page *page)
+{
+ struct super_block *sb = page->mapping->host->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct defrag_info *ino_dfr = &(SDFAT_I(page->mapping->host)->dfr_info);
+ unsigned int clus_start = 0, clus_end = 0;
+ int i = 0;
+
+ /* Check if this inode is on defrag */
+ if (atomic_read(&ino_dfr->stat) != DFR_INO_STAT_REQ)
+ return;
+
+ clus_start = page->index / PAGES_PER_CLUS(sb);
+ clus_end = clus_start + 1;
+
+ /* Check each chunk in given inode */
+ for (i = 0; i < ino_dfr->nr_chunks; i++) {
+ struct defrag_chunk_info *chunk = &(ino_dfr->chunks[i]);
+ unsigned int chunk_start = 0, chunk_end = 0;
+
+ chunk_start = chunk->f_clus;
+ chunk_end = chunk->f_clus + chunk->nr_clus;
+
+ if ((clus_start >= chunk_start) && (clus_end <= chunk_end)) {
+ int off = clus_start - chunk_start;
+
+ clear_bit((page->index & (PAGES_PER_CLUS(sb) - 1)),
+ (volatile unsigned long *)&(sbi->dfr_page_wb[chunk->new_idx + off]));
+ }
+ }
+}
+
+
+/**
+ * @fn __defrag_check_wb
+ * @brief check if WB for given chunk completed
+ * @return 0 on success, -errno otherwise
+ * @param sbi super block info
+ * @param chunk given chunk
+ */
+static int
+__defrag_check_wb(
+ IN struct sdfat_sb_info *sbi,
+ IN struct defrag_chunk_info *chunk)
+{
+ int err = 0, wb_i = 0, i = 0, nr_new = 0;
+
+ if (!sbi || !chunk)
+ return -EINVAL;
+
+ /* Check WB complete status first */
+ for (wb_i = 0; wb_i < chunk->nr_clus; wb_i++) {
+ if (atomic_read((atomic_t *)&(sbi->dfr_page_wb[chunk->new_idx + wb_i]))) {
+ err = -EBUSY;
+ break;
+ }
+ }
+
+ /**
+ * Check NEW_CLUS status.
+ * writepage_end_io cannot check whole WB complete status,
+ * so we need to check NEW_CLUS status.
+ */
+ for (i = 0; i < chunk->nr_clus; i++)
+ if (sbi->dfr_new_clus[chunk->new_idx + i])
+ nr_new++;
+
+ if (nr_new == chunk->nr_clus) {
+ err = 0;
+ if ((wb_i != chunk->nr_clus) && (wb_i != chunk->nr_clus - 1))
+ dfr_debug("submit_fullpage_bio() called on a page (nr_clus %d, wb_i %d)",
+ chunk->nr_clus, wb_i);
+
+ BUG_ON(nr_new > chunk->nr_clus);
+ } else {
+ dfr_debug("nr_new %d, nr_clus %d", nr_new, chunk->nr_clus);
+ err = -EBUSY;
+ }
+
+ /* Update chunk's state */
+ if (!err)
+ chunk->stat |= DFR_CHUNK_STAT_WB;
+
+ return err;
+}
+
+
+static void
+__defrag_check_fat_old(
+ IN struct super_block *sb,
+ IN struct inode *inode,
+ IN struct defrag_chunk_info *chunk)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ unsigned int clus = 0;
+ int err = 0, idx = 0, max_idx = 0;
+
+ /* Get start_clus */
+ clus = SDFAT_I(inode)->fid.start_clu;
+
+ /* Follow FAT-chain */
+ #define num_clusters(val) ((val) ? (s32)((val - 1) >> fsi->cluster_size_bits) + 1 : 0)
+ max_idx = num_clusters(SDFAT_I(inode)->i_size_ondisk);
+ for (idx = 0; idx < max_idx; idx++) {
+
+ FAT32_CHECK_CLUSTER(fsi, clus, err);
+ ERR_HANDLE(err);
+ err = fat_ent_get(sb, clus, &clus);
+ ERR_HANDLE(err);
+
+ if ((idx < max_idx - 1) && (IS_CLUS_EOF(clus) || IS_CLUS_FREE(clus))) {
+ dfr_err("FAT: inode %p, max_idx %d, idx %d, clus %08x, "
+ "f_clus %d, nr_clus %d", inode, max_idx,
+ idx, clus, chunk->f_clus, chunk->nr_clus);
+ BUG_ON(idx < max_idx - 1);
+ goto error;
+ }
+ }
+
+error:
+ return;
+}
+
+
+static void
+__defrag_check_fat_new(
+ IN struct super_block *sb,
+ IN struct inode *inode,
+ IN struct defrag_chunk_info *chunk)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ unsigned int clus = 0;
+ int i = 0, err = 0;
+
+ /* Check start of FAT-chain */
+ if (chunk->prev_clus) {
+ FAT32_CHECK_CLUSTER(fsi, chunk->prev_clus, err);
+ BUG_ON(err);
+ err = fat_ent_get(sb, chunk->prev_clus, &clus);
+ BUG_ON(err);
+ } else {
+ clus = SDFAT_I(inode)->fid.start_clu;
+ }
+ if (sbi->dfr_new_clus[chunk->new_idx] != clus) {
+ dfr_err("FAT: inode %p, start_clus %08x, read_clus %08x",
+ inode, sbi->dfr_new_clus[chunk->new_idx], clus);
+ err = EIO;
+ goto error;
+ }
+
+ /* Check inside of FAT-chain */
+ if (chunk->nr_clus > 1) {
+ for (i = 0; i < chunk->nr_clus - 1; i++) {
+ FAT32_CHECK_CLUSTER(fsi, sbi->dfr_new_clus[chunk->new_idx + i], err);
+ BUG_ON(err);
+ err = fat_ent_get(sb, sbi->dfr_new_clus[chunk->new_idx + i], &clus);
+ BUG_ON(err);
+ if (sbi->dfr_new_clus[chunk->new_idx + i + 1] != clus) {
+ dfr_err("FAT: inode %p, new_clus %08x, read_clus %08x",
+ inode, sbi->dfr_new_clus[chunk->new_idx], clus);
+ err = EIO;
+ goto error;
+ }
+ }
+ clus = 0;
+ }
+
+ /* Check end of FAT-chain */
+ FAT32_CHECK_CLUSTER(fsi, sbi->dfr_new_clus[chunk->new_idx + chunk->nr_clus - 1], err);
+ BUG_ON(err);
+ err = fat_ent_get(sb, sbi->dfr_new_clus[chunk->new_idx + chunk->nr_clus - 1], &clus);
+ BUG_ON(err);
+ if ((chunk->next_clus & 0x0FFFFFFF) != (clus & 0x0FFFFFFF)) {
+ dfr_err("FAT: inode %p, next_clus %08x, read_clus %08x", inode, chunk->next_clus, clus);
+ err = EIO;
+ }
+
+error:
+ BUG_ON(err);
+}
+
+
+/**
+ * @fn __defrag_update_dirent
+ * @brief update DIR entry for defrag req
+ * @return void
+ * @param sb super block
+ * @param chunk given chunk
+ */
+static void
+__defrag_update_dirent(
+ struct super_block *sb,
+ struct defrag_chunk_info *chunk)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi;
+ CHAIN_T dir;
+ DOS_DENTRY_T *dos_ep;
+ unsigned int entry = 0;
+ unsigned long long sector = 0;
+ unsigned short hi = 0, lo = 0;
+ int err = 0;
+
+ dir.dir = GET64_HI(chunk->i_pos);
+ dir.flags = 0x1; // Assume non-continuous
+
+ entry = GET64_LO(chunk->i_pos);
+
+ FAT32_CHECK_CLUSTER(fsi, dir.dir, err);
+ BUG_ON(err);
+ dos_ep = (DOS_DENTRY_T *) get_dentry_in_dir(sb, &dir, entry, &sector);
+
+ hi = GET32_HI(sbi->dfr_new_clus[chunk->new_idx]);
+ lo = GET32_LO(sbi->dfr_new_clus[chunk->new_idx]);
+
+ dos_ep->start_clu_hi = cpu_to_le16(hi);
+ dos_ep->start_clu_lo = cpu_to_le16(lo);
+
+ dcache_modify(sb, sector);
+}
+
+
+/**
+ * @fn defrag_update_fat_prev
+ * @brief update FAT chain for defrag requests
+ * @return void
+ * @param sb super block
+ * @param force flag to force FAT update
+ * @remark protected by super_block and volume lock
+ */
+void
+defrag_update_fat_prev(
+ struct super_block *sb,
+ int force)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ struct defrag_info *sb_dfr = &sbi->dfr_info, *ino_dfr = NULL;
+ int skip = 0, done = 0;
+
+ /* Check if FS_ERROR occurred */
+ if (sb->s_flags & MS_RDONLY) {
+ dfr_err("RDONLY partition (err %d)", -EPERM);
+ goto out;
+ }
+
+ list_for_each_entry(ino_dfr, &sb_dfr->entry, entry) {
+ struct inode *inode = &(container_of(ino_dfr, struct sdfat_inode_info, dfr_info)->vfs_inode);
+ struct sdfat_inode_info *ino_info = SDFAT_I(inode);
+ struct defrag_chunk_info *chunk_prev = NULL;
+ int i = 0, j = 0;
+
+ mutex_lock(&ino_dfr->lock);
+ BUG_ON(atomic_read(&ino_dfr->stat) != DFR_INO_STAT_REQ);
+ for (i = 0; i < ino_dfr->nr_chunks; i++) {
+ struct defrag_chunk_info *chunk = NULL;
+ int err = 0;
+
+ chunk = &(ino_dfr->chunks[i]);
+ BUG_ON(!chunk);
+
+ /* Do nothing for already passed chunk */
+ if (chunk->stat == DFR_CHUNK_STAT_PASS) {
+ done++;
+ continue;
+ }
+
+ /* Handle error case */
+ if (chunk->stat == DFR_CHUNK_STAT_ERR) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ /* Double-check clusters */
+ if (chunk_prev &&
+ (chunk->f_clus == chunk_prev->f_clus + chunk_prev->nr_clus) &&
+ (chunk_prev->stat == DFR_CHUNK_STAT_PASS)) {
+
+ err = defrag_validate_cluster(inode, chunk, 1);
+
+ /* Handle continuous chunks in a file */
+ if (!err) {
+ chunk->prev_clus =
+ sbi->dfr_new_clus[chunk_prev->new_idx + chunk_prev->nr_clus - 1];
+ dfr_debug("prev->f_clus %d, prev->nr_clus %d, chunk->f_clus %d",
+ chunk_prev->f_clus, chunk_prev->nr_clus, chunk->f_clus);
+ }
+ } else {
+ err = defrag_validate_cluster(inode, chunk, 0);
+ }
+
+ if (err) {
+ dfr_err("Cluster validation: inode %p, chunk->f_clus %d, err %d",
+ inode, chunk->f_clus, err);
+ goto error;
+ }
+
+ /**
+ * Skip update_fat_prev if WB or update_fat_next not completed.
+ * Go to error case if FORCE set.
+ */
+ if (__defrag_check_wb(sbi, chunk) || (chunk->stat != DFR_CHUNK_STAT_PREP)) {
+ if (force) {
+ err = -EPERM;
+ dfr_err("Skip case: inode %p, stat %x, f_clus %d, err %d",
+ inode, chunk->stat, chunk->f_clus, err);
+ goto error;
+ }
+ skip++;
+ continue;
+ }
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+ /* SPO test */
+ defrag_spo_test(sb, DFR_SPO_RANDOM, __func__);
+#endif
+
+ /* Update chunk's previous cluster */
+ if (chunk->prev_clus == 0) {
+ /* For the first cluster of a file */
+ /* Update ino_info->fid.start_clu */
+ ino_info->fid.start_clu = sbi->dfr_new_clus[chunk->new_idx];
+ __defrag_update_dirent(sb, chunk);
+ } else {
+ FAT32_CHECK_CLUSTER(fsi, chunk->prev_clus, err);
+ BUG_ON(err);
+ if (fat_ent_set(sb,
+ chunk->prev_clus,
+ sbi->dfr_new_clus[chunk->new_idx])) {
+ err = -EIO;
+ goto error;
+ }
+ }
+
+ /* Clear extent cache */
+ extent_cache_inval_inode(inode);
+
+ /* Update FID info */
+ ino_info->fid.hint_bmap.off = CLUS_EOF;
+ ino_info->fid.hint_bmap.clu = 0;
+
+ /* Clear old FAT-chain */
+ for (j = 0; j < chunk->nr_clus; j++)
+ defrag_free_cluster(sb, chunk->d_clus + j);
+
+ /* Mark this chunk PASS */
+ chunk->stat = DFR_CHUNK_STAT_PASS;
+ __defrag_check_fat_new(sb, inode, chunk);
+
+ done++;
+
+error:
+ if (err) {
+ /**
+ * chunk->new_idx != 0 means this chunk needs to be cleaned up
+ */
+ if (chunk->new_idx) {
+ /* Free already allocated clusters */
+ for (j = 0; j < chunk->nr_clus; j++) {
+ if (sbi->dfr_new_clus[chunk->new_idx + j]) {
+ defrag_free_cluster(sb, sbi->dfr_new_clus[chunk->new_idx + j]);
+ sbi->dfr_new_clus[chunk->new_idx + j] = 0;
+ }
+ }
+
+ __defrag_check_fat_old(sb, inode, chunk);
+ }
+
+ /**
+ * chunk->new_idx == 0 means this chunk already cleaned up
+ */
+ chunk->new_idx = 0;
+ chunk->stat = DFR_CHUNK_STAT_ERR;
+ }
+
+ chunk_prev = chunk;
+ }
+ BUG_ON(!mutex_is_locked(&ino_dfr->lock));
+ mutex_unlock(&ino_dfr->lock);
+ }
+
+out:
+ if (skip) {
+ dfr_debug("%s skipped (nr_reqs %d, done %d, skip %d)",
+ __func__, sb_dfr->nr_chunks - 1, done, skip);
+ } else {
+ /* Make dfr_reserved_clus zero */
+ if (sbi->dfr_reserved_clus > 0) {
+ if (fsi->reserved_clusters < sbi->dfr_reserved_clus) {
+ dfr_err("Reserved count: reserved_clus %d, dfr_reserved_clus %d",
+ fsi->reserved_clusters, sbi->dfr_reserved_clus);
+ BUG_ON(fsi->reserved_clusters < sbi->dfr_reserved_clus);
+ }
+
+ defrag_reserve_clusters(sb, 0 - sbi->dfr_reserved_clus);
+ }
+
+ dfr_debug("%s done (nr_reqs %d, done %d)", __func__, sb_dfr->nr_chunks - 1, done);
+ }
+}
+
+
+/**
+ * @fn defrag_update_fat_next
+ * @brief update FAT chain for defrag requests
+ * @return void
+ * @param sb super block
+ * @remark protected by super_block and volume lock
+ */
+void
+defrag_update_fat_next(
+ struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ struct defrag_info *sb_dfr = &sbi->dfr_info, *ino_dfr = NULL;
+ struct defrag_chunk_info *chunk = NULL;
+ int done = 0, i = 0, j = 0, err = 0;
+
+ /* Check if FS_ERROR occurred */
+ if (sb->s_flags & MS_RDONLY) {
+ dfr_err("RDONLY partition (err %d)", -EROFS);
+ goto out;
+ }
+
+ list_for_each_entry(ino_dfr, &sb_dfr->entry, entry) {
+
+ for (i = 0; i < ino_dfr->nr_chunks; i++) {
+ int skip = 0;
+
+ chunk = &(ino_dfr->chunks[i]);
+
+ /* Do nothing if error occurred or update_fat_next already passed */
+ if (chunk->stat == DFR_CHUNK_STAT_ERR)
+ continue;
+ if (chunk->stat & DFR_CHUNK_STAT_FAT) {
+ done++;
+ continue;
+ }
+
+ /* Ship this chunk if get_block not passed for this chunk */
+ for (j = 0; j < chunk->nr_clus; j++) {
+ if (sbi->dfr_new_clus[chunk->new_idx + j] == 0) {
+ skip = 1;
+ break;
+ }
+ }
+ if (skip)
+ continue;
+
+ /* Update chunk's next cluster */
+ FAT32_CHECK_CLUSTER(fsi,
+ sbi->dfr_new_clus[chunk->new_idx + chunk->nr_clus - 1], err);
+ BUG_ON(err);
+ if (fat_ent_set(sb,
+ sbi->dfr_new_clus[chunk->new_idx + chunk->nr_clus - 1],
+ chunk->next_clus))
+ goto out;
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+ /* SPO test */
+ defrag_spo_test(sb, DFR_SPO_RANDOM, __func__);
+#endif
+
+ /* Update chunk's state */
+ chunk->stat |= DFR_CHUNK_STAT_FAT;
+ done++;
+ }
+ }
+
+out:
+ dfr_debug("%s done (nr_reqs %d, done %d)", __func__, sb_dfr->nr_chunks - 1, done);
+}
+
+
+/**
+ * @fn defrag_check_discard
+ * @brief check if we can send discard for this AU, if so, send discard
+ * @return void
+ * @param sb super block
+ * @remark protected by super_block and volume lock
+ */
+void
+defrag_check_discard(
+ IN struct super_block *sb)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ AMAP_T *amap = SDFAT_SB(sb)->fsi.amap;
+ AU_INFO_T *au = NULL;
+ struct defrag_info *sb_dfr = &(SDFAT_SB(sb)->dfr_info);
+ unsigned int tmp[DFR_MAX_AU_MOVED];
+ int i = 0, j = 0;
+
+ BUG_ON(!amap);
+
+ if (!(SDFAT_SB(sb)->options.discard) ||
+ !(SDFAT_SB(sb)->options.improved_allocation & SDFAT_ALLOC_SMART))
+ return;
+
+ memset(tmp, 0, sizeof(int) * DFR_MAX_AU_MOVED);
+
+ for (i = REQ_HEADER_IDX + 1; i < sb_dfr->nr_chunks; i++) {
+ struct defrag_chunk_info *chunk = &(sb_dfr->chunks[i]);
+ int skip = 0;
+
+ au = GET_AU(amap, i_AU_of_CLU(amap, chunk->d_clus));
+
+ /* Send DISCARD for free AU */
+ if ((IS_AU_IGNORED(au, amap)) &&
+ (amap_get_freeclus(sb, chunk->d_clus) == CLUS_PER_AU(sb))) {
+ sector_t blk = 0, nr_blks = 0;
+ unsigned int au_align_factor = amap->option.au_align_factor % amap->option.au_size;
+
+ BUG_ON(au->idx == 0);
+
+ /* Avoid multiple DISCARD */
+ for (j = 0; j < DFR_MAX_AU_MOVED; j++) {
+ if (tmp[j] == au->idx) {
+ skip = 1;
+ break;
+ }
+ }
+ if (skip == 1)
+ continue;
+
+ /* Send DISCARD cmd */
+ blk = (sector_t) (((au->idx * CLUS_PER_AU(sb)) << fsi->sect_per_clus_bits)
+ - au_align_factor);
+ nr_blks = ((sector_t)CLUS_PER_AU(sb)) << fsi->sect_per_clus_bits;
+
+ dfr_debug("Send DISCARD for AU[%d] (blk %08zx)", au->idx, blk);
+ sb_issue_discard(sb, blk, nr_blks, GFP_NOFS, 0);
+
+ /* Save previous AU's index */
+ for (j = 0; j < DFR_MAX_AU_MOVED; j++) {
+ if (!tmp[j]) {
+ tmp[j] = au->idx;
+ break;
+ }
+ }
+ }
+ }
+}
+
+
+/**
+ * @fn defrag_free_cluster
+ * @brief free uneccessary cluster
+ * @return void
+ * @param sb super block
+ * @param clus physical cluster num
+ * @remark protected by super_block and volume lock
+ */
+int
+defrag_free_cluster(
+ struct super_block *sb,
+ unsigned int clus)
+{
+ FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi;
+ unsigned int val = 0;
+ s32 err = 0;
+
+ FAT32_CHECK_CLUSTER(fsi, clus, err);
+ BUG_ON(err);
+ if (fat_ent_get(sb, clus, &val))
+ return -EIO;
+ if (val) {
+ if (fat_ent_set(sb, clus, 0))
+ return -EIO;
+ } else {
+ dfr_err("Free: Already freed, clus %08x, val %08x", clus, val);
+ BUG_ON(!val);
+ }
+
+ set_sb_dirty(sb);
+ fsi->used_clusters--;
+ if (fsi->amap)
+ amap_release_cluster(sb, clus);
+
+ return 0;
+}
+
+
+/**
+ * @fn defrag_check_defrag_required
+ * @brief check if defrag required
+ * @return 1 if required, 0 otherwise
+ * @param sb super block
+ * @param totalau # of total AUs
+ * @param cleanau # of clean AUs
+ * @param fullau # of full AUs
+ * @remark protected by super_block
+ */
+int
+defrag_check_defrag_required(
+ IN struct super_block *sb,
+ OUT int *totalau,
+ OUT int *cleanau,
+ OUT int *fullau)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ AMAP_T *amap = NULL;
+ int clean_ratio = 0, frag_ratio = 0;
+ int ret = 0;
+
+ if (!sb || !(SDFAT_SB(sb)->options.defrag))
+ return 0;
+
+ /* Check DFR_DEFAULT_STOP_RATIO first */
+ fsi = &(SDFAT_SB(sb)->fsi);
+ if (fsi->used_clusters == (unsigned int)(~0)) {
+ if (fsi->fs_func->count_used_clusters(sb, &fsi->used_clusters))
+ return -EIO;
+ }
+ if (fsi->used_clusters * DFR_FULL_RATIO >= fsi->num_clusters * DFR_DEFAULT_STOP_RATIO) {
+ dfr_debug("used_clusters %d, num_clusters %d", fsi->used_clusters, fsi->num_clusters);
+ return 0;
+ }
+
+ /* Check clean/frag ratio */
+ amap = SDFAT_SB(sb)->fsi.amap;
+ BUG_ON(!amap);
+
+ clean_ratio = (amap->n_clean_au * 100) / amap->n_au;
+ if (amap->n_full_au)
+ frag_ratio = ((amap->n_au - amap->n_clean_au) * 100) / amap->n_full_au;
+ else
+ frag_ratio = ((amap->n_au - amap->n_clean_au) * 100) /
+ (fsi->used_clusters / CLUS_PER_AU(sb) + 1);
+
+ /*
+ * Wake-up defrag_daemon:
+ * when # of clean AUs too small, or frag_ratio exceeds the limit
+ */
+ if ((clean_ratio < DFR_DEFAULT_WAKEUP_RATIO) ||
+ ((clean_ratio < DFR_DEFAULT_CLEAN_RATIO) && (frag_ratio >= DFR_DEFAULT_FRAG_RATIO))) {
+
+ if (totalau)
+ *totalau = amap->n_au;
+ if (cleanau)
+ *cleanau = amap->n_clean_au;
+ if (fullau)
+ *fullau = amap->n_full_au;
+ ret = 1;
+ }
+
+ return ret;
+}
+
+
+/**
+ * @fn defrag_check_defrag_required
+ * @brief check defrag status on inode
+ * @return 1 if defrag in on, 0 otherwise
+ * @param inode inode
+ * @param start logical start addr
+ * @param end logical end addr
+ * @param cancel flag to cancel defrag
+ * @param caller caller info
+ */
+int
+defrag_check_defrag_on(
+ INOUT struct inode *inode,
+ IN loff_t start,
+ IN loff_t end,
+ IN int cancel,
+ IN const char *caller)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info);
+ unsigned int clus_start = 0, clus_end = 0;
+ int ret = 0, i = 0;
+
+ if (!inode || (start == end))
+ return 0;
+
+ mutex_lock(&ino_dfr->lock);
+ /* Check if this inode is on defrag */
+ if (atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ) {
+
+ clus_start = start >> (fsi->cluster_size_bits);
+ clus_end = (end >> (fsi->cluster_size_bits)) +
+ ((end & (fsi->cluster_size - 1)) ? 1 : 0);
+
+ if (!ino_dfr->chunks)
+ goto error;
+
+ /* Check each chunk in given inode */
+ for (i = 0; i < ino_dfr->nr_chunks; i++) {
+ struct defrag_chunk_info *chunk = &(ino_dfr->chunks[i]);
+ unsigned int chunk_start = 0, chunk_end = 0;
+
+ /* Skip this chunk when error occurred or it already passed defrag process */
+ if ((chunk->stat == DFR_CHUNK_STAT_ERR) || (chunk->stat == DFR_CHUNK_STAT_PASS))
+ continue;
+
+ chunk_start = chunk->f_clus;
+ chunk_end = chunk->f_clus + chunk->nr_clus;
+
+ if (((clus_start >= chunk_start) && (clus_start < chunk_end)) ||
+ ((clus_end > chunk_start) && (clus_end <= chunk_end)) ||
+ ((clus_start < chunk_start) && (clus_end > chunk_end))) {
+ ret = 1;
+ if (cancel) {
+ chunk->stat = DFR_CHUNK_STAT_ERR;
+ dfr_debug("Defrag canceled: inode %p, start %08x, end %08x, caller %s",
+ inode, clus_start, clus_end, caller);
+ }
+ }
+ }
+ }
+
+error:
+ BUG_ON(!mutex_is_locked(&ino_dfr->lock));
+ mutex_unlock(&ino_dfr->lock);
+ return ret;
+}
+
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+/**
+ * @fn defrag_spo_test
+ * @brief test SPO while defrag running
+ * @return void
+ * @param sb super block
+ * @param flag SPO debug flag
+ * @param caller caller info
+ */
+void
+defrag_spo_test(
+ struct super_block *sb,
+ int flag,
+ const char *caller)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ if (!sb || !(SDFAT_SB(sb)->options.defrag))
+ return;
+
+ if (flag == sbi->dfr_spo_flag) {
+ dfr_err("Defrag SPO test (flag %d, caller %s)", flag, caller);
+ panic("Defrag SPO test");
+ }
+}
+#endif /* CONFIG_SDFAT_DFR_DEBUG */
+
+
+#endif /* CONFIG_SDFAT_DFR */
diff --git a/fs/sdfat/dfr.h b/fs/sdfat/dfr.h
new file mode 100644
index 000000000000..da98605020d3
--- /dev/null
+++ b/fs/sdfat/dfr.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SDFAT_DEFRAG_H
+#define _SDFAT_DEFRAG_H
+
+#ifdef CONFIG_SDFAT_DFR
+
+/* Tuning parameters */
+#define DFR_MIN_TIMEOUT (1 * HZ) // Minimum timeout for forced-sync
+#define DFR_DEFAULT_TIMEOUT (10 * HZ) // Default timeout for forced-sync
+
+#define DFR_DEFAULT_CLEAN_RATIO (50) // Wake-up daemon when clean AU ratio under 50%
+#define DFR_DEFAULT_WAKEUP_RATIO (10) // Wake-up daemon when clean AU ratio under 10%, regardless of frag_ratio
+
+#define DFR_DEFAULT_FRAG_RATIO (130) // Wake-up daemon when frag_ratio over 130%
+
+#define DFR_DEFAULT_PACKING_RATIO (10) // Call allocator with PACKING flag, when clean AU ratio under 10%
+
+#define DFR_DEFAULT_STOP_RATIO (98) // Stop defrag_daemon when disk used ratio over 98%
+#define DFR_FULL_RATIO (100)
+
+#define DFR_MAX_AU_MOVED (16) // Maximum # of AUs for a request
+
+
+/* Debugging support*/
+#define dfr_err(fmt, args...) pr_err("DFR: " fmt "\n", args)
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+#define dfr_debug(fmt, args...) pr_debug("DFR: " fmt "\n", args)
+#else
+#define dfr_debug(fmt, args...)
+#endif
+
+
+/* Error handling */
+#define ERR_HANDLE(err) { \
+ if (err) { \
+ dfr_debug("err %d", err); \
+ goto error; \
+ } \
+}
+
+#define ERR_HANDLE2(cond, err, val) { \
+ if (cond) { \
+ err = val; \
+ dfr_debug("err %d", err); \
+ goto error; \
+ } \
+}
+
+
+/* Arguments IN-OUT */
+#define IN
+#define OUT
+#define INOUT
+
+
+/* Macros */
+#define GET64_HI(var64) ((unsigned int)((var64) >> 32))
+#define GET64_LO(var64) ((unsigned int)(((var64) << 32) >> 32))
+#define SET64_HI(dst64, var32) { (dst64) = ((loff_t)(var32) << 32) | ((dst64) & 0x00000000ffffffffLL); }
+#define SET64_LO(dst64, var32) { (dst64) = ((dst64) & 0xffffffff00000000LL) | ((var32) & 0x00000000ffffffffLL); }
+
+#define GET32_HI(var32) ((unsigned short)((var32) >> 16))
+#define GET32_LO(var32) ((unsigned short)(((var32) << 16) >> 16))
+#define SET32_HI(dst32, var16) { (dst32) = ((unsigned int)(var16) << 16) | ((dst32) & 0x0000ffff); }
+#define SET32_LO(dst32, var16) { (dst32) = ((dst32) & 0xffff0000) | ((unsigned int)(var16) & 0x0000ffff); }
+
+
+/* FAT32 related */
+#define FAT32_EOF (0x0fffffff)
+#define FAT32_RESERVED (0x0ffffff7)
+#define FAT32_UNUSED_CLUS (2)
+
+#define CLUS_PER_AU(sb) ( \
+ (SDFAT_SB(sb)->options.amap_opt.sect_per_au) >> (SDFAT_SB(sb)->fsi.sect_per_clus_bits) \
+)
+#define PAGES_PER_AU(sb) ( \
+ ((SDFAT_SB(sb)->options.amap_opt.sect_per_au) << ((sb)->s_blocksize_bits)) \
+ >> PAGE_SHIFT \
+)
+#define PAGES_PER_CLUS(sb) ((SDFAT_SB(sb)->fsi.cluster_size) >> PAGE_SHIFT)
+
+#define FAT32_CHECK_CLUSTER(fsi, clus, err) \
+ { \
+ if (((clus) < FAT32_UNUSED_CLUS) || \
+ ((clus) > (fsi)->num_clusters) || \
+ ((clus) >= FAT32_RESERVED)) { \
+ dfr_err("clus %08x, fsi->num_clusters %08x", (clus), (fsi)->num_clusters); \
+ err = -EINVAL; \
+ } else { \
+ err = 0; \
+ } \
+ }
+
+
+/* IOCTL_DFR_INFO */
+struct defrag_info_arg {
+ /* PBS info */
+ unsigned int sec_sz;
+ unsigned int clus_sz;
+ unsigned long long total_sec;
+ unsigned long long fat_offset_sec;
+ unsigned int fat_sz_sec;
+ unsigned int n_fat;
+ unsigned int hidden_sectors;
+
+ /* AU info */
+ unsigned int sec_per_au;
+};
+
+
+/* IOC_DFR_TRAV */
+#define DFR_TRAV_HEADER_IDX (0)
+
+#define DFR_TRAV_TYPE_HEADER (0x0000000F)
+#define DFR_TRAV_TYPE_DIR (1)
+#define DFR_TRAV_TYPE_FILE (2)
+#define DFR_TRAV_TYPE_TEST (DFR_TRAV_TYPE_HEADER | 0x10000000)
+
+#define DFR_TRAV_ROOT_IPOS (0xFFFFFFFFFFFFFFFFLL)
+
+struct defrag_trav_arg {
+ int type;
+ unsigned int start_clus;
+ loff_t i_pos;
+ char name[MAX_DOSNAME_BUF_SIZE];
+ char dummy1;
+ int dummy2;
+};
+
+#define DFR_TRAV_STAT_DONE (0x1)
+#define DFR_TRAV_STAT_MORE (0x2)
+#define DFR_TRAV_STAT_ERR (0xFF)
+
+struct defrag_trav_header {
+ int type;
+ unsigned int start_clus;
+ loff_t i_pos;
+ char name[MAX_DOSNAME_BUF_SIZE];
+ char stat;
+ unsigned int nr_entries;
+};
+
+
+/* IOC_DFR_REQ */
+#define REQ_HEADER_IDX (0)
+
+#define DFR_CHUNK_STAT_ERR (0xFFFFFFFF)
+#define DFR_CHUNK_STAT_REQ (0x1)
+#define DFR_CHUNK_STAT_WB (0x2)
+#define DFR_CHUNK_STAT_FAT (0x4)
+#define DFR_CHUNK_STAT_PREP (DFR_CHUNK_STAT_REQ | DFR_CHUNK_STAT_WB | DFR_CHUNK_STAT_FAT)
+#define DFR_CHUNK_STAT_PASS (0x0000000F)
+
+struct defrag_chunk_header {
+ int mode;
+ unsigned int nr_chunks;
+ loff_t dummy1;
+ int dummy2[4];
+ union {
+ int *dummy3;
+ int dummy4;
+ };
+ int dummy5;
+};
+
+struct defrag_chunk_info {
+ int stat;
+ /* File related */
+ unsigned int f_clus;
+ loff_t i_pos;
+ /* Cluster related */
+ unsigned int d_clus;
+ unsigned int nr_clus;
+ unsigned int prev_clus;
+ unsigned int next_clus;
+ union {
+ void *dummy;
+ /* req status */
+ unsigned int new_idx;
+ };
+ /* AU related */
+ unsigned int au_clus;
+};
+
+
+/* Global info */
+#define DFR_MODE_BACKGROUND (0x1)
+#define DFR_MODE_FOREGROUND (0x2)
+#define DFR_MODE_ONESHOT (0x4)
+#define DFR_MODE_BATCHED (0x8)
+#define DFR_MODE_TEST (DFR_MODE_BACKGROUND | 0x10000000)
+
+#define DFR_SB_STAT_IDLE (0)
+#define DFR_SB_STAT_REQ (1)
+#define DFR_SB_STAT_VALID (2)
+
+#define DFR_INO_STAT_IDLE (0)
+#define DFR_INO_STAT_REQ (1)
+struct defrag_info {
+ struct mutex lock;
+ atomic_t stat;
+ struct defrag_chunk_info *chunks;
+ unsigned int nr_chunks;
+ struct list_head entry;
+};
+
+
+/* SPO test flags */
+#define DFR_SPO_NONE (0)
+#define DFR_SPO_NORMAL (1)
+#define DFR_SPO_DISCARD (2)
+#define DFR_SPO_FAT_NEXT (3)
+#define DFR_SPO_RANDOM (4)
+
+
+/* Extern functions */
+int defrag_get_info(struct super_block *sb, struct defrag_info_arg *arg);
+
+int defrag_scan_dir(struct super_block *sb, struct defrag_trav_arg *arg);
+
+int defrag_validate_cluster(struct inode *inode, struct defrag_chunk_info *chunk, int skip_prev);
+int defrag_reserve_clusters(struct super_block *sb, int nr_clus);
+int defrag_mark_ignore(struct super_block *sb, unsigned int clus);
+void defrag_unmark_ignore_all(struct super_block *sb);
+
+int defrag_map_cluster(struct inode *inode, unsigned int clu_offset, unsigned int *clu);
+void defrag_writepage_end_io(struct page *page);
+
+void defrag_update_fat_prev(struct super_block *sb, int force);
+void defrag_update_fat_next(struct super_block *sb);
+void defrag_check_discard(struct super_block *sb);
+int defrag_free_cluster(struct super_block *sb, unsigned int clus);
+
+int defrag_check_defrag_required(struct super_block *sb, int *totalau, int *cleanau, int *fullau);
+int defrag_check_defrag_on(struct inode *inode, loff_t start, loff_t end, int cancel, const char *caller);
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+void defrag_spo_test(struct super_block *sb, int flag, const char *caller);
+#endif
+
+#endif /* CONFIG_SDFAT_DFR */
+
+#endif /* _SDFAT_DEFRAG_H */
+
diff --git a/fs/sdfat/extent.c b/fs/sdfat/extent.c
new file mode 100644
index 000000000000..59e096530dda
--- /dev/null
+++ b/fs/sdfat/extent.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * linux/fs/fat/cache.c
+ *
+ * Written 1992,1993 by Werner Almesberger
+ *
+ * Mar 1999. AV. Changed cache, so that it uses the starting cluster instead
+ * of inode number.
+ * May 1999. AV. Fixed the bogosity with FAT32 (read "FAT28"). Fscking lusers.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : extent.c */
+/* PURPOSE : Improve the performance of traversing fat chain. */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/slab.h>
+#include "sdfat.h"
+#include "core.h"
+
+#define EXTENT_CACHE_VALID 0
+/* this must be > 0. */
+#define EXTENT_MAX_CACHE 16
+
+struct extent_cache {
+ struct list_head cache_list;
+ u32 nr_contig; /* number of contiguous clusters */
+ u32 fcluster; /* cluster number in the file. */
+ u32 dcluster; /* cluster number on disk. */
+};
+
+struct extent_cache_id {
+ u32 id;
+ u32 nr_contig;
+ u32 fcluster;
+ u32 dcluster;
+};
+
+static struct kmem_cache *extent_cache_cachep;
+
+static void init_once(void *c)
+{
+ struct extent_cache *cache = (struct extent_cache *)c;
+
+ INIT_LIST_HEAD(&cache->cache_list);
+}
+
+s32 extent_cache_init(void)
+{
+ extent_cache_cachep = kmem_cache_create("sdfat_extent_cache",
+ sizeof(struct extent_cache),
+ 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+ init_once);
+ if (!extent_cache_cachep)
+ return -ENOMEM;
+ return 0;
+}
+
+void extent_cache_shutdown(void)
+{
+ if (!extent_cache_cachep)
+ return;
+ kmem_cache_destroy(extent_cache_cachep);
+}
+
+void extent_cache_init_inode(struct inode *inode)
+{
+ EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent);
+
+ spin_lock_init(&extent->cache_lru_lock);
+ extent->nr_caches = 0;
+ extent->cache_valid_id = EXTENT_CACHE_VALID + 1;
+ INIT_LIST_HEAD(&extent->cache_lru);
+}
+
+static inline struct extent_cache *extent_cache_alloc(void)
+{
+ return kmem_cache_alloc(extent_cache_cachep, GFP_NOFS);
+}
+
+static inline void extent_cache_free(struct extent_cache *cache)
+{
+ BUG_ON(!list_empty(&cache->cache_list));
+ kmem_cache_free(extent_cache_cachep, cache);
+}
+
+static inline void extent_cache_update_lru(struct inode *inode,
+ struct extent_cache *cache)
+{
+ EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent);
+
+ if (extent->cache_lru.next != &cache->cache_list)
+ list_move(&cache->cache_list, &extent->cache_lru);
+}
+
+static u32 extent_cache_lookup(struct inode *inode, u32 fclus,
+ struct extent_cache_id *cid,
+ u32 *cached_fclus, u32 *cached_dclus)
+{
+ EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent);
+
+ static struct extent_cache nohit = { .fcluster = 0, };
+
+ struct extent_cache *hit = &nohit, *p;
+ u32 offset = CLUS_EOF;
+
+ spin_lock(&extent->cache_lru_lock);
+ list_for_each_entry(p, &extent->cache_lru, cache_list) {
+ /* Find the cache of "fclus" or nearest cache. */
+ if (p->fcluster <= fclus && hit->fcluster < p->fcluster) {
+ hit = p;
+ if ((hit->fcluster + hit->nr_contig) < fclus) {
+ offset = hit->nr_contig;
+ } else {
+ offset = fclus - hit->fcluster;
+ break;
+ }
+ }
+ }
+ if (hit != &nohit) {
+ extent_cache_update_lru(inode, hit);
+
+ cid->id = extent->cache_valid_id;
+ cid->nr_contig = hit->nr_contig;
+ cid->fcluster = hit->fcluster;
+ cid->dcluster = hit->dcluster;
+ *cached_fclus = cid->fcluster + offset;
+ *cached_dclus = cid->dcluster + offset;
+ }
+ spin_unlock(&extent->cache_lru_lock);
+
+ return offset;
+}
+
+static struct extent_cache *extent_cache_merge(struct inode *inode,
+ struct extent_cache_id *new)
+{
+ EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent);
+
+ struct extent_cache *p;
+
+ list_for_each_entry(p, &extent->cache_lru, cache_list) {
+ /* Find the same part as "new" in cluster-chain. */
+ if (p->fcluster == new->fcluster) {
+ ASSERT(p->dcluster == new->dcluster);
+ if (new->nr_contig > p->nr_contig)
+ p->nr_contig = new->nr_contig;
+ return p;
+ }
+ }
+ return NULL;
+}
+
+static void extent_cache_add(struct inode *inode, struct extent_cache_id *new)
+{
+ EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent);
+
+ struct extent_cache *cache, *tmp;
+
+ if (new->fcluster == -1) /* dummy cache */
+ return;
+
+ spin_lock(&extent->cache_lru_lock);
+ if (new->id != EXTENT_CACHE_VALID &&
+ new->id != extent->cache_valid_id)
+ goto out; /* this cache was invalidated */
+
+ cache = extent_cache_merge(inode, new);
+ if (cache == NULL) {
+ if (extent->nr_caches < EXTENT_MAX_CACHE) {
+ extent->nr_caches++;
+ spin_unlock(&extent->cache_lru_lock);
+
+ tmp = extent_cache_alloc();
+ if (!tmp) {
+ spin_lock(&extent->cache_lru_lock);
+ extent->nr_caches--;
+ spin_unlock(&extent->cache_lru_lock);
+ return;
+ }
+
+ spin_lock(&extent->cache_lru_lock);
+ cache = extent_cache_merge(inode, new);
+ if (cache != NULL) {
+ extent->nr_caches--;
+ extent_cache_free(tmp);
+ goto out_update_lru;
+ }
+ cache = tmp;
+ } else {
+ struct list_head *p = extent->cache_lru.prev;
+ cache = list_entry(p, struct extent_cache, cache_list);
+ }
+ cache->fcluster = new->fcluster;
+ cache->dcluster = new->dcluster;
+ cache->nr_contig = new->nr_contig;
+ }
+out_update_lru:
+ extent_cache_update_lru(inode, cache);
+out:
+ spin_unlock(&extent->cache_lru_lock);
+}
+
+/*
+ * Cache invalidation occurs rarely, thus the LRU chain is not updated. It
+ * fixes itself after a while.
+ */
+static void __extent_cache_inval_inode(struct inode *inode)
+{
+ EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent);
+ struct extent_cache *cache;
+
+ while (!list_empty(&extent->cache_lru)) {
+ cache = list_entry(extent->cache_lru.next,
+ struct extent_cache, cache_list);
+ list_del_init(&cache->cache_list);
+ extent->nr_caches--;
+ extent_cache_free(cache);
+ }
+ /* Update. The copy of caches before this id is discarded. */
+ extent->cache_valid_id++;
+ if (extent->cache_valid_id == EXTENT_CACHE_VALID)
+ extent->cache_valid_id++;
+}
+
+void extent_cache_inval_inode(struct inode *inode)
+{
+ EXTENT_T *extent = &(SDFAT_I(inode)->fid.extent);
+
+ spin_lock(&extent->cache_lru_lock);
+ __extent_cache_inval_inode(inode);
+ spin_unlock(&extent->cache_lru_lock);
+}
+
+static inline s32 cache_contiguous(struct extent_cache_id *cid, u32 dclus)
+{
+ cid->nr_contig++;
+ return ((cid->dcluster + cid->nr_contig) == dclus);
+}
+
+static inline void cache_init(struct extent_cache_id *cid, u32 fclus, u32 dclus)
+{
+ cid->id = EXTENT_CACHE_VALID;
+ cid->fcluster = fclus;
+ cid->dcluster = dclus;
+ cid->nr_contig = 0;
+}
+
+s32 extent_get_clus(struct inode *inode, u32 cluster, u32 *fclus,
+ u32 *dclus, u32 *last_dclus, s32 allow_eof)
+{
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ u32 limit = fsi->num_clusters;
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ struct extent_cache_id cid;
+ u32 content;
+
+ /* FOR GRACEFUL ERROR HANDLING */
+ if (IS_CLUS_FREE(fid->start_clu)) {
+ sdfat_fs_error(sb, "invalid access to "
+ "extent cache (entry 0x%08x)", fid->start_clu);
+ ASSERT(0);
+ return -EIO;
+ }
+
+ *fclus = 0;
+ *dclus = fid->start_clu;
+ *last_dclus = *dclus;
+
+ /*
+ * Don`t use extent_cache if zero offset or non-cluster allocation
+ */
+ if ((cluster == 0) || IS_CLUS_EOF(*dclus))
+ return 0;
+
+ cache_init(&cid, CLUS_EOF, CLUS_EOF);
+
+ if (extent_cache_lookup(inode, cluster, &cid, fclus, dclus) == CLUS_EOF) {
+ /*
+ * dummy, always not contiguous
+ * This is reinitialized by cache_init(), later.
+ */
+ ASSERT((cid.id == EXTENT_CACHE_VALID)
+ && (cid.fcluster == CLUS_EOF)
+ && (cid.dcluster == CLUS_EOF)
+ && (cid.nr_contig == 0));
+ }
+
+ if (*fclus == cluster)
+ return 0;
+
+ while (*fclus < cluster) {
+ /* prevent the infinite loop of cluster chain */
+ if (*fclus > limit) {
+ sdfat_fs_error(sb,
+ "%s: detected the cluster chain loop"
+ " (i_pos %u)", __func__,
+ (*fclus));
+ return -EIO;
+ }
+
+ if (fat_ent_get_safe(sb, *dclus, &content))
+ return -EIO;
+
+ *last_dclus = *dclus;
+ *dclus = content;
+ (*fclus)++;
+
+ if (IS_CLUS_EOF(content)) {
+ if (!allow_eof) {
+ sdfat_fs_error(sb,
+ "%s: invalid cluster chain (i_pos %u,"
+ "last_clus 0x%08x is EOF)",
+ __func__, *fclus, (*last_dclus));
+ return -EIO;
+ }
+
+ break;
+ }
+
+ if (!cache_contiguous(&cid, *dclus))
+ cache_init(&cid, *fclus, *dclus);
+ }
+
+ extent_cache_add(inode, &cid);
+ return 0;
+}
diff --git a/fs/sdfat/fatent.c b/fs/sdfat/fatent.c
new file mode 100644
index 000000000000..fca32a50d336
--- /dev/null
+++ b/fs/sdfat/fatent.c
@@ -0,0 +1,420 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : fatent.c */
+/* PURPOSE : sdFAT FAT entry manager */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <asm/unaligned.h>
+
+#include "sdfat.h"
+#include "core.h"
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+/* All buffer structures are protected w/ fsi->v_sem */
+
+/*----------------------------------------------------------------------*/
+/* Static functions */
+/*----------------------------------------------------------------------*/
+
+/*======================================================================*/
+/* FAT Read/Write Functions */
+/*======================================================================*/
+/* in : sb, loc
+ * out: content
+ * returns 0 on success, -1 on error
+ */
+static s32 exfat_ent_get(struct super_block *sb, u32 loc, u32 *content)
+{
+ u32 off, _content;
+ u64 sec;
+ u8 *fat_sector;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ /* fsi->vol_type == EXFAT */
+ sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-2));
+ off = (loc << 2) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ _content = le32_to_cpu(*(__le32 *)(&fat_sector[off]));
+
+ /* remap reserved clusters to simplify code */
+ if (_content >= CLUSTER_32(0xFFFFFFF8))
+ _content = CLUS_EOF;
+
+ *content = CLUSTER_32(_content);
+ return 0;
+}
+
+static s32 exfat_ent_set(struct super_block *sb, u32 loc, u32 content)
+{
+ u32 off;
+ u64 sec;
+ u8 *fat_sector;
+ __le32 *fat_entry;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-2));
+ off = (loc << 2) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ fat_entry = (__le32 *)&(fat_sector[off]);
+ *fat_entry = cpu_to_le32(content);
+
+ return fcache_modify(sb, sec);
+}
+
+#define FATENT_FAT32_VALID_MASK (0x0FFFFFFFU)
+#define FATENT_FAT32_IGNORE_MASK (0xF0000000U)
+static s32 fat32_ent_get(struct super_block *sb, u32 loc, u32 *content)
+{
+ u32 off, _content;
+ u64 sec;
+ u8 *fat_sector;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-2));
+ off = (loc << 2) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ _content = le32_to_cpu(*(__le32 *)(&fat_sector[off]));
+ _content &= FATENT_FAT32_VALID_MASK;
+
+ /* remap reserved clusters to simplify code */
+ if (_content == CLUSTER_32(0x0FFFFFF7U))
+ _content = CLUS_BAD;
+ else if (_content >= CLUSTER_32(0x0FFFFFF8U))
+ _content = CLUS_EOF;
+
+ *content = CLUSTER_32(_content);
+ return 0;
+}
+
+static s32 fat32_ent_set(struct super_block *sb, u32 loc, u32 content)
+{
+ u32 off;
+ u64 sec;
+ u8 *fat_sector;
+ __le32 *fat_entry;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ content &= FATENT_FAT32_VALID_MASK;
+
+ sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-2));
+ off = (loc << 2) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ fat_entry = (__le32 *)&(fat_sector[off]);
+ content |= (le32_to_cpu(*fat_entry) & FATENT_FAT32_IGNORE_MASK);
+ *fat_entry = cpu_to_le32(content);
+
+ return fcache_modify(sb, sec);
+}
+
+#define FATENT_FAT16_VALID_MASK (0x0000FFFFU)
+static s32 fat16_ent_get(struct super_block *sb, u32 loc, u32 *content)
+{
+ u32 off, _content;
+ u64 sec;
+ u8 *fat_sector;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-1));
+ off = (loc << 1) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ _content = (u32)le16_to_cpu(*(__le16 *)(&fat_sector[off]));
+ _content &= FATENT_FAT16_VALID_MASK;
+
+ /* remap reserved clusters to simplify code */
+ if (_content == CLUSTER_16(0xFFF7U))
+ _content = CLUS_BAD;
+ else if (_content >= CLUSTER_16(0xFFF8U))
+ _content = CLUS_EOF;
+
+ *content = CLUSTER_32(_content);
+ return 0;
+}
+
+static s32 fat16_ent_set(struct super_block *sb, u32 loc, u32 content)
+{
+ u32 off;
+ u64 sec;
+ u8 *fat_sector;
+ __le16 *fat_entry;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ content &= FATENT_FAT16_VALID_MASK;
+
+ sec = fsi->FAT1_start_sector + (loc >> (sb->s_blocksize_bits-1));
+ off = (loc << 1) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ fat_entry = (__le16 *)&(fat_sector[off]);
+ *fat_entry = cpu_to_le16(content);
+
+ return fcache_modify(sb, sec);
+}
+
+#define FATENT_FAT12_VALID_MASK (0x00000FFFU)
+static s32 fat12_ent_get(struct super_block *sb, u32 loc, u32 *content)
+{
+ u32 off, _content;
+ u64 sec;
+ u8 *fat_sector;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ sec = fsi->FAT1_start_sector + ((loc + (loc >> 1)) >> sb->s_blocksize_bits);
+ off = (loc + (loc >> 1)) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ if (off == (u32)(sb->s_blocksize - 1)) {
+ _content = (u32) fat_sector[off];
+
+ fat_sector = fcache_getblk(sb, ++sec);
+ if (!fat_sector)
+ return -EIO;
+
+ _content |= (u32) fat_sector[0] << 8;
+ } else {
+ _content = get_unaligned_le16(&fat_sector[off]);
+ }
+
+ if (loc & 1)
+ _content >>= 4;
+
+ _content &= FATENT_FAT12_VALID_MASK;
+
+ /* remap reserved clusters to simplify code */
+ if (_content == CLUSTER_16(0x0FF7U))
+ _content = CLUS_BAD;
+ else if (_content >= CLUSTER_16(0x0FF8U))
+ _content = CLUS_EOF;
+
+ *content = CLUSTER_32(_content);
+ return 0;
+}
+
+static s32 fat12_ent_set(struct super_block *sb, u32 loc, u32 content)
+{
+ u32 off;
+ u64 sec;
+ u8 *fat_sector, *fat_entry;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ content &= FATENT_FAT12_VALID_MASK;
+
+ sec = fsi->FAT1_start_sector + ((loc + (loc >> 1)) >> sb->s_blocksize_bits);
+ off = (loc + (loc >> 1)) & (u32)(sb->s_blocksize - 1);
+
+ fat_sector = fcache_getblk(sb, sec);
+ if (!fat_sector)
+ return -EIO;
+
+ if (loc & 1) { /* odd */
+
+ content <<= 4;
+
+ if (off == (u32)(sb->s_blocksize-1)) {
+ fat_sector[off] = (u8)(content | (fat_sector[off] & 0x0F));
+ if (fcache_modify(sb, sec))
+ return -EIO;
+
+ fat_sector = fcache_getblk(sb, ++sec);
+ if (!fat_sector)
+ return -EIO;
+
+ fat_sector[0] = (u8)(content >> 8);
+ } else {
+ fat_entry = &(fat_sector[off]);
+ content |= 0x000F & get_unaligned_le16(fat_entry);
+ put_unaligned_le16(content, fat_entry);
+ }
+ } else { /* even */
+ fat_sector[off] = (u8)(content);
+
+ if (off == (u32)(sb->s_blocksize-1)) {
+ fat_sector[off] = (u8)(content);
+ if (fcache_modify(sb, sec))
+ return -EIO;
+
+ fat_sector = fcache_getblk(sb, ++sec);
+ if (!fat_sector)
+ return -EIO;
+
+ fat_sector[0] = (u8)((fat_sector[0] & 0xF0) | (content >> 8));
+ } else {
+ fat_entry = &(fat_sector[off]);
+ content |= 0xF000 & get_unaligned_le16(fat_entry);
+ put_unaligned_le16(content, fat_entry);
+ }
+ }
+ return fcache_modify(sb, sec);
+}
+
+
+static FATENT_OPS_T fat12_ent_ops = {
+ fat12_ent_get,
+ fat12_ent_set
+};
+
+static FATENT_OPS_T fat16_ent_ops = {
+ fat16_ent_get,
+ fat16_ent_set
+};
+
+static FATENT_OPS_T fat32_ent_ops = {
+ fat32_ent_get,
+ fat32_ent_set
+};
+
+static FATENT_OPS_T exfat_ent_ops = {
+ exfat_ent_get,
+ exfat_ent_set
+};
+
+s32 fat_ent_ops_init(struct super_block *sb)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ switch (fsi->vol_type) {
+ case EXFAT:
+ fsi->fatent_ops = &exfat_ent_ops;
+ break;
+ case FAT32:
+ fsi->fatent_ops = &fat32_ent_ops;
+ break;
+ case FAT16:
+ fsi->fatent_ops = &fat16_ent_ops;
+ break;
+ case FAT12:
+ fsi->fatent_ops = &fat12_ent_ops;
+ break;
+ default:
+ fsi->fatent_ops = NULL;
+ EMSG("Unknown volume type : %d", (int)fsi->vol_type);
+ return -ENOTSUPP;
+ }
+
+ return 0;
+}
+
+static inline bool is_reserved_clus(u32 clus)
+{
+ if (IS_CLUS_FREE(clus))
+ return true;
+ if (IS_CLUS_EOF(clus))
+ return true;
+ if (IS_CLUS_BAD(clus))
+ return true;
+ return false;
+}
+
+static inline bool is_valid_clus(FS_INFO_T *fsi, u32 clus)
+{
+ if (clus < CLUS_BASE || fsi->num_clusters <= clus)
+ return false;
+ return true;
+}
+
+s32 fat_ent_get(struct super_block *sb, u32 loc, u32 *content)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ s32 err;
+
+ if (!is_valid_clus(fsi, loc)) {
+ sdfat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", loc);
+ return -EIO;
+ }
+
+ err = fsi->fatent_ops->ent_get(sb, loc, content);
+ if (err) {
+ sdfat_fs_error(sb, "failed to access to FAT "
+ "(entry 0x%08x, err:%d)", loc, err);
+ return err;
+ }
+
+ if (!is_reserved_clus(*content) && !is_valid_clus(fsi, *content)) {
+ sdfat_fs_error(sb, "invalid access to FAT (entry 0x%08x) "
+ "bogus content (0x%08x)", loc, *content);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+s32 fat_ent_set(struct super_block *sb, u32 loc, u32 content)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ return fsi->fatent_ops->ent_set(sb, loc, content);
+}
+
+s32 fat_ent_get_safe(struct super_block *sb, u32 loc, u32 *content)
+{
+ s32 err = fat_ent_get(sb, loc, content);
+
+ if (err)
+ return err;
+
+ if (IS_CLUS_FREE(*content)) {
+ sdfat_fs_error(sb, "invalid access to FAT free cluster "
+ "(entry 0x%08x)", loc);
+ return -EIO;
+ }
+
+ if (IS_CLUS_BAD(*content)) {
+ sdfat_fs_error(sb, "invalid access to FAT bad cluster "
+ "(entry 0x%08x)", loc);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/* end of fatent.c */
diff --git a/fs/sdfat/misc.c b/fs/sdfat/misc.c
new file mode 100644
index 000000000000..a006e898816f
--- /dev/null
+++ b/fs/sdfat/misc.c
@@ -0,0 +1,464 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * linux/fs/fat/misc.c
+ *
+ * Written 1992,1993 by Werner Almesberger
+ * 22/11/2000 - Fixed fat_date_unix2dos for dates earlier than 01/01/1980
+ * and date_dos2unix for date==0 by Igor Zhbanov(bsg@uniyar.ac.ru)
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : misc.c */
+/* PURPOSE : Helper function for checksum and handing sdFAT error */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/time.h>
+#include "sdfat.h"
+#include "version.h"
+
+#ifdef CONFIG_SDFAT_SUPPORT_STLOG
+#ifdef CONFIG_PROC_FSLOG
+#include <linux/fslog.h>
+#else
+#include <linux/stlog.h>
+#endif
+#else
+#define ST_LOG(fmt, ...)
+#endif
+
+/*************************************************************************
+ * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
+#define CURRENT_TIME_SEC timespec64_trunc(current_kernel_time64(), NSEC_PER_SEC)
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0)
+#define CURRENT_TIME_SEC timespec_trunc(current_kernel_time(), NSEC_PER_SEC)
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) */
+ /* EMPTY */
+#endif
+
+
+#ifdef CONFIG_SDFAT_UEVENT
+static struct kobject sdfat_uevent_kobj;
+
+int sdfat_uevent_init(struct kset *sdfat_kset)
+{
+ int err;
+ struct kobj_type *ktype = get_ktype(&sdfat_kset->kobj);
+
+ sdfat_uevent_kobj.kset = sdfat_kset;
+ err = kobject_init_and_add(&sdfat_uevent_kobj, ktype, NULL, "uevent");
+ if (err)
+ pr_err("[SDFAT] Unable to create sdfat uevent kobj\n");
+
+ return err;
+}
+
+void sdfat_uevent_uninit(void)
+{
+ kobject_del(&sdfat_uevent_kobj);
+ memset(&sdfat_uevent_kobj, 0, sizeof(struct kobject));
+}
+
+void sdfat_uevent_ro_remount(struct super_block *sb)
+{
+ struct block_device *bdev = sb->s_bdev;
+ dev_t bd_dev = bdev ? bdev->bd_dev : 0;
+
+ char major[16], minor[16];
+ char *envp[] = { major, minor, NULL };
+
+ snprintf(major, sizeof(major), "MAJOR=%d", MAJOR(bd_dev));
+ snprintf(minor, sizeof(minor), "MINOR=%d", MINOR(bd_dev));
+
+ kobject_uevent_env(&sdfat_uevent_kobj, KOBJ_CHANGE, envp);
+
+ ST_LOG("[SDFAT](%s[%d:%d]): Uevent triggered\n",
+ sb->s_id, MAJOR(bd_dev), MINOR(bd_dev));
+}
+#endif
+
+/*
+ * sdfat_fs_error reports a file system problem that might indicate fa data
+ * corruption/inconsistency. Depending on 'errors' mount option the
+ * panic() is called, or error message is printed FAT and nothing is done,
+ * or filesystem is remounted read-only (default behavior).
+ * In case the file system is remounted read-only, it can be made writable
+ * again by remounting it.
+ */
+void __sdfat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
+{
+ struct sdfat_mount_options *opts = &SDFAT_SB(sb)->options;
+ va_list args;
+ struct va_format vaf;
+ struct block_device *bdev = sb->s_bdev;
+ dev_t bd_dev = bdev ? bdev->bd_dev : 0;
+
+ if (report) {
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ pr_err("[SDFAT](%s[%d:%d]):ERR: %pV\n",
+ sb->s_id, MAJOR(bd_dev), MINOR(bd_dev), &vaf);
+#ifdef CONFIG_SDFAT_SUPPORT_STLOG
+ if (opts->errors == SDFAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) {
+ ST_LOG("[SDFAT](%s[%d:%d]):ERR: %pV\n",
+ sb->s_id, MAJOR(bd_dev), MINOR(bd_dev), &vaf);
+ }
+#endif
+ va_end(args);
+ }
+
+ if (opts->errors == SDFAT_ERRORS_PANIC) {
+ panic("[SDFAT](%s[%d:%d]): fs panic from previous error\n",
+ sb->s_id, MAJOR(bd_dev), MINOR(bd_dev));
+ } else if (opts->errors == SDFAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) {
+ sb->s_flags |= MS_RDONLY;
+ sdfat_statistics_set_mnt_ro();
+ pr_err("[SDFAT](%s[%d:%d]): Filesystem has been set "
+ "read-only\n", sb->s_id, MAJOR(bd_dev), MINOR(bd_dev));
+#ifdef CONFIG_SDFAT_SUPPORT_STLOG
+ ST_LOG("[SDFAT](%s[%d:%d]): Filesystem has been set read-only\n",
+ sb->s_id, MAJOR(bd_dev), MINOR(bd_dev));
+#endif
+ sdfat_uevent_ro_remount(sb);
+ }
+}
+EXPORT_SYMBOL(__sdfat_fs_error);
+
+/**
+ * __sdfat_msg() - print preformated SDFAT specific messages.
+ * All logs except what uses sdfat_fs_error() should be written by __sdfat_msg()
+ * If 'st' is set, the log is propagated to ST_LOG.
+ */
+void __sdfat_msg(struct super_block *sb, const char *level, int st, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+ struct block_device *bdev = sb->s_bdev;
+ dev_t bd_dev = bdev ? bdev->bd_dev : 0;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ /* level means KERN_ pacility level */
+ printk("%s[SDFAT](%s[%d:%d]): %pV\n", level,
+ sb->s_id, MAJOR(bd_dev), MINOR(bd_dev), &vaf);
+#ifdef CONFIG_SDFAT_SUPPORT_STLOG
+ if (st) {
+ ST_LOG("[SDFAT](%s[%d:%d]): %pV\n",
+ sb->s_id, MAJOR(bd_dev), MINOR(bd_dev), &vaf);
+ }
+#endif
+ va_end(args);
+}
+EXPORT_SYMBOL(__sdfat_msg);
+
+void sdfat_log_version(void)
+{
+ pr_info("[SDFAT] Filesystem version %s\n", SDFAT_VERSION);
+#ifdef CONFIG_SDFAT_SUPPORT_STLOG
+ ST_LOG("[SDFAT] Filesystem version %s\n", SDFAT_VERSION);
+#endif
+}
+EXPORT_SYMBOL(sdfat_log_version);
+
+/* <linux/time.h> externs sys_tz
+ * extern struct timezone sys_tz;
+ */
+#define UNIX_SECS_1980 315532800L
+
+#if BITS_PER_LONG == 64
+#define UNIX_SECS_2108 4354819200L
+#endif
+
+/* days between 1970/01/01 and 1980/01/01 (2 leap days) */
+#define DAYS_DELTA_DECADE (365 * 10 + 2)
+/* 120 (2100 - 1980) isn't leap year */
+#define NO_LEAP_YEAR_2100 (120)
+#define IS_LEAP_YEAR(y) (!((y) & 0x3) && (y) != NO_LEAP_YEAR_2100)
+
+#define SECS_PER_MIN (60)
+#define SECS_PER_HOUR (60 * SECS_PER_MIN)
+#define SECS_PER_DAY (24 * SECS_PER_HOUR)
+
+#define MAKE_LEAP_YEAR(leap_year, year) \
+ do { \
+ /* 2100 isn't leap year */ \
+ if (unlikely(year > NO_LEAP_YEAR_2100)) \
+ leap_year = ((year + 3) / 4) - 1; \
+ else \
+ leap_year = ((year + 3) / 4); \
+ } while (0)
+
+/* Linear day numbers of the respective 1sts in non-leap years. */
+static time_t accum_days_in_year[] = {
+ /* Month : N 01 02 03 04 05 06 07 08 09 10 11 12 */
+ 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0,
+};
+
+#define TIMEZONE_SEC(x) ((x) * 15 * SECS_PER_MIN)
+/* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */
+void sdfat_time_fat2unix(struct sdfat_sb_info *sbi, sdfat_timespec_t *ts,
+ DATE_TIME_T *tp)
+{
+ time_t year = tp->Year;
+ time_t ld; /* leap day */
+
+ MAKE_LEAP_YEAR(ld, year);
+
+ if (IS_LEAP_YEAR(year) && (tp->Month) > 2)
+ ld++;
+
+ ts->tv_sec = tp->Second + tp->Minute * SECS_PER_MIN
+ + tp->Hour * SECS_PER_HOUR
+ + (year * 365 + ld + accum_days_in_year[tp->Month]
+ + (tp->Day - 1) + DAYS_DELTA_DECADE) * SECS_PER_DAY;
+
+ ts->tv_nsec = 0;
+
+ /* Treat as local time */
+ if (!sbi->options.tz_utc && !tp->Timezone.valid) {
+ ts->tv_sec += sys_tz.tz_minuteswest * SECS_PER_MIN;
+ return;
+ }
+
+ /* Treat as UTC time */
+ if (!tp->Timezone.valid)
+ return;
+
+ /* Treat as UTC time, but need to adjust timezone to UTC0 */
+ if (tp->Timezone.off <= 0x3F)
+ ts->tv_sec -= TIMEZONE_SEC(tp->Timezone.off);
+ else /* 0x40 <= (tp->Timezone & 0x7F) <=0x7F */
+ ts->tv_sec += TIMEZONE_SEC(0x80 - tp->Timezone.off);
+}
+
+#define TIMEZONE_CUR_OFFSET() ((sys_tz.tz_minuteswest / (-15)) & 0x7F)
+/* Convert linear UNIX date to a FAT time/date pair. */
+void sdfat_time_unix2fat(struct sdfat_sb_info *sbi, sdfat_timespec_t *ts,
+ DATE_TIME_T *tp)
+{
+ bool tz_valid = (sbi->fsi.vol_type == EXFAT) ? true : false;
+ time_t second = ts->tv_sec;
+ time_t day, month, year;
+ time_t ld; /* leap day */
+
+ tp->Timezone.value = 0x00;
+
+ /* Treats as local time with proper time */
+ if (tz_valid || !sbi->options.tz_utc) {
+ second -= sys_tz.tz_minuteswest * SECS_PER_MIN;
+ if (tz_valid) {
+ tp->Timezone.valid = 1;
+ tp->Timezone.off = TIMEZONE_CUR_OFFSET();
+ }
+ }
+
+ /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
+ if (second < UNIX_SECS_1980) {
+ tp->Second = 0;
+ tp->Minute = 0;
+ tp->Hour = 0;
+ tp->Day = 1;
+ tp->Month = 1;
+ tp->Year = 0;
+ return;
+ }
+#if (BITS_PER_LONG == 64)
+ if (second >= UNIX_SECS_2108) {
+ tp->Second = 59;
+ tp->Minute = 59;
+ tp->Hour = 23;
+ tp->Day = 31;
+ tp->Month = 12;
+ tp->Year = 127;
+ return;
+ }
+#endif
+
+ day = second / SECS_PER_DAY - DAYS_DELTA_DECADE;
+ year = day / 365;
+
+ MAKE_LEAP_YEAR(ld, year);
+ if (year * 365 + ld > day)
+ year--;
+
+ MAKE_LEAP_YEAR(ld, year);
+ day -= year * 365 + ld;
+
+ if (IS_LEAP_YEAR(year) && day == accum_days_in_year[3]) {
+ month = 2;
+ } else {
+ if (IS_LEAP_YEAR(year) && day > accum_days_in_year[3])
+ day--;
+ for (month = 1; month < 12; month++) {
+ if (accum_days_in_year[month + 1] > day)
+ break;
+ }
+ }
+ day -= accum_days_in_year[month];
+
+ tp->Second = second % SECS_PER_MIN;
+ tp->Minute = (second / SECS_PER_MIN) % 60;
+ tp->Hour = (second / SECS_PER_HOUR) % 24;
+ tp->Day = day + 1;
+ tp->Month = month;
+ tp->Year = year;
+}
+
+TIMESTAMP_T *tm_now(struct sdfat_sb_info *sbi, TIMESTAMP_T *tp)
+{
+ sdfat_timespec_t ts = CURRENT_TIME_SEC;
+ DATE_TIME_T dt;
+
+ sdfat_time_unix2fat(sbi, &ts, &dt);
+
+ tp->year = dt.Year;
+ tp->mon = dt.Month;
+ tp->day = dt.Day;
+ tp->hour = dt.Hour;
+ tp->min = dt.Minute;
+ tp->sec = dt.Second;
+ tp->tz.value = dt.Timezone.value;
+
+ return tp;
+}
+
+u8 calc_chksum_1byte(void *data, s32 len, u8 chksum)
+{
+ s32 i;
+ u8 *c = (u8 *) data;
+
+ for (i = 0; i < len; i++, c++)
+ chksum = (((chksum & 1) << 7) | ((chksum & 0xFE) >> 1)) + *c;
+
+ return chksum;
+}
+
+u16 calc_chksum_2byte(void *data, s32 len, u16 chksum, s32 type)
+{
+ s32 i;
+ u8 *c = (u8 *) data;
+
+ for (i = 0; i < len; i++, c++) {
+ if (((i == 2) || (i == 3)) && (type == CS_DIR_ENTRY))
+ continue;
+ chksum = (((chksum & 1) << 15) | ((chksum & 0xFFFE) >> 1)) + (u16) *c;
+ }
+ return chksum;
+}
+
+#ifdef CONFIG_SDFAT_TRACE_ELAPSED_TIME
+struct timeval __t1, __t2;
+u32 sdfat_time_current_usec(struct timeval *tv)
+{
+ do_gettimeofday(tv);
+ return (u32)(tv->tv_sec*1000000 + tv->tv_usec);
+}
+#endif /* CONFIG_SDFAT_TRACE_ELAPSED_TIME */
+
+#ifdef CONFIG_SDFAT_DBG_CAREFUL
+/* Check the consistency of i_size_ondisk (FAT32, or flags 0x01 only) */
+void sdfat_debug_check_clusters(struct inode *inode)
+{
+ unsigned int num_clusters;
+ volatile uint32_t tmp_fat_chain[50];
+ volatile int tmp_i = 0;
+ volatile unsigned int num_clusters_org, tmp_i = 0;
+ CHAIN_T clu;
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ FS_INFO_T *fsi = &(SDFAT_SB(inode->i_sb)->fsi);
+
+ if (SDFAT_I(inode)->i_size_ondisk == 0)
+ num_clusters = 0;
+ else
+ num_clusters = ((SDFAT_I(inode)->i_size_ondisk-1) >> fsi->cluster_size_bits) + 1;
+
+ clu.dir = fid->start_clu;
+ clu.size = num_clusters;
+ clu.flags = fid->flags;
+
+ num_clusters_org = num_clusters;
+
+ if (clu.flags == 0x03)
+ return;
+
+ while (num_clusters > 0) {
+ /* FAT chain logging */
+ tmp_fat_chain[tmp_i] = clu.dir;
+ tmp_i++;
+ if (tmp_i >= 50)
+ tmp_i = 0;
+
+ BUG_ON(IS_CLUS_EOF(clu.dir) || IS_CLUS_FREE(clu.dir));
+
+ if (get_next_clus_safe(inode->i_sb, &(clu.dir)))
+ EMSG("%s: failed to access to FAT\n");
+
+ num_clusters--;
+ }
+
+ BUG_ON(!IS_CLUS_EOF(clu.dir));
+}
+
+#endif /* CONFIG_SDFAT_DBG_CAREFUL */
+
+#ifdef CONFIG_SDFAT_DBG_MSG
+void __sdfat_dmsg(int level, const char *fmt, ...)
+{
+#ifdef CONFIG_SDFAT_DBG_SHOW_PID
+ struct va_format vaf;
+ va_list args;
+
+ /* should check type */
+ if (level > SDFAT_MSG_LEVEL)
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ /* fmt already includes KERN_ pacility level */
+ printk("[%u] %pV", current->pid, &vaf);
+ va_end(args);
+#else
+ va_list args;
+
+ /* should check type */
+ if (level > SDFAT_MSG_LEVEL)
+ return;
+
+ va_start(args, fmt);
+ /* fmt already includes KERN_ pacility level */
+ vprintk(fmt, args);
+ va_end(args);
+#endif
+}
+#endif
+
diff --git a/fs/sdfat/mpage.c b/fs/sdfat/mpage.c
new file mode 100644
index 000000000000..f550fbb2204a
--- /dev/null
+++ b/fs/sdfat/mpage.c
@@ -0,0 +1,635 @@
+/*
+ * fs/mpage.c
+ *
+ * Copyright (C) 2002, Linus Torvalds.
+ *
+ * Contains functions related to preparing and submitting BIOs which contain
+ * multiple pagecache pages.
+ *
+ * 15May2002 Andrew Morton
+ * Initial version
+ * 27Jun2002 axboe@suse.de
+ * use bio_add_page() to build bio's just the right size
+ */
+
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : core.c */
+/* PURPOSE : sdFAT glue layer for supporting VFS */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/buffer_head.h>
+#include <linux/exportfs.h>
+#include <linux/mount.h>
+#include <linux/vfs.h>
+#include <linux/parser.h>
+#include <linux/uio.h>
+#include <linux/writeback.h>
+#include <linux/log2.h>
+#include <linux/hash.h>
+#include <linux/backing-dev.h>
+#include <linux/sched.h>
+#include <linux/fs_struct.h>
+#include <linux/namei.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/swap.h> /* for mark_page_accessed() */
+#include <asm/current.h>
+#include <asm/unaligned.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
+#include <linux/aio.h>
+#endif
+
+#include "sdfat.h"
+
+#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
+
+/*************************************************************************
+ * INNER FUNCTIONS FOR FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+static void __mpage_write_end_io(struct bio *bio, int err);
+
+/*************************************************************************
+ * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
+ /* EMPTY */
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) */
+static inline void bio_set_dev(struct bio *bio, struct block_device *bdev)
+{
+ bio->bi_bdev = bdev;
+}
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static inline void __sdfat_clean_bdev_aliases(struct block_device *bdev, sector_t block)
+{
+ clean_bdev_aliases(bdev, block, 1);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0) */
+static inline void __sdfat_clean_bdev_aliases(struct block_device *bdev, sector_t block)
+{
+ unmap_underlying_metadata(bdev, block);
+}
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
+static inline void __sdfat_submit_bio_write2(int flags, struct bio *bio)
+{
+ bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
+ submit_bio(bio);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0) */
+static inline void __sdfat_submit_bio_write2(int flags, struct bio *bio)
+{
+ submit_bio(WRITE | flags, bio);
+}
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
+static inline int bio_get_nr_vecs(struct block_device *bdev)
+{
+ return BIO_MAX_PAGES;
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0) */
+ /* EMPTY */
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
+static inline sector_t __sdfat_bio_sector(struct bio *bio)
+{
+ return bio->bi_iter.bi_sector;
+}
+
+static inline void __sdfat_set_bio_sector(struct bio *bio, sector_t sector)
+{
+ bio->bi_iter.bi_sector = sector;
+}
+
+static inline unsigned int __sdfat_bio_size(struct bio *bio)
+{
+ return bio->bi_iter.bi_size;
+}
+
+static inline void __sdfat_set_bio_size(struct bio *bio, unsigned int size)
+{
+ bio->bi_iter.bi_size = size;
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) */
+static inline sector_t __sdfat_bio_sector(struct bio *bio)
+{
+ return bio->bi_sector;
+}
+
+static inline void __sdfat_set_bio_sector(struct bio *bio, sector_t sector)
+{
+ bio->bi_sector = sector;
+}
+
+static inline unsigned int __sdfat_bio_size(struct bio *bio)
+{
+ return bio->bi_size;
+}
+
+static inline void __sdfat_set_bio_size(struct bio *bio, unsigned int size)
+{
+ bio->bi_size = size;
+}
+#endif
+
+/*************************************************************************
+ * MORE FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0)
+static void mpage_write_end_io(struct bio *bio)
+{
+ __mpage_write_end_io(bio, blk_status_to_errno(bio->bi_status));
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
+static void mpage_write_end_io(struct bio *bio)
+{
+ __mpage_write_end_io(bio, bio->bi_error);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,3,0) */
+static void mpage_write_end_io(struct bio *bio, int err)
+{
+ if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+ err = 0;
+ __mpage_write_end_io(bio, err);
+}
+#endif
+
+/* __check_dfr_on() and __dfr_writepage_end_io() functions
+ * are copied from sdfat.c
+ * Each function should be same perfectly
+ */
+static inline int __check_dfr_on(struct inode *inode, loff_t start, loff_t end, const char *fname)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info);
+
+ if ((atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ) &&
+ fsapi_dfr_check_dfr_on(inode, start, end, 0, fname))
+ return 1;
+#endif
+ return 0;
+}
+
+static inline int __dfr_writepage_end_io(struct page *page)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct defrag_info *ino_dfr = &(SDFAT_I(page->mapping->host)->dfr_info);
+
+ if (atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ)
+ fsapi_dfr_writepage_endio(page);
+#endif
+ return 0;
+}
+
+
+static inline unsigned int __calc_size_to_align(struct super_block *sb)
+{
+ struct block_device *bdev = sb->s_bdev;
+ struct gendisk *disk;
+ struct request_queue *queue;
+ struct queue_limits *limit;
+ unsigned int max_sectors;
+ unsigned int aligned = 0;
+
+ disk = bdev->bd_disk;
+ if (!disk)
+ goto out;
+
+ queue = disk->queue;
+ if (!queue)
+ goto out;
+
+ limit = &queue->limits;
+ max_sectors = limit->max_sectors;
+ aligned = 1 << ilog2(max_sectors);
+
+ if (aligned && (max_sectors & (aligned - 1)))
+ aligned = 0;
+out:
+ return aligned;
+}
+
+struct mpage_data {
+ struct bio *bio;
+ sector_t last_block_in_bio;
+ get_block_t *get_block;
+ unsigned int use_writepage;
+ unsigned int size_to_align;
+};
+
+/*
+ * I/O completion handler for multipage BIOs.
+ *
+ * The mpage code never puts partial pages into a BIO (except for end-of-file).
+ * If a page does not map to a contiguous run of blocks then it simply falls
+ * back to block_read_full_page().
+ *
+ * Why is this? If a page's completion depends on a number of different BIOs
+ * which can complete in any order (or at the same time) then determining the
+ * status of that page is hard. See end_buffer_async_read() for the details.
+ * There is no point in duplicating all that complexity.
+ */
+static void __mpage_write_end_io(struct bio *bio, int err)
+{
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+
+ ASSERT(bio_data_dir(bio) == WRITE); /* only write */
+
+ do {
+ struct page *page = bvec->bv_page;
+
+ if (--bvec >= bio->bi_io_vec)
+ prefetchw(&bvec->bv_page->flags);
+ if (err) {
+ SetPageError(page);
+ if (page->mapping)
+ mapping_set_error(page->mapping, err);
+ }
+
+ __dfr_writepage_end_io(page);
+
+ end_page_writeback(page);
+ } while (bvec >= bio->bi_io_vec);
+ bio_put(bio);
+}
+
+static struct bio *mpage_bio_submit_write(int flags, struct bio *bio)
+{
+ bio->bi_end_io = mpage_write_end_io;
+ __sdfat_submit_bio_write2(flags, bio);
+ return NULL;
+}
+
+static struct bio *
+mpage_alloc(struct block_device *bdev,
+ sector_t first_sector, int nr_vecs,
+ gfp_t gfp_flags)
+{
+ struct bio *bio;
+
+ bio = bio_alloc(gfp_flags, nr_vecs);
+
+ if (bio == NULL && (current->flags & PF_MEMALLOC)) {
+ while (!bio && (nr_vecs /= 2))
+ bio = bio_alloc(gfp_flags, nr_vecs);
+ }
+
+ if (bio) {
+ bio_set_dev(bio, bdev);
+ __sdfat_set_bio_sector(bio, first_sector);
+ }
+ return bio;
+}
+
+static int sdfat_mpage_writepage(struct page *page,
+ struct writeback_control *wbc, void *data)
+{
+ struct mpage_data *mpd = data;
+ struct bio *bio = mpd->bio;
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = page->mapping->host;
+ const unsigned int blkbits = inode->i_blkbits;
+ const unsigned int blocks_per_page = PAGE_SIZE >> blkbits;
+ sector_t last_block;
+ sector_t block_in_file;
+ sector_t blocks[MAX_BUF_PER_PAGE];
+ unsigned int page_block;
+ unsigned int first_unmapped = blocks_per_page;
+ struct block_device *bdev = NULL;
+ int boundary = 0;
+ sector_t boundary_block = 0;
+ struct block_device *boundary_bdev = NULL;
+ int length;
+ struct buffer_head map_bh;
+ loff_t i_size = i_size_read(inode);
+ unsigned long end_index = i_size >> PAGE_SHIFT;
+ int ret = 0;
+
+ if (page_has_buffers(page)) {
+ struct buffer_head *head = page_buffers(page);
+ struct buffer_head *bh = head;
+
+ /* If they're all mapped and dirty, do it */
+ page_block = 0;
+ do {
+ BUG_ON(buffer_locked(bh));
+ if (!buffer_mapped(bh)) {
+ /*
+ * unmapped dirty buffers are created by
+ * __set_page_dirty_buffers -> mmapped data
+ */
+ if (buffer_dirty(bh))
+ goto confused;
+ if (first_unmapped == blocks_per_page)
+ first_unmapped = page_block;
+ continue;
+ }
+
+ if (first_unmapped != blocks_per_page)
+ goto confused; /* hole -> non-hole */
+
+ if (!buffer_dirty(bh) || !buffer_uptodate(bh))
+ goto confused;
+
+ /* bh should be mapped if delay is set */
+ if (buffer_delay(bh)) {
+ sector_t blk_in_file =
+ (sector_t)(page->index << (PAGE_SHIFT - blkbits)) + page_block;
+
+ BUG_ON(bh->b_size != (1 << blkbits));
+ if (page->index > end_index) {
+ MMSG("%s(inode:%p) "
+ "over end with delayed buffer"
+ "(page_idx:%u, end_idx:%u)\n",
+ __func__, inode,
+ (u32)page->index,
+ (u32)end_index);
+ goto confused;
+ }
+
+ ret = mpd->get_block(inode, blk_in_file, bh, 1);
+ if (ret) {
+ MMSG("%s(inode:%p) "
+ "failed to getblk(ret:%d)\n",
+ __func__, inode, ret);
+ goto confused;
+ }
+
+ BUG_ON(buffer_delay(bh));
+
+ if (buffer_new(bh)) {
+ clear_buffer_new(bh);
+ __sdfat_clean_bdev_aliases(bh->b_bdev, bh->b_blocknr);
+ }
+ }
+
+ if (page_block) {
+ if (bh->b_blocknr != blocks[page_block-1] + 1) {
+ MMSG("%s(inode:%p) pblk(%d) "
+ "no_seq(prev:%lld, new:%lld)\n",
+ __func__, inode, page_block,
+ (u64)blocks[page_block-1],
+ (u64)bh->b_blocknr);
+ goto confused;
+ }
+ }
+ blocks[page_block++] = bh->b_blocknr;
+ boundary = buffer_boundary(bh);
+ if (boundary) {
+ boundary_block = bh->b_blocknr;
+ boundary_bdev = bh->b_bdev;
+ }
+ bdev = bh->b_bdev;
+ } while ((bh = bh->b_this_page) != head);
+
+ if (first_unmapped)
+ goto page_is_mapped;
+
+ /*
+ * Page has buffers, but they are all unmapped. The page was
+ * created by pagein or read over a hole which was handled by
+ * block_read_full_page(). If this address_space is also
+ * using mpage_readpages then this can rarely happen.
+ */
+ goto confused;
+ }
+
+ /*
+ * The page has no buffers: map it to disk
+ */
+ BUG_ON(!PageUptodate(page));
+ block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
+ last_block = (i_size - 1) >> blkbits;
+ map_bh.b_page = page;
+ for (page_block = 0; page_block < blocks_per_page; ) {
+
+ map_bh.b_state = 0;
+ map_bh.b_size = 1 << blkbits;
+ if (mpd->get_block(inode, block_in_file, &map_bh, 1))
+ goto confused;
+
+ if (buffer_new(&map_bh))
+ __sdfat_clean_bdev_aliases(map_bh.b_bdev, map_bh.b_blocknr);
+ if (buffer_boundary(&map_bh)) {
+ boundary_block = map_bh.b_blocknr;
+ boundary_bdev = map_bh.b_bdev;
+ }
+
+ if (page_block) {
+ if (map_bh.b_blocknr != blocks[page_block-1] + 1)
+ goto confused;
+ }
+ blocks[page_block++] = map_bh.b_blocknr;
+ boundary = buffer_boundary(&map_bh);
+ bdev = map_bh.b_bdev;
+ if (block_in_file == last_block)
+ break;
+ block_in_file++;
+ }
+ BUG_ON(page_block == 0);
+
+ first_unmapped = page_block;
+
+page_is_mapped:
+ if (page->index >= end_index) {
+ /*
+ * The page straddles i_size. It must be zeroed out on each
+ * and every writepage invocation because it may be mmapped.
+ * "A file is mapped in multiples of the page size. For a file
+ * that is not a multiple of the page size, the remaining memory
+ * is zeroed when mapped, and writes to that region are not
+ * written out to the file."
+ */
+ unsigned int offset = i_size & (PAGE_SIZE - 1);
+
+ if (page->index > end_index || !offset) {
+ MMSG("%s(inode:%p) over end "
+ "(page_idx:%u, end_idx:%u off:%u)\n",
+ __func__, inode, (u32)page->index,
+ (u32)end_index, (u32)offset);
+ goto confused;
+ }
+ zero_user_segment(page, offset, PAGE_SIZE);
+ }
+
+ /*
+ * This page will go to BIO. Do we need to send this BIO off first?
+ *
+ * REMARK : added ELSE_IF for ALIGNMENT_MPAGE_WRITE of SDFAT
+ */
+ if (bio) {
+ if (mpd->last_block_in_bio != blocks[0] - 1) {
+ bio = mpage_bio_submit_write(0, bio);
+ } else if (mpd->size_to_align) {
+ unsigned int mask = mpd->size_to_align - 1;
+ sector_t max_end_block =
+ (__sdfat_bio_sector(bio) & ~(mask)) + mask;
+
+ if ((__sdfat_bio_size(bio) != (1 << (mask + 1))) &&
+ (mpd->last_block_in_bio == max_end_block)) {
+ MMSG("%s(inode:%p) alignment mpage_bio_submit"
+ "(start:%u, len:%u aligned:%u)\n",
+ __func__, inode,
+ (unsigned int)__sdfat_bio_sector(bio),
+ (unsigned int)(mpd->last_block_in_bio -
+ __sdfat_bio_sector(bio) + 1),
+ (unsigned int)mpd->size_to_align);
+ bio = mpage_bio_submit_write(REQ_NOMERGE, bio);
+ }
+ }
+ }
+
+alloc_new:
+ if (!bio) {
+ bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
+ bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH);
+ if (!bio)
+ goto confused;
+ }
+
+ /*
+ * Must try to add the page before marking the buffer clean or
+ * the confused fail path above (OOM) will be very confused when
+ * it finds all bh marked clean (i.e. it will not write anything)
+ */
+ length = first_unmapped << blkbits;
+ if (bio_add_page(bio, page, length, 0) < length) {
+ bio = mpage_bio_submit_write(0, bio);
+ goto alloc_new;
+ }
+
+ /*
+ * OK, we have our BIO, so we can now mark the buffers clean. Make
+ * sure to only clean buffers which we know we'll be writing.
+ */
+ if (page_has_buffers(page)) {
+ struct buffer_head *head = page_buffers(page);
+ struct buffer_head *bh = head;
+ unsigned int buffer_counter = 0;
+
+ do {
+ if (buffer_counter++ == first_unmapped)
+ break;
+ clear_buffer_dirty(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
+
+ /*
+ * we cannot drop the bh if the page is not uptodate
+ * or a concurrent readpage would fail to serialize with the bh
+ * and it would read from disk before we reach the platter.
+ */
+ if (buffer_heads_over_limit && PageUptodate(page))
+ try_to_free_buffers(page);
+ }
+
+ BUG_ON(PageWriteback(page));
+ set_page_writeback(page);
+
+ /*
+ * FIXME FOR DEFRAGMENTATION : CODE REVIEW IS REQUIRED
+ *
+ * Turn off MAPPED flag in victim's bh if defrag on.
+ * Another write_begin can starts after get_block for defrag victims
+ * called.
+ * In this case, write_begin calls get_block and get original block
+ * number and previous defrag will be canceled.
+ */
+ if (unlikely(__check_dfr_on(inode, (loff_t)(page->index << PAGE_SHIFT),
+ (loff_t)((page->index + 1) << PAGE_SHIFT), __func__))) {
+ struct buffer_head *head = page_buffers(page);
+ struct buffer_head *bh = head;
+
+ do {
+ clear_buffer_mapped(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
+ }
+
+ unlock_page(page);
+ if (boundary || (first_unmapped != blocks_per_page)) {
+ bio = mpage_bio_submit_write(0, bio);
+ if (boundary_block) {
+ write_boundary_block(boundary_bdev,
+ boundary_block, 1 << blkbits);
+ }
+ } else {
+ mpd->last_block_in_bio = blocks[blocks_per_page - 1];
+ }
+
+ goto out;
+
+confused:
+ if (bio)
+ bio = mpage_bio_submit_write(0, bio);
+
+ if (mpd->use_writepage) {
+ ret = mapping->a_ops->writepage(page, wbc);
+ } else {
+ ret = -EAGAIN;
+ goto out;
+ }
+ /*
+ * The caller has a ref on the inode, so *mapping is stable
+ */
+ mapping_set_error(mapping, ret);
+out:
+ mpd->bio = bio;
+ return ret;
+}
+
+int sdfat_mpage_writepages(struct address_space *mapping,
+ struct writeback_control *wbc, get_block_t *get_block)
+{
+ struct blk_plug plug;
+ int ret;
+ struct mpage_data mpd = {
+ .bio = NULL,
+ .last_block_in_bio = 0,
+ .get_block = get_block,
+ .use_writepage = 1,
+ .size_to_align = __calc_size_to_align(mapping->host->i_sb),
+ };
+
+ BUG_ON(!get_block);
+ blk_start_plug(&plug);
+ ret = write_cache_pages(mapping, wbc, sdfat_mpage_writepage, &mpd);
+ if (mpd.bio)
+ mpage_bio_submit_write(0, mpd.bio);
+ blk_finish_plug(&plug);
+ return ret;
+}
+
+#endif /* CONFIG_SDFAT_ALIGNED_MPAGE_WRITE */
+
diff --git a/fs/sdfat/nls.c b/fs/sdfat/nls.c
new file mode 100644
index 000000000000..b65634454c55
--- /dev/null
+++ b/fs/sdfat/nls.c
@@ -0,0 +1,478 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : nls.c */
+/* PURPOSE : sdFAT NLS Manager */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+#include <linux/string.h>
+#include <linux/nls.h>
+
+#include "sdfat.h"
+#include "core.h"
+
+/*----------------------------------------------------------------------*/
+/* Global Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/* Local Variable Definitions */
+/*----------------------------------------------------------------------*/
+
+static u16 bad_dos_chars[] = {
+ /* + , ; = [ ] */
+ 0x002B, 0x002C, 0x003B, 0x003D, 0x005B, 0x005D,
+ 0xFF0B, 0xFF0C, 0xFF1B, 0xFF1D, 0xFF3B, 0xFF3D,
+ 0
+};
+
+/*
+ * Allow full-width illegal characters :
+ * "MS windows 7" supports full-width-invalid-name-characters.
+ * So we should check half-width-invalid-name-characters(ASCII) only
+ * for compatibility.
+ *
+ * " * / : < > ? \ |
+ *
+ * patch 1.2.0
+ */
+static u16 bad_uni_chars[] = {
+ 0x0022, 0x002A, 0x002F, 0x003A,
+ 0x003C, 0x003E, 0x003F, 0x005C, 0x007C,
+#if 0 /* allow full-width characters */
+ 0x201C, 0x201D, 0xFF0A, 0xFF0F, 0xFF1A,
+ 0xFF1C, 0xFF1E, 0xFF1F, 0xFF3C, 0xFF5C,
+#endif
+ 0
+};
+
+/*----------------------------------------------------------------------*/
+/* Local Function Declarations */
+/*----------------------------------------------------------------------*/
+static s32 convert_uni_to_ch(struct nls_table *nls, u16 uni, u8 *ch, s32 *lossy);
+static s32 convert_ch_to_uni(struct nls_table *nls, u8 *ch, u16 *uni, s32 *lossy);
+
+static u16 nls_upper(struct super_block *sb, u16 a)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ if (SDFAT_SB(sb)->options.casesensitive)
+ return a;
+ if ((fsi->vol_utbl)[get_col_index(a)] != NULL)
+ return (fsi->vol_utbl)[get_col_index(a)][get_row_index(a)];
+ else
+ return a;
+}
+/*======================================================================*/
+/* Global Function Definitions */
+/*======================================================================*/
+u16 *nls_wstrchr(u16 *str, u16 wchar)
+{
+ while (*str) {
+ if (*(str++) == wchar)
+ return str;
+ }
+
+ return 0;
+}
+
+s32 nls_cmp_sfn(struct super_block *sb, u8 *a, u8 *b)
+{
+ return strncmp((void *)a, (void *)b, DOS_NAME_LENGTH);
+}
+
+s32 nls_cmp_uniname(struct super_block *sb, u16 *a, u16 *b)
+{
+ s32 i;
+
+ for (i = 0; i < MAX_NAME_LENGTH; i++, a++, b++) {
+ if (nls_upper(sb, *a) != nls_upper(sb, *b))
+ return 1;
+ if (*a == 0x0)
+ return 0;
+ }
+ return 0;
+}
+
+#define CASE_LOWER_BASE (0x08) /* base is lower case */
+#define CASE_LOWER_EXT (0x10) /* extension is lower case */
+
+s32 nls_uni16s_to_sfn(struct super_block *sb, UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname, s32 *p_lossy)
+{
+ s32 i, j, len, lossy = NLS_NAME_NO_LOSSY;
+ u8 buf[MAX_CHARSET_SIZE];
+ u8 lower = 0, upper = 0;
+ u8 *dosname = p_dosname->name;
+ u16 *uniname = p_uniname->name;
+ u16 *p, *last_period;
+ struct nls_table *nls = SDFAT_SB(sb)->nls_disk;
+
+ /* DOSNAME is filled with space */
+ for (i = 0; i < DOS_NAME_LENGTH; i++)
+ *(dosname+i) = ' ';
+
+ /* DOT and DOTDOT are handled by VFS layer */
+
+ /* search for the last embedded period */
+ last_period = NULL;
+ for (p = uniname; *p; p++) {
+ if (*p == (u16) '.')
+ last_period = p;
+ }
+
+ i = 0;
+ while (i < DOS_NAME_LENGTH) {
+ if (i == 8) {
+ if (last_period == NULL)
+ break;
+
+ if (uniname <= last_period) {
+ if (uniname < last_period)
+ lossy |= NLS_NAME_OVERLEN;
+ uniname = last_period + 1;
+ }
+ }
+
+ if (*uniname == (u16) '\0') {
+ break;
+ } else if (*uniname == (u16) ' ') {
+ lossy |= NLS_NAME_LOSSY;
+ } else if (*uniname == (u16) '.') {
+ if (uniname < last_period)
+ lossy |= NLS_NAME_LOSSY;
+ else
+ i = 8;
+ } else if (nls_wstrchr(bad_dos_chars, *uniname)) {
+ lossy |= NLS_NAME_LOSSY;
+ *(dosname+i) = '_';
+ i++;
+ } else {
+ len = convert_uni_to_ch(nls, *uniname, buf, &lossy);
+
+ if (len > 1) {
+ if ((i >= 8) && ((i+len) > DOS_NAME_LENGTH))
+ break;
+
+ if ((i < 8) && ((i+len) > 8)) {
+ i = 8;
+ continue;
+ }
+
+ lower = 0xFF;
+
+ for (j = 0; j < len; j++, i++)
+ *(dosname+i) = *(buf+j);
+ } else { /* len == 1 */
+ if ((*buf >= 'a') && (*buf <= 'z')) {
+ *(dosname+i) = *buf - ('a' - 'A');
+
+ lower |= (i < 8) ?
+ CASE_LOWER_BASE :
+ CASE_LOWER_EXT;
+ } else if ((*buf >= 'A') && (*buf <= 'Z')) {
+ *(dosname+i) = *buf;
+
+ upper |= (i < 8) ?
+ CASE_LOWER_BASE :
+ CASE_LOWER_EXT;
+ } else {
+ *(dosname+i) = *buf;
+ }
+ i++;
+ }
+ }
+
+ uniname++;
+ }
+
+ if (*dosname == 0xE5)
+ *dosname = 0x05;
+ if (*uniname != 0x0)
+ lossy |= NLS_NAME_OVERLEN;
+
+ if (upper & lower)
+ p_dosname->name_case = 0xFF;
+ else
+ p_dosname->name_case = lower;
+
+ if (p_lossy)
+ *p_lossy = lossy;
+ return i;
+}
+
+s32 nls_sfn_to_uni16s(struct super_block *sb, DOS_NAME_T *p_dosname, UNI_NAME_T *p_uniname)
+{
+ s32 i = 0, j, n = 0;
+ u8 buf[MAX_DOSNAME_BUF_SIZE];
+ u8 *dosname = p_dosname->name;
+ u16 *uniname = p_uniname->name;
+ struct nls_table *nls = SDFAT_SB(sb)->nls_disk;
+
+ if (*dosname == 0x05) {
+ *buf = 0xE5;
+ i++;
+ n++;
+ }
+
+ for ( ; i < 8; i++, n++) {
+ if (*(dosname+i) == ' ')
+ break;
+
+ if ((*(dosname+i) >= 'A') && (*(dosname+i) <= 'Z') &&
+ (p_dosname->name_case & CASE_LOWER_BASE))
+ *(buf+n) = *(dosname+i) + ('a' - 'A');
+ else
+ *(buf+n) = *(dosname+i);
+ }
+ if (*(dosname+8) != ' ') {
+ *(buf+n) = '.';
+ n++;
+ }
+
+ for (i = 8; i < DOS_NAME_LENGTH; i++, n++) {
+ if (*(dosname+i) == ' ')
+ break;
+
+ if ((*(dosname+i) >= 'A') && (*(dosname+i) <= 'Z') &&
+ (p_dosname->name_case & CASE_LOWER_EXT))
+ *(buf+n) = *(dosname+i) + ('a' - 'A');
+ else
+ *(buf+n) = *(dosname+i);
+ }
+ *(buf+n) = '\0';
+
+ i = j = 0;
+ while (j < MAX_NAME_LENGTH) {
+ if (*(buf+i) == '\0')
+ break;
+
+ i += convert_ch_to_uni(nls, (buf+i), uniname, NULL);
+
+ uniname++;
+ j++;
+ }
+
+ *uniname = (u16) '\0';
+ return j;
+}
+
+static s32 __nls_utf16s_to_vfsname(struct super_block *sb, UNI_NAME_T *p_uniname, u8 *p_cstring, s32 buflen)
+{
+ s32 len;
+ const u16 *uniname = p_uniname->name;
+
+ /* always len >= 0 */
+ len = utf16s_to_utf8s(uniname, MAX_NAME_LENGTH, UTF16_HOST_ENDIAN,
+ p_cstring, buflen);
+ p_cstring[len] = '\0';
+ return len;
+}
+
+static s32 __nls_vfsname_to_utf16s(struct super_block *sb, const u8 *p_cstring,
+ const s32 len, UNI_NAME_T *p_uniname, s32 *p_lossy)
+{
+ s32 i, unilen, lossy = NLS_NAME_NO_LOSSY;
+ u16 upname[MAX_NAME_LENGTH+1];
+ u16 *uniname = p_uniname->name;
+
+ BUG_ON(!len);
+
+ unilen = utf8s_to_utf16s(p_cstring, len, UTF16_HOST_ENDIAN,
+ (wchar_t *)uniname, MAX_NAME_LENGTH+2);
+ if (unilen < 0) {
+ MMSG("%s: failed to vfsname_to_utf16(err:%d) "
+ "vfsnamelen:%d", __func__, unilen, len);
+ return unilen;
+ }
+
+ if (unilen > MAX_NAME_LENGTH) {
+ MMSG("%s: failed to vfsname_to_utf16(estr:ENAMETOOLONG) "
+ "vfsnamelen:%d, unilen:%d>%d",
+ __func__, len, unilen, MAX_NAME_LENGTH);
+ return -ENAMETOOLONG;
+ }
+
+ p_uniname->name_len = (u8)(unilen & 0xFF);
+
+ for (i = 0; i < unilen; i++) {
+ if ((*uniname < 0x0020) || nls_wstrchr(bad_uni_chars, *uniname))
+ lossy |= NLS_NAME_LOSSY;
+
+ *(upname+i) = nls_upper(sb, *uniname);
+ uniname++;
+ }
+
+ *uniname = (u16)'\0';
+ p_uniname->name_len = unilen;
+ p_uniname->name_hash = calc_chksum_2byte((void *) upname,
+ unilen << 1, 0, CS_DEFAULT);
+
+ if (p_lossy)
+ *p_lossy = lossy;
+
+ return unilen;
+}
+
+static s32 __nls_uni16s_to_vfsname(struct super_block *sb, UNI_NAME_T *p_uniname, u8 *p_cstring, s32 buflen)
+{
+ s32 i, j, len, out_len = 0;
+ u8 buf[MAX_CHARSET_SIZE];
+ const u16 *uniname = p_uniname->name;
+ struct nls_table *nls = SDFAT_SB(sb)->nls_io;
+
+ i = 0;
+ while ((i < MAX_NAME_LENGTH) && (out_len < (buflen-1))) {
+ if (*uniname == (u16)'\0')
+ break;
+
+ len = convert_uni_to_ch(nls, *uniname, buf, NULL);
+
+ if (out_len + len >= buflen)
+ len = (buflen - 1) - out_len;
+
+ out_len += len;
+
+ if (len > 1) {
+ for (j = 0; j < len; j++)
+ *p_cstring++ = (s8) *(buf+j);
+ } else { /* len == 1 */
+ *p_cstring++ = (s8) *buf;
+ }
+
+ uniname++;
+ i++;
+ }
+
+ *p_cstring = '\0';
+ return out_len;
+}
+
+static s32 __nls_vfsname_to_uni16s(struct super_block *sb, const u8 *p_cstring,
+ const s32 len, UNI_NAME_T *p_uniname, s32 *p_lossy)
+{
+ s32 i, unilen, lossy = NLS_NAME_NO_LOSSY;
+ u16 upname[MAX_NAME_LENGTH+1];
+ u16 *uniname = p_uniname->name;
+ struct nls_table *nls = SDFAT_SB(sb)->nls_io;
+
+ BUG_ON(!len);
+
+ i = unilen = 0;
+ while ((unilen < MAX_NAME_LENGTH) && (i < len)) {
+ i += convert_ch_to_uni(nls, (u8 *)(p_cstring+i), uniname, &lossy);
+
+ if ((*uniname < 0x0020) || nls_wstrchr(bad_uni_chars, *uniname))
+ lossy |= NLS_NAME_LOSSY;
+
+ *(upname+unilen) = nls_upper(sb, *uniname);
+
+ uniname++;
+ unilen++;
+ }
+
+ if (*(p_cstring+i) != '\0')
+ lossy |= NLS_NAME_OVERLEN;
+
+ *uniname = (u16)'\0';
+ p_uniname->name_len = unilen;
+ p_uniname->name_hash =
+ calc_chksum_2byte((void *) upname, unilen<<1, 0, CS_DEFAULT);
+
+ if (p_lossy)
+ *p_lossy = lossy;
+
+ return unilen;
+}
+
+s32 nls_uni16s_to_vfsname(struct super_block *sb, UNI_NAME_T *uniname, u8 *p_cstring, s32 buflen)
+{
+ if (SDFAT_SB(sb)->options.utf8)
+ return __nls_utf16s_to_vfsname(sb, uniname, p_cstring, buflen);
+
+ return __nls_uni16s_to_vfsname(sb, uniname, p_cstring, buflen);
+}
+
+s32 nls_vfsname_to_uni16s(struct super_block *sb, const u8 *p_cstring, const s32 len, UNI_NAME_T *uniname, s32 *p_lossy)
+{
+ if (SDFAT_SB(sb)->options.utf8)
+ return __nls_vfsname_to_utf16s(sb, p_cstring, len, uniname, p_lossy);
+ return __nls_vfsname_to_uni16s(sb, p_cstring, len, uniname, p_lossy);
+}
+
+/*======================================================================*/
+/* Local Function Definitions */
+/*======================================================================*/
+
+static s32 convert_ch_to_uni(struct nls_table *nls, u8 *ch, u16 *uni, s32 *lossy)
+{
+ int len;
+
+ *uni = 0x0;
+
+ if (ch[0] < 0x80) {
+ *uni = (u16) ch[0];
+ return 1;
+ }
+
+ len = nls->char2uni(ch, MAX_CHARSET_SIZE, uni);
+ if (len < 0) {
+ /* conversion failed */
+ DMSG("%s: fail to use nls\n", __func__);
+ if (lossy != NULL)
+ *lossy |= NLS_NAME_LOSSY;
+ *uni = (u16) '_';
+ if (!strcmp(nls->charset, "utf8"))
+ return 1;
+ return 2;
+ }
+
+ return len;
+} /* end of convert_ch_to_uni */
+
+static s32 convert_uni_to_ch(struct nls_table *nls, u16 uni, u8 *ch, s32 *lossy)
+{
+ int len;
+
+ ch[0] = 0x0;
+
+ if (uni < 0x0080) {
+ ch[0] = (u8) uni;
+ return 1;
+ }
+
+ len = nls->uni2char(uni, ch, MAX_CHARSET_SIZE);
+ if (len < 0) {
+ /* conversion failed */
+ DMSG("%s: fail to use nls\n", __func__);
+ if (lossy != NULL)
+ *lossy |= NLS_NAME_LOSSY;
+ ch[0] = '_';
+ return 1;
+ }
+
+ return len;
+
+} /* end of convert_uni_to_ch */
+
+/* end of nls.c */
diff --git a/fs/sdfat/sdfat.c b/fs/sdfat/sdfat.c
new file mode 100644
index 000000000000..2d9955cfc993
--- /dev/null
+++ b/fs/sdfat/sdfat.c
@@ -0,0 +1,5255 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : core.c */
+/* PURPOSE : sdFAT glue layer for supporting VFS */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/slab.h>
+#include <linux/seq_file.h>
+#include <linux/pagemap.h>
+#include <linux/mpage.h>
+#include <linux/buffer_head.h>
+#include <linux/exportfs.h>
+#include <linux/mount.h>
+#include <linux/vfs.h>
+#include <linux/parser.h>
+#include <linux/uio.h>
+#include <linux/writeback.h>
+#include <linux/log2.h>
+#include <linux/hash.h>
+#include <linux/backing-dev.h>
+#include <linux/sched.h>
+#include <linux/fs_struct.h>
+#include <linux/namei.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/swap.h> /* for mark_page_accessed() */
+#include <linux/vmalloc.h>
+#include <asm/current.h>
+#include <asm/unaligned.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0)
+#include <linux/iversion.h>
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
+#include <linux/aio.h>
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+#error SDFAT only supports linux kernel version 3.0 or higher
+#endif
+
+#include "sdfat.h"
+#include "version.h"
+
+/* skip iterating emit_dots when dir is empty */
+#define ITER_POS_FILLED_DOTS (2)
+
+/* type index declare at sdfat.h */
+const char *FS_TYPE_STR[] = {
+ "auto",
+ "exfat",
+ "vfat"
+};
+
+static struct kset *sdfat_kset;
+static struct kmem_cache *sdfat_inode_cachep;
+
+
+static int sdfat_default_codepage = CONFIG_SDFAT_DEFAULT_CODEPAGE;
+static char sdfat_default_iocharset[] = CONFIG_SDFAT_DEFAULT_IOCHARSET;
+static const char sdfat_iocharset_with_utf8[] = "iso8859-1";
+
+#ifdef CONFIG_SDFAT_TRACE_SB_LOCK
+static unsigned long __lock_jiffies;
+#endif
+
+static void sdfat_truncate(struct inode *inode, loff_t old_size);
+static int sdfat_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
+
+static struct inode *sdfat_iget(struct super_block *sb, loff_t i_pos);
+static struct inode *sdfat_build_inode(struct super_block *sb, const FILE_ID_T *fid, loff_t i_pos);
+static void sdfat_detach(struct inode *inode);
+static void sdfat_attach(struct inode *inode, loff_t i_pos);
+static inline unsigned long sdfat_hash(loff_t i_pos);
+static int __sdfat_write_inode(struct inode *inode, int sync);
+static int sdfat_sync_inode(struct inode *inode);
+static int sdfat_write_inode(struct inode *inode, struct writeback_control *wbc);
+static void sdfat_write_super(struct super_block *sb);
+static void sdfat_write_failed(struct address_space *mapping, loff_t to);
+
+static void sdfat_init_namebuf(DENTRY_NAMEBUF_T *nb);
+static int sdfat_alloc_namebuf(DENTRY_NAMEBUF_T *nb);
+static void sdfat_free_namebuf(DENTRY_NAMEBUF_T *nb);
+
+/*************************************************************************
+ * INNER FUNCTIONS FOR FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+static int __sdfat_getattr(struct inode *inode, struct kstat *stat);
+static void __sdfat_writepage_end_io(struct bio *bio, int err);
+static inline void __lock_super(struct super_block *sb);
+static inline void __unlock_super(struct super_block *sb);
+static int __sdfat_create(struct inode *dir, struct dentry *dentry);
+static int __sdfat_revalidate(struct dentry *dentry);
+static int __sdfat_revalidate_ci(struct dentry *dentry, unsigned int flags);
+static int __sdfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync);
+static struct dentry *__sdfat_lookup(struct inode *dir, struct dentry *dentry);
+static int __sdfat_mkdir(struct inode *dir, struct dentry *dentry);
+static int __sdfat_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry);
+static int __sdfat_show_options(struct seq_file *m, struct super_block *sb);
+static inline ssize_t __sdfat_blkdev_direct_IO(int rw, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t offset,
+ unsigned long nr_segs);
+static inline ssize_t __sdfat_direct_IO(int rw, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t offset,
+ loff_t count, unsigned long nr_segs);
+static int __sdfat_d_hash(const struct dentry *dentry, struct qstr *qstr);
+static int __sdfat_d_hashi(const struct dentry *dentry, struct qstr *qstr);
+static int __sdfat_cmp(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name);
+static int __sdfat_cmpi(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name);
+
+/*************************************************************************
+ * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0)
+static inline void inode_set_iversion(struct inode *inode, u64 val)
+{
+ inode->i_version = val;
+}
+static inline u64 inode_peek_iversion(struct inode *inode)
+{
+ return inode->i_version;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
+ /* EMPTY */
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) */
+static inline void bio_set_dev(struct bio *bio, struct block_device *bdev)
+{
+ bio->bi_bdev = bdev;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+static int sdfat_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
+{
+ struct inode *inode = d_backing_inode(path->dentry);
+
+ return __sdfat_getattr(inode, stat);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) */
+static int sdfat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+ struct inode *inode = dentry->d_inode;
+
+ return __sdfat_getattr(inode, stat);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static inline void __sdfat_clean_bdev_aliases(struct block_device *bdev, sector_t block)
+{
+ clean_bdev_aliases(bdev, block, 1);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0) */
+static inline void __sdfat_clean_bdev_aliases(struct block_device *bdev, sector_t block)
+{
+ unmap_underlying_metadata(bdev, block);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+static int sdfat_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ /*
+ * The VFS already checks for existence, so for local filesystems
+ * the RENAME_NOREPLACE implementation is equivalent to plain rename.
+ * Don't support any other flags
+ */
+ if (flags & ~RENAME_NOREPLACE)
+ return -EINVAL;
+ return __sdfat_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) */
+static int sdfat_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ return __sdfat_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+
+static int setattr_prepare(struct dentry *dentry, struct iattr *attr)
+{
+ struct inode *inode = dentry->d_inode;
+
+ return inode_change_ok(inode, attr);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
+static inline void __sdfat_submit_bio_write(struct bio *bio)
+{
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ submit_bio(bio);
+}
+
+static inline unsigned int __sdfat_full_name_hash(const struct dentry *dentry, const char *name, unsigned int len)
+{
+ return full_name_hash(dentry, name, len);
+}
+
+static inline unsigned long __sdfat_init_name_hash(const struct dentry *dentry)
+{
+ return init_name_hash(dentry);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) */
+static inline void __sdfat_submit_bio_write(struct bio *bio)
+{
+ submit_bio(WRITE, bio);
+}
+
+static inline unsigned int __sdfat_full_name_hash(const struct dentry *unused, const char *name, unsigned int len)
+{
+ return full_name_hash(name, len);
+}
+
+static inline unsigned long __sdfat_init_name_hash(const struct dentry *unused)
+{
+ return init_name_hash();
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 21)
+ /* EMPTY */
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 21) */
+static inline void inode_lock(struct inode *inode)
+{
+ mutex_lock(&inode->i_mutex);
+}
+
+static inline void inode_unlock(struct inode *inode)
+{
+ mutex_unlock(&inode->i_mutex);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
+static inline int sdfat_remount_syncfs(struct super_block *sb)
+{
+ sync_filesystem(sb);
+ return 0;
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) */
+static inline int sdfat_remount_syncfs(struct super_block *sb)
+{
+ /*
+ * We don`t need to call sync_filesystem(sb),
+ * Because VFS calls it.
+ */
+ return 0;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
+static inline sector_t __sdfat_bio_sector(struct bio *bio)
+{
+ return bio->bi_iter.bi_sector;
+}
+
+static inline void __sdfat_set_bio_iterate(struct bio *bio, sector_t sector,
+ unsigned int size, unsigned int idx, unsigned int done)
+{
+ struct bvec_iter *iter = &(bio->bi_iter);
+
+ iter->bi_sector = sector;
+ iter->bi_size = size;
+ iter->bi_idx = idx;
+ iter->bi_bvec_done = done;
+}
+
+static void __sdfat_truncate_pagecache(struct inode *inode,
+ loff_t to, loff_t newsize)
+{
+ truncate_pagecache(inode, newsize);
+}
+
+static int sdfat_d_hash(const struct dentry *dentry, struct qstr *qstr)
+{
+ return __sdfat_d_hash(dentry, qstr);
+}
+
+static int sdfat_d_hashi(const struct dentry *dentry, struct qstr *qstr)
+{
+ return __sdfat_d_hashi(dentry, qstr);
+}
+
+//instead of sdfat_readdir
+static int sdfat_iterate(struct file *filp, struct dir_context *ctx)
+{
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ DIR_ENTRY_T de;
+ DENTRY_NAMEBUF_T *nb = &(de.NameBuf);
+ unsigned long inum;
+ loff_t cpos;
+ int err = 0, fake_offset = 0;
+
+ sdfat_init_namebuf(nb);
+ __lock_super(sb);
+
+ cpos = ctx->pos;
+ if ((fsi->vol_type == EXFAT) || (inode->i_ino == SDFAT_ROOT_INO)) {
+ if (!dir_emit_dots(filp, ctx))
+ goto out;
+ if (ctx->pos == ITER_POS_FILLED_DOTS) {
+ cpos = 0;
+ fake_offset = 1;
+ }
+ }
+ if (cpos & (DENTRY_SIZE - 1)) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ /* name buffer should be allocated before use */
+ err = sdfat_alloc_namebuf(nb);
+ if (err)
+ goto out;
+get_new:
+ SDFAT_I(inode)->fid.size = i_size_read(inode);
+ SDFAT_I(inode)->fid.rwoffset = cpos >> DENTRY_SIZE_BITS;
+
+ if (cpos >= SDFAT_I(inode)->fid.size)
+ goto end_of_dir;
+
+ err = fsapi_readdir(inode, &de);
+ if (err) {
+ // at least we tried to read a sector
+ // move cpos to next sector position (should be aligned)
+ if (err == -EIO) {
+ cpos += 1 << (sb->s_blocksize_bits);
+ cpos &= ~((u32)sb->s_blocksize-1);
+ }
+
+ err = -EIO;
+ goto end_of_dir;
+ }
+
+ cpos = SDFAT_I(inode)->fid.rwoffset << DENTRY_SIZE_BITS;
+
+ if (!nb->lfn[0])
+ goto end_of_dir;
+
+ if (!memcmp(nb->sfn, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH)) {
+ inum = inode->i_ino;
+ } else if (!memcmp(nb->sfn, DOS_PAR_DIR_NAME, DOS_NAME_LENGTH)) {
+ inum = parent_ino(filp->f_path.dentry);
+ } else {
+ loff_t i_pos = ((loff_t) SDFAT_I(inode)->fid.start_clu << 32) |
+ ((SDFAT_I(inode)->fid.rwoffset-1) & 0xffffffff);
+ struct inode *tmp = sdfat_iget(sb, i_pos);
+
+ if (tmp) {
+ inum = tmp->i_ino;
+ iput(tmp);
+ } else {
+ inum = iunique(sb, SDFAT_ROOT_INO);
+ }
+ }
+
+ /* Before calling dir_emit(), sb_lock should be released.
+ * Because page fault can occur in dir_emit() when the size of buffer given
+ * from user is larger than one page size
+ */
+ __unlock_super(sb);
+ if (!dir_emit(ctx, nb->lfn, strlen(nb->lfn), inum,
+ (de.Attr & ATTR_SUBDIR) ? DT_DIR : DT_REG))
+ goto out_unlocked;
+ __lock_super(sb);
+
+ ctx->pos = cpos;
+ goto get_new;
+
+end_of_dir:
+ if (!cpos && fake_offset)
+ cpos = ITER_POS_FILLED_DOTS;
+ ctx->pos = cpos;
+out:
+ __unlock_super(sb);
+out_unlocked:
+ /*
+ * To improve performance, free namebuf after unlock sb_lock.
+ * If namebuf is not allocated, this function do nothing
+ */
+ sdfat_free_namebuf(nb);
+ return err;
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) */
+static inline sector_t __sdfat_bio_sector(struct bio *bio)
+{
+ return bio->bi_sector;
+}
+
+static inline void __sdfat_set_bio_iterate(struct bio *bio, sector_t sector,
+ unsigned int size, unsigned int idx, unsigned int done)
+{
+ bio->bi_sector = sector;
+ bio->bi_idx = idx;
+ bio->bi_size = size; //PAGE_SIZE;
+}
+
+static void __sdfat_truncate_pagecache(struct inode *inode,
+ loff_t to, loff_t newsize)
+{
+ truncate_pagecache(inode, to, newsize);
+}
+
+static int sdfat_d_hash(const struct dentry *dentry,
+ const struct inode *inode, struct qstr *qstr)
+{
+ return __sdfat_d_hash(dentry, qstr);
+}
+
+static int sdfat_d_hashi(const struct dentry *dentry,
+ const struct inode *inode, struct qstr *qstr)
+{
+ return __sdfat_d_hashi(dentry, qstr);
+}
+
+static int sdfat_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ DIR_ENTRY_T de;
+ DENTRY_NAMEBUF_T *nb = &(de.NameBuf);
+ unsigned long inum;
+ loff_t cpos;
+ int err = 0, fake_offset = 0;
+
+ sdfat_init_namebuf(nb);
+ __lock_super(sb);
+
+ cpos = filp->f_pos;
+ /* Fake . and .. for the root directory. */
+ if ((fsi->vol_type == EXFAT) || (inode->i_ino == SDFAT_ROOT_INO)) {
+ while (cpos < ITER_POS_FILLED_DOTS) {
+ if (inode->i_ino == SDFAT_ROOT_INO)
+ inum = SDFAT_ROOT_INO;
+ else if (cpos == 0)
+ inum = inode->i_ino;
+ else /* (cpos == 1) */
+ inum = parent_ino(filp->f_path.dentry);
+
+ if (filldir(dirent, "..", cpos+1, cpos, inum, DT_DIR) < 0)
+ goto out;
+ cpos++;
+ filp->f_pos++;
+ }
+ if (cpos == ITER_POS_FILLED_DOTS) {
+ cpos = 0;
+ fake_offset = 1;
+ }
+ }
+ if (cpos & (DENTRY_SIZE - 1)) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ /* name buffer should be allocated before use */
+ err = sdfat_alloc_namebuf(nb);
+ if (err)
+ goto out;
+get_new:
+ SDFAT_I(inode)->fid.size = i_size_read(inode);
+ SDFAT_I(inode)->fid.rwoffset = cpos >> DENTRY_SIZE_BITS;
+
+ if (cpos >= SDFAT_I(inode)->fid.size)
+ goto end_of_dir;
+
+ err = fsapi_readdir(inode, &de);
+ if (err) {
+ // at least we tried to read a sector
+ // move cpos to next sector position (should be aligned)
+ if (err == -EIO) {
+ cpos += 1 << (sb->s_blocksize_bits);
+ cpos &= ~((u32)sb->s_blocksize-1);
+ }
+
+ err = -EIO;
+ goto end_of_dir;
+ }
+
+ cpos = SDFAT_I(inode)->fid.rwoffset << DENTRY_SIZE_BITS;
+
+ if (!nb->lfn[0])
+ goto end_of_dir;
+
+ if (!memcmp(nb->sfn, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH)) {
+ inum = inode->i_ino;
+ } else if (!memcmp(nb->sfn, DOS_PAR_DIR_NAME, DOS_NAME_LENGTH)) {
+ inum = parent_ino(filp->f_path.dentry);
+ } else {
+ loff_t i_pos = ((loff_t) SDFAT_I(inode)->fid.start_clu << 32) |
+ ((SDFAT_I(inode)->fid.rwoffset-1) & 0xffffffff);
+ struct inode *tmp = sdfat_iget(sb, i_pos);
+
+ if (tmp) {
+ inum = tmp->i_ino;
+ iput(tmp);
+ } else {
+ inum = iunique(sb, SDFAT_ROOT_INO);
+ }
+ }
+
+ /* Before calling dir_emit(), sb_lock should be released.
+ * Because page fault can occur in dir_emit() when the size of buffer given
+ * from user is larger than one page size
+ */
+ __unlock_super(sb);
+ if (filldir(dirent, nb->lfn, strlen(nb->lfn), cpos, inum,
+ (de.Attr & ATTR_SUBDIR) ? DT_DIR : DT_REG) < 0)
+ goto out_unlocked;
+ __lock_super(sb);
+
+ filp->f_pos = cpos;
+ goto get_new;
+
+end_of_dir:
+ if (!cpos && fake_offset)
+ cpos = ITER_POS_FILLED_DOTS;
+ filp->f_pos = cpos;
+out:
+ __unlock_super(sb);
+out_unlocked:
+ /*
+ * To improve performance, free namebuf after unlock sb_lock.
+ * If namebuf is not allocated, this function do nothing
+ */
+ sdfat_free_namebuf(nb);
+ return err;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
+ /* EMPTY */
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) */
+static inline struct inode *file_inode(const struct file *f)
+{
+ return f->f_dentry->d_inode;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+static inline int __is_sb_dirty(struct super_block *sb)
+{
+ return SDFAT_SB(sb)->s_dirt;
+}
+
+static inline void __set_sb_clean(struct super_block *sb)
+{
+ SDFAT_SB(sb)->s_dirt = 0;
+}
+
+/* Workqueue wrapper for sdfat_write_super () */
+static void __write_super_delayed(struct work_struct *work)
+{
+ struct sdfat_sb_info *sbi;
+ struct super_block *sb;
+
+ sbi = container_of(work, struct sdfat_sb_info, write_super_work.work);
+ sb = sbi->host_sb;
+
+ /* XXX: Is this needed? */
+ if (!sb || !down_read_trylock(&sb->s_umount)) {
+ DMSG("%s: skip delayed work(write_super).\n", __func__);
+ return;
+ }
+
+ DMSG("%s: do delayed_work(write_super).\n", __func__);
+
+ spin_lock(&sbi->work_lock);
+ sbi->write_super_queued = 0;
+ spin_unlock(&sbi->work_lock);
+
+ sdfat_write_super(sb);
+
+ up_read(&sb->s_umount);
+}
+
+static void setup_sdfat_sync_super_wq(struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ mutex_init(&sbi->s_lock);
+ spin_lock_init(&sbi->work_lock);
+ INIT_DELAYED_WORK(&sbi->write_super_work, __write_super_delayed);
+ sbi->host_sb = sb;
+}
+
+static inline bool __cancel_delayed_work_sync(struct sdfat_sb_info *sbi)
+{
+ return cancel_delayed_work_sync(&sbi->write_super_work);
+}
+
+static inline void lock_super(struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ mutex_lock(&sbi->s_lock);
+}
+
+static inline void unlock_super(struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ mutex_unlock(&sbi->s_lock);
+}
+
+static int sdfat_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ return __sdfat_revalidate(dentry);
+}
+
+static int sdfat_revalidate_ci(struct dentry *dentry, unsigned int flags)
+{
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ return __sdfat_revalidate_ci(dentry, flags);
+}
+
+static struct inode *sdfat_iget(struct super_block *sb, loff_t i_pos)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct sdfat_inode_info *info;
+ struct hlist_head *head = sbi->inode_hashtable + sdfat_hash(i_pos);
+ struct inode *inode = NULL;
+
+ spin_lock(&sbi->inode_hash_lock);
+ hlist_for_each_entry(info, head, i_hash_fat) {
+ BUG_ON(info->vfs_inode.i_sb != sb);
+
+ if (i_pos != info->i_pos)
+ continue;
+ inode = igrab(&info->vfs_inode);
+ if (inode)
+ break;
+ }
+ spin_unlock(&sbi->inode_hash_lock);
+ return inode;
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) */
+static inline int __is_sb_dirty(struct super_block *sb)
+{
+ return sb->s_dirt;
+}
+
+static inline void __set_sb_clean(struct super_block *sb)
+{
+ sb->s_dirt = 0;
+}
+
+static void setup_sdfat_sync_super_wq(struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ sbi->host_sb = sb;
+}
+
+static inline bool __cancel_delayed_work_sync(struct sdfat_sb_info *sbi)
+{
+ /* DO NOTHING */
+ return 0;
+}
+
+static inline void clear_inode(struct inode *inode)
+{
+ end_writeback(inode);
+}
+
+static int sdfat_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ if (nd && nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ return __sdfat_revalidate(dentry);
+}
+
+static int sdfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
+{
+ if (nd && nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ return __sdfat_revalidate_ci(dentry, nd ? nd->flags : 0);
+
+}
+
+static struct inode *sdfat_iget(struct super_block *sb, loff_t i_pos)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct sdfat_inode_info *info;
+ struct hlist_node *node;
+ struct hlist_head *head = sbi->inode_hashtable + sdfat_hash(i_pos);
+ struct inode *inode = NULL;
+
+ spin_lock(&sbi->inode_hash_lock);
+ hlist_for_each_entry(info, node, head, i_hash_fat) {
+ BUG_ON(info->vfs_inode.i_sb != sb);
+
+ if (i_pos != info->i_pos)
+ continue;
+ inode = igrab(&info->vfs_inode);
+ if (inode)
+ break;
+ }
+ spin_unlock(&sbi->inode_hash_lock);
+ return inode;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
+static struct dentry *sdfat_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ return __sdfat_lookup(dir, dentry);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 6, 0) */
+static struct dentry *sdfat_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ return __sdfat_lookup(dir, dentry);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
+ /* NOTHING NOW */
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) */
+#define GLOBAL_ROOT_UID (0)
+#define GLOBAL_ROOT_GID (0)
+
+static inline bool uid_eq(uid_t left, uid_t right)
+{
+ return left == right;
+}
+
+static inline bool gid_eq(gid_t left, gid_t right)
+{
+ return left == right;
+}
+
+static inline uid_t from_kuid_munged(struct user_namespace *to, uid_t kuid)
+{
+ return kuid;
+}
+
+static inline gid_t from_kgid_munged(struct user_namespace *to, gid_t kgid)
+{
+ return kgid;
+}
+
+static inline uid_t make_kuid(struct user_namespace *from, uid_t uid)
+{
+ return uid;
+}
+
+static inline gid_t make_kgid(struct user_namespace *from, gid_t gid)
+{
+ return gid;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+static struct dentry *__d_make_root(struct inode *root_inode)
+{
+ return d_make_root(root_inode);
+}
+
+static void __sdfat_do_truncate(struct inode *inode, loff_t old, loff_t new)
+{
+ down_write(&SDFAT_I(inode)->truncate_lock);
+ truncate_setsize(inode, new);
+ sdfat_truncate(inode, old);
+ up_write(&SDFAT_I(inode)->truncate_lock);
+}
+
+static sector_t sdfat_aop_bmap(struct address_space *mapping, sector_t block)
+{
+ sector_t blocknr;
+
+ /* sdfat_get_cluster() assumes the requested blocknr isn't truncated. */
+ down_read(&SDFAT_I(mapping->host)->truncate_lock);
+ blocknr = generic_block_bmap(mapping, block, sdfat_get_block);
+ up_read(&SDFAT_I(mapping->host)->truncate_lock);
+ return blocknr;
+}
+
+static int sdfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ return __sdfat_mkdir(dir, dentry);
+}
+
+static int sdfat_show_options(struct seq_file *m, struct dentry *root)
+{
+ return __sdfat_show_options(m, root->d_sb);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0) */
+static inline void set_nlink(struct inode *inode, unsigned int nlink)
+{
+ inode->i_nlink = nlink;
+}
+
+static struct dentry *__d_make_root(struct inode *root_inode)
+{
+ return d_alloc_root(root_inode);
+}
+
+static void __sdfat_do_truncate(struct inode *inode, loff_t old, loff_t new)
+{
+ truncate_setsize(inode, new);
+ sdfat_truncate(inode, old);
+}
+
+static sector_t sdfat_aop_bmap(struct address_space *mapping, sector_t block)
+{
+ sector_t blocknr;
+
+ /* sdfat_get_cluster() assumes the requested blocknr isn't truncated. */
+ down_read(&mapping->host->i_alloc_sem);
+ blocknr = generic_block_bmap(mapping, block, sdfat_get_block);
+ up_read(&mapping->host->i_alloc_sem);
+ return blocknr;
+}
+
+static int sdfat_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+ return __sdfat_mkdir(dir, dentry);
+}
+
+static int sdfat_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+ return __sdfat_show_options(m, mnt->mnt_sb);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+#define __sdfat_generic_file_fsync(filp, start, end, datasync) \
+ generic_file_fsync(filp, start, end, datasync)
+
+static int sdfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
+{
+ return __sdfat_file_fsync(filp, start, end, datasync);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) */
+#define __sdfat_generic_file_fsync(filp, start, end, datasync) \
+ generic_file_fsync(filp, datasync)
+static int sdfat_file_fsync(struct file *filp, int datasync)
+{
+ return __sdfat_file_fsync(filp, 0, 0, datasync);
+}
+#endif
+
+/*************************************************************************
+ * MORE FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
+#define CURRENT_TIME_SEC timespec64_trunc(current_kernel_time64(), NSEC_PER_SEC)
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0)
+#define CURRENT_TIME_SEC timespec_trunc(current_kernel_time(), NSEC_PER_SEC)
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) */
+ /* EMPTY */
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0)
+static void sdfat_writepage_end_io(struct bio *bio)
+{
+ __sdfat_writepage_end_io(bio, blk_status_to_errno(bio->bi_status));
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
+static void sdfat_writepage_end_io(struct bio *bio)
+{
+ __sdfat_writepage_end_io(bio, bio->bi_error);
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) */
+static void sdfat_writepage_end_io(struct bio *bio, int err)
+{
+ if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+ err = 0;
+ __sdfat_writepage_end_io(bio, err);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
+static int sdfat_cmp(const struct dentry *dentry,
+ unsigned int len, const char *str, const struct qstr *name)
+{
+ return __sdfat_cmp(dentry, len, str, name);
+}
+
+static int sdfat_cmpi(const struct dentry *dentry,
+ unsigned int len, const char *str, const struct qstr *name)
+{
+ return __sdfat_cmpi(dentry, len, str, name);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
+static int sdfat_cmp(const struct dentry *parent, const struct dentry *dentry,
+ unsigned int len, const char *str, const struct qstr *name)
+{
+ return __sdfat_cmp(dentry, len, str, name);
+}
+
+static int sdfat_cmpi(const struct dentry *parent, const struct dentry *dentry,
+ unsigned int len, const char *str, const struct qstr *name)
+{
+ return __sdfat_cmpi(dentry, len, str, name);
+}
+#else
+static int sdfat_cmp(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
+{
+ return __sdfat_cmp(dentry, len, str, name);
+}
+
+static int sdfat_cmpi(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
+{
+ return __sdfat_cmpi(dentry, len, str, name);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+static ssize_t sdfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ size_t count = iov_iter_count(iter);
+ int rw = iov_iter_rw(iter);
+ loff_t offset = iocb->ki_pos;
+
+ return __sdfat_direct_IO(rw, iocb, inode,
+ (void *)iter, offset, count, 0 /* UNUSED */);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
+static ssize_t sdfat_direct_IO(struct kiocb *iocb,
+ struct iov_iter *iter,
+ loff_t offset)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ size_t count = iov_iter_count(iter);
+ int rw = iov_iter_rw(iter);
+
+ return __sdfat_direct_IO(rw, iocb, inode,
+ (void *)iter, offset, count, 0 /* UNUSED */);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
+static ssize_t sdfat_direct_IO(int rw, struct kiocb *iocb,
+ struct iov_iter *iter,
+ loff_t offset)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ size_t count = iov_iter_count(iter);
+
+ return __sdfat_direct_IO(rw, iocb, inode,
+ (void *)iter, offset, count, 0 /* UNUSED */);
+}
+#else
+static ssize_t sdfat_direct_IO(int rw, struct kiocb *iocb,
+ const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ size_t count = iov_length(iov, nr_segs);
+
+ return __sdfat_direct_IO(rw, iocb, inode,
+ (void *)iov, offset, count, nr_segs);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+static inline ssize_t __sdfat_blkdev_direct_IO(int unused, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t unused_1,
+ unsigned long nr_segs)
+{
+ struct iov_iter *iter = (struct iov_iter *)iov_u;
+
+ return blockdev_direct_IO(iocb, inode, iter, sdfat_get_block);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
+static inline ssize_t __sdfat_blkdev_direct_IO(int unused, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t offset,
+ unsigned long nr_segs)
+{
+ struct iov_iter *iter = (struct iov_iter *)iov_u;
+
+ return blockdev_direct_IO(iocb, inode, iter, offset, sdfat_get_block);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
+static inline ssize_t __sdfat_blkdev_direct_IO(int rw, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t offset,
+ unsigned long nr_segs)
+{
+ struct iov_iter *iter = (struct iov_iter *)iov_u;
+
+ return blockdev_direct_IO(rw, iocb, inode, iter,
+ offset, sdfat_get_block);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+static inline ssize_t __sdfat_blkdev_direct_IO(int rw, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t offset,
+ unsigned long nr_segs)
+{
+ const struct iovec *iov = (const struct iovec *)iov_u;
+
+ return blockdev_direct_IO(rw, iocb, inode, iov,
+ offset, nr_segs, sdfat_get_block);
+}
+#else
+static inline ssize_t __sdfat_blkdev_direct_IO(int rw, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t offset,
+ unsigned long nr_segs)
+{
+ const struct iovec *iov = (const struct iovec *)iov_u;
+
+ return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+ offset, nr_segs, sdfat_get_block, NULL);
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
+static const char *sdfat_follow_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done)
+{
+ struct sdfat_inode_info *ei = SDFAT_I(inode);
+
+ return (char *)(ei->target);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
+static const char *sdfat_follow_link(struct dentry *dentry, void **cookie)
+{
+ struct sdfat_inode_info *ei = SDFAT_I(dentry->d_inode);
+
+ return *cookie = (char *)(ei->target);
+}
+#else
+static void *sdfat_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct sdfat_inode_info *ei = SDFAT_I(dentry->d_inode);
+
+ nd_set_link(nd, (char *)(ei->target));
+ return NULL;
+}
+#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
+static int sdfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ bool excl)
+{
+ return __sdfat_create(dir, dentry);
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+static int sdfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ struct nameidata *nd)
+{
+ return __sdfat_create(dir, dentry);
+}
+#else
+static int sdfat_create(struct inode *dir, struct dentry *dentry, int mode,
+ struct nameidata *nd)
+{
+ return __sdfat_create(dir, dentry);
+}
+#endif
+
+
+/*************************************************************************
+ * WRAP FUNCTIONS FOR DEBUGGING
+ *************************************************************************/
+#ifdef CONFIG_SDFAT_TRACE_SB_LOCK
+static inline void __lock_super(struct super_block *sb)
+{
+ lock_super(sb);
+ __lock_jiffies = jiffies;
+}
+
+static inline void __unlock_super(struct super_block *sb)
+{
+ int time = ((jiffies - __lock_jiffies) * 1000 / HZ);
+ /* FIXME : error message should be modified */
+ if (time > 10)
+ EMSG("lock_super in %s (%d ms)\n", __func__, time);
+
+ unlock_super(sb);
+}
+#else /* CONFIG_SDFAT_TRACE_SB_LOCK */
+static inline void __lock_super(struct super_block *sb)
+{
+ lock_super(sb);
+}
+
+static inline void __unlock_super(struct super_block *sb)
+{
+ unlock_super(sb);
+}
+#endif /* CONFIG_SDFAT_TRACE_SB_LOCK */
+
+/*************************************************************************
+ * NORMAL FUNCTIONS
+ *************************************************************************/
+static inline loff_t sdfat_make_i_pos(FILE_ID_T *fid)
+{
+ return ((loff_t) fid->dir.dir << 32) | (fid->entry & 0xffffffff);
+}
+
+/*======================================================================*/
+/* Directory Entry Name Buffer Operations */
+/*======================================================================*/
+static void sdfat_init_namebuf(DENTRY_NAMEBUF_T *nb)
+{
+ nb->lfn = NULL;
+ nb->sfn = NULL;
+ nb->lfnbuf_len = 0;
+ nb->sfnbuf_len = 0;
+}
+
+static int sdfat_alloc_namebuf(DENTRY_NAMEBUF_T *nb)
+{
+ nb->lfn = __getname();
+ if (!nb->lfn)
+ return -ENOMEM;
+ nb->sfn = nb->lfn + MAX_VFSNAME_BUF_SIZE;
+ nb->lfnbuf_len = MAX_VFSNAME_BUF_SIZE;
+ nb->sfnbuf_len = MAX_VFSNAME_BUF_SIZE;
+ return 0;
+}
+
+static void sdfat_free_namebuf(DENTRY_NAMEBUF_T *nb)
+{
+ if (!nb->lfn)
+ return;
+
+ __putname(nb->lfn);
+ sdfat_init_namebuf(nb);
+}
+
+/*======================================================================*/
+/* Directory Entry Operations */
+/*======================================================================*/
+#define SDFAT_DSTATE_LOCKED (void *)(0xCAFE2016)
+#define SDFAT_DSTATE_UNLOCKED (void *)(0x00000000)
+
+static inline void __lock_d_revalidate(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ dentry->d_fsdata = SDFAT_DSTATE_LOCKED;
+ spin_unlock(&dentry->d_lock);
+}
+
+static inline void __unlock_d_revalidate(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ dentry->d_fsdata = SDFAT_DSTATE_UNLOCKED;
+ spin_unlock(&dentry->d_lock);
+}
+
+/* __check_dstate_locked requires dentry->d_lock */
+static inline int __check_dstate_locked(struct dentry *dentry)
+{
+ if (dentry->d_fsdata == SDFAT_DSTATE_LOCKED)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * If new entry was created in the parent, it could create the 8.3
+ * alias (the shortname of logname). So, the parent may have the
+ * negative-dentry which matches the created 8.3 alias.
+ *
+ * If it happened, the negative dentry isn't actually negative
+ * anymore. So, drop it.
+ */
+static int __sdfat_revalidate_common(struct dentry *dentry)
+{
+ int ret = 1;
+
+ spin_lock(&dentry->d_lock);
+ if ((!dentry->d_inode) && (!__check_dstate_locked(dentry) &&
+ (dentry->d_time !=
+ (unsigned long)inode_peek_iversion(dentry->d_parent->d_inode)))) {
+ ret = 0;
+ }
+ spin_unlock(&dentry->d_lock);
+ return ret;
+}
+
+static int __sdfat_revalidate(struct dentry *dentry)
+{
+ /* This is not negative dentry. Always valid. */
+ if (dentry->d_inode)
+ return 1;
+ return __sdfat_revalidate_common(dentry);
+}
+
+static int __sdfat_revalidate_ci(struct dentry *dentry, unsigned int flags)
+{
+ /*
+ * This is not negative dentry. Always valid.
+ *
+ * Note, rename() to existing directory entry will have ->d_inode,
+ * and will use existing name which isn't specified name by user.
+ *
+ * We may be able to drop this positive dentry here. But dropping
+ * positive dentry isn't good idea. So it's unsupported like
+ * rename("filename", "FILENAME") for now.
+ */
+ if (dentry->d_inode)
+ return 1;
+#if 0 /* Blocked below code for lookup_one_len() called by stackable FS */
+ /*
+ * This may be nfsd (or something), anyway, we can't see the
+ * intent of this. So, since this can be for creation, drop it.
+ */
+ if (!flags)
+ return 0;
+#endif
+ /*
+ * Drop the negative dentry, in order to make sure to use the
+ * case sensitive name which is specified by user if this is
+ * for creation.
+ */
+ if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+ return 0;
+ return __sdfat_revalidate_common(dentry);
+}
+
+
+/* returns the length of a struct qstr, ignoring trailing dots */
+static unsigned int __sdfat_striptail_len(unsigned int len, const char *name)
+{
+ while (len && name[len - 1] == '.')
+ len--;
+ return len;
+}
+
+static unsigned int sdfat_striptail_len(const struct qstr *qstr)
+{
+ return __sdfat_striptail_len(qstr->len, qstr->name);
+}
+
+/*
+ * Compute the hash for the sdfat name corresponding to the dentry.
+ * Note: if the name is invalid, we leave the hash code unchanged so
+ * that the existing dentry can be used. The sdfat fs routines will
+ * return ENOENT or EINVAL as appropriate.
+ */
+static int __sdfat_d_hash(const struct dentry *dentry, struct qstr *qstr)
+{
+ unsigned int len = sdfat_striptail_len(qstr);
+
+ qstr->hash = __sdfat_full_name_hash(dentry, qstr->name, len);
+ return 0;
+}
+
+/*
+ * Compute the hash for the sdfat name corresponding to the dentry.
+ * Note: if the name is invalid, we leave the hash code unchanged so
+ * that the existing dentry can be used. The sdfat fs routines will
+ * return ENOENT or EINVAL as appropriate.
+ */
+static int __sdfat_d_hashi(const struct dentry *dentry, struct qstr *qstr)
+{
+ struct nls_table *t = SDFAT_SB(dentry->d_sb)->nls_io;
+ const unsigned char *name;
+ unsigned int len;
+ unsigned long hash;
+
+ name = qstr->name;
+ len = sdfat_striptail_len(qstr);
+
+ hash = __sdfat_init_name_hash(dentry);
+ while (len--)
+ hash = partial_name_hash(nls_tolower(t, *name++), hash);
+ qstr->hash = end_name_hash(hash);
+
+ return 0;
+}
+
+/*
+ * Case sensitive compare of two sdfat names.
+ */
+static int __sdfat_cmp(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name)
+{
+ unsigned int alen, blen;
+
+ /* A filename cannot end in '.' or we treat it like it has none */
+ alen = sdfat_striptail_len(name);
+ blen = __sdfat_striptail_len(len, str);
+ if (alen == blen) {
+ if (strncmp(name->name, str, alen) == 0)
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * Case insensitive compare of two sdfat names.
+ */
+static int __sdfat_cmpi(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name)
+{
+ struct nls_table *t = SDFAT_SB(dentry->d_sb)->nls_io;
+ unsigned int alen, blen;
+
+ /* A filename cannot end in '.' or we treat it like it has none */
+ alen = sdfat_striptail_len(name);
+ blen = __sdfat_striptail_len(len, str);
+ if (alen == blen) {
+ if (nls_strnicmp(t, name->name, str, alen) == 0)
+ return 0;
+ }
+ return 1;
+}
+
+static const struct dentry_operations sdfat_dentry_ops = {
+ .d_revalidate = sdfat_revalidate,
+ .d_hash = sdfat_d_hash,
+ .d_compare = sdfat_cmp,
+};
+
+static const struct dentry_operations sdfat_ci_dentry_ops = {
+ .d_revalidate = sdfat_revalidate_ci,
+ .d_hash = sdfat_d_hashi,
+ .d_compare = sdfat_cmpi,
+};
+
+#ifdef CONFIG_SDFAT_DFR
+/*----------------------------------------------------------------------*/
+/* Defragmentation related */
+/*----------------------------------------------------------------------*/
+/**
+ * @fn defrag_cleanup_reqs
+ * @brief clean-up defrag info depending on error flag
+ * @return void
+ * @param sb super block
+ * @param error error flag
+ */
+static void defrag_cleanup_reqs(INOUT struct super_block *sb, IN int error)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct defrag_info *sb_dfr = &(sbi->dfr_info);
+ struct defrag_info *ino_dfr = NULL, *tmp = NULL;
+ /* sdfat patch 0.96 : sbi->dfr_info crash problem */
+ __lock_super(sb);
+
+ /* Clean-up ino_dfr */
+ if (!error) {
+ list_for_each_entry_safe(ino_dfr, tmp, &sb_dfr->entry, entry) {
+ struct inode *inode = &(container_of(ino_dfr, struct sdfat_inode_info, dfr_info)->vfs_inode);
+
+ mutex_lock(&ino_dfr->lock);
+
+ atomic_set(&ino_dfr->stat, DFR_INO_STAT_IDLE);
+
+ list_del(&ino_dfr->entry);
+
+ ino_dfr->chunks = NULL;
+ ino_dfr->nr_chunks = 0;
+ INIT_LIST_HEAD(&ino_dfr->entry);
+
+ BUG_ON(!mutex_is_locked(&ino_dfr->lock));
+ mutex_unlock(&ino_dfr->lock);
+
+ iput(inode);
+ }
+ }
+
+ /* Clean-up sb_dfr */
+ sb_dfr->chunks = NULL;
+ sb_dfr->nr_chunks = 0;
+ INIT_LIST_HEAD(&sb_dfr->entry);
+
+ /* Clear dfr_new_clus page */
+ memset(sbi->dfr_new_clus, 0, PAGE_SIZE);
+ sbi->dfr_new_idx = 1;
+ memset(sbi->dfr_page_wb, 0, PAGE_SIZE);
+
+ sbi->dfr_hint_clus = sbi->dfr_hint_idx = sbi->dfr_reserved_clus = 0;
+
+ __unlock_super(sb);
+}
+
+/**
+ * @fn defrag_validate_pages
+ * @brief validate and mark dirty for victiim pages
+ * @return 0 on success, -errno otherwise
+ * @param inode inode
+ * @param chunk given chunk
+ * @remark protected by inode_lock and super_lock
+ */
+static int
+defrag_validate_pages(
+ IN struct inode *inode,
+ IN struct defrag_chunk_info *chunk)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct page *page = NULL;
+ unsigned int i_size = 0, page_off = 0, page_nr = 0;
+ int buf_i = 0, i = 0, err = 0;
+
+ i_size = i_size_read(inode);
+ page_off = chunk->f_clus * PAGES_PER_CLUS(sb);
+ page_nr = (i_size / PAGE_SIZE) + ((i_size % PAGE_SIZE) ? 1 : 0);
+ if ((i_size <= 0) || (page_nr <= 0)) {
+ dfr_err("inode %p, i_size %d, page_nr %d", inode, i_size, page_nr);
+ return -EINVAL;
+ }
+
+ /* Get victim pages
+ * and check its dirty/writeback/mapped state
+ */
+ for (i = 0;
+ i < min((int)(page_nr - page_off), (int)(chunk->nr_clus * PAGES_PER_CLUS(sb)));
+ i++) {
+ page = find_get_page(inode->i_mapping, page_off + i);
+ if (page)
+ if (!trylock_page(page)) {
+ put_page(page);
+ page = NULL;
+ }
+
+ if (!page) {
+ dfr_debug("get/lock_page() failed, index %d", i);
+ err = -EINVAL;
+ goto error;
+ }
+
+ sbi->dfr_pagep[buf_i++] = page;
+ if (PageError(page) || !PageUptodate(page) || PageDirty(page) ||
+ PageWriteback(page) || page_mapped(page)) {
+ dfr_debug("page %p, err %d, uptodate %d, "
+ "dirty %d, wb %d, mapped %d",
+ page, PageError(page), PageUptodate(page),
+ PageDirty(page), PageWriteback(page),
+ page_mapped(page));
+ err = -EINVAL;
+ goto error;
+ }
+
+ set_bit((page->index & (PAGES_PER_CLUS(sb) - 1)),
+ (volatile unsigned long *)&(sbi->dfr_page_wb[chunk->new_idx + i / PAGES_PER_CLUS(sb)]));
+
+ page = NULL;
+ }
+
+ /**
+ * All pages in the chunks are valid.
+ */
+ i_size -= (chunk->f_clus * (sbi->fsi.cluster_size));
+ BUG_ON(((i_size / PAGE_SIZE) + ((i_size % PAGE_SIZE) ? 1 : 0)) != (page_nr - page_off));
+
+ for (i = 0; i < buf_i; i++) {
+ struct buffer_head *bh = NULL, *head = NULL;
+ int bh_idx = 0;
+
+ page = sbi->dfr_pagep[i];
+ BUG_ON(!page);
+
+ /* Mark dirty in page */
+ set_page_dirty(page);
+ mark_page_accessed(page);
+
+ /* Attach empty BHs */
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+
+ /* Mark dirty in BHs */
+ bh = head = page_buffers(page);
+ BUG_ON(!bh && !i_size);
+ do {
+ if ((bh_idx >= 1) && (bh_idx >= (i_size >> inode->i_blkbits))) {
+ clear_buffer_dirty(bh);
+ } else {
+ if (PageUptodate(page))
+ if (!buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+
+ /* Set this bh as delay */
+ set_buffer_new(bh);
+ set_buffer_delay(bh);
+
+ mark_buffer_dirty(bh);
+ }
+
+ bh_idx++;
+ bh = bh->b_this_page;
+ } while (bh != head);
+
+ /* Mark this page accessed */
+ mark_page_accessed(page);
+
+ i_size -= PAGE_SIZE;
+ }
+
+error:
+ /* Unlock and put refs for pages */
+ for (i = 0; i < buf_i; i++) {
+ BUG_ON(!sbi->dfr_pagep[i]);
+ unlock_page(sbi->dfr_pagep[i]);
+ put_page(sbi->dfr_pagep[i]);
+ }
+ memset(sbi->dfr_pagep, 0, sizeof(PAGE_SIZE));
+
+ return err;
+}
+
+
+/**
+ * @fn defrag_validate_reqs
+ * @brief validate defrag requests
+ * @return negative if all requests not valid, 0 otherwise
+ * @param sb super block
+ * @param chunks given chunks
+ */
+static int
+defrag_validate_reqs(
+ IN struct super_block *sb,
+ INOUT struct defrag_chunk_info *chunks)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct defrag_info *sb_dfr = &(sbi->dfr_info);
+ int i = 0, err = 0, err_cnt = 0;
+
+ /* Validate all reqs */
+ for (i = REQ_HEADER_IDX + 1; i < sb_dfr->nr_chunks; i++) {
+ struct defrag_chunk_info *chunk = NULL;
+ struct inode *inode = NULL;
+ struct defrag_info *ino_dfr = NULL;
+
+ chunk = &chunks[i];
+
+ /* Check inode */
+ __lock_super(sb);
+ inode = sdfat_iget(sb, chunk->i_pos);
+ if (!inode) {
+ dfr_debug("inode not found, i_pos %08llx", chunk->i_pos);
+ chunk->stat = DFR_CHUNK_STAT_ERR;
+ err_cnt++;
+ __unlock_super(sb);
+ continue;
+ }
+ __unlock_super(sb);
+
+ dfr_debug("req[%d] inode %p, i_pos %08llx, f_clus %d, "
+ "d_clus %08x, nr %d, prev %08x, next %08x",
+ i, inode, chunk->i_pos, chunk->f_clus, chunk->d_clus,
+ chunk->nr_clus, chunk->prev_clus, chunk->next_clus);
+ /**
+ * Lock ordering: inode_lock -> lock_super
+ */
+ inode_lock(inode);
+ __lock_super(sb);
+
+ /* Check if enough buffers exist for chunk->new_idx */
+ if ((sbi->dfr_new_idx + chunk->nr_clus) >= (PAGE_SIZE / sizeof(int))) {
+ dfr_err("dfr_new_idx %d, chunk->nr_clus %d",
+ sbi->dfr_new_idx, chunk->nr_clus);
+ err = -ENOSPC;
+ goto unlock;
+ }
+
+ /* Reserve clusters for defrag with DA */
+ err = fsapi_dfr_reserve_clus(sb, chunk->nr_clus);
+ if (err)
+ goto unlock;
+
+ /* Check clusters */
+ err = fsapi_dfr_validate_clus(inode, chunk, 0);
+ if (err) {
+ fsapi_dfr_reserve_clus(sb, 0 - chunk->nr_clus);
+ dfr_debug("Cluster validation: err %d", err);
+ goto unlock;
+ }
+
+ /* Check pages */
+ err = defrag_validate_pages(inode, chunk);
+ if (err) {
+ fsapi_dfr_reserve_clus(sb, 0 - chunk->nr_clus);
+ dfr_debug("Page validation: err %d", err);
+ goto unlock;
+ }
+
+ /* Mark IGNORE flag to victim AU */
+ if (sbi->options.improved_allocation & SDFAT_ALLOC_SMART)
+ fsapi_dfr_mark_ignore(sb, chunk->d_clus);
+
+ ino_dfr = &(SDFAT_I(inode)->dfr_info);
+ mutex_lock(&ino_dfr->lock);
+
+ /* Update chunk info */
+ chunk->stat = DFR_CHUNK_STAT_REQ;
+ chunk->new_idx = sbi->dfr_new_idx;
+
+ /* Update ino_dfr info */
+ if (list_empty(&(ino_dfr->entry))) {
+ list_add_tail(&ino_dfr->entry, &sb_dfr->entry);
+ ino_dfr->chunks = chunk;
+ igrab(inode);
+ }
+ ino_dfr->nr_chunks++;
+
+ atomic_set(&ino_dfr->stat, DFR_INO_STAT_REQ);
+
+ BUG_ON(!mutex_is_locked(&ino_dfr->lock));
+ mutex_unlock(&ino_dfr->lock);
+
+ /* Reserved buffers for chunk->new_idx */
+ sbi->dfr_new_idx += chunk->nr_clus;
+
+unlock:
+ if (err) {
+ chunk->stat = DFR_CHUNK_STAT_ERR;
+ err_cnt++;
+ }
+ iput(inode);
+ __unlock_super(sb);
+ inode_unlock(inode);
+ }
+
+ /* Return error if all chunks are invalid */
+ if (err_cnt == sb_dfr->nr_chunks - 1) {
+ dfr_debug("%s failed (err_cnt %d)", __func__, err_cnt);
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+
+/**
+ * @fn defrag_check_fs_busy
+ * @brief check if this module busy
+ * @return 0 when idle, 1 otherwise
+ * @param sb super block
+ * @param reserved_clus # of reserved clusters
+ * @param queued_pages # of queued pages
+ */
+static int
+defrag_check_fs_busy(
+ IN struct super_block *sb,
+ OUT int *reserved_clus,
+ OUT int *queued_pages)
+{
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ int err = 0;
+
+ *reserved_clus = *queued_pages = 0;
+
+ __lock_super(sb);
+ *reserved_clus = fsi->reserved_clusters;
+ *queued_pages = atomic_read(&SDFAT_SB(sb)->stat_n_pages_queued);
+
+ if (*reserved_clus || *queued_pages)
+ err = 1;
+ __unlock_super(sb);
+
+ return err;
+}
+
+
+/**
+ * @fn sdfat_ioctl_defrag_req
+ * @brief ioctl to send defrag requests
+ * @return 0 on success, -errno otherwise
+ * @param inode inode
+ * @param uarg given requests
+ */
+static int
+sdfat_ioctl_defrag_req(
+ IN struct inode *inode,
+ INOUT unsigned int *uarg)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct defrag_info *sb_dfr = &(sbi->dfr_info);
+ struct defrag_chunk_header head;
+ struct defrag_chunk_info *chunks = NULL;
+ unsigned int len = 0;
+ int err = 0;
+ unsigned long timeout = 0;
+
+ /* Check overlapped defrag */
+ if (atomic_cmpxchg(&sb_dfr->stat, DFR_SB_STAT_IDLE, DFR_SB_STAT_REQ)) {
+ dfr_debug("sb_dfr->stat %d", atomic_read(&sb_dfr->stat));
+ return -EBUSY;
+ }
+
+ /* Check if defrag required */
+ __lock_super(sb);
+ if (!fsapi_dfr_check_dfr_required(sb, NULL, NULL, NULL)) {
+ dfr_debug("Not enough space left for defrag (err %d)", -ENOSPC);
+ atomic_set(&sb_dfr->stat, DFR_SB_STAT_IDLE);
+ __unlock_super(sb);
+ return -ENOSPC;
+ }
+ __unlock_super(sb);
+
+ /* Copy args */
+ memset(&head, 0, sizeof(struct defrag_chunk_header));
+ err = copy_from_user(&head, uarg, sizeof(struct defrag_chunk_info));
+ ERR_HANDLE(err);
+
+ /* If FS busy, cancel defrag */
+ if (!(head.mode == DFR_MODE_TEST)) {
+ int reserved_clus = 0, queued_pages = 0;
+
+ err = defrag_check_fs_busy(sb, &reserved_clus, &queued_pages);
+ if (err) {
+ dfr_debug("FS busy, cancel defrag (reserved_clus %d, queued_pages %d)",
+ reserved_clus, queued_pages);
+ err = -EBUSY;
+ goto error;
+ }
+ }
+
+ /* Total length is saved in the chunk header's nr_chunks field */
+ len = head.nr_chunks;
+ ERR_HANDLE2(!len, err, -EINVAL);
+
+ dfr_debug("IOC_DFR_REQ started (mode %d, nr_req %d)", head.mode, len - 1);
+ if (get_order(len * sizeof(struct defrag_chunk_info)) > MAX_ORDER) {
+ dfr_debug("len %d, sizeof(struct defrag_chunk_info) %lu, MAX_ORDER %d",
+ len, sizeof(struct defrag_chunk_info), MAX_ORDER);
+ err = -EINVAL;
+ goto error;
+ }
+ chunks = alloc_pages_exact(len * sizeof(struct defrag_chunk_info),
+ GFP_KERNEL | __GFP_ZERO);
+ ERR_HANDLE2(!chunks, err, -ENOMEM)
+
+ err = copy_from_user(chunks, uarg, len * sizeof(struct defrag_chunk_info));
+ ERR_HANDLE(err);
+
+ /* Initialize sb_dfr */
+ sb_dfr->chunks = chunks;
+ sb_dfr->nr_chunks = len;
+
+ /* Validate reqs & mark defrag/dirty */
+ err = defrag_validate_reqs(sb, sb_dfr->chunks);
+ ERR_HANDLE(err);
+
+ atomic_set(&sb_dfr->stat, DFR_SB_STAT_VALID);
+
+ /* Wait for defrag completion */
+ if (head.mode == DFR_MODE_ONESHOT)
+ timeout = 0;
+ else if (head.mode & DFR_MODE_BACKGROUND)
+ timeout = DFR_DEFAULT_TIMEOUT;
+ else
+ timeout = DFR_MIN_TIMEOUT;
+
+ dfr_debug("Wait for completion (timeout %ld)", timeout);
+ init_completion(&sbi->dfr_complete);
+ timeout = wait_for_completion_timeout(&sbi->dfr_complete, timeout);
+
+ if (!timeout) {
+ /* Force defrag_updat_fat() after timeout. */
+ dfr_debug("Force sync(), mode %d, left-timeout %ld", head.mode, timeout);
+
+ down_read(&sb->s_umount);
+
+ sync_inodes_sb(sb);
+
+ __lock_super(sb);
+ fsapi_dfr_update_fat_next(sb);
+
+ fsapi_sync_fs(sb, 1);
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+ /* SPO test */
+ fsapi_dfr_spo_test(sb, DFR_SPO_FAT_NEXT, __func__);
+#endif
+
+ fsapi_dfr_update_fat_prev(sb, 1);
+ fsapi_sync_fs(sb, 1);
+
+ __unlock_super(sb);
+
+ up_read(&sb->s_umount);
+ }
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+ /* SPO test */
+ fsapi_dfr_spo_test(sb, DFR_SPO_NORMAL, __func__);
+#endif
+
+ __lock_super(sb);
+ /* Send DISCARD to clean-ed AUs */
+ fsapi_dfr_check_discard(sb);
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+ /* SPO test */
+ fsapi_dfr_spo_test(sb, DFR_SPO_DISCARD, __func__);
+#endif
+
+ /* Unmark IGNORE flag to all victim AUs */
+ fsapi_dfr_unmark_ignore_all(sb);
+ __unlock_super(sb);
+
+ err = copy_to_user(uarg, sb_dfr->chunks, sizeof(struct defrag_chunk_info) * len);
+ ERR_HANDLE(err);
+
+error:
+ /* Clean-up sb_dfr & ino_dfr */
+ defrag_cleanup_reqs(sb, err);
+
+ if (chunks)
+ free_pages_exact(chunks, len * sizeof(struct defrag_chunk_info));
+
+ /* Set sb_dfr's state as IDLE */
+ atomic_set(&sb_dfr->stat, DFR_SB_STAT_IDLE);
+
+ dfr_debug("IOC_DFR_REQ done (err %d)", err);
+ return err;
+}
+
+/**
+ * @fn sdfat_ioctl_defrag_trav
+ * @brief ioctl to traverse given directory for defrag
+ * @return 0 on success, -errno otherwise
+ * @param inode inode
+ * @param uarg output buffer
+ */
+static int
+sdfat_ioctl_defrag_trav(
+ IN struct inode *inode,
+ INOUT unsigned int *uarg)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct defrag_info *sb_dfr = &(sbi->dfr_info);
+ struct defrag_trav_arg *args = (struct defrag_trav_arg *) sbi->dfr_pagep;
+ struct defrag_trav_header *header = (struct defrag_trav_header *) args;
+ int err = 0;
+
+ /* Check overlapped defrag */
+ if (atomic_cmpxchg(&sb_dfr->stat, DFR_SB_STAT_IDLE, DFR_SB_STAT_REQ)) {
+ dfr_debug("sb_dfr->stat %d", atomic_read(&sb_dfr->stat));
+ return -EBUSY;
+ }
+
+ /* Check if defrag required */
+ __lock_super(sb);
+ if (!fsapi_dfr_check_dfr_required(sb, NULL, NULL, NULL)) {
+ dfr_debug("Not enough space left for defrag (err %d)", -ENOSPC);
+ atomic_set(&sb_dfr->stat, DFR_SB_STAT_IDLE);
+ __unlock_super(sb);
+ return -ENOSPC;
+ }
+ __unlock_super(sb);
+
+ /* Copy args */
+ err = copy_from_user(args, uarg, PAGE_SIZE);
+ ERR_HANDLE(err);
+
+ /**
+ * Check args.
+ * ROOT directory has i_pos = 0 and start_clus = 0 .
+ */
+ if (!(header->type & DFR_TRAV_TYPE_HEADER)) {
+ err = -EINVAL;
+ dfr_debug("type %d, i_pos %08llx, start_clus %08x",
+ header->type, header->i_pos, header->start_clus);
+ goto error;
+ }
+
+ /* If FS busy, cancel defrag */
+ if (!(header->type & DFR_TRAV_TYPE_TEST)) {
+ unsigned int reserved_clus = 0, queued_pages = 0;
+
+ err = defrag_check_fs_busy(sb, &reserved_clus, &queued_pages);
+ if (err) {
+ dfr_debug("FS busy, cancel defrag (reserved_clus %d, queued_pages %d)",
+ reserved_clus, queued_pages);
+ err = -EBUSY;
+ goto error;
+ }
+ }
+
+ /* Scan given directory and gather info */
+ inode_lock(inode);
+ __lock_super(sb);
+ err = fsapi_dfr_scan_dir(sb, (void *)args);
+ __unlock_super(sb);
+ inode_unlock(inode);
+ ERR_HANDLE(err);
+
+ /* Copy the result to user */
+ err = copy_to_user(uarg, args, PAGE_SIZE);
+ ERR_HANDLE(err);
+
+error:
+ memset(sbi->dfr_pagep, 0, PAGE_SIZE);
+
+ atomic_set(&sb_dfr->stat, DFR_SB_STAT_IDLE);
+ return err;
+}
+
+/**
+ * @fn sdfat_ioctl_defrag_info
+ * @brief ioctl to get HW param info
+ * @return 0 on success, -errno otherwise
+ * @param sb super block
+ * @param uarg output buffer
+ */
+static int
+sdfat_ioctl_defrag_info(
+ IN struct super_block *sb,
+ OUT unsigned int *uarg)
+{
+ struct defrag_info_arg info_arg;
+ int err = 0;
+
+ memset(&info_arg, 0, sizeof(struct defrag_info_arg));
+
+ __lock_super(sb);
+ err = fsapi_dfr_get_info(sb, &info_arg);
+ __unlock_super(sb);
+ ERR_HANDLE(err);
+ dfr_debug("IOC_DFR_INFO: sec_per_au %d, hidden_sectors %d",
+ info_arg.sec_per_au, info_arg.hidden_sectors);
+
+ err = copy_to_user(uarg, &info_arg, sizeof(struct defrag_info_arg));
+error:
+ return err;
+}
+
+#endif /* CONFIG_SDFAT_DFR */
+
+static inline int __do_dfr_map_cluster(struct inode *inode, u32 clu_offset, unsigned int *clus_ptr)
+{
+#ifdef CONFIG_SDFAT_DFR
+ return fsapi_dfr_map_clus(inode, clu_offset, clus_ptr);
+#else
+ return 0;
+#endif
+}
+
+static inline int __check_dfr_on(struct inode *inode, loff_t start, loff_t end, const char *fname)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info);
+
+ if ((atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ) &&
+ fsapi_dfr_check_dfr_on(inode, start, end, 0, fname))
+ return 1;
+#endif
+ return 0;
+}
+
+static inline int __cancel_dfr_work(struct inode *inode, loff_t start, loff_t end, const char *fname)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct defrag_info *ino_dfr = &(SDFAT_I(inode)->dfr_info);
+ /* Cancel DEFRAG */
+ if (atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ)
+ fsapi_dfr_check_dfr_on(inode, start, end, 1, fname);
+#endif
+ return 0;
+}
+
+static inline int __dfr_writepage_end_io(struct page *page)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct defrag_info *ino_dfr = &(SDFAT_I(page->mapping->host)->dfr_info);
+
+ if (atomic_read(&ino_dfr->stat) == DFR_INO_STAT_REQ)
+ fsapi_dfr_writepage_endio(page);
+#endif
+ return 0;
+}
+
+static inline void __init_dfr_info(struct inode *inode)
+{
+#ifdef CONFIG_SDFAT_DFR
+ memset(&(SDFAT_I(inode)->dfr_info), 0, sizeof(struct defrag_info));
+ INIT_LIST_HEAD(&(SDFAT_I(inode)->dfr_info.entry));
+ mutex_init(&(SDFAT_I(inode)->dfr_info.lock));
+#endif
+}
+
+static inline int __alloc_dfr_mem_if_required(struct super_block *sb)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ if (!sbi->options.defrag)
+ return 0;
+
+ memset(&sbi->dfr_info, 0, sizeof(struct defrag_info));
+ INIT_LIST_HEAD(&(sbi->dfr_info.entry));
+ mutex_init(&(sbi->dfr_info.lock));
+
+ sbi->dfr_new_clus = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!sbi->dfr_new_clus) {
+ dfr_debug("error %d", -ENOMEM);
+ return -ENOMEM;
+ }
+ sbi->dfr_new_idx = 1;
+
+ sbi->dfr_page_wb = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!sbi->dfr_page_wb) {
+ dfr_debug("error %d", -ENOMEM);
+ return -ENOMEM;
+ }
+
+ sbi->dfr_pagep = alloc_pages_exact(sizeof(struct page *) *
+ PAGES_PER_AU(sb), GFP_KERNEL | __GFP_ZERO);
+ if (!sbi->dfr_pagep) {
+ dfr_debug("error %d", -ENOMEM);
+ return -ENOMEM;
+ }
+#endif
+ return 0;
+}
+
+static void __free_dfr_mem_if_required(struct super_block *sb)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ if (sbi->dfr_pagep) {
+ free_pages_exact(sbi->dfr_pagep, sizeof(struct page *) * PAGES_PER_AU(sb));
+ sbi->dfr_pagep = NULL;
+ }
+
+ /* thanks for kfree */
+ kfree(sbi->dfr_page_wb);
+ sbi->dfr_page_wb = NULL;
+
+ kfree(sbi->dfr_new_clus);
+ sbi->dfr_new_clus = NULL;
+#endif
+}
+
+
+static int sdfat_file_mmap(struct file *file, struct vm_area_struct *vm_struct)
+{
+ __cancel_dfr_work(file->f_mapping->host,
+ (loff_t)vm_struct->vm_start,
+ (loff_t)(vm_struct->vm_end - 1),
+ __func__);
+
+ return generic_file_mmap(file, vm_struct);
+}
+
+static int sdfat_ioctl_volume_id(struct inode *dir)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(dir->i_sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+
+ return fsi->vol_id;
+}
+
+static int sdfat_dfr_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+#ifdef CONFIG_SDFAT_DFR
+ switch (cmd) {
+ case SDFAT_IOCTL_DFR_INFO: {
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi;
+ unsigned int __user *uarg = (unsigned int __user *) arg;
+
+ __lock_super(sb);
+ /* Check FS type (FAT32 only) */
+ if (fsi->vol_type != FAT32) {
+ dfr_err("Defrag not supported, vol_type %d", fsi->vol_type);
+ __unlock_super(sb);
+ return -EPERM;
+
+ }
+
+ /* Check if SB's defrag option enabled */
+ if (!(SDFAT_SB(sb)->options.defrag)) {
+ dfr_err("Defrag not supported, sbi->options.defrag %d", SDFAT_SB(sb)->options.defrag);
+ __unlock_super(sb);
+ return -EPERM;
+ }
+
+ /* Only IOCTL on mount-point allowed */
+ if (filp->f_path.mnt->mnt_root != filp->f_path.dentry) {
+ dfr_err("IOC_DFR_INFO only allowed on ROOT, root %p, dentry %p",
+ filp->f_path.mnt->mnt_root, filp->f_path.dentry);
+ __unlock_super(sb);
+ return -EPERM;
+ }
+ __unlock_super(sb);
+
+ return sdfat_ioctl_defrag_info(sb, uarg);
+ }
+ case SDFAT_IOCTL_DFR_TRAV: {
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi;
+ unsigned int __user *uarg = (unsigned int __user *) arg;
+
+ __lock_super(sb);
+ /* Check FS type (FAT32 only) */
+ if (fsi->vol_type != FAT32) {
+ dfr_err("Defrag not supported, vol_type %d", fsi->vol_type);
+ __unlock_super(sb);
+ return -EPERM;
+
+ }
+
+ /* Check if SB's defrag option enabled */
+ if (!(SDFAT_SB(sb)->options.defrag)) {
+ dfr_err("Defrag not supported, sbi->options.defrag %d", SDFAT_SB(sb)->options.defrag);
+ __unlock_super(sb);
+ return -EPERM;
+ }
+ __unlock_super(sb);
+
+ return sdfat_ioctl_defrag_trav(inode, uarg);
+ }
+ case SDFAT_IOCTL_DFR_REQ: {
+ struct super_block *sb = inode->i_sb;
+ FS_INFO_T *fsi = &SDFAT_SB(sb)->fsi;
+ unsigned int __user *uarg = (unsigned int __user *) arg;
+
+ __lock_super(sb);
+
+ /* Check if FS_ERROR occurred */
+ if (sb->s_flags & MS_RDONLY) {
+ dfr_err("RDONLY partition (err %d)", -EPERM);
+ __unlock_super(sb);
+ return -EPERM;
+ }
+
+ /* Check FS type (FAT32 only) */
+ if (fsi->vol_type != FAT32) {
+ dfr_err("Defrag not supported, vol_type %d", fsi->vol_type);
+ __unlock_super(sb);
+ return -EINVAL;
+
+ }
+
+ /* Check if SB's defrag option enabled */
+ if (!(SDFAT_SB(sb)->options.defrag)) {
+ dfr_err("Defrag not supported, sbi->options.defrag %d", SDFAT_SB(sb)->options.defrag);
+ __unlock_super(sb);
+ return -EPERM;
+ }
+
+ /* Only IOCTL on mount-point allowed */
+ if (filp->f_path.mnt->mnt_root != filp->f_path.dentry) {
+ dfr_err("IOC_DFR_INFO only allowed on ROOT, root %p, dentry %p",
+ filp->f_path.mnt->mnt_root, filp->f_path.dentry);
+ __unlock_super(sb);
+ return -EINVAL;
+ }
+ __unlock_super(sb);
+
+ return sdfat_ioctl_defrag_req(inode, uarg);
+ }
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+ case SDFAT_IOCTL_DFR_SPO_FLAG: {
+ struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb);
+ int ret = 0;
+
+ ret = get_user(sbi->dfr_spo_flag, (int __user *)arg);
+ dfr_debug("dfr_spo_flag %d", sbi->dfr_spo_flag);
+
+ return ret;
+ }
+#endif /* CONFIG_SDFAT_DFR_DEBUG */
+ }
+#endif /* CONFIG_SDFAT_DFR */
+
+ /* Inappropriate ioctl for device */
+ return -ENOTTY;
+}
+
+static int sdfat_dbg_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ unsigned int flags;
+
+ switch (cmd) {
+ case SDFAT_IOC_GET_DEBUGFLAGS:
+ flags = sbi->debug_flags;
+ return put_user(flags, (int __user *)arg);
+ case SDFAT_IOC_SET_DEBUGFLAGS:
+ flags = 0;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (get_user(flags, (int __user *) arg))
+ return -EFAULT;
+
+ __lock_super(sb);
+ sbi->debug_flags = flags;
+ __unlock_super(sb);
+ return 0;
+ case SDFAT_IOCTL_PANIC:
+ panic("ioctl panic for test");
+
+ /* COULD NOT REACH HEAR */
+ return 0;
+ }
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+ return -ENOTTY;
+}
+
+static long sdfat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ int err;
+
+ if (cmd == SDFAT_IOCTL_GET_VOLUME_ID)
+ return sdfat_ioctl_volume_id(inode);
+
+ err = sdfat_dfr_ioctl(inode, filp, cmd, arg);
+ if (err != -ENOTTY)
+ return err;
+
+ /* -ENOTTY if inappropriate ioctl for device */
+ return sdfat_dbg_ioctl(inode, filp, cmd, arg);
+}
+
+static int __sdfat_getattr(struct inode *inode, struct kstat *stat)
+{
+ TMSG("%s entered\n", __func__);
+
+ generic_fillattr(inode, stat);
+ stat->blksize = SDFAT_SB(inode->i_sb)->fsi.cluster_size;
+
+ TMSG("%s exited\n", __func__);
+ return 0;
+}
+
+static void __sdfat_writepage_end_io(struct bio *bio, int err)
+{
+ struct page *page = bio->bi_io_vec->bv_page;
+ struct super_block *sb = page->mapping->host->i_sb;
+
+ ASSERT(bio->bi_vcnt == 1); /* Single page endio */
+ ASSERT(bio_data_dir(bio)); /* Write */
+
+ if (err) {
+ SetPageError(page);
+ mapping_set_error(page->mapping, err);
+ }
+
+ __dfr_writepage_end_io(page);
+
+#ifdef CONFIG_SDFAT_TRACE_IO
+ {
+ //struct sdfat_sb_info *sbi = SDFAT_SB(bio->bi_bdev->bd_super);
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ sbi->stat_n_pages_written++;
+ if (page->mapping->host == sb->s_bdev->bd_inode)
+ sbi->stat_n_bdev_pages_written++;
+
+ /* 4 MB = 1024 pages => 0.4 sec (approx.)
+ * 32 KB = 64 pages => 0.025 sec
+ * Min. average latency b/w msgs. ~= 0.025 sec
+ */
+ if ((sbi->stat_n_pages_written & 63) == 0) {
+ DMSG("STAT:%u, %u, %u, %u (Sector #: %u)\n",
+ sbi->stat_n_pages_added, sbi->stat_n_pages_written,
+ sbi->stat_n_bdev_pages_witten,
+ sbi->stat_n_pages_confused,
+ (unsigned int)__sdfat_bio_sector(bio));
+ }
+ }
+#endif
+ end_page_writeback(page);
+ bio_put(bio);
+
+ // Update trace info.
+ atomic_dec(&SDFAT_SB(sb)->stat_n_pages_queued);
+}
+
+
+static int __support_write_inode_sync(struct super_block *sb)
+{
+#ifdef CONFIG_SDFAT_SUPPORT_DIR_SYNC
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ if (sbi->fsi.vol_type != EXFAT)
+ return 0;
+#endif
+ return 1;
+#endif
+ return 0;
+}
+
+
+static int __sdfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
+{
+ struct inode *inode = filp->f_mapping->host;
+ struct super_block *sb = inode->i_sb;
+ int res, err = 0;
+
+ res = __sdfat_generic_file_fsync(filp, start, end, datasync);
+
+ if (!__support_write_inode_sync(sb))
+ err = fsapi_sync_fs(sb, 1);
+
+ return res ? res : err;
+}
+
+
+static const struct file_operations sdfat_dir_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
+ .iterate = sdfat_iterate,
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) */
+ .readdir = sdfat_readdir,
+#endif
+ .fsync = sdfat_file_fsync,
+ .unlocked_ioctl = sdfat_generic_ioctl,
+};
+
+static int __sdfat_create(struct inode *dir, struct dentry *dentry)
+{
+ struct super_block *sb = dir->i_sb;
+ struct inode *inode;
+ sdfat_timespec_t ts;
+ FILE_ID_T fid;
+ loff_t i_pos;
+ int err;
+
+ __lock_super(sb);
+
+ TMSG("%s entered\n", __func__);
+
+ ts = CURRENT_TIME_SEC;
+
+ err = fsapi_create(dir, (u8 *) dentry->d_name.name, FM_REGULAR, &fid);
+ if (err)
+ goto out;
+
+ __lock_d_revalidate(dentry);
+
+ inode_inc_iversion(dir);
+ dir->i_ctime = dir->i_mtime = dir->i_atime = ts;
+ if (IS_DIRSYNC(dir))
+ (void) sdfat_sync_inode(dir);
+ else
+ mark_inode_dirty(dir);
+
+ i_pos = sdfat_make_i_pos(&fid);
+ inode = sdfat_build_inode(sb, &fid, i_pos);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out;
+ }
+ inode_inc_iversion(inode);
+ inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
+ /* timestamp is already written, so mark_inode_dirty() is unneeded. */
+
+ d_instantiate(dentry, inode);
+out:
+ __unlock_d_revalidate(dentry);
+ __unlock_super(sb);
+ TMSG("%s exited with err(%d)\n", __func__, err);
+ if (!err)
+ sdfat_statistics_set_create(fid.flags);
+ return err;
+}
+
+
+static int sdfat_find(struct inode *dir, struct qstr *qname, FILE_ID_T *fid)
+{
+ int err;
+
+ if (qname->len == 0)
+ return -ENOENT;
+
+ err = fsapi_lookup(dir, (u8 *) qname->name, fid);
+ if (err)
+ return -ENOENT;
+
+ return 0;
+}
+
+static int sdfat_d_anon_disconn(struct dentry *dentry)
+{
+ return IS_ROOT(dentry) && (dentry->d_flags & DCACHE_DISCONNECTED);
+}
+
+static struct dentry *__sdfat_lookup(struct inode *dir, struct dentry *dentry)
+{
+ struct super_block *sb = dir->i_sb;
+ struct inode *inode;
+ struct dentry *alias;
+ int err;
+ FILE_ID_T fid;
+ loff_t i_pos;
+ u64 ret;
+ mode_t i_mode;
+
+ __lock_super(sb);
+ TMSG("%s entered\n", __func__);
+ err = sdfat_find(dir, &dentry->d_name, &fid);
+ if (err) {
+ if (err == -ENOENT) {
+ inode = NULL;
+ goto out;
+ }
+ goto error;
+ }
+
+ i_pos = sdfat_make_i_pos(&fid);
+ inode = sdfat_build_inode(sb, &fid, i_pos);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+
+ i_mode = inode->i_mode;
+ if (S_ISLNK(i_mode) && !SDFAT_I(inode)->target) {
+ SDFAT_I(inode)->target = kmalloc((i_size_read(inode)+1), GFP_KERNEL);
+ if (!SDFAT_I(inode)->target) {
+ err = -ENOMEM;
+ goto error;
+ }
+ fsapi_read_link(dir, &fid, SDFAT_I(inode)->target, i_size_read(inode), &ret);
+ *(SDFAT_I(inode)->target + i_size_read(inode)) = '\0';
+ }
+
+ alias = d_find_alias(inode);
+
+ /*
+ * Checking "alias->d_parent == dentry->d_parent" to make sure
+ * FS is not corrupted (especially double linked dir).
+ */
+ if (alias && alias->d_parent == dentry->d_parent &&
+ !sdfat_d_anon_disconn(alias)) {
+
+ /*
+ * Unhashed alias is able to exist because of revalidate()
+ * called by lookup_fast. You can easily make this status
+ * by calling create and lookup concurrently
+ * In such case, we reuse an alias instead of new dentry
+ */
+ if (d_unhashed(alias)) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
+ BUG_ON(alias->d_name.hash != dentry->d_name.hash && alias->d_name.len != dentry->d_name.len);
+#else
+ BUG_ON(alias->d_name.hash_len != dentry->d_name.hash_len);
+#endif
+ sdfat_msg(sb, KERN_INFO, "rehashed a dentry(%p) "
+ "in read lookup", alias);
+ d_drop(dentry);
+ d_rehash(alias);
+ } else if (!S_ISDIR(i_mode)) {
+ /*
+ * This inode has non anonymous-DCACHE_DISCONNECTED
+ * dentry. This means, the user did ->lookup() by an
+ * another name (longname vs 8.3 alias of it) in past.
+ *
+ * Switch to new one for reason of locality if possible.
+ */
+ d_move(alias, dentry);
+ }
+ iput(inode);
+ __unlock_super(sb);
+ TMSG("%s exited\n", __func__);
+ return alias;
+ }
+ dput(alias);
+out:
+ /* initialize d_time even though it is positive dentry */
+ dentry->d_time = (unsigned long)inode_peek_iversion(dir);
+ __unlock_super(sb);
+
+ dentry = d_splice_alias(inode, dentry);
+
+ TMSG("%s exited\n", __func__);
+ return dentry;
+error:
+ __unlock_super(sb);
+ TMSG("%s exited with err(%d)\n", __func__, err);
+ return ERR_PTR(err);
+}
+
+
+static int sdfat_unlink(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ struct super_block *sb = dir->i_sb;
+ sdfat_timespec_t ts;
+ int err;
+
+ __lock_super(sb);
+
+ TMSG("%s entered\n", __func__);
+
+ ts = CURRENT_TIME_SEC;
+
+ SDFAT_I(inode)->fid.size = i_size_read(inode);
+
+ __cancel_dfr_work(inode, 0, SDFAT_I(inode)->fid.size, __func__);
+
+ err = fsapi_unlink(dir, &(SDFAT_I(inode)->fid));
+ if (err)
+ goto out;
+
+ __lock_d_revalidate(dentry);
+
+ inode_inc_iversion(dir);
+ dir->i_mtime = dir->i_atime = ts;
+ if (IS_DIRSYNC(dir))
+ (void) sdfat_sync_inode(dir);
+ else
+ mark_inode_dirty(dir);
+
+ clear_nlink(inode);
+ inode->i_mtime = inode->i_atime = ts;
+ sdfat_detach(inode);
+ dentry->d_time = (unsigned long)inode_peek_iversion(dir);
+out:
+ __unlock_d_revalidate(dentry);
+ __unlock_super(sb);
+ TMSG("%s exited with err(%d)\n", __func__, err);
+ return err;
+}
+
+static int sdfat_symlink(struct inode *dir, struct dentry *dentry, const char *target)
+{
+ struct super_block *sb = dir->i_sb;
+ struct inode *inode;
+ sdfat_timespec_t ts;
+ FILE_ID_T fid;
+ loff_t i_pos;
+ int err;
+ u64 len = (u64) strlen(target);
+ u64 ret;
+
+ /* symlink option check */
+ if (!SDFAT_SB(sb)->options.symlink)
+ return -ENOTSUPP;
+
+ __lock_super(sb);
+
+ TMSG("%s entered\n", __func__);
+
+ ts = CURRENT_TIME_SEC;
+
+ err = fsapi_create(dir, (u8 *) dentry->d_name.name, FM_SYMLINK, &fid);
+ if (err)
+ goto out;
+
+ err = fsapi_write_link(dir, &fid, (char *) target, len, &ret);
+
+ if (err) {
+ fsapi_remove(dir, &fid);
+ goto out;
+ }
+
+ __lock_d_revalidate(dentry);
+
+ inode_inc_iversion(dir);
+ dir->i_ctime = dir->i_mtime = dir->i_atime = ts;
+ if (IS_DIRSYNC(dir))
+ (void) sdfat_sync_inode(dir);
+ else
+ mark_inode_dirty(dir);
+
+ i_pos = sdfat_make_i_pos(&fid);
+ inode = sdfat_build_inode(sb, &fid, i_pos);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out;
+ }
+ inode_inc_iversion(inode);
+ inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
+ /* timestamp is already written, so mark_inode_dirty() is unneeded. */
+
+ SDFAT_I(inode)->target = kmalloc((len+1), GFP_KERNEL);
+ if (!SDFAT_I(inode)->target) {
+ err = -ENOMEM;
+ goto out;
+ }
+ memcpy(SDFAT_I(inode)->target, target, len+1);
+
+ d_instantiate(dentry, inode);
+out:
+ __unlock_d_revalidate(dentry);
+ __unlock_super(sb);
+ TMSG("%s exited with err(%d)\n", __func__, err);
+ return err;
+}
+
+
+static int __sdfat_mkdir(struct inode *dir, struct dentry *dentry)
+{
+ struct super_block *sb = dir->i_sb;
+ struct inode *inode;
+ sdfat_timespec_t ts;
+ FILE_ID_T fid;
+ loff_t i_pos;
+ int err;
+
+ __lock_super(sb);
+
+ TMSG("%s entered\n", __func__);
+
+ ts = CURRENT_TIME_SEC;
+
+ err = fsapi_mkdir(dir, (u8 *) dentry->d_name.name, &fid);
+ if (err)
+ goto out;
+
+ __lock_d_revalidate(dentry);
+
+ inode_inc_iversion(dir);
+ dir->i_ctime = dir->i_mtime = dir->i_atime = ts;
+ if (IS_DIRSYNC(dir))
+ (void) sdfat_sync_inode(dir);
+ else
+ mark_inode_dirty(dir);
+ inc_nlink(dir);
+
+ i_pos = sdfat_make_i_pos(&fid);
+ inode = sdfat_build_inode(sb, &fid, i_pos);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out;
+ }
+ inode_inc_iversion(inode);
+ inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
+ /* timestamp is already written, so mark_inode_dirty() is unneeded. */
+
+ d_instantiate(dentry, inode);
+
+out:
+ __unlock_d_revalidate(dentry);
+ __unlock_super(sb);
+ TMSG("%s exited with err(%d)\n", __func__, err);
+ if (!err)
+ sdfat_statistics_set_mkdir(fid.flags);
+ return err;
+}
+
+
+static int sdfat_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ struct super_block *sb = dir->i_sb;
+ sdfat_timespec_t ts;
+ int err;
+
+ __lock_super(sb);
+
+ TMSG("%s entered\n", __func__);
+
+ ts = CURRENT_TIME_SEC;
+
+ SDFAT_I(inode)->fid.size = i_size_read(inode);
+
+ err = fsapi_rmdir(dir, &(SDFAT_I(inode)->fid));
+ if (err)
+ goto out;
+
+ __lock_d_revalidate(dentry);
+
+ inode_inc_iversion(dir);
+ dir->i_mtime = dir->i_atime = ts;
+ if (IS_DIRSYNC(dir))
+ (void) sdfat_sync_inode(dir);
+ else
+ mark_inode_dirty(dir);
+ drop_nlink(dir);
+
+ clear_nlink(inode);
+ inode->i_mtime = inode->i_atime = ts;
+ sdfat_detach(inode);
+ dentry->d_time = (unsigned long)inode_peek_iversion(dir);
+out:
+ __unlock_d_revalidate(dentry);
+ __unlock_super(sb);
+ TMSG("%s exited with err(%d)\n", __func__, err);
+ return err;
+}
+
+static int __sdfat_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ struct inode *old_inode, *new_inode;
+ struct super_block *sb = old_dir->i_sb;
+ sdfat_timespec_t ts;
+ loff_t i_pos;
+ int err;
+
+ __lock_super(sb);
+
+ TMSG("%s entered\n", __func__);
+
+ old_inode = old_dentry->d_inode;
+ new_inode = new_dentry->d_inode;
+
+ ts = CURRENT_TIME_SEC;
+
+ SDFAT_I(old_inode)->fid.size = i_size_read(old_inode);
+
+ __cancel_dfr_work(old_inode, 0, 1, __func__);
+
+ err = fsapi_rename(old_dir, &(SDFAT_I(old_inode)->fid), new_dir, new_dentry);
+ if (err)
+ goto out;
+
+ __lock_d_revalidate(old_dentry);
+ __lock_d_revalidate(new_dentry);
+
+ inode_inc_iversion(new_dir);
+ new_dir->i_ctime = new_dir->i_mtime = new_dir->i_atime = ts;
+ if (IS_DIRSYNC(new_dir))
+ (void) sdfat_sync_inode(new_dir);
+ else
+ mark_inode_dirty(new_dir);
+
+ i_pos = sdfat_make_i_pos(&(SDFAT_I(old_inode)->fid));
+ sdfat_detach(old_inode);
+ sdfat_attach(old_inode, i_pos);
+ if (IS_DIRSYNC(new_dir))
+ (void) sdfat_sync_inode(old_inode);
+ else
+ mark_inode_dirty(old_inode);
+
+ if ((S_ISDIR(old_inode->i_mode)) && (old_dir != new_dir)) {
+ drop_nlink(old_dir);
+ if (!new_inode)
+ inc_nlink(new_dir);
+ }
+
+ inode_inc_iversion(old_dir);
+ old_dir->i_ctime = old_dir->i_mtime = ts;
+ if (IS_DIRSYNC(old_dir))
+ (void) sdfat_sync_inode(old_dir);
+ else
+ mark_inode_dirty(old_dir);
+
+ if (new_inode) {
+ sdfat_detach(new_inode);
+
+ /* skip drop_nlink if new_inode already has been dropped */
+ if (new_inode->i_nlink) {
+ drop_nlink(new_inode);
+ if (S_ISDIR(new_inode->i_mode))
+ drop_nlink(new_inode);
+ } else {
+ EMSG("%s : abnormal access to an inode dropped\n",
+ __func__);
+ WARN_ON(new_inode->i_nlink == 0);
+ }
+ new_inode->i_ctime = ts;
+#if 0
+ (void) sdfat_sync_inode(new_inode);
+#endif
+ }
+
+out:
+ __unlock_d_revalidate(old_dentry);
+ __unlock_d_revalidate(new_dentry);
+ __unlock_super(sb);
+ TMSG("%s exited with err(%d)\n", __func__, err);
+ return err;
+}
+
+static int sdfat_cont_expand(struct inode *inode, loff_t size)
+{
+ struct address_space *mapping = inode->i_mapping;
+ loff_t start = i_size_read(inode), count = size - i_size_read(inode);
+ int err, err2;
+
+ err = generic_cont_expand_simple(inode, size);
+ if (err)
+ return err;
+
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
+ mark_inode_dirty(inode);
+
+ if (!IS_SYNC(inode))
+ return 0;
+
+ err = filemap_fdatawrite_range(mapping, start, start + count - 1);
+ err2 = sync_mapping_buffers(mapping);
+ err = (err)?(err):(err2);
+ err2 = write_inode_now(inode, 1);
+ err = (err)?(err):(err2);
+ if (err)
+ return err;
+
+ return filemap_fdatawait_range(mapping, start, start + count - 1);
+}
+
+static int sdfat_allow_set_time(struct sdfat_sb_info *sbi, struct inode *inode)
+{
+ mode_t allow_utime = sbi->options.allow_utime;
+
+ if (!uid_eq(current_fsuid(), inode->i_uid)) {
+ if (in_group_p(inode->i_gid))
+ allow_utime >>= 3;
+ if (allow_utime & MAY_WRITE)
+ return 1;
+ }
+
+ /* use a default check */
+ return 0;
+}
+
+static int sdfat_sanitize_mode(const struct sdfat_sb_info *sbi,
+ struct inode *inode, umode_t *mode_ptr)
+{
+ mode_t i_mode, mask, perm;
+
+ i_mode = inode->i_mode;
+
+ if (S_ISREG(i_mode) || S_ISLNK(i_mode))
+ mask = sbi->options.fs_fmask;
+ else
+ mask = sbi->options.fs_dmask;
+
+ perm = *mode_ptr & ~(S_IFMT | mask);
+
+ /* Of the r and x bits, all (subject to umask) must be present.*/
+ if ((perm & (S_IRUGO | S_IXUGO)) != (i_mode & (S_IRUGO | S_IXUGO)))
+ return -EPERM;
+
+ if (sdfat_mode_can_hold_ro(inode)) {
+ /* Of the w bits, either all (subject to umask) or none must be present. */
+ if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask)))
+ return -EPERM;
+ } else {
+ /* If sdfat_mode_can_hold_ro(inode) is false, can't change w bits. */
+ if ((perm & S_IWUGO) != (S_IWUGO & ~mask))
+ return -EPERM;
+ }
+
+ *mode_ptr &= S_IFMT | perm;
+
+ return 0;
+}
+
+/*
+ * sdfat_block_truncate_page() zeroes out a mapping from file offset `from'
+ * up to the end of the block which corresponds to `from'.
+ * This is required during truncate to physically zeroout the tail end
+ * of that block so it doesn't yield old data if the file is later grown.
+ * Also, avoid causing failure from fsx for cases of "data past EOF"
+ */
+static int sdfat_block_truncate_page(struct inode *inode, loff_t from)
+{
+ return block_truncate_page(inode->i_mapping, from, sdfat_get_block);
+}
+
+static int sdfat_setattr(struct dentry *dentry, struct iattr *attr)
+{
+
+ struct sdfat_sb_info *sbi = SDFAT_SB(dentry->d_sb);
+ struct inode *inode = dentry->d_inode;
+ unsigned int ia_valid;
+ int error;
+ loff_t old_size;
+
+ TMSG("%s entered\n", __func__);
+
+ if ((attr->ia_valid & ATTR_SIZE)
+ && (attr->ia_size > i_size_read(inode))) {
+ error = sdfat_cont_expand(inode, attr->ia_size);
+ if (error || attr->ia_valid == ATTR_SIZE)
+ goto out;
+ attr->ia_valid &= ~ATTR_SIZE;
+ }
+
+ /* Check for setting the inode time. */
+ ia_valid = attr->ia_valid;
+ if ((ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET))
+ && sdfat_allow_set_time(sbi, inode)) {
+ attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET);
+ }
+
+ error = setattr_prepare(dentry, attr);
+ attr->ia_valid = ia_valid;
+ if (error)
+ goto out;
+
+ if (((attr->ia_valid & ATTR_UID) &&
+ (!uid_eq(attr->ia_uid, sbi->options.fs_uid))) ||
+ ((attr->ia_valid & ATTR_GID) &&
+ (!gid_eq(attr->ia_gid, sbi->options.fs_gid))) ||
+ ((attr->ia_valid & ATTR_MODE) &&
+ (attr->ia_mode & ~(S_IFREG | S_IFLNK | S_IFDIR | S_IRWXUGO)))) {
+ error = -EPERM;
+ goto out;
+ }
+
+ /*
+ * We don't return -EPERM here. Yes, strange, but this is too
+ * old behavior.
+ */
+ if (attr->ia_valid & ATTR_MODE) {
+ if (sdfat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0)
+ attr->ia_valid &= ~ATTR_MODE;
+ }
+
+ SDFAT_I(inode)->fid.size = i_size_read(inode);
+
+ /* patch 1.2.0 : fixed the problem of size mismatch. */
+ if (attr->ia_valid & ATTR_SIZE) {
+ error = sdfat_block_truncate_page(inode, attr->ia_size);
+ if (error)
+ goto out;
+
+ old_size = i_size_read(inode);
+
+ /* TO CHECK evicting directory works correctly */
+ MMSG("%s: inode(%p) truncate size (%llu->%llu)\n", __func__,
+ inode, (u64)old_size, (u64)attr->ia_size);
+ __sdfat_do_truncate(inode, old_size, attr->ia_size);
+ }
+ setattr_copy(inode, attr);
+ mark_inode_dirty(inode);
+out:
+ TMSG("%s exited with err(%d)\n", __func__, error);
+ return error;
+}
+
+static const struct inode_operations sdfat_dir_inode_operations = {
+ .create = sdfat_create,
+ .lookup = sdfat_lookup,
+ .unlink = sdfat_unlink,
+ .symlink = sdfat_symlink,
+ .mkdir = sdfat_mkdir,
+ .rmdir = sdfat_rmdir,
+ .rename = sdfat_rename,
+ .setattr = sdfat_setattr,
+ .getattr = sdfat_getattr,
+#ifdef CONFIG_SDFAT_VIRTUAL_XATTR
+ .listxattr = sdfat_listxattr,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+ .setxattr = sdfat_setxattr,
+ .getxattr = sdfat_getxattr,
+ .removexattr = sdfat_removexattr,
+#endif
+#endif
+};
+
+/*======================================================================*/
+/* File Operations */
+/*======================================================================*/
+static const struct inode_operations sdfat_symlink_inode_operations = {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+ .readlink = generic_readlink,
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
+ .get_link = sdfat_follow_link,
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) */
+ .follow_link = sdfat_follow_link,
+#endif
+#ifdef CONFIG_SDFAT_VIRTUAL_XATTR
+ .listxattr = sdfat_listxattr,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+ .setxattr = sdfat_setxattr,
+ .getxattr = sdfat_getxattr,
+ .removexattr = sdfat_removexattr,
+#endif
+#endif
+};
+
+static int sdfat_file_release(struct inode *inode, struct file *filp)
+{
+ struct super_block *sb = inode->i_sb;
+
+ /* Moved below code from sdfat_write_inode
+ * TO FIX size-mismatch problem.
+ */
+ /* FIXME : Added bug_on to confirm that there is no size mismatch */
+ sdfat_debug_bug_on(SDFAT_I(inode)->fid.size != i_size_read(inode));
+ SDFAT_I(inode)->fid.size = i_size_read(inode);
+ fsapi_sync_fs(sb, 0);
+ return 0;
+}
+
+static const struct file_operations sdfat_file_operations = {
+ .llseek = generic_file_llseek,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
+ .read_iter = generic_file_read_iter,
+ .write_iter = generic_file_write_iter,
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = generic_file_read_iter,
+ .write_iter = generic_file_write_iter,
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) */
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
+#endif
+ .mmap = sdfat_file_mmap,
+ .release = sdfat_file_release,
+ .unlocked_ioctl = sdfat_generic_ioctl,
+ .fsync = sdfat_file_fsync,
+ .splice_read = generic_file_splice_read,
+};
+
+static const struct address_space_operations sdfat_da_aops;
+static const struct address_space_operations sdfat_aops;
+
+static void sdfat_truncate(struct inode *inode, loff_t old_size)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ unsigned int blocksize = 1 << inode->i_blkbits;
+ loff_t aligned_size;
+ int err;
+
+ __lock_super(sb);
+
+ if (SDFAT_I(inode)->fid.start_clu == 0) {
+ /* Stange statement:
+ * Empty start_clu != ~0 (not allocated)
+ */
+ sdfat_fs_error(sb, "tried to truncate zeroed cluster.");
+ goto out;
+ }
+
+ sdfat_debug_check_clusters(inode);
+
+ __cancel_dfr_work(inode, (loff_t)i_size_read(inode), (loff_t)old_size, __func__);
+
+ err = fsapi_truncate(inode, old_size, i_size_read(inode));
+ if (err)
+ goto out;
+
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
+ if (IS_DIRSYNC(inode))
+ (void) sdfat_sync_inode(inode);
+ else
+ mark_inode_dirty(inode);
+
+ // FIXME: 확인 요망
+ // inode->i_blocks = ((SDFAT_I(inode)->i_size_ondisk + (fsi->cluster_size - 1))
+ inode->i_blocks = ((i_size_read(inode) + (fsi->cluster_size - 1)) &
+ ~((loff_t)fsi->cluster_size - 1)) >> inode->i_blkbits;
+out:
+ /*
+ * This protects against truncating a file bigger than it was then
+ * trying to write into the hole.
+ *
+ * comment by sh.hong:
+ * This seems to mean 'intra page/block' truncate and writing.
+ * I couldn't find a reason to change the values prior to fsapi_truncate
+ * Therefore, I switched the order of operations
+ * so that it's possible to utilize i_size_ondisk in fsapi_truncate
+ */
+
+ aligned_size = i_size_read(inode);
+ if (aligned_size & (blocksize - 1)) {
+ aligned_size |= (blocksize - 1);
+ aligned_size++;
+ }
+
+ if (SDFAT_I(inode)->i_size_ondisk > i_size_read(inode))
+ SDFAT_I(inode)->i_size_ondisk = aligned_size;
+
+ sdfat_debug_check_clusters(inode);
+
+ if (SDFAT_I(inode)->i_size_aligned > i_size_read(inode))
+ SDFAT_I(inode)->i_size_aligned = aligned_size;
+
+ /* After truncation :
+ * 1) Delayed allocation is OFF
+ * i_size = i_size_ondisk <= i_size_aligned
+ * (useless size var.)
+ * (block-aligned)
+ * 2) Delayed allocation is ON
+ * i_size = i_size_ondisk = i_size_aligned
+ * (will be block-aligned after write)
+ * or
+ * i_size_ondisk < i_size <= i_size_aligned (block_aligned)
+ * (will be block-aligned after write)
+ */
+
+ __unlock_super(sb);
+}
+
+static const struct inode_operations sdfat_file_inode_operations = {
+ .setattr = sdfat_setattr,
+ .getattr = sdfat_getattr,
+#ifdef CONFIG_SDFAT_VIRTUAL_XATTR
+ .listxattr = sdfat_listxattr,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+ .setxattr = sdfat_setxattr,
+ .getxattr = sdfat_getxattr,
+ .removexattr = sdfat_removexattr,
+#endif
+#endif
+};
+
+/*======================================================================*/
+/* Address Space Operations */
+/*======================================================================*/
+/* 2-level option flag */
+#define BMAP_NOT_CREATE 0
+#define BMAP_ADD_BLOCK 1
+#define BMAP_ADD_CLUSTER 2
+#define BLOCK_ADDED(bmap_ops) (bmap_ops)
+static int sdfat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
+ unsigned long *mapped_blocks, int *create)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ const unsigned long blocksize = sb->s_blocksize;
+ const unsigned char blocksize_bits = sb->s_blocksize_bits;
+ sector_t last_block;
+ unsigned int cluster, clu_offset, sec_offset;
+ int err = 0;
+
+ *phys = 0;
+ *mapped_blocks = 0;
+
+ /* core code should handle EIO */
+#if 0
+ if (fsi->prev_eio && BLOCK_ADDED(*create))
+ return -EIO;
+#endif
+
+ if (((fsi->vol_type == FAT12) || (fsi->vol_type == FAT16)) &&
+ (inode->i_ino == SDFAT_ROOT_INO)) {
+ if (sector < (fsi->dentries_in_root >>
+ (sb->s_blocksize_bits - DENTRY_SIZE_BITS))) {
+ *phys = sector + fsi->root_start_sector;
+ *mapped_blocks = 1;
+ }
+ return 0;
+ }
+
+ last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits;
+ if ((sector >= last_block) && (*create == BMAP_NOT_CREATE))
+ return 0;
+
+ /* Is this block already allocated? */
+ clu_offset = sector >> fsi->sect_per_clus_bits; /* cluster offset */
+
+ SDFAT_I(inode)->fid.size = i_size_read(inode);
+
+
+ if (unlikely(__check_dfr_on(inode,
+ (loff_t)((loff_t)clu_offset << fsi->cluster_size_bits),
+ (loff_t)((loff_t)(clu_offset + 1) << fsi->cluster_size_bits),
+ __func__))) {
+ err = __do_dfr_map_cluster(inode, clu_offset, &cluster);
+ } else {
+ if (*create & BMAP_ADD_CLUSTER)
+ err = fsapi_map_clus(inode, clu_offset, &cluster, 1);
+ else
+ err = fsapi_map_clus(inode, clu_offset, &cluster, ALLOC_NOWHERE);
+ }
+
+ if (err) {
+ if (err != -ENOSPC)
+ return -EIO;
+ return err;
+ }
+
+ /* FOR BIGDATA */
+ sdfat_statistics_set_rw(SDFAT_I(inode)->fid.flags,
+ clu_offset, *create & BMAP_ADD_CLUSTER);
+
+ if (!IS_CLUS_EOF(cluster)) {
+ /* sector offset in cluster */
+ sec_offset = sector & (fsi->sect_per_clus - 1);
+
+ *phys = CLUS_TO_SECT(fsi, cluster) + sec_offset;
+ *mapped_blocks = fsi->sect_per_clus - sec_offset;
+ }
+#if 0
+ else {
+ /* Debug purpose (new clu needed) */
+ ASSERT((*create & BMAP_ADD_CLUSTER) == 0);
+ ASSERT(sector >= last_block);
+ }
+#endif
+
+ if (sector < last_block)
+ *create = BMAP_NOT_CREATE;
+#if 0
+ else if (sector >= last_block)
+ *create = non-zero;
+
+ if (iblock <= last mapped-block)
+ *phys != 0
+ *create = BMAP_NOT_CREATE
+ else if (iblock <= last cluster)
+ *phys != 0
+ *create = non-zero
+#endif
+ return 0;
+}
+
+static int sdfat_da_prep_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
+ unsigned long mapped_blocks;
+ sector_t phys;
+ loff_t pos;
+ int sec_offset;
+ int bmap_create = create ? BMAP_ADD_BLOCK : BMAP_NOT_CREATE;
+ int err = 0;
+
+ __lock_super(sb);
+
+ /* FAT32 only */
+ ASSERT(fsi->vol_type == FAT32);
+
+ err = sdfat_bmap(inode, iblock, &phys, &mapped_blocks, &bmap_create);
+ if (err) {
+ if (err != -ENOSPC)
+ sdfat_fs_error_ratelimit(sb, "%s: failed to bmap "
+ "(iblock:%u, err:%d)", __func__,
+ (u32)iblock, err);
+ goto unlock_ret;
+ }
+
+ sec_offset = iblock & (fsi->sect_per_clus - 1);
+
+ if (phys) {
+ /* the block in in the mapped cluster boundary */
+ max_blocks = min(mapped_blocks, max_blocks);
+ map_bh(bh_result, sb, phys);
+
+ BUG_ON(BLOCK_ADDED(bmap_create) && (sec_offset == 0));
+
+ } else if (create == 1) {
+ /* Not exist: new cluster needed */
+ if (!BLOCK_ADDED(bmap_create)) {
+ sector_t last_block;
+ last_block = (i_size_read(inode) + (sb->s_blocksize - 1))
+ >> sb->s_blocksize_bits;
+ sdfat_fs_error(sb, "%s: new cluster need, but "
+ "bmap_create == BMAP_NOT_CREATE(iblock:%lld, "
+ "last_block:%lld)", __func__,
+ (s64)iblock, (s64)last_block);
+ err = -EIO;
+ goto unlock_ret;
+ }
+
+ // Reserved Cluster (only if iblock is the first sector in a clu)
+ if (sec_offset == 0) {
+ err = fsapi_reserve_clus(inode);
+ if (err) {
+ if (err != -ENOSPC)
+ sdfat_fs_error_ratelimit(sb,
+ "%s: failed to bmap "
+ "(iblock:%u, err:%d)", __func__,
+ (u32)iblock, err);
+
+ goto unlock_ret;
+ }
+ }
+
+ // Delayed mapping
+ map_bh(bh_result, sb, ~((sector_t) 0xffff));
+ set_buffer_new(bh_result);
+ set_buffer_delay(bh_result);
+
+ } else {
+ /* get_block on non-existing addr. with create==0 */
+ /*
+ * CHECKME:
+ * i_size_aligned 보다 작으면 delay 매핑을 일단
+ * 켜줘야되는 게 아닌가?
+ * - 0-fill 을 항상 하기에, FAT 에서는 문제 없음.
+ * 중간에 영역이 꽉 찼으면, 디스크에 내려가지 않고는
+ * invalidate 될 일이 없음
+ */
+ goto unlock_ret;
+ }
+
+
+ /* Newly added blocks */
+ if (BLOCK_ADDED(bmap_create)) {
+ set_buffer_new(bh_result);
+
+ SDFAT_I(inode)->i_size_aligned += max_blocks << sb->s_blocksize_bits;
+ if (phys) {
+ /* i_size_ondisk changes if a block added in the existing cluster */
+ #define num_clusters(value) ((value) ? (s32)((value - 1) >> fsi->cluster_size_bits) + 1 : 0)
+
+ /* FOR GRACEFUL ERROR HANDLING */
+ if (num_clusters(SDFAT_I(inode)->i_size_aligned) !=
+ num_clusters(SDFAT_I(inode)->i_size_ondisk)) {
+ EMSG("%s: inode(%p) invalid size (create(%d) "
+ "bmap_create(%d) phys(%lld) aligned(%lld) "
+ "on_disk(%lld) iblock(%u) sec_off(%d))\n",
+ __func__, inode, create, bmap_create, (s64)phys,
+ (s64)SDFAT_I(inode)->i_size_aligned,
+ (s64)SDFAT_I(inode)->i_size_ondisk,
+ (u32)iblock,
+ (s32)sec_offset);
+ sdfat_debug_bug_on(1);
+ }
+ SDFAT_I(inode)->i_size_ondisk = SDFAT_I(inode)->i_size_aligned;
+ }
+
+ pos = (iblock + 1) << sb->s_blocksize_bits;
+ /* Debug purpose - defensive coding */
+ ASSERT(SDFAT_I(inode)->i_size_aligned == pos);
+ if (SDFAT_I(inode)->i_size_aligned < pos)
+ SDFAT_I(inode)->i_size_aligned = pos;
+ /* Debug end */
+
+#ifdef CONFIG_SDFAT_TRACE_IO
+ /* New page added (ASSERTION: 8 blocks per page) */
+ if ((sec_offset & 7) == 0)
+ sbi->stat_n_pages_added++;
+#endif
+ }
+
+ /* FOR GRACEFUL ERROR HANDLING */
+ if (i_size_read(inode) > SDFAT_I(inode)->i_size_aligned) {
+ sdfat_fs_error_ratelimit(sb, "%s: invalid size (inode(%p), "
+ "size(%llu) > aligned(%llu)\n", __func__, inode,
+ i_size_read(inode), SDFAT_I(inode)->i_size_aligned);
+ sdfat_debug_bug_on(1);
+ }
+
+ bh_result->b_size = max_blocks << sb->s_blocksize_bits;
+
+unlock_ret:
+ __unlock_super(sb);
+ return err;
+}
+
+static int sdfat_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ struct super_block *sb = inode->i_sb;
+ unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
+ int err = 0;
+ unsigned long mapped_blocks;
+ sector_t phys;
+ loff_t pos;
+ int bmap_create = create ? BMAP_ADD_CLUSTER : BMAP_NOT_CREATE;
+
+ __lock_super(sb);
+ err = sdfat_bmap(inode, iblock, &phys, &mapped_blocks, &bmap_create);
+ if (err) {
+ if (err != -ENOSPC)
+ sdfat_fs_error_ratelimit(sb, "%s: failed to bmap "
+ "(inode:%p iblock:%u, err:%d)",
+ __func__, inode, (u32)iblock, err);
+ goto unlock_ret;
+ }
+
+ if (phys) {
+ max_blocks = min(mapped_blocks, max_blocks);
+
+ /* Treat newly added block / cluster */
+ if (BLOCK_ADDED(bmap_create) || buffer_delay(bh_result)) {
+
+ /* Update i_size_ondisk */
+ pos = (iblock + 1) << sb->s_blocksize_bits;
+ if (SDFAT_I(inode)->i_size_ondisk < pos) {
+ /* Debug purpose */
+ if ((pos - SDFAT_I(inode)->i_size_ondisk) > bh_result->b_size) {
+ /* This never happens without DA */
+ MMSG("Jumping get_block\n");
+ }
+
+ SDFAT_I(inode)->i_size_ondisk = pos;
+ sdfat_debug_check_clusters(inode);
+ }
+
+ if (BLOCK_ADDED(bmap_create)) {
+ /* Old way (w/o DA)
+ * create == 1 only if iblock > i_size
+ * (in block unit)
+ */
+
+ /* 20130723 CHECK
+ * Truncate와 동시에 발생할 경우,
+ * i_size < (i_block 위치) 면서 buffer_delay()가
+ * 켜져있을 수 있다.
+ *
+ * 기존에 할당된 영역을 다시 쓸 뿐이므로 큰 문제
+ * 없지만, 그 경우, 미리 i_size_aligned 가 확장된
+ * 영역이어야 한다.
+ */
+
+ /* FOR GRACEFUL ERROR HANDLING */
+ if (buffer_delay(bh_result) &&
+ (pos > SDFAT_I(inode)->i_size_aligned)) {
+ sdfat_fs_error(sb, "requested for bmap "
+ "out of range(pos:(%llu)>i_size_aligned(%llu)\n",
+ pos, SDFAT_I(inode)->i_size_aligned);
+ sdfat_debug_bug_on(1);
+ err = -EIO;
+ goto unlock_ret;
+ }
+ set_buffer_new(bh_result);
+
+ /*
+ * adjust i_size_aligned if i_size_ondisk is
+ * bigger than it. (i.e. non-DA)
+ */
+ if (SDFAT_I(inode)->i_size_ondisk >
+ SDFAT_I(inode)->i_size_aligned) {
+ SDFAT_I(inode)->i_size_aligned =
+ SDFAT_I(inode)->i_size_ondisk;
+ }
+ }
+
+ if (buffer_delay(bh_result))
+ clear_buffer_delay(bh_result);
+
+#if 0
+ /* Debug purpose */
+ if (SDFAT_I(inode)->i_size_ondisk >
+ SDFAT_I(inode)->i_size_aligned) {
+ /* Only after truncate
+ * and the two size variables should indicate
+ * same i_block
+ */
+ unsigned int blocksize = 1 << inode->i_blkbits;
+ BUG_ON(SDFAT_I(inode)->i_size_ondisk -
+ SDFAT_I(inode)->i_size_aligned >= blocksize);
+ }
+#endif
+ }
+ map_bh(bh_result, sb, phys);
+ }
+
+ bh_result->b_size = max_blocks << sb->s_blocksize_bits;
+unlock_ret:
+ __unlock_super(sb);
+ return err;
+}
+
+static int sdfat_readpage(struct file *file, struct page *page)
+{
+ int ret;
+
+ ret = mpage_readpage(page, sdfat_get_block);
+ return ret;
+}
+
+static int sdfat_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *pages, unsigned int nr_pages)
+{
+ int ret;
+
+ ret = mpage_readpages(mapping, pages, nr_pages, sdfat_get_block);
+ return ret;
+}
+
+static inline void sdfat_submit_fullpage_bio(struct block_device *bdev,
+ sector_t sector, unsigned int length, struct page *page)
+{
+ /* Single page bio submit */
+ struct bio *bio;
+
+ BUG_ON((length > PAGE_SIZE) || (length == 0));
+
+ /*
+ * If __GFP_WAIT is set, then bio_alloc will always be able to allocate
+ * a bio. This is due to the mempool guarantees. To make this work, callers
+ * must never allocate more than 1 bio at a time from this pool.
+ *
+ * #define GFP_NOIO (__GFP_WAIT)
+ */
+ bio = bio_alloc(GFP_NOIO, 1);
+
+ bio_set_dev(bio, bdev);
+ bio->bi_vcnt = 1;
+ bio->bi_io_vec[0].bv_page = page; /* Inline vec */
+ bio->bi_io_vec[0].bv_len = length; /* PAGE_SIZE */
+ bio->bi_io_vec[0].bv_offset = 0;
+ __sdfat_set_bio_iterate(bio, sector, length, 0, 0);
+
+ bio->bi_end_io = sdfat_writepage_end_io;
+ __sdfat_submit_bio_write(bio);
+}
+
+static int sdfat_writepage(struct page *page, struct writeback_control *wbc)
+{
+ struct inode * const inode = page->mapping->host;
+ struct super_block *sb = inode->i_sb;
+ loff_t i_size = i_size_read(inode);
+ const pgoff_t end_index = i_size >> PAGE_SHIFT;
+ const unsigned int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ struct buffer_head *bh, *head;
+ sector_t block, block_0, last_phys;
+ int ret;
+ unsigned int nr_blocks_towrite = blocks_per_page;
+
+ /* Don't distinguish 0-filled/clean block.
+ * Just write back the whole page
+ */
+ if (fsi->cluster_size < PAGE_SIZE)
+ goto confused;
+
+ if (!PageUptodate(page)) {
+ MMSG("%s: Not up-to-date page -> block_write_full_page\n",
+ __func__);
+ goto confused;
+ }
+
+ if (page->index >= end_index) {
+ /* last page or outside i_size */
+ unsigned int offset = i_size & (PAGE_SIZE-1);
+
+ /* If a truncation is in progress */
+ if (page->index > end_index || !offset)
+ goto confused;
+
+ /* 0-fill after i_size */
+ zero_user_segment(page, offset, PAGE_SIZE);
+ }
+
+ if (!page_has_buffers(page)) {
+ MMSG("WP: No buffers -> block_write_full_page\n");
+ goto confused;
+ }
+
+ block = (sector_t)page->index << (PAGE_SHIFT - inode->i_blkbits);
+ block_0 = block; /* first block */
+ head = page_buffers(page);
+ bh = head;
+
+ last_phys = 0;
+ do {
+ BUG_ON(buffer_locked(bh));
+
+ if (!buffer_dirty(bh) || !buffer_uptodate(bh)) {
+ if (nr_blocks_towrite == blocks_per_page)
+ nr_blocks_towrite = (unsigned int) (block - block_0);
+
+ BUG_ON(nr_blocks_towrite >= blocks_per_page);
+
+ // !uptodate but dirty??
+ if (buffer_dirty(bh))
+ goto confused;
+
+ // Nothing to writeback in this block
+ bh = bh->b_this_page;
+ block++;
+ continue;
+ }
+
+ if (nr_blocks_towrite != blocks_per_page)
+ // Dirty -> Non-dirty -> Dirty again case
+ goto confused;
+
+ /* Map if needed */
+ if (!buffer_mapped(bh) || buffer_delay(bh)) {
+ BUG_ON(bh->b_size != (1 << (inode->i_blkbits)));
+ ret = sdfat_get_block(inode, block, bh, 1);
+ if (ret)
+ goto confused;
+
+ if (buffer_new(bh)) {
+ clear_buffer_new(bh);
+ __sdfat_clean_bdev_aliases(bh->b_bdev, bh->b_blocknr);
+ }
+ }
+
+ /* continuity check */
+ if (((last_phys + 1) != bh->b_blocknr) && (last_phys != 0)) {
+ DMSG("Non-contiguous block mapping in single page");
+ goto confused;
+ }
+
+ last_phys = bh->b_blocknr;
+ bh = bh->b_this_page;
+ block++;
+ } while (bh != head);
+
+ if (nr_blocks_towrite == 0) {
+ DMSG("Page dirty but no dirty bh? alloc_208\n");
+ goto confused;
+ }
+
+
+ /* Write-back */
+ do {
+ clear_buffer_dirty(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
+
+ BUG_ON(PageWriteback(page));
+ set_page_writeback(page);
+
+ /**
+ * Turn off MAPPED flag in victim's bh if defrag on.
+ * Another write_begin can starts after get_block for defrag victims called.
+ * In this case, write_begin calls get_block and get original block number
+ * and previous defrag will be canceled.
+ */
+ if (unlikely(__check_dfr_on(inode,
+ (loff_t)(page->index << PAGE_SHIFT),
+ (loff_t)((page->index + 1) << PAGE_SHIFT),
+ __func__))) {
+ do {
+ clear_buffer_mapped(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
+ }
+
+ // Trace # of pages queued (Approx.)
+ atomic_inc(&SDFAT_SB(sb)->stat_n_pages_queued);
+
+ sdfat_submit_fullpage_bio(head->b_bdev,
+ head->b_blocknr << (sb->s_blocksize_bits - SECTOR_SIZE_BITS),
+ nr_blocks_towrite << inode->i_blkbits,
+ page);
+
+ unlock_page(page);
+
+ return 0;
+
+confused:
+#ifdef CONFIG_SDFAT_TRACE_IO
+ SDFAT_SB(sb)->stat_n_pages_confused++;
+#endif
+ ret = block_write_full_page(page, sdfat_get_block, wbc);
+ return ret;
+}
+
+static int sdfat_da_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ MMSG("%s(inode:%p) with nr_to_write = 0x%08lx "
+ "(ku %d, bg %d, tag %d, rc %d )\n",
+ __func__, mapping->host, wbc->nr_to_write,
+ wbc->for_kupdate, wbc->for_background, wbc->tagged_writepages,
+ wbc->for_reclaim);
+
+ ASSERT(mapping->a_ops == &sdfat_da_aops);
+
+#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
+ if (SDFAT_SB(mapping->host->i_sb)->options.adj_req)
+ return sdfat_mpage_writepages(mapping, wbc, sdfat_get_block);
+#endif
+ return generic_writepages(mapping, wbc);
+}
+
+static int sdfat_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ MMSG("%s(inode:%p) with nr_to_write = 0x%08lx "
+ "(ku %d, bg %d, tag %d, rc %d )\n",
+ __func__, mapping->host, wbc->nr_to_write,
+ wbc->for_kupdate, wbc->for_background, wbc->tagged_writepages,
+ wbc->for_reclaim);
+
+ ASSERT(mapping->a_ops == &sdfat_aops);
+
+#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
+ if (SDFAT_SB(mapping->host->i_sb)->options.adj_req)
+ return sdfat_mpage_writepages(mapping, wbc, sdfat_get_block);
+#endif
+ return mpage_writepages(mapping, wbc, sdfat_get_block);
+}
+
+static void sdfat_write_failed(struct address_space *mapping, loff_t to)
+{
+ struct inode *inode = mapping->host;
+
+ if (to > i_size_read(inode)) {
+ __sdfat_truncate_pagecache(inode, to, i_size_read(inode));
+ sdfat_truncate(inode, SDFAT_I(inode)->i_size_aligned);
+ }
+}
+
+static int sdfat_check_writable(struct super_block *sb)
+{
+ if (fsapi_check_bdi_valid(sb))
+ return -EIO;
+
+ if (sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ return 0;
+}
+
+static int __sdfat_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned int len,
+ unsigned int flags, struct page **pagep,
+ void **fsdata, get_block_t *get_block,
+ loff_t *bytes, const char *fname)
+{
+ struct super_block *sb = mapping->host->i_sb;
+ int ret;
+
+ __cancel_dfr_work(mapping->host, pos, (loff_t)(pos + len), fname);
+
+ ret = sdfat_check_writable(sb);
+ if (unlikely(ret < 0))
+ return ret;
+
+ *pagep = NULL;
+ ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+ get_block, bytes);
+
+ if (ret < 0)
+ sdfat_write_failed(mapping, pos+len);
+
+ return ret;
+}
+
+
+static int sdfat_da_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned int len, unsigned int flags,
+ struct page **pagep, void **fsdata)
+{
+ return __sdfat_write_begin(file, mapping, pos, len, flags,
+ pagep, fsdata, sdfat_da_prep_block,
+ &SDFAT_I(mapping->host)->i_size_aligned,
+ __func__);
+}
+
+
+static int sdfat_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned int len, unsigned int flags,
+ struct page **pagep, void **fsdata)
+{
+ return __sdfat_write_begin(file, mapping, pos, len, flags,
+ pagep, fsdata, sdfat_get_block,
+ &SDFAT_I(mapping->host)->i_size_ondisk,
+ __func__);
+}
+
+static int sdfat_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned int len, unsigned int copied,
+ struct page *pagep, void *fsdata)
+{
+ struct inode *inode = mapping->host;
+ FILE_ID_T *fid = &(SDFAT_I(inode)->fid);
+ int err;
+
+ err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+
+ /* FOR GRACEFUL ERROR HANDLING */
+ if (SDFAT_I(inode)->i_size_aligned < i_size_read(inode)) {
+ sdfat_fs_error(inode->i_sb, "invalid size(size(%llu) "
+ "> aligned(%llu)\n", i_size_read(inode),
+ SDFAT_I(inode)->i_size_aligned);
+ sdfat_debug_bug_on(1);
+ }
+
+ if (err < len)
+ sdfat_write_failed(mapping, pos+len);
+
+ if (!(err < 0) && !(fid->attr & ATTR_ARCHIVE)) {
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+ fid->attr |= ATTR_ARCHIVE;
+ mark_inode_dirty(inode);
+ }
+
+ return err;
+}
+
+static inline ssize_t __sdfat_direct_IO(int rw, struct kiocb *iocb,
+ struct inode *inode, void *iov_u, loff_t offset,
+ loff_t count, unsigned long nr_segs)
+{
+ struct address_space *mapping = inode->i_mapping;
+ loff_t size = offset + count;
+ ssize_t ret;
+
+ if (rw == WRITE) {
+ /*
+ * FIXME: blockdev_direct_IO() doesn't use ->write_begin(),
+ * so we need to update the ->i_size_aligned to block boundary.
+ *
+ * But we must fill the remaining area or hole by nul for
+ * updating ->i_size_aligned
+ *
+ * Return 0, and fallback to normal buffered write.
+ */
+ if (SDFAT_I(inode)->i_size_aligned < size)
+ return 0;
+ }
+
+ /*
+ * sdFAT need to use the DIO_LOCKING for avoiding the race
+ * condition of sdfat_get_block() and ->truncate().
+ */
+ ret = __sdfat_blkdev_direct_IO(rw, iocb, inode, iov_u, offset, nr_segs);
+ if (ret < 0 && (rw & WRITE))
+ sdfat_write_failed(mapping, size);
+
+ return ret;
+}
+
+static const struct address_space_operations sdfat_aops = {
+ .readpage = sdfat_readpage,
+ .readpages = sdfat_readpages,
+ .writepage = sdfat_writepage,
+ .writepages = sdfat_writepages,
+ .write_begin = sdfat_write_begin,
+ .write_end = sdfat_write_end,
+ .direct_IO = sdfat_direct_IO,
+ .bmap = sdfat_aop_bmap
+};
+
+static const struct address_space_operations sdfat_da_aops = {
+ .readpage = sdfat_readpage,
+ .readpages = sdfat_readpages,
+ .writepage = sdfat_writepage,
+ .writepages = sdfat_da_writepages,
+ .write_begin = sdfat_da_write_begin,
+ .write_end = sdfat_write_end,
+ .direct_IO = sdfat_direct_IO,
+ .bmap = sdfat_aop_bmap
+};
+
+/*======================================================================*/
+/* Super Operations */
+/*======================================================================*/
+
+static inline unsigned long sdfat_hash(loff_t i_pos)
+{
+ return hash_32(i_pos, SDFAT_HASH_BITS);
+}
+
+static void sdfat_attach(struct inode *inode, loff_t i_pos)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb);
+ struct hlist_head *head = sbi->inode_hashtable + sdfat_hash(i_pos);
+
+ spin_lock(&sbi->inode_hash_lock);
+ SDFAT_I(inode)->i_pos = i_pos;
+ hlist_add_head(&SDFAT_I(inode)->i_hash_fat, head);
+ spin_unlock(&sbi->inode_hash_lock);
+}
+
+static void sdfat_detach(struct inode *inode)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb);
+
+ spin_lock(&sbi->inode_hash_lock);
+ hlist_del_init(&SDFAT_I(inode)->i_hash_fat);
+ SDFAT_I(inode)->i_pos = 0;
+ spin_unlock(&sbi->inode_hash_lock);
+}
+
+
+/* doesn't deal with root inode */
+static int sdfat_fill_inode(struct inode *inode, const FILE_ID_T *fid)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+ DIR_ENTRY_T info;
+ u64 size = fid->size;
+
+ memcpy(&(SDFAT_I(inode)->fid), fid, sizeof(FILE_ID_T));
+
+ SDFAT_I(inode)->i_pos = 0;
+ SDFAT_I(inode)->target = NULL;
+ inode->i_uid = sbi->options.fs_uid;
+ inode->i_gid = sbi->options.fs_gid;
+ inode_inc_iversion(inode);
+ inode->i_generation = get_seconds();
+
+ if (fsapi_read_inode(inode, &info) < 0) {
+ MMSG("%s: failed to read stat!\n", __func__);
+ return -EIO;
+ }
+
+ if (info.Attr & ATTR_SUBDIR) { /* directory */
+ inode->i_generation &= ~1;
+ inode->i_mode = sdfat_make_mode(sbi, info.Attr, S_IRWXUGO);
+ inode->i_op = &sdfat_dir_inode_operations;
+ inode->i_fop = &sdfat_dir_operations;
+
+ set_nlink(inode, info.NumSubdirs);
+ } else if (info.Attr & ATTR_SYMLINK) { /* symbolic link */
+ inode->i_op = &sdfat_symlink_inode_operations;
+ inode->i_generation |= 1;
+ inode->i_mode = sdfat_make_mode(sbi, info.Attr, S_IRWXUGO);
+ } else { /* regular file */
+ inode->i_generation |= 1;
+ inode->i_mode = sdfat_make_mode(sbi, info.Attr, S_IRWXUGO);
+ inode->i_op = &sdfat_file_inode_operations;
+ inode->i_fop = &sdfat_file_operations;
+
+ if (sbi->options.improved_allocation & SDFAT_ALLOC_DELAY)
+ inode->i_mapping->a_ops = &sdfat_da_aops;
+ else
+ inode->i_mapping->a_ops = &sdfat_aops;
+
+ inode->i_mapping->nrpages = 0;
+
+ }
+
+ /*
+ * Use fid->size instead of info.Size
+ * because info.Size means the value saved on disk
+ */
+ i_size_write(inode, size);
+
+ /* ondisk and aligned size should be aligned with block size */
+ if (size & (inode->i_sb->s_blocksize - 1)) {
+ size |= (inode->i_sb->s_blocksize - 1);
+ size++;
+ }
+
+ SDFAT_I(inode)->i_size_aligned = size;
+ SDFAT_I(inode)->i_size_ondisk = size;
+ sdfat_debug_check_clusters(inode);
+
+ sdfat_save_attr(inode, info.Attr);
+
+ inode->i_blocks = ((i_size_read(inode) + (fsi->cluster_size - 1))
+ & ~((loff_t)fsi->cluster_size - 1)) >> inode->i_blkbits;
+
+ sdfat_time_fat2unix(sbi, &inode->i_mtime, &info.ModifyTimestamp);
+ sdfat_time_fat2unix(sbi, &inode->i_ctime, &info.CreateTimestamp);
+ sdfat_time_fat2unix(sbi, &inode->i_atime, &info.AccessTimestamp);
+
+ __init_dfr_info(inode);
+
+ return 0;
+}
+
+static struct inode *sdfat_build_inode(struct super_block *sb,
+ const FILE_ID_T *fid, loff_t i_pos) {
+ struct inode *inode;
+ int err;
+
+ inode = sdfat_iget(sb, i_pos);
+ if (inode)
+ goto out;
+ inode = new_inode(sb);
+ if (!inode) {
+ inode = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ inode->i_ino = iunique(sb, SDFAT_ROOT_INO);
+ inode_set_iversion(inode, 1);
+ err = sdfat_fill_inode(inode, fid);
+ if (err) {
+ iput(inode);
+ inode = ERR_PTR(err);
+ goto out;
+ }
+ sdfat_attach(inode, i_pos);
+ insert_inode_hash(inode);
+out:
+ return inode;
+}
+
+static struct inode *sdfat_alloc_inode(struct super_block *sb)
+{
+ struct sdfat_inode_info *ei;
+
+ ei = kmem_cache_alloc(sdfat_inode_cachep, GFP_NOFS);
+ if (!ei)
+ return NULL;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+ init_rwsem(&ei->truncate_lock);
+#endif
+ return &ei->vfs_inode;
+}
+
+static void sdfat_destroy_inode(struct inode *inode)
+{
+ if (SDFAT_I(inode)->target) {
+ kfree(SDFAT_I(inode)->target);
+ SDFAT_I(inode)->target = NULL;
+ }
+
+ kmem_cache_free(sdfat_inode_cachep, SDFAT_I(inode));
+}
+
+static int __sdfat_write_inode(struct inode *inode, int sync)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ DIR_ENTRY_T info;
+
+ if (inode->i_ino == SDFAT_ROOT_INO)
+ return 0;
+
+ info.Attr = sdfat_make_attr(inode);
+ info.Size = i_size_read(inode);
+
+ sdfat_time_unix2fat(sbi, &inode->i_mtime, &info.ModifyTimestamp);
+ sdfat_time_unix2fat(sbi, &inode->i_ctime, &info.CreateTimestamp);
+ sdfat_time_unix2fat(sbi, &inode->i_atime, &info.AccessTimestamp);
+
+ if (!__support_write_inode_sync(sb))
+ sync = 0;
+
+ /* FIXME : Do we need handling error? */
+ return fsapi_write_inode(inode, &info, sync);
+}
+
+static int sdfat_sync_inode(struct inode *inode)
+{
+ return __sdfat_write_inode(inode, 1);
+}
+
+static int sdfat_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ return __sdfat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
+static void sdfat_evict_inode(struct inode *inode)
+{
+ truncate_inode_pages(&inode->i_data, 0);
+
+ if (!inode->i_nlink) {
+ loff_t old_size = i_size_read(inode);
+
+ i_size_write(inode, 0);
+
+ SDFAT_I(inode)->fid.size = old_size;
+
+ __cancel_dfr_work(inode, 0, (loff_t)old_size, __func__);
+
+ /* TO CHECK evicting directory works correctly */
+ MMSG("%s: inode(%p) evict %s (size(%llu) to zero)\n",
+ __func__, inode,
+ S_ISDIR(inode->i_mode) ? "directory" : "file",
+ (u64)old_size);
+ fsapi_truncate(inode, old_size, 0);
+ }
+
+ invalidate_inode_buffers(inode);
+ clear_inode(inode);
+ fsapi_invalidate_extent(inode);
+ sdfat_detach(inode);
+
+ /* after end of this function, caller will remove inode hash */
+ /* remove_inode_hash(inode); */
+}
+
+
+
+static void sdfat_put_super(struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ int err;
+
+ sdfat_log_msg(sb, KERN_INFO, "trying to unmount...");
+
+ __cancel_delayed_work_sync(sbi);
+
+ if (__is_sb_dirty(sb))
+ sdfat_write_super(sb);
+
+ __free_dfr_mem_if_required(sb);
+ err = fsapi_umount(sb);
+
+ if (sbi->nls_disk) {
+ unload_nls(sbi->nls_disk);
+ sbi->nls_disk = NULL;
+ sbi->options.codepage = sdfat_default_codepage;
+ }
+ if (sbi->nls_io) {
+ unload_nls(sbi->nls_io);
+ sbi->nls_io = NULL;
+ }
+ if (sbi->options.iocharset != sdfat_default_iocharset) {
+ kfree(sbi->options.iocharset);
+ sbi->options.iocharset = sdfat_default_iocharset;
+ }
+
+ sb->s_fs_info = NULL;
+
+ kobject_del(&sbi->sb_kobj);
+ kobject_put(&sbi->sb_kobj);
+ if (!sbi->use_vmalloc)
+ kfree(sbi);
+ else
+ vfree(sbi);
+
+ sdfat_log_msg(sb, KERN_INFO, "unmounted successfully! %s",
+ err ? "(with previous I/O errors)" : "");
+}
+
+static inline void __flush_delayed_meta(struct super_block *sb, s32 sync)
+{
+#ifdef CONFIG_SDFAT_DELAYED_META_DIRTY
+ fsapi_cache_flush(sb, sync);
+#else
+ /* DO NOTHING */
+#endif
+}
+
+static void sdfat_write_super(struct super_block *sb)
+{
+ int time = 0;
+
+ __lock_super(sb);
+
+ __set_sb_clean(sb);
+
+#ifdef CONFIG_SDFAT_DFR
+ if (atomic_read(&(SDFAT_SB(sb)->dfr_info.stat)) == DFR_SB_STAT_VALID)
+ fsapi_dfr_update_fat_next(sb);
+#endif
+
+ /* flush delayed FAT/DIR dirty */
+ __flush_delayed_meta(sb, 0);
+
+ if (!(sb->s_flags & MS_RDONLY))
+ fsapi_sync_fs(sb, 0);
+
+ __unlock_super(sb);
+
+ time = jiffies;
+
+ /* Issuing bdev requests is needed
+ * to guarantee DIR updates in time
+ * whether w/ or w/o delayed DIR dirty feature.
+ * (otherwise DIR updates could be delayed for 5 + 5 secs at max.)
+ */
+ sync_blockdev(sb->s_bdev);
+
+#if (defined(CONFIG_SDFAT_DFR) && defined(CONFIG_SDFAT_DFR_DEBUG))
+ /* SPO test */
+ fsapi_dfr_spo_test(sb, DFR_SPO_FAT_NEXT, __func__);
+#endif
+ MMSG("BD: sdfat_write_super (bdev_sync for %ld ms)\n",
+ (jiffies - time) * 1000 / HZ);
+}
+
+
+static void __dfr_update_fat_next(struct super_block *sb)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+
+ if (sbi->options.defrag &&
+ (atomic_read(&sbi->dfr_info.stat) == DFR_SB_STAT_VALID)) {
+ fsapi_dfr_update_fat_next(sb);
+ }
+#endif
+}
+
+static void __dfr_update_fat_prev(struct super_block *sb, int wait)
+{
+#ifdef CONFIG_SDFAT_DFR
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct defrag_info *sb_dfr = &sbi->dfr_info;
+ /* static time available? */
+ static int time; /* initialized by zero */
+ int uevent = 0, total = 0, clean = 0, full = 0;
+ int spent = jiffies - time;
+
+ if (!(sbi->options.defrag && wait))
+ return;
+
+ __lock_super(sb);
+ /* Update FAT for defrag */
+ if (atomic_read(&(sbi->dfr_info.stat)) == DFR_SB_STAT_VALID) {
+
+ fsapi_dfr_update_fat_prev(sb, 0);
+
+ /* flush delayed FAT/DIR dirty */
+ __flush_delayed_meta(sb, 0);
+
+ /* Complete defrag req */
+ fsapi_sync_fs(sb, 1);
+ atomic_set(&sb_dfr->stat, DFR_SB_STAT_REQ);
+ complete_all(&sbi->dfr_complete);
+ } else if (((spent < 0) || (spent > DFR_DEFAULT_TIMEOUT)) &&
+ (atomic_read(&(sbi->dfr_info.stat)) == DFR_SB_STAT_IDLE)) {
+ uevent = fsapi_dfr_check_dfr_required(sb, &total, &clean, &full);
+ time = jiffies;
+ }
+ __unlock_super(sb);
+
+ if (uevent) {
+ kobject_uevent(&SDFAT_SB(sb)->sb_kobj, KOBJ_CHANGE);
+ dfr_debug("uevent for defrag_daemon, total_au %d, "
+ "clean_au %d, full_au %d", total, clean, full);
+ }
+#endif
+}
+
+static int sdfat_sync_fs(struct super_block *sb, int wait)
+{
+ int err = 0;
+
+ /* If there are some dirty buffers in the bdev inode */
+ if (__is_sb_dirty(sb)) {
+ __lock_super(sb);
+ __set_sb_clean(sb);
+
+ __dfr_update_fat_next(sb);
+
+ err = fsapi_sync_fs(sb, 1);
+
+#if (defined(CONFIG_SDFAT_DFR) && defined(CONFIG_SDFAT_DFR_DEBUG))
+ /* SPO test */
+ fsapi_dfr_spo_test(sb, DFR_SPO_FAT_NEXT, __func__);
+#endif
+
+ __unlock_super(sb);
+ }
+
+ __dfr_update_fat_prev(sb, wait);
+
+ return err;
+}
+
+static int sdfat_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ /*
+ * patch 1.2.2 :
+ * fixed the slow-call problem because of volume-lock contention.
+ */
+ struct super_block *sb = dentry->d_sb;
+ u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+ VOL_INFO_T info;
+
+ /* fsapi_statfs will try to get a volume lock if needed */
+ if (fsapi_statfs(sb, &info))
+ return -EIO;
+
+ if (fsi->prev_eio)
+ sdfat_msg(sb, KERN_INFO, "called statfs with previous"
+ " I/O error(0x%02X).", fsi->prev_eio);
+
+ buf->f_type = sb->s_magic;
+ buf->f_bsize = info.ClusterSize;
+ buf->f_blocks = info.NumClusters;
+ buf->f_bfree = info.FreeClusters;
+ buf->f_bavail = info.FreeClusters;
+ buf->f_fsid.val[0] = (u32)id;
+ buf->f_fsid.val[1] = (u32)(id >> 32);
+ buf->f_namelen = 260;
+
+ return 0;
+}
+
+static int sdfat_remount(struct super_block *sb, int *flags, char *data)
+{
+ unsigned long prev_sb_flags;
+ char *orig_data = kstrdup(data, GFP_KERNEL);
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ FS_INFO_T *fsi = &(sbi->fsi);
+
+ *flags |= MS_NODIRATIME;
+
+ prev_sb_flags = sb->s_flags;
+
+ sdfat_remount_syncfs(sb);
+
+ fsapi_set_vol_flags(sb, VOL_CLEAN, 1);
+
+ sdfat_log_msg(sb, KERN_INFO, "re-mounted(%s->%s), eio=0x%x, Opts: %s",
+ (prev_sb_flags & MS_RDONLY) ? "ro" : "rw",
+ (*flags & MS_RDONLY) ? "ro" : "rw",
+ fsi->prev_eio, orig_data);
+ kfree(orig_data);
+ return 0;
+}
+
+static int __sdfat_show_options(struct seq_file *m, struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct sdfat_mount_options *opts = &sbi->options;
+ FS_INFO_T *fsi = &(sbi->fsi);
+
+ /* Show partition info */
+ seq_printf(m, ",fs=%s", sdfat_get_vol_type_str(fsi->vol_type));
+ if (fsi->prev_eio)
+ seq_printf(m, ",eio=0x%x", fsi->prev_eio);
+ if (!uid_eq(opts->fs_uid, GLOBAL_ROOT_UID))
+ seq_printf(m, ",uid=%u",
+ from_kuid_munged(&init_user_ns, opts->fs_uid));
+ if (!gid_eq(opts->fs_gid, GLOBAL_ROOT_GID))
+ seq_printf(m, ",gid=%u",
+ from_kgid_munged(&init_user_ns, opts->fs_gid));
+ seq_printf(m, ",fmask=%04o", opts->fs_fmask);
+ seq_printf(m, ",dmask=%04o", opts->fs_dmask);
+ if (opts->allow_utime)
+ seq_printf(m, ",allow_utime=%04o", opts->allow_utime);
+ if (sbi->nls_disk)
+ seq_printf(m, ",codepage=%s", sbi->nls_disk->charset);
+ if (sbi->nls_io)
+ seq_printf(m, ",iocharset=%s", sbi->nls_io->charset);
+ if (opts->utf8)
+ seq_puts(m, ",utf8");
+ if (sbi->fsi.vol_type != EXFAT)
+ seq_puts(m, ",shortname=winnt");
+ seq_printf(m, ",namecase=%u", opts->casesensitive);
+ if (opts->tz_utc)
+ seq_puts(m, ",tz=UTC");
+ if (opts->improved_allocation & SDFAT_ALLOC_DELAY)
+ seq_puts(m, ",delay");
+ if (opts->improved_allocation & SDFAT_ALLOC_SMART)
+ seq_printf(m, ",smart,ausize=%u", opts->amap_opt.sect_per_au);
+ if (opts->defrag)
+ seq_puts(m, ",defrag");
+ if (opts->adj_hidsect)
+ seq_puts(m, ",adj_hid");
+ if (opts->adj_req)
+ seq_puts(m, ",adj_req");
+ seq_printf(m, ",symlink=%u", opts->symlink);
+ seq_printf(m, ",bps=%ld", sb->s_blocksize);
+ if (opts->errors == SDFAT_ERRORS_CONT)
+ seq_puts(m, ",errors=continue");
+ else if (opts->errors == SDFAT_ERRORS_PANIC)
+ seq_puts(m, ",errors=panic");
+ else
+ seq_puts(m, ",errors=remount-ro");
+ if (opts->discard)
+ seq_puts(m, ",discard");
+
+ return 0;
+}
+
+static const struct super_operations sdfat_sops = {
+ .alloc_inode = sdfat_alloc_inode,
+ .destroy_inode = sdfat_destroy_inode,
+ .write_inode = sdfat_write_inode,
+ .evict_inode = sdfat_evict_inode,
+ .put_super = sdfat_put_super,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+ .write_super = sdfat_write_super,
+#endif
+ .sync_fs = sdfat_sync_fs,
+ .statfs = sdfat_statfs,
+ .remount_fs = sdfat_remount,
+ .show_options = sdfat_show_options,
+};
+
+/*======================================================================*/
+/* SYSFS Operations */
+/*======================================================================*/
+#define SDFAT_ATTR(name, mode, show, store) \
+static struct sdfat_attr sdfat_attr_##name = __ATTR(name, mode, show, store)
+
+struct sdfat_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct sdfat_sb_info *, char *);
+ ssize_t (*store)(struct sdfat_sb_info *, const char *, size_t);
+};
+
+static ssize_t sdfat_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct sdfat_sb_info *sbi = container_of(kobj, struct sdfat_sb_info, sb_kobj);
+ struct sdfat_attr *a = container_of(attr, struct sdfat_attr, attr);
+
+ return a->show ? a->show(sbi, buf) : 0;
+}
+
+static ssize_t sdfat_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct sdfat_sb_info *sbi = container_of(kobj, struct sdfat_sb_info, sb_kobj);
+ struct sdfat_attr *a = container_of(attr, struct sdfat_attr, attr);
+
+ return a->store ? a->store(sbi, buf, len) : len;
+}
+
+static const struct sysfs_ops sdfat_attr_ops = {
+ .show = sdfat_attr_show,
+ .store = sdfat_attr_store,
+};
+
+
+static ssize_t type_show(struct sdfat_sb_info *sbi, char *buf)
+{
+ FS_INFO_T *fsi = &(sbi->fsi);
+
+ return snprintf(buf, PAGE_SIZE, "%s\n", sdfat_get_vol_type_str(fsi->vol_type));
+}
+SDFAT_ATTR(type, 0444, type_show, NULL);
+
+static ssize_t eio_show(struct sdfat_sb_info *sbi, char *buf)
+{
+ FS_INFO_T *fsi = &(sbi->fsi);
+
+ return snprintf(buf, PAGE_SIZE, "0x%x\n", fsi->prev_eio);
+}
+SDFAT_ATTR(eio, 0444, eio_show, NULL);
+
+static ssize_t fratio_show(struct sdfat_sb_info *sbi, char *buf)
+{
+ unsigned int n_total_au = 0;
+ unsigned int n_clean_au = 0;
+ unsigned int n_full_au = 0;
+ unsigned int n_dirty_au = 0;
+ unsigned int fr = 0;
+
+ n_total_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_TOTAL);
+ n_clean_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_CLEAN);
+ n_full_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_FULL);
+ n_dirty_au = n_total_au - (n_full_au + n_clean_au);
+
+ if (!n_dirty_au)
+ fr = 0;
+ else if (!n_clean_au)
+ fr = 100;
+ else
+ fr = (n_dirty_au * 100) / (n_clean_au + n_dirty_au);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", fr);
+}
+SDFAT_ATTR(fratio, 0444, fratio_show, NULL);
+
+static ssize_t totalau_show(struct sdfat_sb_info *sbi, char *buf)
+{
+ unsigned int n_au = 0;
+
+ n_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_TOTAL);
+ return snprintf(buf, PAGE_SIZE, "%u\n", n_au);
+}
+SDFAT_ATTR(totalau, 0444, totalau_show, NULL);
+
+static ssize_t cleanau_show(struct sdfat_sb_info *sbi, char *buf)
+{
+ unsigned int n_clean_au = 0;
+
+ n_clean_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_CLEAN);
+ return snprintf(buf, PAGE_SIZE, "%u\n", n_clean_au);
+}
+SDFAT_ATTR(cleanau, 0444, cleanau_show, NULL);
+
+static ssize_t fullau_show(struct sdfat_sb_info *sbi, char *buf)
+{
+ unsigned int n_full_au = 0;
+
+ n_full_au = fsapi_get_au_stat(sbi->host_sb, VOL_AU_STAT_FULL);
+ return snprintf(buf, PAGE_SIZE, "%u\n", n_full_au);
+}
+SDFAT_ATTR(fullau, 0444, fullau_show, NULL);
+
+static struct attribute *sdfat_attrs[] = {
+ &sdfat_attr_type.attr,
+ &sdfat_attr_eio.attr,
+ &sdfat_attr_fratio.attr,
+ &sdfat_attr_totalau.attr,
+ &sdfat_attr_cleanau.attr,
+ &sdfat_attr_fullau.attr,
+ NULL,
+};
+
+static struct kobj_type sdfat_ktype = {
+ .default_attrs = sdfat_attrs,
+ .sysfs_ops = &sdfat_attr_ops,
+};
+
+static ssize_t version_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ return snprintf(buff, PAGE_SIZE, "FS Version %s\n", SDFAT_VERSION);
+}
+
+static struct kobj_attribute version_attr = __ATTR_RO(version);
+
+static struct attribute *attributes[] = {
+ &version_attr.attr,
+ NULL,
+};
+
+static struct attribute_group attr_group = {
+ .attrs = attributes,
+};
+
+/*======================================================================*/
+/* Super Block Read Operations */
+/*======================================================================*/
+
+enum {
+ Opt_uid,
+ Opt_gid,
+ Opt_umask,
+ Opt_dmask,
+ Opt_fmask,
+ Opt_allow_utime,
+ Opt_codepage,
+ Opt_charset,
+ Opt_utf8,
+ Opt_namecase,
+ Opt_tz_utc,
+ Opt_adj_hidsect,
+ Opt_delay,
+ Opt_smart,
+ Opt_ausize,
+ Opt_packing,
+ Opt_defrag,
+ Opt_symlink,
+ Opt_debug,
+ Opt_err_cont,
+ Opt_err_panic,
+ Opt_err_ro,
+ Opt_err,
+ Opt_discard,
+ Opt_fs,
+ Opt_adj_req,
+#ifdef CONFIG_SDFAT_USE_FOR_VFAT
+ Opt_shortname_lower,
+ Opt_shortname_win95,
+ Opt_shortname_winnt,
+ Opt_shortname_mixed,
+#endif /* CONFIG_SDFAT_USE_FOR_VFAT */
+};
+
+static const match_table_t sdfat_tokens = {
+ {Opt_uid, "uid=%u"},
+ {Opt_gid, "gid=%u"},
+ {Opt_umask, "umask=%o"},
+ {Opt_dmask, "dmask=%o"},
+ {Opt_fmask, "fmask=%o"},
+ {Opt_allow_utime, "allow_utime=%o"},
+ {Opt_codepage, "codepage=%u"},
+ {Opt_charset, "iocharset=%s"},
+ {Opt_utf8, "utf8"},
+ {Opt_namecase, "namecase=%u"},
+ {Opt_tz_utc, "tz=UTC"},
+ {Opt_adj_hidsect, "adj_hid"},
+ {Opt_delay, "delay"},
+ {Opt_smart, "smart"},
+ {Opt_ausize, "ausize=%u"},
+ {Opt_packing, "packing=%u"},
+ {Opt_defrag, "defrag"},
+ {Opt_symlink, "symlink=%u"},
+ {Opt_debug, "debug"},
+ {Opt_err_cont, "errors=continue"},
+ {Opt_err_panic, "errors=panic"},
+ {Opt_err_ro, "errors=remount-ro"},
+ {Opt_discard, "discard"},
+ {Opt_fs, "fs=%s"},
+ {Opt_adj_req, "adj_req"},
+#ifdef CONFIG_SDFAT_USE_FOR_VFAT
+ {Opt_shortname_lower, "shortname=lower"},
+ {Opt_shortname_win95, "shortname=win95"},
+ {Opt_shortname_winnt, "shortname=winnt"},
+ {Opt_shortname_mixed, "shortname=mixed"},
+#endif /* CONFIG_SDFAT_USE_FOR_VFAT */
+ {Opt_err, NULL}
+};
+
+static int parse_options(struct super_block *sb, char *options, int silent,
+ int *debug, struct sdfat_mount_options *opts)
+{
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+ int option, i;
+ char *tmpstr;
+
+ opts->fs_uid = current_uid();
+ opts->fs_gid = current_gid();
+ opts->fs_fmask = opts->fs_dmask = current->fs->umask;
+ opts->allow_utime = (unsigned short) -1;
+ opts->codepage = sdfat_default_codepage;
+ opts->iocharset = sdfat_default_iocharset;
+ opts->casesensitive = 0;
+ opts->utf8 = 0;
+ opts->adj_hidsect = 0;
+ opts->tz_utc = 0;
+ opts->improved_allocation = 0;
+ opts->amap_opt.pack_ratio = 0; // Default packing
+ opts->amap_opt.sect_per_au = 0;
+ opts->amap_opt.misaligned_sect = 0;
+ opts->symlink = 0;
+ opts->errors = SDFAT_ERRORS_RO;
+ opts->discard = 0;
+ *debug = 0;
+
+ if (!options)
+ goto out;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+
+ if (!*p)
+ continue;
+ token = match_token(p, sdfat_tokens, args);
+ switch (token) {
+ case Opt_uid:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->fs_uid = make_kuid(current_user_ns(), option);
+ break;
+ case Opt_gid:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->fs_gid = make_kgid(current_user_ns(), option);
+ break;
+ case Opt_umask:
+ case Opt_dmask:
+ case Opt_fmask:
+ if (match_octal(&args[0], &option))
+ return 0;
+ if (token != Opt_dmask)
+ opts->fs_fmask = option;
+ if (token != Opt_fmask)
+ opts->fs_dmask = option;
+ break;
+ case Opt_allow_utime:
+ if (match_octal(&args[0], &option))
+ return 0;
+ opts->allow_utime = option & (S_IWGRP | S_IWOTH);
+ break;
+ case Opt_codepage:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->codepage = option;
+ break;
+ case Opt_charset:
+ if (opts->iocharset != sdfat_default_iocharset)
+ kfree(opts->iocharset);
+ tmpstr = match_strdup(&args[0]);
+ if (!tmpstr)
+ return -ENOMEM;
+ opts->iocharset = tmpstr;
+ break;
+ case Opt_namecase:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->casesensitive = (option > 0) ? 1:0;
+ break;
+ case Opt_utf8:
+ opts->utf8 = 1;
+ break;
+ case Opt_adj_hidsect:
+ opts->adj_hidsect = 1;
+ break;
+ case Opt_tz_utc:
+ opts->tz_utc = 1;
+ break;
+ case Opt_symlink:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->symlink = option > 0 ? 1 : 0;
+ break;
+ case Opt_delay:
+ opts->improved_allocation |= SDFAT_ALLOC_DELAY;
+ break;
+ case Opt_smart:
+ opts->improved_allocation |= SDFAT_ALLOC_SMART;
+ break;
+ case Opt_ausize:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ if (!is_power_of_2(option))
+ return -EINVAL;
+ opts->amap_opt.sect_per_au = option;
+ IMSG("set AU size by option : %u sectors\n", option);
+ break;
+ case Opt_packing:
+ if (match_int(&args[0], &option))
+ return 0;
+ opts->amap_opt.pack_ratio = option;
+ break;
+ case Opt_defrag:
+#ifdef CONFIG_SDFAT_DFR
+ opts->defrag = 1;
+#else
+ IMSG("defragmentation config is not enabled. ignore\n");
+#endif
+ break;
+ case Opt_err_cont:
+ opts->errors = SDFAT_ERRORS_CONT;
+ break;
+ case Opt_err_panic:
+ opts->errors = SDFAT_ERRORS_PANIC;
+ break;
+ case Opt_err_ro:
+ opts->errors = SDFAT_ERRORS_RO;
+ break;
+ case Opt_debug:
+ *debug = 1;
+ break;
+ case Opt_discard:
+ opts->discard = 1;
+ break;
+ case Opt_fs:
+ tmpstr = match_strdup(&args[0]);
+ if (!tmpstr)
+ return -ENOMEM;
+ for (i = 0; i < FS_TYPE_MAX; i++) {
+ if (!strcmp(tmpstr, FS_TYPE_STR[i])) {
+ opts->fs_type = (unsigned char)i;
+ sdfat_log_msg(sb, KERN_ERR,
+ "set fs-type by option : %s",
+ FS_TYPE_STR[i]);
+ break;
+ }
+ }
+ kfree(tmpstr);
+ if (i == FS_TYPE_MAX) {
+ sdfat_log_msg(sb, KERN_ERR,
+ "invalid fs-type, "
+ "only allow auto, exfat, vfat");
+ return -EINVAL;
+ }
+ break;
+ case Opt_adj_req:
+#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
+ opts->adj_req = 1;
+#else
+ IMSG("adjust request config is not enabled. ignore\n");
+#endif
+ break;
+#ifdef CONFIG_SDFAT_USE_FOR_VFAT
+ case Opt_shortname_lower:
+ case Opt_shortname_win95:
+ case Opt_shortname_mixed:
+ pr_warn("[SDFAT] DRAGONS AHEAD! sdFAT only understands \"shortname=winnt\"!\n");
+ case Opt_shortname_winnt:
+ break;
+#endif /* CONFIG_SDFAT_USE_FOR_VFAT */
+ default:
+ if (!silent) {
+ sdfat_msg(sb, KERN_ERR,
+ "unrecognized mount option \"%s\" "
+ "or missing value", p);
+ }
+ return -EINVAL;
+ }
+ }
+
+out:
+ if (opts->allow_utime == (unsigned short) -1)
+ opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
+
+ if (opts->utf8 && strcmp(opts->iocharset, sdfat_iocharset_with_utf8)) {
+ sdfat_msg(sb, KERN_WARNING,
+ "utf8 enabled, \"iocharset=%s\" is recommended",
+ sdfat_iocharset_with_utf8);
+ }
+
+ if (opts->discard) {
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
+
+ if (!blk_queue_discard(q))
+ sdfat_msg(sb, KERN_WARNING,
+ "mounting with \"discard\" option, but "
+ "the device does not support discard");
+ opts->discard = 0;
+ }
+
+ return 0;
+}
+
+static void sdfat_hash_init(struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ int i;
+
+ spin_lock_init(&sbi->inode_hash_lock);
+ for (i = 0; i < SDFAT_HASH_SIZE; i++)
+ INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
+}
+
+static int sdfat_read_root(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ sdfat_timespec_t ts;
+ FS_INFO_T *fsi = &(sbi->fsi);
+ DIR_ENTRY_T info;
+
+ ts = CURRENT_TIME_SEC;
+
+ SDFAT_I(inode)->fid.dir.dir = fsi->root_dir;
+ SDFAT_I(inode)->fid.dir.flags = 0x01;
+ SDFAT_I(inode)->fid.entry = -1;
+ SDFAT_I(inode)->fid.start_clu = fsi->root_dir;
+ SDFAT_I(inode)->fid.flags = 0x01;
+ SDFAT_I(inode)->fid.type = TYPE_DIR;
+ SDFAT_I(inode)->fid.version = 0;
+ SDFAT_I(inode)->fid.rwoffset = 0;
+ SDFAT_I(inode)->fid.hint_bmap.off = CLUS_EOF;
+ SDFAT_I(inode)->fid.hint_stat.eidx = 0;
+ SDFAT_I(inode)->fid.hint_stat.clu = fsi->root_dir;
+ SDFAT_I(inode)->fid.hint_femp.eidx = -1;
+
+ SDFAT_I(inode)->target = NULL;
+
+ if (fsapi_read_inode(inode, &info) < 0)
+ return -EIO;
+
+ inode->i_uid = sbi->options.fs_uid;
+ inode->i_gid = sbi->options.fs_gid;
+ inode_inc_iversion(inode);
+ inode->i_generation = 0;
+ inode->i_mode = sdfat_make_mode(sbi, ATTR_SUBDIR, S_IRWXUGO);
+ inode->i_op = &sdfat_dir_inode_operations;
+ inode->i_fop = &sdfat_dir_operations;
+
+ i_size_write(inode, info.Size);
+ SDFAT_I(inode)->fid.size = info.Size;
+ inode->i_blocks = ((i_size_read(inode) + (fsi->cluster_size - 1))
+ & ~((loff_t)fsi->cluster_size - 1)) >> inode->i_blkbits;
+ SDFAT_I(inode)->i_pos = ((loff_t) fsi->root_dir << 32) | 0xffffffff;
+ SDFAT_I(inode)->i_size_aligned = i_size_read(inode);
+ SDFAT_I(inode)->i_size_ondisk = i_size_read(inode);
+
+ sdfat_save_attr(inode, ATTR_SUBDIR);
+ inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
+ set_nlink(inode, info.NumSubdirs + 2);
+ return 0;
+}
+
+
+
+static void setup_dops(struct super_block *sb)
+{
+ if (SDFAT_SB(sb)->options.casesensitive == 0)
+ sb->s_d_op = &sdfat_ci_dentry_ops;
+ else
+ sb->s_d_op = &sdfat_dentry_ops;
+}
+
+static int sdfat_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct inode *root_inode = NULL;
+ struct sdfat_sb_info *sbi;
+ int debug;
+ int err;
+ char buf[50];
+ struct block_device *bdev = sb->s_bdev;
+ dev_t bd_dev = bdev ? bdev->bd_dev : 0;
+
+ sdfat_log_msg(sb, KERN_INFO, "trying to mount...");
+
+ /*
+ * GFP_KERNEL is ok here, because while we do hold the
+ * supeblock lock, memory pressure can't call back into
+ * the filesystem, since we're only just about to mount
+ * it and have no inodes etc active!
+ */
+ sbi = kzalloc(sizeof(struct sdfat_sb_info), GFP_KERNEL);
+ if (!sbi) {
+ sdfat_log_msg(sb, KERN_INFO,
+ "trying to alloc sbi with vzalloc()");
+ sbi = vzalloc(sizeof(struct sdfat_sb_info));
+ if (!sbi) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to mount! (ENOMEM)");
+ return -ENOMEM;
+ }
+ sbi->use_vmalloc = 1;
+ }
+
+ mutex_init(&sbi->s_vlock);
+ sb->s_fs_info = sbi;
+ sb->s_flags |= MS_NODIRATIME;
+ sb->s_magic = SDFAT_SUPER_MAGIC;
+ sb->s_op = &sdfat_sops;
+ ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ err = parse_options(sb, data, silent, &debug, &sbi->options);
+ if (err) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to parse options");
+ goto failed_mount;
+ }
+
+ setup_sdfat_xattr_handler(sb);
+ setup_sdfat_sync_super_wq(sb);
+ setup_dops(sb);
+
+ err = fsapi_mount(sb);
+ if (err) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to recognize fat type");
+ goto failed_mount;
+ }
+
+ /* set up enough so that it can read an inode */
+ sdfat_hash_init(sb);
+
+ /*
+ * The low byte of FAT's first entry must have same value with
+ * media-field. But in real world, too many devices is
+ * writing wrong value. So, removed that validity check.
+ *
+ * if (FAT_FIRST_ENT(sb, media) != first)
+ */
+
+ err = -EINVAL;
+ sprintf(buf, "cp%d", sbi->options.codepage);
+ sbi->nls_disk = load_nls(buf);
+ if (!sbi->nls_disk) {
+ sdfat_log_msg(sb, KERN_ERR, "codepage %s not found", buf);
+ goto failed_mount2;
+ }
+
+ sbi->nls_io = load_nls(sbi->options.iocharset);
+ if (!sbi->nls_io) {
+ sdfat_log_msg(sb, KERN_ERR, "IO charset %s not found",
+ sbi->options.iocharset);
+ goto failed_mount2;
+ }
+
+ err = __alloc_dfr_mem_if_required(sb);
+ if (err) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to initialize a memory for "
+ "defragmentation");
+ goto failed_mount3;
+ }
+
+ err = -ENOMEM;
+ root_inode = new_inode(sb);
+ if (!root_inode) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to allocate root inode.");
+ goto failed_mount3;
+ }
+
+ root_inode->i_ino = SDFAT_ROOT_INO;
+ inode_set_iversion(root_inode, 1);
+
+ err = sdfat_read_root(root_inode);
+ if (err) {
+ sdfat_log_msg(sb, KERN_ERR, "failed to initialize root inode.");
+ goto failed_mount3;
+ }
+
+ sdfat_attach(root_inode, SDFAT_I(root_inode)->i_pos);
+ insert_inode_hash(root_inode);
+
+ err = -ENOMEM;
+ sb->s_root = __d_make_root(root_inode);
+ if (!sb->s_root) {
+ sdfat_msg(sb, KERN_ERR, "failed to get the root dentry");
+ goto failed_mount3;
+ }
+
+ /*
+ * Initialize filesystem attributes (for sysfs)
+ * ex: /sys/fs/sdfat/mmcblk1[179:17]
+ */
+ sbi->sb_kobj.kset = sdfat_kset;
+ err = kobject_init_and_add(&sbi->sb_kobj, &sdfat_ktype, NULL,
+ "%s[%d:%d]", sb->s_id, MAJOR(bd_dev), MINOR(bd_dev));
+ if (err) {
+ sdfat_msg(sb, KERN_ERR, "Unable to create sdfat attributes for"
+ " %s[%d:%d](%d)", sb->s_id,
+ MAJOR(bd_dev), MINOR(bd_dev), err);
+ goto failed_mount3;
+ }
+
+ sdfat_log_msg(sb, KERN_INFO, "mounted successfully!");
+ /* FOR BIGDATA */
+ sdfat_statistics_set_mnt(&sbi->fsi);
+ sdfat_statistics_set_vol_size(sb);
+ return 0;
+
+failed_mount3:
+ __free_dfr_mem_if_required(sb);
+failed_mount2:
+ fsapi_umount(sb);
+failed_mount:
+ sdfat_log_msg(sb, KERN_INFO, "failed to mount! (%d)", err);
+
+ if (root_inode)
+ iput(root_inode);
+ sb->s_root = NULL;
+
+ if (sbi->nls_io)
+ unload_nls(sbi->nls_io);
+ if (sbi->nls_disk)
+ unload_nls(sbi->nls_disk);
+ if (sbi->options.iocharset != sdfat_default_iocharset)
+ kfree(sbi->options.iocharset);
+ sb->s_fs_info = NULL;
+ if (!sbi->use_vmalloc)
+ kfree(sbi);
+ else
+ vfree(sbi);
+ return err;
+}
+
+static struct dentry *sdfat_fs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data) {
+ return mount_bdev(fs_type, flags, dev_name, data, sdfat_fill_super);
+}
+
+static void init_once(void *foo)
+{
+ struct sdfat_inode_info *ei = (struct sdfat_inode_info *)foo;
+
+ INIT_HLIST_NODE(&ei->i_hash_fat);
+ inode_init_once(&ei->vfs_inode);
+}
+
+static int __init sdfat_init_inodecache(void)
+{
+ sdfat_inode_cachep = kmem_cache_create("sdfat_inode_cache",
+ sizeof(struct sdfat_inode_info),
+ 0, (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+ init_once);
+ if (!sdfat_inode_cachep)
+ return -ENOMEM;
+ return 0;
+}
+
+static void sdfat_destroy_inodecache(void)
+{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
+ kmem_cache_destroy(sdfat_inode_cachep);
+}
+
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+static void sdfat_debug_kill_sb(struct super_block *sb)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(sb);
+ struct block_device *bdev = sb->s_bdev;
+
+ long flags;
+
+ if (sbi) {
+ flags = sbi->debug_flags;
+
+ if (flags & SDFAT_DEBUGFLAGS_INVALID_UMOUNT) {
+ /* invalidate_bdev drops all device cache include dirty.
+ * we use this to simulate device removal
+ */
+ fsapi_cache_release(sb);
+ invalidate_bdev(bdev);
+ }
+ }
+
+ kill_block_super(sb);
+}
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+
+static struct file_system_type sdfat_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "sdfat",
+ .mount = sdfat_fs_mount,
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ .kill_sb = sdfat_debug_kill_sb,
+#else
+ .kill_sb = kill_block_super,
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+ .fs_flags = FS_REQUIRES_DEV,
+};
+MODULE_ALIAS_FS("sdfat");
+
+#ifdef CONFIG_SDFAT_USE_FOR_EXFAT
+static struct file_system_type exfat_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "exfat",
+ .mount = sdfat_fs_mount,
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ .kill_sb = sdfat_debug_kill_sb,
+#else
+ .kill_sb = kill_block_super,
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+ .fs_flags = FS_REQUIRES_DEV,
+};
+MODULE_ALIAS_FS("exfat");
+#endif /* CONFIG_SDFAT_USE_FOR_EXFAT */
+
+#ifdef CONFIG_SDFAT_USE_FOR_VFAT
+static struct file_system_type vfat_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "vfat",
+ .mount = sdfat_fs_mount,
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ .kill_sb = sdfat_debug_kill_sb,
+#else
+ .kill_sb = kill_block_super,
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+ .fs_flags = FS_REQUIRES_DEV,
+};
+MODULE_ALIAS_FS("vfat");
+#endif /* CONFIG_SDFAT_USE_FOR_VFAT */
+
+static int __init init_sdfat_fs(void)
+{
+ int err;
+
+ sdfat_log_version();
+ err = fsapi_init();
+ if (err)
+ goto error;
+
+ sdfat_kset = kset_create_and_add("sdfat", NULL, fs_kobj);
+ if (!sdfat_kset) {
+ pr_err("[SDFAT] failed to create sdfat kset\n");
+ err = -ENOMEM;
+ goto error;
+ }
+
+ err = sysfs_create_group(&sdfat_kset->kobj, &attr_group);
+ if (err) {
+ pr_err("[SDFAT] failed to create sdfat version attributes\n");
+ goto error;
+ }
+
+ err = sdfat_statistics_init(sdfat_kset);
+ if (err)
+ goto error;
+
+ err = sdfat_uevent_init(sdfat_kset);
+ if (err)
+ goto error;
+
+ err = sdfat_init_inodecache();
+ if (err) {
+ pr_err("[SDFAT] failed to initialize inode cache\n");
+ goto error;
+ }
+
+ err = register_filesystem(&sdfat_fs_type);
+ if (err) {
+ pr_err("[SDFAT] failed to register filesystem\n");
+ goto error;
+ }
+
+#ifdef CONFIG_SDFAT_USE_FOR_EXFAT
+ err = register_filesystem(&exfat_fs_type);
+ if (err) {
+ pr_err("[SDFAT] failed to register for exfat filesystem\n");
+ goto error;
+ }
+#endif /* CONFIG_SDFAT_USE_FOR_EXFAT */
+
+#ifdef CONFIG_SDFAT_USE_FOR_VFAT
+ err = register_filesystem(&vfat_fs_type);
+ if (err) {
+ pr_err("[SDFAT] failed to register for vfat filesystem\n");
+ goto error;
+ }
+#endif /* CONFIG_SDFAT_USE_FOR_VFAT */
+
+ return 0;
+error:
+ sdfat_uevent_uninit();
+ sdfat_statistics_uninit();
+
+ if (sdfat_kset) {
+ sysfs_remove_group(&sdfat_kset->kobj, &attr_group);
+ kset_unregister(sdfat_kset);
+ sdfat_kset = NULL;
+ }
+
+ sdfat_destroy_inodecache();
+ fsapi_shutdown();
+
+ pr_err("[SDFAT] failed to initialize FS driver(err:%d)\n", err);
+ return err;
+}
+
+static void __exit exit_sdfat_fs(void)
+{
+ sdfat_uevent_uninit();
+ sdfat_statistics_uninit();
+
+ if (sdfat_kset) {
+ sysfs_remove_group(&sdfat_kset->kobj, &attr_group);
+ kset_unregister(sdfat_kset);
+ sdfat_kset = NULL;
+ }
+
+ sdfat_destroy_inodecache();
+ unregister_filesystem(&sdfat_fs_type);
+
+#ifdef CONFIG_SDFAT_USE_FOR_EXFAT
+ unregister_filesystem(&exfat_fs_type);
+#endif /* CONFIG_SDFAT_USE_FOR_EXFAT */
+#ifdef CONFIG_SDFAT_USE_FOR_VFAT
+ unregister_filesystem(&vfat_fs_type);
+#endif /* CONFIG_SDFAT_USE_FOR_VFAT */
+ fsapi_shutdown();
+}
+
+module_init(init_sdfat_fs);
+module_exit(exit_sdfat_fs);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FAT/exFAT filesystem support");
+MODULE_AUTHOR("Samsung Electronics Co., Ltd.");
+
diff --git a/fs/sdfat/sdfat.h b/fs/sdfat/sdfat.h
new file mode 100644
index 000000000000..60f7811c7b99
--- /dev/null
+++ b/fs/sdfat/sdfat.h
@@ -0,0 +1,528 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SDFAT_H
+#define _SDFAT_H
+
+#include <linux/buffer_head.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/nls.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+#include <linux/ratelimit.h>
+#include <linux/version.h>
+#include <linux/kobject.h>
+#include "api.h"
+
+#ifdef CONFIG_SDFAT_DFR
+#include "dfr.h"
+#endif
+
+/*
+ * sdfat error flags
+ */
+#define SDFAT_ERRORS_CONT (1) /* ignore error and continue */
+#define SDFAT_ERRORS_PANIC (2) /* panic on error */
+#define SDFAT_ERRORS_RO (3) /* remount r/o on error */
+
+/*
+ * sdfat allocator flags
+ */
+#define SDFAT_ALLOC_DELAY (1) /* Delayed allocation */
+#define SDFAT_ALLOC_SMART (2) /* Smart allocation */
+
+/*
+ * sdfat allocator destination for smart allocation
+ */
+#define ALLOC_NOWHERE (0)
+#define ALLOC_COLD (1)
+#define ALLOC_HOT (16)
+#define ALLOC_COLD_ALIGNED (1)
+#define ALLOC_COLD_PACKING (2)
+#define ALLOC_COLD_SEQ (4)
+
+/*
+ * sdfat nls lossy flag
+ */
+#define NLS_NAME_NO_LOSSY (0x00) /* no lossy */
+#define NLS_NAME_LOSSY (0x01) /* just detected incorrect filename(s) */
+#define NLS_NAME_OVERLEN (0x02) /* the length is over than its limit */
+
+/*
+ * sdfat common MACRO
+ */
+#define CLUSTER_16(x) ((u16)((x) & 0xFFFFU))
+#define CLUSTER_32(x) ((u32)((x) & 0xFFFFFFFFU))
+#define CLUS_EOF CLUSTER_32(~0)
+#define CLUS_BAD (0xFFFFFFF7U)
+#define CLUS_FREE (0)
+#define CLUS_BASE (2)
+#define IS_CLUS_EOF(x) ((x) == CLUS_EOF)
+#define IS_CLUS_BAD(x) ((x) == CLUS_BAD)
+#define IS_CLUS_FREE(x) ((x) == CLUS_FREE)
+#define IS_LAST_SECT_IN_CLUS(fsi, sec) \
+ ((((sec) - (fsi)->data_start_sector + 1) \
+ & ((1 << (fsi)->sect_per_clus_bits) - 1)) == 0)
+
+#define CLUS_TO_SECT(fsi, x) \
+ ((((unsigned long long)(x) - CLUS_BASE) << (fsi)->sect_per_clus_bits) + (fsi)->data_start_sector)
+
+#define SECT_TO_CLUS(fsi, sec) \
+ ((u32)((((sec) - (fsi)->data_start_sector) >> (fsi)->sect_per_clus_bits) + CLUS_BASE))
+
+/* variables defined at sdfat.c */
+extern const char *FS_TYPE_STR[];
+
+enum {
+ FS_TYPE_AUTO,
+ FS_TYPE_EXFAT,
+ FS_TYPE_VFAT,
+ FS_TYPE_MAX
+};
+
+/*
+ * sdfat mount in-memory data
+ */
+struct sdfat_mount_options {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
+ kuid_t fs_uid;
+ kgid_t fs_gid;
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) */
+ uid_t fs_uid;
+ gid_t fs_gid;
+#endif
+ unsigned short fs_fmask;
+ unsigned short fs_dmask;
+ unsigned short allow_utime; /* permission for setting the [am]time */
+ unsigned short codepage; /* codepage for shortname conversions */
+ char *iocharset; /* charset for filename input/display */
+ struct {
+ unsigned int pack_ratio;
+ unsigned int sect_per_au;
+ unsigned int misaligned_sect;
+ } amap_opt; /* AMAP-related options (see amap.c) */
+
+ unsigned char utf8;
+ unsigned char casesensitive;
+ unsigned char adj_hidsect;
+ unsigned char tz_utc;
+ unsigned char improved_allocation;
+ unsigned char defrag;
+ unsigned char symlink; /* support symlink operation */
+ unsigned char errors; /* on error: continue, panic, remount-ro */
+ unsigned char discard; /* flag on if -o dicard specified and device support discard() */
+ unsigned char fs_type; /* fs_type that user specified */
+ unsigned short adj_req; /* support aligned mpage write */
+};
+
+#define SDFAT_HASH_BITS 8
+#define SDFAT_HASH_SIZE (1UL << SDFAT_HASH_BITS)
+
+/*
+ * SDFAT file system superblock in-memory data
+ */
+struct sdfat_sb_info {
+ FS_INFO_T fsi; /* private filesystem info */
+
+ struct mutex s_vlock; /* volume lock */
+ int use_vmalloc;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+ int s_dirt;
+ struct mutex s_lock; /* superblock lock */
+ int write_super_queued; /* Write_super work is pending? */
+ struct delayed_work write_super_work; /* Work_queue data structrue for write_super() */
+ spinlock_t work_lock; /* Lock for WQ */
+#endif
+ struct super_block *host_sb; /* sb pointer */
+ struct sdfat_mount_options options;
+ struct nls_table *nls_disk; /* Codepage used on disk */
+ struct nls_table *nls_io; /* Charset used for input and display */
+ struct ratelimit_state ratelimit;
+
+ spinlock_t inode_hash_lock;
+ struct hlist_head inode_hashtable[SDFAT_HASH_SIZE];
+ struct kobject sb_kobj;
+#ifdef CONFIG_SDFAT_DBG_IOCTL
+ long debug_flags;
+#endif /* CONFIG_SDFAT_DBG_IOCTL */
+
+#ifdef CONFIG_SDFAT_DFR
+ struct defrag_info dfr_info;
+ struct completion dfr_complete;
+ unsigned int *dfr_new_clus;
+ int dfr_new_idx;
+ unsigned int *dfr_page_wb;
+ void **dfr_pagep;
+ unsigned int dfr_hint_clus;
+ unsigned int dfr_hint_idx;
+ int dfr_reserved_clus;
+
+#ifdef CONFIG_SDFAT_DFR_DEBUG
+ int dfr_spo_flag;
+#endif /* CONFIG_SDFAT_DFR_DEBUG */
+
+#endif /* CONFIG_SDFAT_DFR */
+
+#ifdef CONFIG_SDFAT_TRACE_IO
+ /* Statistics for allocator */
+ unsigned int stat_n_pages_written; /* # of written pages in total */
+ unsigned int stat_n_pages_added; /* # of added blocks in total */
+ unsigned int stat_n_bdev_pages_written; /* # of written pages owned by bdev inode */
+ unsigned int stat_n_pages_confused;
+#endif
+ atomic_t stat_n_pages_queued; /* # of pages in the request queue (approx.) */
+};
+
+/*
+ * SDFAT file system inode in-memory data
+ */
+struct sdfat_inode_info {
+ FILE_ID_T fid;
+ char *target;
+ /* NOTE: i_size_ondisk is 64bits, so must hold ->inode_lock to access */
+ loff_t i_size_ondisk; /* physically allocated size */
+ loff_t i_size_aligned; /* block-aligned i_size (used in cont_write_begin) */
+ loff_t i_pos; /* on-disk position of directory entry or 0 */
+ struct hlist_node i_hash_fat; /* hash by i_location */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+ struct rw_semaphore truncate_lock; /* protect bmap against truncate */
+#endif
+#ifdef CONFIG_SDFAT_DFR
+ struct defrag_info dfr_info;
+#endif
+ struct inode vfs_inode;
+};
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
+typedef struct timespec64 sdfat_timespec_t;
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0) */
+typedef struct timespec sdfat_timespec_t;
+#endif
+
+/*
+ * FIXME : needs on-disk-slot in-memory data
+ */
+
+/* static inline functons */
+static inline const char *sdfat_get_vol_type_str(unsigned int type)
+{
+ if (type == EXFAT)
+ return "exfat";
+ else if (type == FAT32)
+ return "vfat:32";
+ else if (type == FAT16)
+ return "vfat:16";
+ else if (type == FAT12)
+ return "vfat:12";
+
+ return "unknown";
+}
+
+static inline struct sdfat_sb_info *SDFAT_SB(struct super_block *sb)
+{
+ return (struct sdfat_sb_info *)sb->s_fs_info;
+}
+
+static inline struct sdfat_inode_info *SDFAT_I(struct inode *inode)
+{
+ return container_of(inode, struct sdfat_inode_info, vfs_inode);
+}
+
+/*
+ * If ->i_mode can't hold S_IWUGO (i.e. ATTR_RO), we use ->i_attrs to
+ * save ATTR_RO instead of ->i_mode.
+ *
+ * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only
+ * bit, it's just used as flag for app.
+ */
+static inline int sdfat_mode_can_hold_ro(struct inode *inode)
+{
+ struct sdfat_sb_info *sbi = SDFAT_SB(inode->i_sb);
+
+ if (S_ISDIR(inode->i_mode))
+ return 0;
+
+ if ((~sbi->options.fs_fmask) & S_IWUGO)
+ return 1;
+ return 0;
+}
+
+/*
+ * FIXME : needs to check symlink option.
+ */
+/* Convert attribute bits and a mask to the UNIX mode. */
+static inline mode_t sdfat_make_mode(struct sdfat_sb_info *sbi,
+ u32 attr, mode_t mode)
+{
+ if ((attr & ATTR_READONLY) && !(attr & ATTR_SUBDIR))
+ mode &= ~S_IWUGO;
+
+ if (attr & ATTR_SUBDIR)
+ return (mode & ~sbi->options.fs_dmask) | S_IFDIR;
+ else if (attr & ATTR_SYMLINK)
+ return (mode & ~sbi->options.fs_dmask) | S_IFLNK;
+ else
+ return (mode & ~sbi->options.fs_fmask) | S_IFREG;
+}
+
+/* Return the FAT attribute byte for this inode */
+static inline u32 sdfat_make_attr(struct inode *inode)
+{
+ u32 attrs = SDFAT_I(inode)->fid.attr;
+
+ if (S_ISDIR(inode->i_mode))
+ attrs |= ATTR_SUBDIR;
+ if (sdfat_mode_can_hold_ro(inode) && !(inode->i_mode & S_IWUGO))
+ attrs |= ATTR_READONLY;
+ return attrs;
+}
+
+static inline void sdfat_save_attr(struct inode *inode, u32 attr)
+{
+ if (sdfat_mode_can_hold_ro(inode))
+ SDFAT_I(inode)->fid.attr = attr & ATTR_RWMASK;
+ else
+ SDFAT_I(inode)->fid.attr = attr & (ATTR_RWMASK | ATTR_READONLY);
+}
+
+/* sdfat/statistics.c */
+/* bigdata function */
+#ifdef CONFIG_SDFAT_STATISTICS
+extern int sdfat_statistics_init(struct kset *sdfat_kset);
+extern void sdfat_statistics_uninit(void);
+extern void sdfat_statistics_set_mnt(FS_INFO_T *fsi);
+extern void sdfat_statistics_set_mnt_ro(void);
+extern void sdfat_statistics_set_mkdir(u8 flags);
+extern void sdfat_statistics_set_create(u8 flags);
+extern void sdfat_statistics_set_rw(u8 flags, u32 clu_offset, s32 create);
+extern void sdfat_statistics_set_trunc(u8 flags, CHAIN_T *clu);
+extern void sdfat_statistics_set_vol_size(struct super_block *sb);
+#else
+static inline int sdfat_statistics_init(struct kset *sdfat_kset)
+{
+ return 0;
+}
+static inline void sdfat_statistics_uninit(void) {};
+static inline void sdfat_statistics_set_mnt(FS_INFO_T *fsi) {};
+static inline void sdfat_statistics_set_mnt_ro(void) {};
+static inline void sdfat_statistics_set_mkdir(u8 flags) {};
+static inline void sdfat_statistics_set_create(u8 flags) {};
+static inline void sdfat_statistics_set_rw(u8 flags, u32 clu_offset, s32 create) {};
+static inline void sdfat_statistics_set_trunc(u8 flags, CHAIN_T *clu) {};
+static inline void sdfat_statistics_set_vol_size(struct super_block *sb) {};
+#endif
+
+/* sdfat/nls.c */
+/* NLS management function */
+s32 nls_cmp_sfn(struct super_block *sb, u8 *a, u8 *b);
+s32 nls_cmp_uniname(struct super_block *sb, u16 *a, u16 *b);
+s32 nls_uni16s_to_sfn(struct super_block *sb, UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname, s32 *p_lossy);
+s32 nls_sfn_to_uni16s(struct super_block *sb, DOS_NAME_T *p_dosname, UNI_NAME_T *p_uniname);
+s32 nls_uni16s_to_vfsname(struct super_block *sb, UNI_NAME_T *uniname, u8 *p_cstring, s32 len);
+s32 nls_vfsname_to_uni16s(struct super_block *sb, const u8 *p_cstring,
+ const s32 len, UNI_NAME_T *uniname, s32 *p_lossy);
+
+/* sdfat/mpage.c */
+#ifdef CONFIG_SDFAT_ALIGNED_MPAGE_WRITE
+int sdfat_mpage_writepages(struct address_space *mapping,
+ struct writeback_control *wbc, get_block_t *get_block);
+#endif
+
+/* sdfat/xattr.c */
+#ifdef CONFIG_SDFAT_VIRTUAL_XATTR
+void setup_sdfat_xattr_handler(struct super_block *sb);
+extern int sdfat_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags);
+extern ssize_t sdfat_getxattr(struct dentry *dentry, const char *name, void *value, size_t size);
+extern ssize_t sdfat_listxattr(struct dentry *dentry, char *list, size_t size);
+extern int sdfat_removexattr(struct dentry *dentry, const char *name);
+#else
+static inline void setup_sdfat_xattr_handler(struct super_block *sb) {};
+#endif
+
+/* sdfat/misc.c */
+#ifdef CONFIG_SDFAT_UEVENT
+extern int sdfat_uevent_init(struct kset *sdfat_kset);
+extern void sdfat_uevent_uninit(void);
+extern void sdfat_uevent_ro_remount(struct super_block *sb);
+#else
+static inline int sdfat_uevent_init(struct kset *sdfat_kset)
+{
+ return 0;
+}
+static inline void sdfat_uevent_uninit(void) {};
+static inline void sdfat_uevent_ro_remount(struct super_block *sb) {};
+#endif
+extern void
+__sdfat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
+ __printf(3, 4) __cold;
+#define sdfat_fs_error(sb, fmt, args...) \
+ __sdfat_fs_error(sb, 1, fmt, ## args)
+#define sdfat_fs_error_ratelimit(sb, fmt, args...) \
+ __sdfat_fs_error(sb, __ratelimit(&SDFAT_SB(sb)->ratelimit), fmt, ## args)
+extern void
+__sdfat_msg(struct super_block *sb, const char *lv, int st, const char *fmt, ...)
+ __printf(4, 5) __cold;
+#define sdfat_msg(sb, lv, fmt, args...) \
+ __sdfat_msg(sb, lv, 0, fmt, ## args)
+#define sdfat_log_msg(sb, lv, fmt, args...) \
+ __sdfat_msg(sb, lv, 1, fmt, ## args)
+extern void sdfat_log_version(void);
+extern void sdfat_time_fat2unix(struct sdfat_sb_info *sbi, sdfat_timespec_t *ts,
+ DATE_TIME_T *tp);
+extern void sdfat_time_unix2fat(struct sdfat_sb_info *sbi, sdfat_timespec_t *ts,
+ DATE_TIME_T *tp);
+extern TIMESTAMP_T *tm_now(struct sdfat_sb_info *sbi, TIMESTAMP_T *tm);
+
+#ifdef CONFIG_SDFAT_DEBUG
+
+#ifdef CONFIG_SDFAT_DBG_CAREFUL
+void sdfat_debug_check_clusters(struct inode *inode);
+#else
+#define sdfat_debug_check_clusters(inode)
+#endif /* CONFIG_SDFAT_DBG_CAREFUL */
+
+#ifdef CONFIG_SDFAT_DBG_BUGON
+#define sdfat_debug_bug_on(expr) BUG_ON(expr)
+#else
+#define sdfat_debug_bug_on(expr)
+#endif
+
+#ifdef CONFIG_SDFAT_DBG_WARNON
+#define sdfat_debug_warn_on(expr) WARN_ON(expr)
+#else
+#define sdfat_debug_warn_on(expr)
+#endif
+
+#else /* CONFIG_SDFAT_DEBUG */
+
+#define sdfat_debug_check_clusters(inode)
+#define sdfat_debug_bug_on(expr)
+#define sdfat_debug_warn_on(expr)
+
+#endif /* CONFIG_SDFAT_DEBUG */
+
+#ifdef CONFIG_SDFAT_TRACE_ELAPSED_TIME
+u32 sdfat_time_current_usec(struct timeval *tv);
+extern struct timeval __t1;
+extern struct timeval __t2;
+
+#define TIME_GET(tv) sdfat_time_current_usec(tv)
+#define TIME_START(s) sdfat_time_current_usec(s)
+#define TIME_END(e) sdfat_time_current_usec(e)
+#define TIME_ELAPSED(s, e) ((u32)(((e)->tv_sec - (s)->tv_sec) * 1000000 + \
+ ((e)->tv_usec - (s)->tv_usec)))
+#define PRINT_TIME(n) pr_info("[SDFAT] Elapsed time %d = %d (usec)\n", n, (__t2 - __t1))
+#else /* CONFIG_SDFAT_TRACE_ELAPSED_TIME */
+#define TIME_GET(tv) (0)
+#define TIME_START(s)
+#define TIME_END(e)
+#define TIME_ELAPSED(s, e) (0)
+#define PRINT_TIME(n)
+#endif /* CONFIG_SDFAT_TRACE_ELAPSED_TIME */
+
+#define SDFAT_MSG_LV_NONE (0x00000000)
+#define SDFAT_MSG_LV_ERR (0x00000001)
+#define SDFAT_MSG_LV_INFO (0x00000002)
+#define SDFAT_MSG_LV_DBG (0x00000003)
+#define SDFAT_MSG_LV_MORE (0x00000004)
+#define SDFAT_MSG_LV_TRACE (0x00000005)
+#define SDFAT_MSG_LV_ALL (0x00000006)
+
+#define SDFAT_MSG_LEVEL SDFAT_MSG_LV_INFO
+
+#define SDFAT_TAG_NAME "SDFAT"
+#define __S(x) #x
+#define _S(x) __S(x)
+
+extern void __sdfat_dmsg(int level, const char *fmt, ...) __printf(2, 3) __cold;
+
+#define SDFAT_EMSG_T(level, ...) \
+ __sdfat_dmsg(level, KERN_ERR "[" SDFAT_TAG_NAME "] [" _S(__FILE__) "(" _S(__LINE__) ")] " __VA_ARGS__)
+#define SDFAT_DMSG_T(level, ...) \
+ __sdfat_dmsg(level, KERN_INFO "[" SDFAT_TAG_NAME "] " __VA_ARGS__)
+
+#define SDFAT_EMSG(...) SDFAT_EMSG_T(SDFAT_MSG_LV_ERR, __VA_ARGS__)
+#define SDFAT_IMSG(...) SDFAT_DMSG_T(SDFAT_MSG_LV_INFO, __VA_ARGS__)
+#define SDFAT_DMSG(...) SDFAT_DMSG_T(SDFAT_MSG_LV_DBG, __VA_ARGS__)
+#define SDFAT_MMSG(...) SDFAT_DMSG_T(SDFAT_MSG_LV_MORE, __VA_ARGS__)
+#define SDFAT_TMSG(...) SDFAT_DMSG_T(SDFAT_MSG_LV_TRACE, __VA_ARGS__)
+
+#define EMSG(...)
+#define IMSG(...)
+#define DMSG(...)
+#define MMSG(...)
+#define TMSG(...)
+
+#define EMSG_VAR(exp)
+#define IMSG_VAR(exp)
+#define DMSG_VAR(exp)
+#define MMSG_VAR(exp)
+#define TMSG_VAR(exp)
+
+#ifdef CONFIG_SDFAT_DBG_MSG
+
+
+#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_ERR)
+#undef EMSG
+#undef EMSG_VAR
+#define EMSG(...) SDFAT_EMSG(__VA_ARGS__)
+#define EMSG_VAR(exp) exp
+#endif
+
+#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_INFO)
+#undef IMSG
+#undef IMSG_VAR
+#define IMSG(...) SDFAT_IMSG(__VA_ARGS__)
+#define IMSG_VAR(exp) exp
+#endif
+
+#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_DBG)
+#undef DMSG
+#undef DMSG_VAR
+#define DMSG(...) SDFAT_DMSG(__VA_ARGS__)
+#define DMSG_VAR(exp) exp
+#endif
+
+#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_MORE)
+#undef MMSG
+#undef MMSG_VAR
+#define MMSG(...) SDFAT_MMSG(__VA_ARGS__)
+#define MMSG_VAR(exp) exp
+#endif
+
+/* should replace with trace function */
+#if (SDFAT_MSG_LEVEL >= SDFAT_MSG_LV_TRACE)
+#undef TMSG
+#undef TMSG_VAR
+#define TMSG(...) SDFAT_TMSG(__VA_ARGS__)
+#define TMSG_VAR(exp) exp
+#endif
+
+#endif /* CONFIG_SDFAT_DBG_MSG */
+
+
+#define ASSERT(expr) { \
+ if (!(expr)) { \
+ pr_err("Assertion failed! %s\n", #expr); \
+ BUG_ON(1); \
+ } \
+}
+
+#endif /* !_SDFAT_H */
+
diff --git a/fs/sdfat/sdfat_fs.h b/fs/sdfat/sdfat_fs.h
new file mode 100644
index 000000000000..23b5cda2f58c
--- /dev/null
+++ b/fs/sdfat/sdfat_fs.h
@@ -0,0 +1,423 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SDFAT_FS_H
+#define _SDFAT_FS_H
+
+#include <linux/types.h>
+#include <linux/magic.h>
+#include <asm/byteorder.h>
+
+/*----------------------------------------------------------------------*/
+/* Constant & Macro Definitions */
+/*----------------------------------------------------------------------*/
+#ifndef MSDOS_SUPER_MAGIC
+#define MSDOS_SUPER_MAGIC 0x4d44 /* MD */
+#endif
+
+#ifndef EXFAT_SUPER_MAGIC
+#define EXFAT_SUPER_MAGIC (0x2011BAB0UL)
+#endif /* EXFAT_SUPER_MAGIC */
+
+#ifndef SDFAT_SUPER_MAGIC
+#define SDFAT_SUPER_MAGIC (0x5EC5DFA4UL)
+#endif /* SDFAT_SUPER_MAGIC */
+
+#define SDFAT_ROOT_INO 1
+
+/* FAT types */
+#define FAT12 0x01 // FAT12
+#define FAT16 0x0E // Win95 FAT16 (LBA)
+#define FAT32 0x0C // Win95 FAT32 (LBA)
+#define EXFAT 0x07 // exFAT
+
+/* directory file name */
+#define DOS_CUR_DIR_NAME ". "
+#define DOS_PAR_DIR_NAME ".. "
+
+#ifdef __LITTLE_ENDIAN
+#define UNI_CUR_DIR_NAME ".\0"
+#define UNI_PAR_DIR_NAME ".\0.\0"
+#else
+#define UNI_CUR_DIR_NAME "\0."
+#define UNI_PAR_DIR_NAME "\0.\0."
+#endif
+
+/* file name lengths */
+/* NOTE :
+ * The maximum length of input or output is limited to 256 including NULL,
+ * But we allocate 4 extra bytes for utf8 translation reside in last position,
+ * because utf8 can uses memory upto 6 bytes per one character.
+ * Therefore, MAX_CHARSET_SIZE supports upto 6 bytes for utf8
+ */
+#define MAX_UNINAME_BUF_SIZE (((MAX_NAME_LENGTH+1)*2)+4)
+#define MAX_DOSNAME_BUF_SIZE ((DOS_NAME_LENGTH+2)+6)
+#define MAX_VFSNAME_BUF_SIZE ((MAX_NAME_LENGTH+1)*MAX_CHARSET_SIZE)
+#define MAX_CHARSET_SIZE 6 // max size of multi-byte character
+#define MAX_NAME_LENGTH 255 // max len of file name excluding NULL
+#define DOS_NAME_LENGTH 11 // DOS file name length excluding NULL
+
+#define SECTOR_SIZE_BITS 9 /* VFS sector size is 512 bytes */
+
+#define DENTRY_SIZE 32 /* directory entry size */
+#define DENTRY_SIZE_BITS 5
+
+#define MAX_FAT_DENTRIES 65536 /* FAT allows 65536 directory entries */
+#define MAX_EXFAT_DENTRIES 8388608 /* exFAT allows 8388608(256MB) directory entries */
+
+/* PBR entries */
+#define PBR_SIGNATURE 0xAA55
+#define EXT_SIGNATURE 0xAA550000
+#define VOL_LABEL "NO NAME " /* size should be 11 */
+#define OEM_NAME "MSWIN4.1" /* size should be 8 */
+#define STR_FAT12 "FAT12 " /* size should be 8 */
+#define STR_FAT16 "FAT16 " /* size should be 8 */
+#define STR_FAT32 "FAT32 " /* size should be 8 */
+#define STR_EXFAT "EXFAT " /* size should be 8 */
+
+#define VOL_CLEAN 0x0000
+#define VOL_DIRTY 0x0002
+
+#define FAT_VOL_DIRTY 0x01
+
+/* max number of clusters */
+#define FAT12_THRESHOLD 4087 // 2^12 - 1 + 2 (clu 0 & 1)
+#define FAT16_THRESHOLD 65527 // 2^16 - 1 + 2
+#define FAT32_THRESHOLD 268435457 // 2^28 - 1 + 2
+#define EXFAT_THRESHOLD 268435457 // 2^28 - 1 + 2
+
+/* dentry types */
+#define MSDOS_DELETED 0xE5 /* deleted mark */
+#define MSDOS_UNUSED 0x00 /* end of directory */
+
+#define EXFAT_UNUSED 0x00 /* end of directory */
+#define IS_EXFAT_DELETED(x) ((x) < 0x80) /* deleted file (0x01~0x7F) */
+#define EXFAT_INVAL 0x80 /* invalid value */
+#define EXFAT_BITMAP 0x81 /* allocation bitmap */
+#define EXFAT_UPCASE 0x82 /* upcase table */
+#define EXFAT_VOLUME 0x83 /* volume label */
+#define EXFAT_FILE 0x85 /* file or dir */
+#define EXFAT_STREAM 0xC0 /* stream entry */
+#define EXFAT_NAME 0xC1 /* file name entry */
+#define EXFAT_ACL 0xC2 /* stream entry */
+
+/* specific flag */
+#define MSDOS_LAST_LFN 0x40
+
+/* file attributes */
+#define ATTR_NORMAL 0x0000
+#define ATTR_READONLY 0x0001
+#define ATTR_HIDDEN 0x0002
+#define ATTR_SYSTEM 0x0004
+#define ATTR_VOLUME 0x0008
+#define ATTR_SUBDIR 0x0010
+#define ATTR_ARCHIVE 0x0020
+#define ATTR_SYMLINK 0x0040
+#define ATTR_EXTEND (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM | \
+ ATTR_VOLUME) /* 0x000F */
+
+#define ATTR_EXTEND_MASK (ATTR_EXTEND | ATTR_SUBDIR | ATTR_ARCHIVE)
+#define ATTR_RWMASK (ATTR_HIDDEN | ATTR_SYSTEM | ATTR_VOLUME | \
+ ATTR_SUBDIR | ATTR_ARCHIVE | ATTR_SYMLINK)/* 0x007E */
+
+/* file creation modes */
+#define FM_REGULAR 0x00
+#define FM_SYMLINK 0x40
+
+/* time modes */
+#define TM_CREATE 0
+#define TM_MODIFY 1
+#define TM_ACCESS 2
+
+/* checksum types */
+#define CS_DIR_ENTRY 0
+#define CS_PBR_SECTOR 1
+#define CS_DEFAULT 2
+
+/*
+ * ioctl command
+ */
+#define SDFAT_IOCTL_GET_VOLUME_ID _IOR('r', 0x12, __u32)
+#define SDFAT_IOCTL_DFR_INFO _IOC(_IOC_NONE, 'E', 0x13, sizeof(u32))
+#define SDFAT_IOCTL_DFR_TRAV _IOC(_IOC_NONE, 'E', 0x14, sizeof(u32))
+#define SDFAT_IOCTL_DFR_REQ _IOC(_IOC_NONE, 'E', 0x15, sizeof(u32))
+#define SDFAT_IOCTL_DFR_SPO_FLAG _IOC(_IOC_NONE, 'E', 0x16, sizeof(u32))
+#define SDFAT_IOCTL_PANIC _IOC(_IOC_NONE, 'E', 0x17, sizeof(u32))
+
+/*
+ * ioctl command for debugging
+ */
+
+/*
+ * IOCTL code 'f' used by
+ * - file systems typically #0~0x1F
+ * - embedded terminal devices #128~
+ * - exts for debugging purpose #99
+ * number 100 and 101 is available now but has possible conflicts
+ *
+ * NOTE : This is available only If CONFIG_SDFAT_DVBG_IOCTL is enabled.
+ *
+ */
+#define SDFAT_IOC_GET_DEBUGFLAGS _IOR('f', 100, long)
+#define SDFAT_IOC_SET_DEBUGFLAGS _IOW('f', 101, long)
+
+#define SDFAT_DEBUGFLAGS_INVALID_UMOUNT 0x01
+#define SDFAT_DEBUGFLAGS_ERROR_RW 0x02
+
+/*----------------------------------------------------------------------*/
+/* On-Disk Type Definitions */
+/*----------------------------------------------------------------------*/
+
+/* FAT12/16 BIOS parameter block (64 bytes) */
+typedef struct {
+ __u8 jmp_boot[3];
+ __u8 oem_name[8];
+
+ __u8 sect_size[2]; /* unaligned */
+ __u8 sect_per_clus;
+ __le16 num_reserved; /* . */
+ __u8 num_fats;
+ __u8 num_root_entries[2]; /* unaligned */
+ __u8 num_sectors[2]; /* unaligned */
+ __u8 media_type;
+ __le16 num_fat_sectors;
+ __le16 sectors_in_track;
+ __le16 num_heads;
+ __le32 num_hid_sectors; /* . */
+ __le32 num_huge_sectors;
+
+ __u8 phy_drv_no;
+ __u8 state; /* used by WindowsNT for mount state */
+ __u8 ext_signature;
+ __u8 vol_serial[4];
+ __u8 vol_label[11];
+ __u8 vol_type[8];
+ __le16 dummy;
+} bpb16_t;
+
+/* FAT32 BIOS parameter block (64 bytes) */
+typedef struct {
+ __u8 jmp_boot[3];
+ __u8 oem_name[8];
+
+ __u8 sect_size[2]; /* unaligned */
+ __u8 sect_per_clus;
+ __le16 num_reserved;
+ __u8 num_fats;
+ __u8 num_root_entries[2]; /* unaligned */
+ __u8 num_sectors[2]; /* unaligned */
+ __u8 media_type;
+ __le16 num_fat_sectors; /* zero */
+ __le16 sectors_in_track;
+ __le16 num_heads;
+ __le32 num_hid_sectors; /* . */
+ __le32 num_huge_sectors;
+
+ __le32 num_fat32_sectors;
+ __le16 ext_flags;
+ __u8 fs_version[2];
+ __le32 root_cluster; /* . */
+ __le16 fsinfo_sector;
+ __le16 backup_sector;
+ __le16 reserved[6]; /* . */
+} bpb32_t;
+
+/* FAT32 EXTEND BIOS parameter block (32 bytes) */
+typedef struct {
+ __u8 phy_drv_no;
+ __u8 state; /* used by WindowsNT for mount state */
+ __u8 ext_signature;
+ __u8 vol_serial[4];
+ __u8 vol_label[11];
+ __u8 vol_type[8];
+ __le16 dummy[3];
+} bsx32_t;
+
+/* EXFAT BIOS parameter block (64 bytes) */
+typedef struct {
+ __u8 jmp_boot[3];
+ __u8 oem_name[8];
+ __u8 res_zero[53];
+} bpb64_t;
+
+/* EXFAT EXTEND BIOS parameter block (56 bytes) */
+typedef struct {
+ __le64 vol_offset;
+ __le64 vol_length;
+ __le32 fat_offset;
+ __le32 fat_length;
+ __le32 clu_offset;
+ __le32 clu_count;
+ __le32 root_cluster;
+ __le32 vol_serial;
+ __u8 fs_version[2];
+ __le16 vol_flags;
+ __u8 sect_size_bits;
+ __u8 sect_per_clus_bits;
+ __u8 num_fats;
+ __u8 phy_drv_no;
+ __u8 perc_in_use;
+ __u8 reserved2[7];
+} bsx64_t;
+
+/* FAT32 PBR (64 bytes) */
+typedef struct {
+ bpb16_t bpb;
+} pbr16_t;
+
+/* FAT32 PBR[BPB+BSX] (96 bytes) */
+typedef struct {
+ bpb32_t bpb;
+ bsx32_t bsx;
+} pbr32_t;
+
+/* EXFAT PBR[BPB+BSX] (120 bytes) */
+typedef struct {
+ bpb64_t bpb;
+ bsx64_t bsx;
+} pbr64_t;
+
+/* Common PBR[Partition Boot Record] (512 bytes) */
+typedef struct {
+ union {
+ __u8 raw[64];
+ bpb16_t f16;
+ bpb32_t f32;
+ bpb64_t f64;
+ } bpb;
+ union {
+ __u8 raw[56];
+ bsx32_t f32;
+ bsx64_t f64;
+ } bsx;
+ __u8 boot_code[390];
+ __le16 signature;
+} pbr_t;
+
+/* FAT32 filesystem information sector (512 bytes) */
+typedef struct {
+ __le32 signature1; // aligned
+ __u8 reserved1[480];
+ __le32 signature2; // aligned
+ __le32 free_cluster; // aligned
+ __le32 next_cluster; // aligned
+ __u8 reserved2[14];
+ __le16 signature3[2];
+} fat32_fsi_t;
+
+/* FAT directory entry (32 bytes) */
+typedef struct {
+ __u8 dummy[32];
+} DENTRY_T;
+
+typedef struct {
+ __u8 name[DOS_NAME_LENGTH]; /* 11 chars */
+ __u8 attr;
+ __u8 lcase;
+ __u8 create_time_ms;
+ __le16 create_time; // aligned
+ __le16 create_date; // aligned
+ __le16 access_date; // aligned
+ __le16 start_clu_hi; // aligned
+ __le16 modify_time; // aligned
+ __le16 modify_date; // aligned
+ __le16 start_clu_lo; // aligned
+ __le32 size; // aligned
+} DOS_DENTRY_T;
+
+/* FAT extended directory entry (32 bytes) */
+typedef struct {
+ __u8 order;
+ __u8 unicode_0_4[10];
+ __u8 attr;
+ __u8 sysid;
+ __u8 checksum;
+ __le16 unicode_5_10[6]; // aligned
+ __le16 start_clu; // aligned
+ __le16 unicode_11_12[2]; // aligned
+} EXT_DENTRY_T;
+
+/* EXFAT file directory entry (32 bytes) */
+typedef struct {
+ __u8 type;
+ __u8 num_ext;
+ __le16 checksum; // aligned
+ __le16 attr; // aligned
+ __le16 reserved1;
+ __le16 create_time; // aligned
+ __le16 create_date; // aligned
+ __le16 modify_time; // aligned
+ __le16 modify_date; // aligned
+ __le16 access_time; // aligned
+ __le16 access_date; // aligned
+ __u8 create_time_ms;
+ __u8 modify_time_ms;
+ __u8 create_tz;
+ __u8 modify_tz;
+ __u8 access_tz;
+ __u8 reserved2[7];
+} FILE_DENTRY_T;
+
+/* EXFAT stream extension directory entry (32 bytes) */
+typedef struct {
+ __u8 type;
+ __u8 flags;
+ __u8 reserved1;
+ __u8 name_len;
+ __le16 name_hash; // aligned
+ __le16 reserved2;
+ __le64 valid_size; // aligned
+ __le32 reserved3; // aligned
+ __le32 start_clu; // aligned
+ __le64 size; // aligned
+} STRM_DENTRY_T;
+
+/* EXFAT file name directory entry (32 bytes) */
+typedef struct {
+ __u8 type;
+ __u8 flags;
+ __le16 unicode_0_14[15]; // aligned
+} NAME_DENTRY_T;
+
+/* EXFAT allocation bitmap directory entry (32 bytes) */
+typedef struct {
+ __u8 type;
+ __u8 flags;
+ __u8 reserved[18];
+ __le32 start_clu; // aligned
+ __le64 size; // aligned
+} BMAP_DENTRY_T;
+
+/* EXFAT up-case table directory entry (32 bytes) */
+typedef struct {
+ __u8 type;
+ __u8 reserved1[3];
+ __le32 checksum; // aligned
+ __u8 reserved2[12];
+ __le32 start_clu; // aligned
+ __le64 size; // aligned
+} CASE_DENTRY_T;
+
+/* EXFAT volume label directory entry (32 bytes) */
+typedef struct {
+ __u8 type;
+ __u8 label_len;
+ __le16 unicode_0_10[11]; // aligned
+ __u8 reserved[8];
+} VOLM_DENTRY_T;
+
+#endif /* _SDFAT_FS_H */
diff --git a/fs/sdfat/statistics.c b/fs/sdfat/statistics.c
new file mode 100644
index 000000000000..099d9a358a76
--- /dev/null
+++ b/fs/sdfat/statistics.c
@@ -0,0 +1,281 @@
+#include "sdfat.h"
+
+#define SDFAT_VF_CLUS_MAX 7 /* 512 Byte ~ 32 KByte */
+#define SDFAT_EF_CLUS_MAX 17 /* 512 Byte ~ 32 MByte */
+
+enum {
+ SDFAT_MNT_FAT12,
+ SDFAT_MNT_FAT16,
+ SDFAT_MNT_FAT32,
+ SDFAT_MNT_EXFAT,
+ SDFAT_MNT_RO,
+ SDFAT_MNT_MAX
+};
+
+enum {
+ SDFAT_OP_EXFAT_MNT,
+ SDFAT_OP_MKDIR,
+ SDFAT_OP_CREATE,
+ SDFAT_OP_READ,
+ SDFAT_OP_WRITE,
+ SDFAT_OP_TRUNC,
+ SDFAT_OP_MAX
+};
+
+enum {
+ SDFAT_VOL_4G,
+ SDFAT_VOL_8G,
+ SDFAT_VOL_16G,
+ SDFAT_VOL_32G,
+ SDFAT_VOL_64G,
+ SDFAT_VOL_128G,
+ SDFAT_VOL_256G,
+ SDFAT_VOL_512G,
+ SDFAT_VOL_XTB,
+ SDFAT_VOL_MAX
+};
+
+static struct sdfat_statistics {
+ u32 clus_vfat[SDFAT_VF_CLUS_MAX];
+ u32 clus_exfat[SDFAT_EF_CLUS_MAX];
+ u32 mnt_cnt[SDFAT_MNT_MAX];
+ u32 nofat_op[SDFAT_OP_MAX];
+ u32 vol_size[SDFAT_VOL_MAX];
+} statistics;
+
+static struct kset *sdfat_statistics_kset;
+
+static ssize_t vfat_cl_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ return snprintf(buff, PAGE_SIZE, "\"VCL_512B_I\":\"%u\","
+ "\"VCL_1K_I\":\"%u\",\"VCL_2K_I\":\"%u\","
+ "\"VCL_4K_I\":\"%u\",\"VCL_8K_I\":\"%u\","
+ "\"VCL_16K_I\":\"%u\",\"VCL_32K_I\":\"%u\"\n",
+ statistics.clus_vfat[0], statistics.clus_vfat[1],
+ statistics.clus_vfat[2], statistics.clus_vfat[3],
+ statistics.clus_vfat[4], statistics.clus_vfat[5],
+ statistics.clus_vfat[6]);
+}
+
+static ssize_t exfat_cl_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ return snprintf(buff, PAGE_SIZE, "\"ECL_512B_I\":\"%u\","
+ "\"ECL_1K_I\":\"%u\",\"ECL_2K_I\":\"%u\","
+ "\"ECL_4K_I\":\"%u\",\"ECL_8K_I\":\"%u\","
+ "\"ECL_16K_I\":\"%u\",\"ECL_32K_I\":\"%u\","
+ "\"ECL_64K_I\":\"%u\",\"ECL_128K_I\":\"%u\","
+ "\"ECL_256K_I\":\"%u\",\"ECL_512K_I\":\"%u\","
+ "\"ECL_1M_I\":\"%u\",\"ECL_2M_I\":\"%u\","
+ "\"ECL_4M_I\":\"%u\",\"ECL_8M_I\":\"%u\","
+ "\"ECL_16M_I\":\"%u\",\"ECL_32M_I\":\"%u\"\n",
+ statistics.clus_exfat[0], statistics.clus_exfat[1],
+ statistics.clus_exfat[2], statistics.clus_exfat[3],
+ statistics.clus_exfat[4], statistics.clus_exfat[5],
+ statistics.clus_exfat[6], statistics.clus_exfat[7],
+ statistics.clus_exfat[8], statistics.clus_exfat[9],
+ statistics.clus_exfat[10], statistics.clus_exfat[11],
+ statistics.clus_exfat[12], statistics.clus_exfat[13],
+ statistics.clus_exfat[14], statistics.clus_exfat[15],
+ statistics.clus_exfat[16]);
+}
+
+static ssize_t mount_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ return snprintf(buff, PAGE_SIZE, "\"FAT12_MNT_I\":\"%u\","
+ "\"FAT16_MNT_I\":\"%u\",\"FAT32_MNT_I\":\"%u\","
+ "\"EXFAT_MNT_I\":\"%u\",\"RO_MNT_I\":\"%u\"\n",
+ statistics.mnt_cnt[SDFAT_MNT_FAT12],
+ statistics.mnt_cnt[SDFAT_MNT_FAT16],
+ statistics.mnt_cnt[SDFAT_MNT_FAT32],
+ statistics.mnt_cnt[SDFAT_MNT_EXFAT],
+ statistics.mnt_cnt[SDFAT_MNT_RO]);
+}
+
+static ssize_t nofat_op_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ return snprintf(buff, PAGE_SIZE, "\"NOFAT_MOUNT_I\":\"%u\","
+ "\"NOFAT_MKDIR_I\":\"%u\",\"NOFAT_CREATE_I\":\"%u\","
+ "\"NOFAT_READ_I\":\"%u\",\"NOFAT_WRITE_I\":\"%u\","
+ "\"NOFAT_TRUNC_I\":\"%u\"\n",
+ statistics.nofat_op[SDFAT_OP_EXFAT_MNT],
+ statistics.nofat_op[SDFAT_OP_MKDIR],
+ statistics.nofat_op[SDFAT_OP_CREATE],
+ statistics.nofat_op[SDFAT_OP_READ],
+ statistics.nofat_op[SDFAT_OP_WRITE],
+ statistics.nofat_op[SDFAT_OP_TRUNC]);
+}
+
+static ssize_t vol_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ return snprintf(buff, PAGE_SIZE, "\"VOL_4G_I\":\"%u\","
+ "\"VOL_8G_I\":\"%u\",\"VOL_16G_I\":\"%u\","
+ "\"VOL_32G_I\":\"%u\",\"VOL_64G_I\":\"%u\","
+ "\"VOL_128G_I\":\"%u\",\"VOL_256G_I\":\"%u\","
+ "\"VOL_512G_I\":\"%u\",\"VOL_XTB_I\":\"%u\"\n",
+ statistics.vol_size[SDFAT_VOL_4G],
+ statistics.vol_size[SDFAT_VOL_8G],
+ statistics.vol_size[SDFAT_VOL_16G],
+ statistics.vol_size[SDFAT_VOL_32G],
+ statistics.vol_size[SDFAT_VOL_64G],
+ statistics.vol_size[SDFAT_VOL_128G],
+ statistics.vol_size[SDFAT_VOL_256G],
+ statistics.vol_size[SDFAT_VOL_512G],
+ statistics.vol_size[SDFAT_VOL_XTB]);
+}
+
+static struct kobj_attribute vfat_cl_attr = __ATTR_RO(vfat_cl);
+static struct kobj_attribute exfat_cl_attr = __ATTR_RO(exfat_cl);
+static struct kobj_attribute mount_attr = __ATTR_RO(mount);
+static struct kobj_attribute nofat_op_attr = __ATTR_RO(nofat_op);
+static struct kobj_attribute vol_size_attr = __ATTR_RO(vol_size);
+
+static struct attribute *attributes_statistics[] = {
+ &vfat_cl_attr.attr,
+ &exfat_cl_attr.attr,
+ &mount_attr.attr,
+ &nofat_op_attr.attr,
+ &vol_size_attr.attr,
+ NULL,
+};
+
+static struct attribute_group attr_group_statistics = {
+ .attrs = attributes_statistics,
+};
+
+int sdfat_statistics_init(struct kset *sdfat_kset)
+{
+ int err;
+
+ sdfat_statistics_kset = kset_create_and_add("statistics", NULL, &sdfat_kset->kobj);
+ if (!sdfat_statistics_kset) {
+ pr_err("[SDFAT] failed to create sdfat statistics kobj\n");
+ return -ENOMEM;
+ }
+
+ err = sysfs_create_group(&sdfat_statistics_kset->kobj, &attr_group_statistics);
+ if (err) {
+ pr_err("[SDFAT] failed to create sdfat statistics attributes\n");
+ kset_unregister(sdfat_statistics_kset);
+ sdfat_statistics_kset = NULL;
+ return err;
+ }
+
+ return 0;
+}
+
+void sdfat_statistics_uninit(void)
+{
+ if (sdfat_statistics_kset) {
+ sysfs_remove_group(&sdfat_statistics_kset->kobj, &attr_group_statistics);
+ kset_unregister(sdfat_statistics_kset);
+ sdfat_statistics_kset = NULL;
+ }
+ memset(&statistics, 0, sizeof(struct sdfat_statistics));
+}
+
+void sdfat_statistics_set_mnt(FS_INFO_T *fsi)
+{
+ if (fsi->vol_type == EXFAT) {
+ statistics.mnt_cnt[SDFAT_MNT_EXFAT]++;
+ statistics.nofat_op[SDFAT_OP_EXFAT_MNT] = 1;
+ if (fsi->sect_per_clus_bits < SDFAT_EF_CLUS_MAX)
+ statistics.clus_exfat[fsi->sect_per_clus_bits]++;
+ else
+ statistics.clus_exfat[SDFAT_EF_CLUS_MAX - 1]++;
+ return;
+ }
+
+ if (fsi->vol_type == FAT32)
+ statistics.mnt_cnt[SDFAT_MNT_FAT32]++;
+ else if (fsi->vol_type == FAT16)
+ statistics.mnt_cnt[SDFAT_MNT_FAT16]++;
+ else if (fsi->vol_type == FAT12)
+ statistics.mnt_cnt[SDFAT_MNT_FAT12]++;
+
+ if (fsi->sect_per_clus_bits < SDFAT_VF_CLUS_MAX)
+ statistics.clus_vfat[fsi->sect_per_clus_bits]++;
+ else
+ statistics.clus_vfat[SDFAT_VF_CLUS_MAX - 1]++;
+}
+
+void sdfat_statistics_set_mnt_ro(void)
+{
+ statistics.mnt_cnt[SDFAT_MNT_RO]++;
+}
+
+void sdfat_statistics_set_mkdir(u8 flags)
+{
+ if (flags != 0x03)
+ return;
+ statistics.nofat_op[SDFAT_OP_MKDIR] = 1;
+}
+
+void sdfat_statistics_set_create(u8 flags)
+{
+ if (flags != 0x03)
+ return;
+ statistics.nofat_op[SDFAT_OP_CREATE] = 1;
+}
+
+/* flags : file or dir flgas, 0x03 means no fat-chain.
+ * clu_offset : file or dir logical cluster offset
+ * create : BMAP_ADD_CLUSTER or not
+ *
+ * File or dir have BMAP_ADD_CLUSTER is no fat-chain write
+ * when they have 0x03 flag and two or more clusters.
+ * And don`t have BMAP_ADD_CLUSTER is no fat-chain read
+ * when above same condition.
+ */
+void sdfat_statistics_set_rw(u8 flags, u32 clu_offset, s32 create)
+{
+ if ((flags == 0x03) && (clu_offset > 1)) {
+ if (create)
+ statistics.nofat_op[SDFAT_OP_WRITE] = 1;
+ else
+ statistics.nofat_op[SDFAT_OP_READ] = 1;
+ }
+}
+
+/* flags : file or dir flgas, 0x03 means no fat-chain.
+ * clu : cluster chain
+ *
+ * Set no fat-chain trunc when file or dir have 0x03 flag
+ * and two or more clusters.
+ */
+void sdfat_statistics_set_trunc(u8 flags, CHAIN_T *clu)
+{
+ if ((flags == 0x03) && (clu->size > 1))
+ statistics.nofat_op[SDFAT_OP_TRUNC] = 1;
+}
+
+void sdfat_statistics_set_vol_size(struct super_block *sb)
+{
+ u64 vol_size;
+ FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
+
+ vol_size = (u64)fsi->num_sectors << sb->s_blocksize_bits;
+
+ if (vol_size <= ((u64)1 << 32))
+ statistics.vol_size[SDFAT_VOL_4G]++;
+ else if (vol_size <= ((u64)1 << 33))
+ statistics.vol_size[SDFAT_VOL_8G]++;
+ else if (vol_size <= ((u64)1 << 34))
+ statistics.vol_size[SDFAT_VOL_16G]++;
+ else if (vol_size <= ((u64)1 << 35))
+ statistics.vol_size[SDFAT_VOL_32G]++;
+ else if (vol_size <= ((u64)1 << 36))
+ statistics.vol_size[SDFAT_VOL_64G]++;
+ else if (vol_size <= ((u64)1 << 37))
+ statistics.vol_size[SDFAT_VOL_128G]++;
+ else if (vol_size <= ((u64)1 << 38))
+ statistics.vol_size[SDFAT_VOL_256G]++;
+ else if (vol_size <= ((u64)1 << 39))
+ statistics.vol_size[SDFAT_VOL_512G]++;
+ else
+ statistics.vol_size[SDFAT_VOL_XTB]++;
+}
diff --git a/fs/sdfat/upcase.h b/fs/sdfat/upcase.h
new file mode 100644
index 000000000000..386772c57f8d
--- /dev/null
+++ b/fs/sdfat/upcase.h
@@ -0,0 +1,407 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _UPCASE_H
+#define _UPCASE_H
+
+/* Upcase tabel macro */
+#define SDFAT_NUM_UPCASE 2918
+#define HIGH_INDEX_BIT (8)
+#define HIGH_INDEX_MASK (0xFF00)
+#define LOW_INDEX_BIT (16-HIGH_INDEX_BIT)
+#define UTBL_ROW_COUNT (1<<LOW_INDEX_BIT)
+#define UTBL_COL_COUNT (1<<HIGH_INDEX_BIT)
+
+static inline u16 get_col_index(u16 i)
+{
+ return i >> LOW_INDEX_BIT;
+}
+static inline u16 get_row_index(u16 i)
+{
+ return i & ~HIGH_INDEX_MASK;
+}
+
+
+static const u8 uni_def_upcase[SDFAT_NUM_UPCASE<<1] = {
+ 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00,
+ 0x08, 0x00, 0x09, 0x00, 0x0A, 0x00, 0x0B, 0x00, 0x0C, 0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0F, 0x00,
+ 0x10, 0x00, 0x11, 0x00, 0x12, 0x00, 0x13, 0x00, 0x14, 0x00, 0x15, 0x00, 0x16, 0x00, 0x17, 0x00,
+ 0x18, 0x00, 0x19, 0x00, 0x1A, 0x00, 0x1B, 0x00, 0x1C, 0x00, 0x1D, 0x00, 0x1E, 0x00, 0x1F, 0x00,
+ 0x20, 0x00, 0x21, 0x00, 0x22, 0x00, 0x23, 0x00, 0x24, 0x00, 0x25, 0x00, 0x26, 0x00, 0x27, 0x00,
+ 0x28, 0x00, 0x29, 0x00, 0x2A, 0x00, 0x2B, 0x00, 0x2C, 0x00, 0x2D, 0x00, 0x2E, 0x00, 0x2F, 0x00,
+ 0x30, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x34, 0x00, 0x35, 0x00, 0x36, 0x00, 0x37, 0x00,
+ 0x38, 0x00, 0x39, 0x00, 0x3A, 0x00, 0x3B, 0x00, 0x3C, 0x00, 0x3D, 0x00, 0x3E, 0x00, 0x3F, 0x00,
+ 0x40, 0x00, 0x41, 0x00, 0x42, 0x00, 0x43, 0x00, 0x44, 0x00, 0x45, 0x00, 0x46, 0x00, 0x47, 0x00,
+ 0x48, 0x00, 0x49, 0x00, 0x4A, 0x00, 0x4B, 0x00, 0x4C, 0x00, 0x4D, 0x00, 0x4E, 0x00, 0x4F, 0x00,
+ 0x50, 0x00, 0x51, 0x00, 0x52, 0x00, 0x53, 0x00, 0x54, 0x00, 0x55, 0x00, 0x56, 0x00, 0x57, 0x00,
+ 0x58, 0x00, 0x59, 0x00, 0x5A, 0x00, 0x5B, 0x00, 0x5C, 0x00, 0x5D, 0x00, 0x5E, 0x00, 0x5F, 0x00,
+ 0x60, 0x00, 0x41, 0x00, 0x42, 0x00, 0x43, 0x00, 0x44, 0x00, 0x45, 0x00, 0x46, 0x00, 0x47, 0x00,
+ 0x48, 0x00, 0x49, 0x00, 0x4A, 0x00, 0x4B, 0x00, 0x4C, 0x00, 0x4D, 0x00, 0x4E, 0x00, 0x4F, 0x00,
+ 0x50, 0x00, 0x51, 0x00, 0x52, 0x00, 0x53, 0x00, 0x54, 0x00, 0x55, 0x00, 0x56, 0x00, 0x57, 0x00,
+ 0x58, 0x00, 0x59, 0x00, 0x5A, 0x00, 0x7B, 0x00, 0x7C, 0x00, 0x7D, 0x00, 0x7E, 0x00, 0x7F, 0x00,
+ 0x80, 0x00, 0x81, 0x00, 0x82, 0x00, 0x83, 0x00, 0x84, 0x00, 0x85, 0x00, 0x86, 0x00, 0x87, 0x00,
+ 0x88, 0x00, 0x89, 0x00, 0x8A, 0x00, 0x8B, 0x00, 0x8C, 0x00, 0x8D, 0x00, 0x8E, 0x00, 0x8F, 0x00,
+ 0x90, 0x00, 0x91, 0x00, 0x92, 0x00, 0x93, 0x00, 0x94, 0x00, 0x95, 0x00, 0x96, 0x00, 0x97, 0x00,
+ 0x98, 0x00, 0x99, 0x00, 0x9A, 0x00, 0x9B, 0x00, 0x9C, 0x00, 0x9D, 0x00, 0x9E, 0x00, 0x9F, 0x00,
+ 0xA0, 0x00, 0xA1, 0x00, 0xA2, 0x00, 0xA3, 0x00, 0xA4, 0x00, 0xA5, 0x00, 0xA6, 0x00, 0xA7, 0x00,
+ 0xA8, 0x00, 0xA9, 0x00, 0xAA, 0x00, 0xAB, 0x00, 0xAC, 0x00, 0xAD, 0x00, 0xAE, 0x00, 0xAF, 0x00,
+ 0xB0, 0x00, 0xB1, 0x00, 0xB2, 0x00, 0xB3, 0x00, 0xB4, 0x00, 0xB5, 0x00, 0xB6, 0x00, 0xB7, 0x00,
+ 0xB8, 0x00, 0xB9, 0x00, 0xBA, 0x00, 0xBB, 0x00, 0xBC, 0x00, 0xBD, 0x00, 0xBE, 0x00, 0xBF, 0x00,
+ 0xC0, 0x00, 0xC1, 0x00, 0xC2, 0x00, 0xC3, 0x00, 0xC4, 0x00, 0xC5, 0x00, 0xC6, 0x00, 0xC7, 0x00,
+ 0xC8, 0x00, 0xC9, 0x00, 0xCA, 0x00, 0xCB, 0x00, 0xCC, 0x00, 0xCD, 0x00, 0xCE, 0x00, 0xCF, 0x00,
+ 0xD0, 0x00, 0xD1, 0x00, 0xD2, 0x00, 0xD3, 0x00, 0xD4, 0x00, 0xD5, 0x00, 0xD6, 0x00, 0xD7, 0x00,
+ 0xD8, 0x00, 0xD9, 0x00, 0xDA, 0x00, 0xDB, 0x00, 0xDC, 0x00, 0xDD, 0x00, 0xDE, 0x00, 0xDF, 0x00,
+ 0xC0, 0x00, 0xC1, 0x00, 0xC2, 0x00, 0xC3, 0x00, 0xC4, 0x00, 0xC5, 0x00, 0xC6, 0x00, 0xC7, 0x00,
+ 0xC8, 0x00, 0xC9, 0x00, 0xCA, 0x00, 0xCB, 0x00, 0xCC, 0x00, 0xCD, 0x00, 0xCE, 0x00, 0xCF, 0x00,
+ 0xD0, 0x00, 0xD1, 0x00, 0xD2, 0x00, 0xD3, 0x00, 0xD4, 0x00, 0xD5, 0x00, 0xD6, 0x00, 0xF7, 0x00,
+ 0xD8, 0x00, 0xD9, 0x00, 0xDA, 0x00, 0xDB, 0x00, 0xDC, 0x00, 0xDD, 0x00, 0xDE, 0x00, 0x78, 0x01,
+ 0x00, 0x01, 0x00, 0x01, 0x02, 0x01, 0x02, 0x01, 0x04, 0x01, 0x04, 0x01, 0x06, 0x01, 0x06, 0x01,
+ 0x08, 0x01, 0x08, 0x01, 0x0A, 0x01, 0x0A, 0x01, 0x0C, 0x01, 0x0C, 0x01, 0x0E, 0x01, 0x0E, 0x01,
+ 0x10, 0x01, 0x10, 0x01, 0x12, 0x01, 0x12, 0x01, 0x14, 0x01, 0x14, 0x01, 0x16, 0x01, 0x16, 0x01,
+ 0x18, 0x01, 0x18, 0x01, 0x1A, 0x01, 0x1A, 0x01, 0x1C, 0x01, 0x1C, 0x01, 0x1E, 0x01, 0x1E, 0x01,
+ 0x20, 0x01, 0x20, 0x01, 0x22, 0x01, 0x22, 0x01, 0x24, 0x01, 0x24, 0x01, 0x26, 0x01, 0x26, 0x01,
+ 0x28, 0x01, 0x28, 0x01, 0x2A, 0x01, 0x2A, 0x01, 0x2C, 0x01, 0x2C, 0x01, 0x2E, 0x01, 0x2E, 0x01,
+ 0x30, 0x01, 0x31, 0x01, 0x32, 0x01, 0x32, 0x01, 0x34, 0x01, 0x34, 0x01, 0x36, 0x01, 0x36, 0x01,
+ 0x38, 0x01, 0x39, 0x01, 0x39, 0x01, 0x3B, 0x01, 0x3B, 0x01, 0x3D, 0x01, 0x3D, 0x01, 0x3F, 0x01,
+ 0x3F, 0x01, 0x41, 0x01, 0x41, 0x01, 0x43, 0x01, 0x43, 0x01, 0x45, 0x01, 0x45, 0x01, 0x47, 0x01,
+ 0x47, 0x01, 0x49, 0x01, 0x4A, 0x01, 0x4A, 0x01, 0x4C, 0x01, 0x4C, 0x01, 0x4E, 0x01, 0x4E, 0x01,
+ 0x50, 0x01, 0x50, 0x01, 0x52, 0x01, 0x52, 0x01, 0x54, 0x01, 0x54, 0x01, 0x56, 0x01, 0x56, 0x01,
+ 0x58, 0x01, 0x58, 0x01, 0x5A, 0x01, 0x5A, 0x01, 0x5C, 0x01, 0x5C, 0x01, 0x5E, 0x01, 0x5E, 0x01,
+ 0x60, 0x01, 0x60, 0x01, 0x62, 0x01, 0x62, 0x01, 0x64, 0x01, 0x64, 0x01, 0x66, 0x01, 0x66, 0x01,
+ 0x68, 0x01, 0x68, 0x01, 0x6A, 0x01, 0x6A, 0x01, 0x6C, 0x01, 0x6C, 0x01, 0x6E, 0x01, 0x6E, 0x01,
+ 0x70, 0x01, 0x70, 0x01, 0x72, 0x01, 0x72, 0x01, 0x74, 0x01, 0x74, 0x01, 0x76, 0x01, 0x76, 0x01,
+ 0x78, 0x01, 0x79, 0x01, 0x79, 0x01, 0x7B, 0x01, 0x7B, 0x01, 0x7D, 0x01, 0x7D, 0x01, 0x7F, 0x01,
+ 0x43, 0x02, 0x81, 0x01, 0x82, 0x01, 0x82, 0x01, 0x84, 0x01, 0x84, 0x01, 0x86, 0x01, 0x87, 0x01,
+ 0x87, 0x01, 0x89, 0x01, 0x8A, 0x01, 0x8B, 0x01, 0x8B, 0x01, 0x8D, 0x01, 0x8E, 0x01, 0x8F, 0x01,
+ 0x90, 0x01, 0x91, 0x01, 0x91, 0x01, 0x93, 0x01, 0x94, 0x01, 0xF6, 0x01, 0x96, 0x01, 0x97, 0x01,
+ 0x98, 0x01, 0x98, 0x01, 0x3D, 0x02, 0x9B, 0x01, 0x9C, 0x01, 0x9D, 0x01, 0x20, 0x02, 0x9F, 0x01,
+ 0xA0, 0x01, 0xA0, 0x01, 0xA2, 0x01, 0xA2, 0x01, 0xA4, 0x01, 0xA4, 0x01, 0xA6, 0x01, 0xA7, 0x01,
+ 0xA7, 0x01, 0xA9, 0x01, 0xAA, 0x01, 0xAB, 0x01, 0xAC, 0x01, 0xAC, 0x01, 0xAE, 0x01, 0xAF, 0x01,
+ 0xAF, 0x01, 0xB1, 0x01, 0xB2, 0x01, 0xB3, 0x01, 0xB3, 0x01, 0xB5, 0x01, 0xB5, 0x01, 0xB7, 0x01,
+ 0xB8, 0x01, 0xB8, 0x01, 0xBA, 0x01, 0xBB, 0x01, 0xBC, 0x01, 0xBC, 0x01, 0xBE, 0x01, 0xF7, 0x01,
+ 0xC0, 0x01, 0xC1, 0x01, 0xC2, 0x01, 0xC3, 0x01, 0xC4, 0x01, 0xC5, 0x01, 0xC4, 0x01, 0xC7, 0x01,
+ 0xC8, 0x01, 0xC7, 0x01, 0xCA, 0x01, 0xCB, 0x01, 0xCA, 0x01, 0xCD, 0x01, 0xCD, 0x01, 0xCF, 0x01,
+ 0xCF, 0x01, 0xD1, 0x01, 0xD1, 0x01, 0xD3, 0x01, 0xD3, 0x01, 0xD5, 0x01, 0xD5, 0x01, 0xD7, 0x01,
+ 0xD7, 0x01, 0xD9, 0x01, 0xD9, 0x01, 0xDB, 0x01, 0xDB, 0x01, 0x8E, 0x01, 0xDE, 0x01, 0xDE, 0x01,
+ 0xE0, 0x01, 0xE0, 0x01, 0xE2, 0x01, 0xE2, 0x01, 0xE4, 0x01, 0xE4, 0x01, 0xE6, 0x01, 0xE6, 0x01,
+ 0xE8, 0x01, 0xE8, 0x01, 0xEA, 0x01, 0xEA, 0x01, 0xEC, 0x01, 0xEC, 0x01, 0xEE, 0x01, 0xEE, 0x01,
+ 0xF0, 0x01, 0xF1, 0x01, 0xF2, 0x01, 0xF1, 0x01, 0xF4, 0x01, 0xF4, 0x01, 0xF6, 0x01, 0xF7, 0x01,
+ 0xF8, 0x01, 0xF8, 0x01, 0xFA, 0x01, 0xFA, 0x01, 0xFC, 0x01, 0xFC, 0x01, 0xFE, 0x01, 0xFE, 0x01,
+ 0x00, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x04, 0x02, 0x04, 0x02, 0x06, 0x02, 0x06, 0x02,
+ 0x08, 0x02, 0x08, 0x02, 0x0A, 0x02, 0x0A, 0x02, 0x0C, 0x02, 0x0C, 0x02, 0x0E, 0x02, 0x0E, 0x02,
+ 0x10, 0x02, 0x10, 0x02, 0x12, 0x02, 0x12, 0x02, 0x14, 0x02, 0x14, 0x02, 0x16, 0x02, 0x16, 0x02,
+ 0x18, 0x02, 0x18, 0x02, 0x1A, 0x02, 0x1A, 0x02, 0x1C, 0x02, 0x1C, 0x02, 0x1E, 0x02, 0x1E, 0x02,
+ 0x20, 0x02, 0x21, 0x02, 0x22, 0x02, 0x22, 0x02, 0x24, 0x02, 0x24, 0x02, 0x26, 0x02, 0x26, 0x02,
+ 0x28, 0x02, 0x28, 0x02, 0x2A, 0x02, 0x2A, 0x02, 0x2C, 0x02, 0x2C, 0x02, 0x2E, 0x02, 0x2E, 0x02,
+ 0x30, 0x02, 0x30, 0x02, 0x32, 0x02, 0x32, 0x02, 0x34, 0x02, 0x35, 0x02, 0x36, 0x02, 0x37, 0x02,
+ 0x38, 0x02, 0x39, 0x02, 0x65, 0x2C, 0x3B, 0x02, 0x3B, 0x02, 0x3D, 0x02, 0x66, 0x2C, 0x3F, 0x02,
+ 0x40, 0x02, 0x41, 0x02, 0x41, 0x02, 0x43, 0x02, 0x44, 0x02, 0x45, 0x02, 0x46, 0x02, 0x46, 0x02,
+ 0x48, 0x02, 0x48, 0x02, 0x4A, 0x02, 0x4A, 0x02, 0x4C, 0x02, 0x4C, 0x02, 0x4E, 0x02, 0x4E, 0x02,
+ 0x50, 0x02, 0x51, 0x02, 0x52, 0x02, 0x81, 0x01, 0x86, 0x01, 0x55, 0x02, 0x89, 0x01, 0x8A, 0x01,
+ 0x58, 0x02, 0x8F, 0x01, 0x5A, 0x02, 0x90, 0x01, 0x5C, 0x02, 0x5D, 0x02, 0x5E, 0x02, 0x5F, 0x02,
+ 0x93, 0x01, 0x61, 0x02, 0x62, 0x02, 0x94, 0x01, 0x64, 0x02, 0x65, 0x02, 0x66, 0x02, 0x67, 0x02,
+ 0x97, 0x01, 0x96, 0x01, 0x6A, 0x02, 0x62, 0x2C, 0x6C, 0x02, 0x6D, 0x02, 0x6E, 0x02, 0x9C, 0x01,
+ 0x70, 0x02, 0x71, 0x02, 0x9D, 0x01, 0x73, 0x02, 0x74, 0x02, 0x9F, 0x01, 0x76, 0x02, 0x77, 0x02,
+ 0x78, 0x02, 0x79, 0x02, 0x7A, 0x02, 0x7B, 0x02, 0x7C, 0x02, 0x64, 0x2C, 0x7E, 0x02, 0x7F, 0x02,
+ 0xA6, 0x01, 0x81, 0x02, 0x82, 0x02, 0xA9, 0x01, 0x84, 0x02, 0x85, 0x02, 0x86, 0x02, 0x87, 0x02,
+ 0xAE, 0x01, 0x44, 0x02, 0xB1, 0x01, 0xB2, 0x01, 0x45, 0x02, 0x8D, 0x02, 0x8E, 0x02, 0x8F, 0x02,
+ 0x90, 0x02, 0x91, 0x02, 0xB7, 0x01, 0x93, 0x02, 0x94, 0x02, 0x95, 0x02, 0x96, 0x02, 0x97, 0x02,
+ 0x98, 0x02, 0x99, 0x02, 0x9A, 0x02, 0x9B, 0x02, 0x9C, 0x02, 0x9D, 0x02, 0x9E, 0x02, 0x9F, 0x02,
+ 0xA0, 0x02, 0xA1, 0x02, 0xA2, 0x02, 0xA3, 0x02, 0xA4, 0x02, 0xA5, 0x02, 0xA6, 0x02, 0xA7, 0x02,
+ 0xA8, 0x02, 0xA9, 0x02, 0xAA, 0x02, 0xAB, 0x02, 0xAC, 0x02, 0xAD, 0x02, 0xAE, 0x02, 0xAF, 0x02,
+ 0xB0, 0x02, 0xB1, 0x02, 0xB2, 0x02, 0xB3, 0x02, 0xB4, 0x02, 0xB5, 0x02, 0xB6, 0x02, 0xB7, 0x02,
+ 0xB8, 0x02, 0xB9, 0x02, 0xBA, 0x02, 0xBB, 0x02, 0xBC, 0x02, 0xBD, 0x02, 0xBE, 0x02, 0xBF, 0x02,
+ 0xC0, 0x02, 0xC1, 0x02, 0xC2, 0x02, 0xC3, 0x02, 0xC4, 0x02, 0xC5, 0x02, 0xC6, 0x02, 0xC7, 0x02,
+ 0xC8, 0x02, 0xC9, 0x02, 0xCA, 0x02, 0xCB, 0x02, 0xCC, 0x02, 0xCD, 0x02, 0xCE, 0x02, 0xCF, 0x02,
+ 0xD0, 0x02, 0xD1, 0x02, 0xD2, 0x02, 0xD3, 0x02, 0xD4, 0x02, 0xD5, 0x02, 0xD6, 0x02, 0xD7, 0x02,
+ 0xD8, 0x02, 0xD9, 0x02, 0xDA, 0x02, 0xDB, 0x02, 0xDC, 0x02, 0xDD, 0x02, 0xDE, 0x02, 0xDF, 0x02,
+ 0xE0, 0x02, 0xE1, 0x02, 0xE2, 0x02, 0xE3, 0x02, 0xE4, 0x02, 0xE5, 0x02, 0xE6, 0x02, 0xE7, 0x02,
+ 0xE8, 0x02, 0xE9, 0x02, 0xEA, 0x02, 0xEB, 0x02, 0xEC, 0x02, 0xED, 0x02, 0xEE, 0x02, 0xEF, 0x02,
+ 0xF0, 0x02, 0xF1, 0x02, 0xF2, 0x02, 0xF3, 0x02, 0xF4, 0x02, 0xF5, 0x02, 0xF6, 0x02, 0xF7, 0x02,
+ 0xF8, 0x02, 0xF9, 0x02, 0xFA, 0x02, 0xFB, 0x02, 0xFC, 0x02, 0xFD, 0x02, 0xFE, 0x02, 0xFF, 0x02,
+ 0x00, 0x03, 0x01, 0x03, 0x02, 0x03, 0x03, 0x03, 0x04, 0x03, 0x05, 0x03, 0x06, 0x03, 0x07, 0x03,
+ 0x08, 0x03, 0x09, 0x03, 0x0A, 0x03, 0x0B, 0x03, 0x0C, 0x03, 0x0D, 0x03, 0x0E, 0x03, 0x0F, 0x03,
+ 0x10, 0x03, 0x11, 0x03, 0x12, 0x03, 0x13, 0x03, 0x14, 0x03, 0x15, 0x03, 0x16, 0x03, 0x17, 0x03,
+ 0x18, 0x03, 0x19, 0x03, 0x1A, 0x03, 0x1B, 0x03, 0x1C, 0x03, 0x1D, 0x03, 0x1E, 0x03, 0x1F, 0x03,
+ 0x20, 0x03, 0x21, 0x03, 0x22, 0x03, 0x23, 0x03, 0x24, 0x03, 0x25, 0x03, 0x26, 0x03, 0x27, 0x03,
+ 0x28, 0x03, 0x29, 0x03, 0x2A, 0x03, 0x2B, 0x03, 0x2C, 0x03, 0x2D, 0x03, 0x2E, 0x03, 0x2F, 0x03,
+ 0x30, 0x03, 0x31, 0x03, 0x32, 0x03, 0x33, 0x03, 0x34, 0x03, 0x35, 0x03, 0x36, 0x03, 0x37, 0x03,
+ 0x38, 0x03, 0x39, 0x03, 0x3A, 0x03, 0x3B, 0x03, 0x3C, 0x03, 0x3D, 0x03, 0x3E, 0x03, 0x3F, 0x03,
+ 0x40, 0x03, 0x41, 0x03, 0x42, 0x03, 0x43, 0x03, 0x44, 0x03, 0x45, 0x03, 0x46, 0x03, 0x47, 0x03,
+ 0x48, 0x03, 0x49, 0x03, 0x4A, 0x03, 0x4B, 0x03, 0x4C, 0x03, 0x4D, 0x03, 0x4E, 0x03, 0x4F, 0x03,
+ 0x50, 0x03, 0x51, 0x03, 0x52, 0x03, 0x53, 0x03, 0x54, 0x03, 0x55, 0x03, 0x56, 0x03, 0x57, 0x03,
+ 0x58, 0x03, 0x59, 0x03, 0x5A, 0x03, 0x5B, 0x03, 0x5C, 0x03, 0x5D, 0x03, 0x5E, 0x03, 0x5F, 0x03,
+ 0x60, 0x03, 0x61, 0x03, 0x62, 0x03, 0x63, 0x03, 0x64, 0x03, 0x65, 0x03, 0x66, 0x03, 0x67, 0x03,
+ 0x68, 0x03, 0x69, 0x03, 0x6A, 0x03, 0x6B, 0x03, 0x6C, 0x03, 0x6D, 0x03, 0x6E, 0x03, 0x6F, 0x03,
+ 0x70, 0x03, 0x71, 0x03, 0x72, 0x03, 0x73, 0x03, 0x74, 0x03, 0x75, 0x03, 0x76, 0x03, 0x77, 0x03,
+ 0x78, 0x03, 0x79, 0x03, 0x7A, 0x03, 0xFD, 0x03, 0xFE, 0x03, 0xFF, 0x03, 0x7E, 0x03, 0x7F, 0x03,
+ 0x80, 0x03, 0x81, 0x03, 0x82, 0x03, 0x83, 0x03, 0x84, 0x03, 0x85, 0x03, 0x86, 0x03, 0x87, 0x03,
+ 0x88, 0x03, 0x89, 0x03, 0x8A, 0x03, 0x8B, 0x03, 0x8C, 0x03, 0x8D, 0x03, 0x8E, 0x03, 0x8F, 0x03,
+ 0x90, 0x03, 0x91, 0x03, 0x92, 0x03, 0x93, 0x03, 0x94, 0x03, 0x95, 0x03, 0x96, 0x03, 0x97, 0x03,
+ 0x98, 0x03, 0x99, 0x03, 0x9A, 0x03, 0x9B, 0x03, 0x9C, 0x03, 0x9D, 0x03, 0x9E, 0x03, 0x9F, 0x03,
+ 0xA0, 0x03, 0xA1, 0x03, 0xA2, 0x03, 0xA3, 0x03, 0xA4, 0x03, 0xA5, 0x03, 0xA6, 0x03, 0xA7, 0x03,
+ 0xA8, 0x03, 0xA9, 0x03, 0xAA, 0x03, 0xAB, 0x03, 0x86, 0x03, 0x88, 0x03, 0x89, 0x03, 0x8A, 0x03,
+ 0xB0, 0x03, 0x91, 0x03, 0x92, 0x03, 0x93, 0x03, 0x94, 0x03, 0x95, 0x03, 0x96, 0x03, 0x97, 0x03,
+ 0x98, 0x03, 0x99, 0x03, 0x9A, 0x03, 0x9B, 0x03, 0x9C, 0x03, 0x9D, 0x03, 0x9E, 0x03, 0x9F, 0x03,
+ 0xA0, 0x03, 0xA1, 0x03, 0xA3, 0x03, 0xA3, 0x03, 0xA4, 0x03, 0xA5, 0x03, 0xA6, 0x03, 0xA7, 0x03,
+ 0xA8, 0x03, 0xA9, 0x03, 0xAA, 0x03, 0xAB, 0x03, 0x8C, 0x03, 0x8E, 0x03, 0x8F, 0x03, 0xCF, 0x03,
+ 0xD0, 0x03, 0xD1, 0x03, 0xD2, 0x03, 0xD3, 0x03, 0xD4, 0x03, 0xD5, 0x03, 0xD6, 0x03, 0xD7, 0x03,
+ 0xD8, 0x03, 0xD8, 0x03, 0xDA, 0x03, 0xDA, 0x03, 0xDC, 0x03, 0xDC, 0x03, 0xDE, 0x03, 0xDE, 0x03,
+ 0xE0, 0x03, 0xE0, 0x03, 0xE2, 0x03, 0xE2, 0x03, 0xE4, 0x03, 0xE4, 0x03, 0xE6, 0x03, 0xE6, 0x03,
+ 0xE8, 0x03, 0xE8, 0x03, 0xEA, 0x03, 0xEA, 0x03, 0xEC, 0x03, 0xEC, 0x03, 0xEE, 0x03, 0xEE, 0x03,
+ 0xF0, 0x03, 0xF1, 0x03, 0xF9, 0x03, 0xF3, 0x03, 0xF4, 0x03, 0xF5, 0x03, 0xF6, 0x03, 0xF7, 0x03,
+ 0xF7, 0x03, 0xF9, 0x03, 0xFA, 0x03, 0xFA, 0x03, 0xFC, 0x03, 0xFD, 0x03, 0xFE, 0x03, 0xFF, 0x03,
+ 0x00, 0x04, 0x01, 0x04, 0x02, 0x04, 0x03, 0x04, 0x04, 0x04, 0x05, 0x04, 0x06, 0x04, 0x07, 0x04,
+ 0x08, 0x04, 0x09, 0x04, 0x0A, 0x04, 0x0B, 0x04, 0x0C, 0x04, 0x0D, 0x04, 0x0E, 0x04, 0x0F, 0x04,
+ 0x10, 0x04, 0x11, 0x04, 0x12, 0x04, 0x13, 0x04, 0x14, 0x04, 0x15, 0x04, 0x16, 0x04, 0x17, 0x04,
+ 0x18, 0x04, 0x19, 0x04, 0x1A, 0x04, 0x1B, 0x04, 0x1C, 0x04, 0x1D, 0x04, 0x1E, 0x04, 0x1F, 0x04,
+ 0x20, 0x04, 0x21, 0x04, 0x22, 0x04, 0x23, 0x04, 0x24, 0x04, 0x25, 0x04, 0x26, 0x04, 0x27, 0x04,
+ 0x28, 0x04, 0x29, 0x04, 0x2A, 0x04, 0x2B, 0x04, 0x2C, 0x04, 0x2D, 0x04, 0x2E, 0x04, 0x2F, 0x04,
+ 0x10, 0x04, 0x11, 0x04, 0x12, 0x04, 0x13, 0x04, 0x14, 0x04, 0x15, 0x04, 0x16, 0x04, 0x17, 0x04,
+ 0x18, 0x04, 0x19, 0x04, 0x1A, 0x04, 0x1B, 0x04, 0x1C, 0x04, 0x1D, 0x04, 0x1E, 0x04, 0x1F, 0x04,
+ 0x20, 0x04, 0x21, 0x04, 0x22, 0x04, 0x23, 0x04, 0x24, 0x04, 0x25, 0x04, 0x26, 0x04, 0x27, 0x04,
+ 0x28, 0x04, 0x29, 0x04, 0x2A, 0x04, 0x2B, 0x04, 0x2C, 0x04, 0x2D, 0x04, 0x2E, 0x04, 0x2F, 0x04,
+ 0x00, 0x04, 0x01, 0x04, 0x02, 0x04, 0x03, 0x04, 0x04, 0x04, 0x05, 0x04, 0x06, 0x04, 0x07, 0x04,
+ 0x08, 0x04, 0x09, 0x04, 0x0A, 0x04, 0x0B, 0x04, 0x0C, 0x04, 0x0D, 0x04, 0x0E, 0x04, 0x0F, 0x04,
+ 0x60, 0x04, 0x60, 0x04, 0x62, 0x04, 0x62, 0x04, 0x64, 0x04, 0x64, 0x04, 0x66, 0x04, 0x66, 0x04,
+ 0x68, 0x04, 0x68, 0x04, 0x6A, 0x04, 0x6A, 0x04, 0x6C, 0x04, 0x6C, 0x04, 0x6E, 0x04, 0x6E, 0x04,
+ 0x70, 0x04, 0x70, 0x04, 0x72, 0x04, 0x72, 0x04, 0x74, 0x04, 0x74, 0x04, 0x76, 0x04, 0x76, 0x04,
+ 0x78, 0x04, 0x78, 0x04, 0x7A, 0x04, 0x7A, 0x04, 0x7C, 0x04, 0x7C, 0x04, 0x7E, 0x04, 0x7E, 0x04,
+ 0x80, 0x04, 0x80, 0x04, 0x82, 0x04, 0x83, 0x04, 0x84, 0x04, 0x85, 0x04, 0x86, 0x04, 0x87, 0x04,
+ 0x88, 0x04, 0x89, 0x04, 0x8A, 0x04, 0x8A, 0x04, 0x8C, 0x04, 0x8C, 0x04, 0x8E, 0x04, 0x8E, 0x04,
+ 0x90, 0x04, 0x90, 0x04, 0x92, 0x04, 0x92, 0x04, 0x94, 0x04, 0x94, 0x04, 0x96, 0x04, 0x96, 0x04,
+ 0x98, 0x04, 0x98, 0x04, 0x9A, 0x04, 0x9A, 0x04, 0x9C, 0x04, 0x9C, 0x04, 0x9E, 0x04, 0x9E, 0x04,
+ 0xA0, 0x04, 0xA0, 0x04, 0xA2, 0x04, 0xA2, 0x04, 0xA4, 0x04, 0xA4, 0x04, 0xA6, 0x04, 0xA6, 0x04,
+ 0xA8, 0x04, 0xA8, 0x04, 0xAA, 0x04, 0xAA, 0x04, 0xAC, 0x04, 0xAC, 0x04, 0xAE, 0x04, 0xAE, 0x04,
+ 0xB0, 0x04, 0xB0, 0x04, 0xB2, 0x04, 0xB2, 0x04, 0xB4, 0x04, 0xB4, 0x04, 0xB6, 0x04, 0xB6, 0x04,
+ 0xB8, 0x04, 0xB8, 0x04, 0xBA, 0x04, 0xBA, 0x04, 0xBC, 0x04, 0xBC, 0x04, 0xBE, 0x04, 0xBE, 0x04,
+ 0xC0, 0x04, 0xC1, 0x04, 0xC1, 0x04, 0xC3, 0x04, 0xC3, 0x04, 0xC5, 0x04, 0xC5, 0x04, 0xC7, 0x04,
+ 0xC7, 0x04, 0xC9, 0x04, 0xC9, 0x04, 0xCB, 0x04, 0xCB, 0x04, 0xCD, 0x04, 0xCD, 0x04, 0xC0, 0x04,
+ 0xD0, 0x04, 0xD0, 0x04, 0xD2, 0x04, 0xD2, 0x04, 0xD4, 0x04, 0xD4, 0x04, 0xD6, 0x04, 0xD6, 0x04,
+ 0xD8, 0x04, 0xD8, 0x04, 0xDA, 0x04, 0xDA, 0x04, 0xDC, 0x04, 0xDC, 0x04, 0xDE, 0x04, 0xDE, 0x04,
+ 0xE0, 0x04, 0xE0, 0x04, 0xE2, 0x04, 0xE2, 0x04, 0xE4, 0x04, 0xE4, 0x04, 0xE6, 0x04, 0xE6, 0x04,
+ 0xE8, 0x04, 0xE8, 0x04, 0xEA, 0x04, 0xEA, 0x04, 0xEC, 0x04, 0xEC, 0x04, 0xEE, 0x04, 0xEE, 0x04,
+ 0xF0, 0x04, 0xF0, 0x04, 0xF2, 0x04, 0xF2, 0x04, 0xF4, 0x04, 0xF4, 0x04, 0xF6, 0x04, 0xF6, 0x04,
+ 0xF8, 0x04, 0xF8, 0x04, 0xFA, 0x04, 0xFA, 0x04, 0xFC, 0x04, 0xFC, 0x04, 0xFE, 0x04, 0xFE, 0x04,
+ 0x00, 0x05, 0x00, 0x05, 0x02, 0x05, 0x02, 0x05, 0x04, 0x05, 0x04, 0x05, 0x06, 0x05, 0x06, 0x05,
+ 0x08, 0x05, 0x08, 0x05, 0x0A, 0x05, 0x0A, 0x05, 0x0C, 0x05, 0x0C, 0x05, 0x0E, 0x05, 0x0E, 0x05,
+ 0x10, 0x05, 0x10, 0x05, 0x12, 0x05, 0x12, 0x05, 0x14, 0x05, 0x15, 0x05, 0x16, 0x05, 0x17, 0x05,
+ 0x18, 0x05, 0x19, 0x05, 0x1A, 0x05, 0x1B, 0x05, 0x1C, 0x05, 0x1D, 0x05, 0x1E, 0x05, 0x1F, 0x05,
+ 0x20, 0x05, 0x21, 0x05, 0x22, 0x05, 0x23, 0x05, 0x24, 0x05, 0x25, 0x05, 0x26, 0x05, 0x27, 0x05,
+ 0x28, 0x05, 0x29, 0x05, 0x2A, 0x05, 0x2B, 0x05, 0x2C, 0x05, 0x2D, 0x05, 0x2E, 0x05, 0x2F, 0x05,
+ 0x30, 0x05, 0x31, 0x05, 0x32, 0x05, 0x33, 0x05, 0x34, 0x05, 0x35, 0x05, 0x36, 0x05, 0x37, 0x05,
+ 0x38, 0x05, 0x39, 0x05, 0x3A, 0x05, 0x3B, 0x05, 0x3C, 0x05, 0x3D, 0x05, 0x3E, 0x05, 0x3F, 0x05,
+ 0x40, 0x05, 0x41, 0x05, 0x42, 0x05, 0x43, 0x05, 0x44, 0x05, 0x45, 0x05, 0x46, 0x05, 0x47, 0x05,
+ 0x48, 0x05, 0x49, 0x05, 0x4A, 0x05, 0x4B, 0x05, 0x4C, 0x05, 0x4D, 0x05, 0x4E, 0x05, 0x4F, 0x05,
+ 0x50, 0x05, 0x51, 0x05, 0x52, 0x05, 0x53, 0x05, 0x54, 0x05, 0x55, 0x05, 0x56, 0x05, 0x57, 0x05,
+ 0x58, 0x05, 0x59, 0x05, 0x5A, 0x05, 0x5B, 0x05, 0x5C, 0x05, 0x5D, 0x05, 0x5E, 0x05, 0x5F, 0x05,
+ 0x60, 0x05, 0x31, 0x05, 0x32, 0x05, 0x33, 0x05, 0x34, 0x05, 0x35, 0x05, 0x36, 0x05, 0x37, 0x05,
+ 0x38, 0x05, 0x39, 0x05, 0x3A, 0x05, 0x3B, 0x05, 0x3C, 0x05, 0x3D, 0x05, 0x3E, 0x05, 0x3F, 0x05,
+ 0x40, 0x05, 0x41, 0x05, 0x42, 0x05, 0x43, 0x05, 0x44, 0x05, 0x45, 0x05, 0x46, 0x05, 0x47, 0x05,
+ 0x48, 0x05, 0x49, 0x05, 0x4A, 0x05, 0x4B, 0x05, 0x4C, 0x05, 0x4D, 0x05, 0x4E, 0x05, 0x4F, 0x05,
+ 0x50, 0x05, 0x51, 0x05, 0x52, 0x05, 0x53, 0x05, 0x54, 0x05, 0x55, 0x05, 0x56, 0x05, 0xFF, 0xFF,
+ 0xF6, 0x17, 0x63, 0x2C, 0x7E, 0x1D, 0x7F, 0x1D, 0x80, 0x1D, 0x81, 0x1D, 0x82, 0x1D, 0x83, 0x1D,
+ 0x84, 0x1D, 0x85, 0x1D, 0x86, 0x1D, 0x87, 0x1D, 0x88, 0x1D, 0x89, 0x1D, 0x8A, 0x1D, 0x8B, 0x1D,
+ 0x8C, 0x1D, 0x8D, 0x1D, 0x8E, 0x1D, 0x8F, 0x1D, 0x90, 0x1D, 0x91, 0x1D, 0x92, 0x1D, 0x93, 0x1D,
+ 0x94, 0x1D, 0x95, 0x1D, 0x96, 0x1D, 0x97, 0x1D, 0x98, 0x1D, 0x99, 0x1D, 0x9A, 0x1D, 0x9B, 0x1D,
+ 0x9C, 0x1D, 0x9D, 0x1D, 0x9E, 0x1D, 0x9F, 0x1D, 0xA0, 0x1D, 0xA1, 0x1D, 0xA2, 0x1D, 0xA3, 0x1D,
+ 0xA4, 0x1D, 0xA5, 0x1D, 0xA6, 0x1D, 0xA7, 0x1D, 0xA8, 0x1D, 0xA9, 0x1D, 0xAA, 0x1D, 0xAB, 0x1D,
+ 0xAC, 0x1D, 0xAD, 0x1D, 0xAE, 0x1D, 0xAF, 0x1D, 0xB0, 0x1D, 0xB1, 0x1D, 0xB2, 0x1D, 0xB3, 0x1D,
+ 0xB4, 0x1D, 0xB5, 0x1D, 0xB6, 0x1D, 0xB7, 0x1D, 0xB8, 0x1D, 0xB9, 0x1D, 0xBA, 0x1D, 0xBB, 0x1D,
+ 0xBC, 0x1D, 0xBD, 0x1D, 0xBE, 0x1D, 0xBF, 0x1D, 0xC0, 0x1D, 0xC1, 0x1D, 0xC2, 0x1D, 0xC3, 0x1D,
+ 0xC4, 0x1D, 0xC5, 0x1D, 0xC6, 0x1D, 0xC7, 0x1D, 0xC8, 0x1D, 0xC9, 0x1D, 0xCA, 0x1D, 0xCB, 0x1D,
+ 0xCC, 0x1D, 0xCD, 0x1D, 0xCE, 0x1D, 0xCF, 0x1D, 0xD0, 0x1D, 0xD1, 0x1D, 0xD2, 0x1D, 0xD3, 0x1D,
+ 0xD4, 0x1D, 0xD5, 0x1D, 0xD6, 0x1D, 0xD7, 0x1D, 0xD8, 0x1D, 0xD9, 0x1D, 0xDA, 0x1D, 0xDB, 0x1D,
+ 0xDC, 0x1D, 0xDD, 0x1D, 0xDE, 0x1D, 0xDF, 0x1D, 0xE0, 0x1D, 0xE1, 0x1D, 0xE2, 0x1D, 0xE3, 0x1D,
+ 0xE4, 0x1D, 0xE5, 0x1D, 0xE6, 0x1D, 0xE7, 0x1D, 0xE8, 0x1D, 0xE9, 0x1D, 0xEA, 0x1D, 0xEB, 0x1D,
+ 0xEC, 0x1D, 0xED, 0x1D, 0xEE, 0x1D, 0xEF, 0x1D, 0xF0, 0x1D, 0xF1, 0x1D, 0xF2, 0x1D, 0xF3, 0x1D,
+ 0xF4, 0x1D, 0xF5, 0x1D, 0xF6, 0x1D, 0xF7, 0x1D, 0xF8, 0x1D, 0xF9, 0x1D, 0xFA, 0x1D, 0xFB, 0x1D,
+ 0xFC, 0x1D, 0xFD, 0x1D, 0xFE, 0x1D, 0xFF, 0x1D, 0x00, 0x1E, 0x00, 0x1E, 0x02, 0x1E, 0x02, 0x1E,
+ 0x04, 0x1E, 0x04, 0x1E, 0x06, 0x1E, 0x06, 0x1E, 0x08, 0x1E, 0x08, 0x1E, 0x0A, 0x1E, 0x0A, 0x1E,
+ 0x0C, 0x1E, 0x0C, 0x1E, 0x0E, 0x1E, 0x0E, 0x1E, 0x10, 0x1E, 0x10, 0x1E, 0x12, 0x1E, 0x12, 0x1E,
+ 0x14, 0x1E, 0x14, 0x1E, 0x16, 0x1E, 0x16, 0x1E, 0x18, 0x1E, 0x18, 0x1E, 0x1A, 0x1E, 0x1A, 0x1E,
+ 0x1C, 0x1E, 0x1C, 0x1E, 0x1E, 0x1E, 0x1E, 0x1E, 0x20, 0x1E, 0x20, 0x1E, 0x22, 0x1E, 0x22, 0x1E,
+ 0x24, 0x1E, 0x24, 0x1E, 0x26, 0x1E, 0x26, 0x1E, 0x28, 0x1E, 0x28, 0x1E, 0x2A, 0x1E, 0x2A, 0x1E,
+ 0x2C, 0x1E, 0x2C, 0x1E, 0x2E, 0x1E, 0x2E, 0x1E, 0x30, 0x1E, 0x30, 0x1E, 0x32, 0x1E, 0x32, 0x1E,
+ 0x34, 0x1E, 0x34, 0x1E, 0x36, 0x1E, 0x36, 0x1E, 0x38, 0x1E, 0x38, 0x1E, 0x3A, 0x1E, 0x3A, 0x1E,
+ 0x3C, 0x1E, 0x3C, 0x1E, 0x3E, 0x1E, 0x3E, 0x1E, 0x40, 0x1E, 0x40, 0x1E, 0x42, 0x1E, 0x42, 0x1E,
+ 0x44, 0x1E, 0x44, 0x1E, 0x46, 0x1E, 0x46, 0x1E, 0x48, 0x1E, 0x48, 0x1E, 0x4A, 0x1E, 0x4A, 0x1E,
+ 0x4C, 0x1E, 0x4C, 0x1E, 0x4E, 0x1E, 0x4E, 0x1E, 0x50, 0x1E, 0x50, 0x1E, 0x52, 0x1E, 0x52, 0x1E,
+ 0x54, 0x1E, 0x54, 0x1E, 0x56, 0x1E, 0x56, 0x1E, 0x58, 0x1E, 0x58, 0x1E, 0x5A, 0x1E, 0x5A, 0x1E,
+ 0x5C, 0x1E, 0x5C, 0x1E, 0x5E, 0x1E, 0x5E, 0x1E, 0x60, 0x1E, 0x60, 0x1E, 0x62, 0x1E, 0x62, 0x1E,
+ 0x64, 0x1E, 0x64, 0x1E, 0x66, 0x1E, 0x66, 0x1E, 0x68, 0x1E, 0x68, 0x1E, 0x6A, 0x1E, 0x6A, 0x1E,
+ 0x6C, 0x1E, 0x6C, 0x1E, 0x6E, 0x1E, 0x6E, 0x1E, 0x70, 0x1E, 0x70, 0x1E, 0x72, 0x1E, 0x72, 0x1E,
+ 0x74, 0x1E, 0x74, 0x1E, 0x76, 0x1E, 0x76, 0x1E, 0x78, 0x1E, 0x78, 0x1E, 0x7A, 0x1E, 0x7A, 0x1E,
+ 0x7C, 0x1E, 0x7C, 0x1E, 0x7E, 0x1E, 0x7E, 0x1E, 0x80, 0x1E, 0x80, 0x1E, 0x82, 0x1E, 0x82, 0x1E,
+ 0x84, 0x1E, 0x84, 0x1E, 0x86, 0x1E, 0x86, 0x1E, 0x88, 0x1E, 0x88, 0x1E, 0x8A, 0x1E, 0x8A, 0x1E,
+ 0x8C, 0x1E, 0x8C, 0x1E, 0x8E, 0x1E, 0x8E, 0x1E, 0x90, 0x1E, 0x90, 0x1E, 0x92, 0x1E, 0x92, 0x1E,
+ 0x94, 0x1E, 0x94, 0x1E, 0x96, 0x1E, 0x97, 0x1E, 0x98, 0x1E, 0x99, 0x1E, 0x9A, 0x1E, 0x9B, 0x1E,
+ 0x9C, 0x1E, 0x9D, 0x1E, 0x9E, 0x1E, 0x9F, 0x1E, 0xA0, 0x1E, 0xA0, 0x1E, 0xA2, 0x1E, 0xA2, 0x1E,
+ 0xA4, 0x1E, 0xA4, 0x1E, 0xA6, 0x1E, 0xA6, 0x1E, 0xA8, 0x1E, 0xA8, 0x1E, 0xAA, 0x1E, 0xAA, 0x1E,
+ 0xAC, 0x1E, 0xAC, 0x1E, 0xAE, 0x1E, 0xAE, 0x1E, 0xB0, 0x1E, 0xB0, 0x1E, 0xB2, 0x1E, 0xB2, 0x1E,
+ 0xB4, 0x1E, 0xB4, 0x1E, 0xB6, 0x1E, 0xB6, 0x1E, 0xB8, 0x1E, 0xB8, 0x1E, 0xBA, 0x1E, 0xBA, 0x1E,
+ 0xBC, 0x1E, 0xBC, 0x1E, 0xBE, 0x1E, 0xBE, 0x1E, 0xC0, 0x1E, 0xC0, 0x1E, 0xC2, 0x1E, 0xC2, 0x1E,
+ 0xC4, 0x1E, 0xC4, 0x1E, 0xC6, 0x1E, 0xC6, 0x1E, 0xC8, 0x1E, 0xC8, 0x1E, 0xCA, 0x1E, 0xCA, 0x1E,
+ 0xCC, 0x1E, 0xCC, 0x1E, 0xCE, 0x1E, 0xCE, 0x1E, 0xD0, 0x1E, 0xD0, 0x1E, 0xD2, 0x1E, 0xD2, 0x1E,
+ 0xD4, 0x1E, 0xD4, 0x1E, 0xD6, 0x1E, 0xD6, 0x1E, 0xD8, 0x1E, 0xD8, 0x1E, 0xDA, 0x1E, 0xDA, 0x1E,
+ 0xDC, 0x1E, 0xDC, 0x1E, 0xDE, 0x1E, 0xDE, 0x1E, 0xE0, 0x1E, 0xE0, 0x1E, 0xE2, 0x1E, 0xE2, 0x1E,
+ 0xE4, 0x1E, 0xE4, 0x1E, 0xE6, 0x1E, 0xE6, 0x1E, 0xE8, 0x1E, 0xE8, 0x1E, 0xEA, 0x1E, 0xEA, 0x1E,
+ 0xEC, 0x1E, 0xEC, 0x1E, 0xEE, 0x1E, 0xEE, 0x1E, 0xF0, 0x1E, 0xF0, 0x1E, 0xF2, 0x1E, 0xF2, 0x1E,
+ 0xF4, 0x1E, 0xF4, 0x1E, 0xF6, 0x1E, 0xF6, 0x1E, 0xF8, 0x1E, 0xF8, 0x1E, 0xFA, 0x1E, 0xFB, 0x1E,
+ 0xFC, 0x1E, 0xFD, 0x1E, 0xFE, 0x1E, 0xFF, 0x1E, 0x08, 0x1F, 0x09, 0x1F, 0x0A, 0x1F, 0x0B, 0x1F,
+ 0x0C, 0x1F, 0x0D, 0x1F, 0x0E, 0x1F, 0x0F, 0x1F, 0x08, 0x1F, 0x09, 0x1F, 0x0A, 0x1F, 0x0B, 0x1F,
+ 0x0C, 0x1F, 0x0D, 0x1F, 0x0E, 0x1F, 0x0F, 0x1F, 0x18, 0x1F, 0x19, 0x1F, 0x1A, 0x1F, 0x1B, 0x1F,
+ 0x1C, 0x1F, 0x1D, 0x1F, 0x16, 0x1F, 0x17, 0x1F, 0x18, 0x1F, 0x19, 0x1F, 0x1A, 0x1F, 0x1B, 0x1F,
+ 0x1C, 0x1F, 0x1D, 0x1F, 0x1E, 0x1F, 0x1F, 0x1F, 0x28, 0x1F, 0x29, 0x1F, 0x2A, 0x1F, 0x2B, 0x1F,
+ 0x2C, 0x1F, 0x2D, 0x1F, 0x2E, 0x1F, 0x2F, 0x1F, 0x28, 0x1F, 0x29, 0x1F, 0x2A, 0x1F, 0x2B, 0x1F,
+ 0x2C, 0x1F, 0x2D, 0x1F, 0x2E, 0x1F, 0x2F, 0x1F, 0x38, 0x1F, 0x39, 0x1F, 0x3A, 0x1F, 0x3B, 0x1F,
+ 0x3C, 0x1F, 0x3D, 0x1F, 0x3E, 0x1F, 0x3F, 0x1F, 0x38, 0x1F, 0x39, 0x1F, 0x3A, 0x1F, 0x3B, 0x1F,
+ 0x3C, 0x1F, 0x3D, 0x1F, 0x3E, 0x1F, 0x3F, 0x1F, 0x48, 0x1F, 0x49, 0x1F, 0x4A, 0x1F, 0x4B, 0x1F,
+ 0x4C, 0x1F, 0x4D, 0x1F, 0x46, 0x1F, 0x47, 0x1F, 0x48, 0x1F, 0x49, 0x1F, 0x4A, 0x1F, 0x4B, 0x1F,
+ 0x4C, 0x1F, 0x4D, 0x1F, 0x4E, 0x1F, 0x4F, 0x1F, 0x50, 0x1F, 0x59, 0x1F, 0x52, 0x1F, 0x5B, 0x1F,
+ 0x54, 0x1F, 0x5D, 0x1F, 0x56, 0x1F, 0x5F, 0x1F, 0x58, 0x1F, 0x59, 0x1F, 0x5A, 0x1F, 0x5B, 0x1F,
+ 0x5C, 0x1F, 0x5D, 0x1F, 0x5E, 0x1F, 0x5F, 0x1F, 0x68, 0x1F, 0x69, 0x1F, 0x6A, 0x1F, 0x6B, 0x1F,
+ 0x6C, 0x1F, 0x6D, 0x1F, 0x6E, 0x1F, 0x6F, 0x1F, 0x68, 0x1F, 0x69, 0x1F, 0x6A, 0x1F, 0x6B, 0x1F,
+ 0x6C, 0x1F, 0x6D, 0x1F, 0x6E, 0x1F, 0x6F, 0x1F, 0xBA, 0x1F, 0xBB, 0x1F, 0xC8, 0x1F, 0xC9, 0x1F,
+ 0xCA, 0x1F, 0xCB, 0x1F, 0xDA, 0x1F, 0xDB, 0x1F, 0xF8, 0x1F, 0xF9, 0x1F, 0xEA, 0x1F, 0xEB, 0x1F,
+ 0xFA, 0x1F, 0xFB, 0x1F, 0x7E, 0x1F, 0x7F, 0x1F, 0x88, 0x1F, 0x89, 0x1F, 0x8A, 0x1F, 0x8B, 0x1F,
+ 0x8C, 0x1F, 0x8D, 0x1F, 0x8E, 0x1F, 0x8F, 0x1F, 0x88, 0x1F, 0x89, 0x1F, 0x8A, 0x1F, 0x8B, 0x1F,
+ 0x8C, 0x1F, 0x8D, 0x1F, 0x8E, 0x1F, 0x8F, 0x1F, 0x98, 0x1F, 0x99, 0x1F, 0x9A, 0x1F, 0x9B, 0x1F,
+ 0x9C, 0x1F, 0x9D, 0x1F, 0x9E, 0x1F, 0x9F, 0x1F, 0x98, 0x1F, 0x99, 0x1F, 0x9A, 0x1F, 0x9B, 0x1F,
+ 0x9C, 0x1F, 0x9D, 0x1F, 0x9E, 0x1F, 0x9F, 0x1F, 0xA8, 0x1F, 0xA9, 0x1F, 0xAA, 0x1F, 0xAB, 0x1F,
+ 0xAC, 0x1F, 0xAD, 0x1F, 0xAE, 0x1F, 0xAF, 0x1F, 0xA8, 0x1F, 0xA9, 0x1F, 0xAA, 0x1F, 0xAB, 0x1F,
+ 0xAC, 0x1F, 0xAD, 0x1F, 0xAE, 0x1F, 0xAF, 0x1F, 0xB8, 0x1F, 0xB9, 0x1F, 0xB2, 0x1F, 0xBC, 0x1F,
+ 0xB4, 0x1F, 0xB5, 0x1F, 0xB6, 0x1F, 0xB7, 0x1F, 0xB8, 0x1F, 0xB9, 0x1F, 0xBA, 0x1F, 0xBB, 0x1F,
+ 0xBC, 0x1F, 0xBD, 0x1F, 0xBE, 0x1F, 0xBF, 0x1F, 0xC0, 0x1F, 0xC1, 0x1F, 0xC2, 0x1F, 0xC3, 0x1F,
+ 0xC4, 0x1F, 0xC5, 0x1F, 0xC6, 0x1F, 0xC7, 0x1F, 0xC8, 0x1F, 0xC9, 0x1F, 0xCA, 0x1F, 0xCB, 0x1F,
+ 0xC3, 0x1F, 0xCD, 0x1F, 0xCE, 0x1F, 0xCF, 0x1F, 0xD8, 0x1F, 0xD9, 0x1F, 0xD2, 0x1F, 0xD3, 0x1F,
+ 0xD4, 0x1F, 0xD5, 0x1F, 0xD6, 0x1F, 0xD7, 0x1F, 0xD8, 0x1F, 0xD9, 0x1F, 0xDA, 0x1F, 0xDB, 0x1F,
+ 0xDC, 0x1F, 0xDD, 0x1F, 0xDE, 0x1F, 0xDF, 0x1F, 0xE8, 0x1F, 0xE9, 0x1F, 0xE2, 0x1F, 0xE3, 0x1F,
+ 0xE4, 0x1F, 0xEC, 0x1F, 0xE6, 0x1F, 0xE7, 0x1F, 0xE8, 0x1F, 0xE9, 0x1F, 0xEA, 0x1F, 0xEB, 0x1F,
+ 0xEC, 0x1F, 0xED, 0x1F, 0xEE, 0x1F, 0xEF, 0x1F, 0xF0, 0x1F, 0xF1, 0x1F, 0xF2, 0x1F, 0xF3, 0x1F,
+ 0xF4, 0x1F, 0xF5, 0x1F, 0xF6, 0x1F, 0xF7, 0x1F, 0xF8, 0x1F, 0xF9, 0x1F, 0xFA, 0x1F, 0xFB, 0x1F,
+ 0xF3, 0x1F, 0xFD, 0x1F, 0xFE, 0x1F, 0xFF, 0x1F, 0x00, 0x20, 0x01, 0x20, 0x02, 0x20, 0x03, 0x20,
+ 0x04, 0x20, 0x05, 0x20, 0x06, 0x20, 0x07, 0x20, 0x08, 0x20, 0x09, 0x20, 0x0A, 0x20, 0x0B, 0x20,
+ 0x0C, 0x20, 0x0D, 0x20, 0x0E, 0x20, 0x0F, 0x20, 0x10, 0x20, 0x11, 0x20, 0x12, 0x20, 0x13, 0x20,
+ 0x14, 0x20, 0x15, 0x20, 0x16, 0x20, 0x17, 0x20, 0x18, 0x20, 0x19, 0x20, 0x1A, 0x20, 0x1B, 0x20,
+ 0x1C, 0x20, 0x1D, 0x20, 0x1E, 0x20, 0x1F, 0x20, 0x20, 0x20, 0x21, 0x20, 0x22, 0x20, 0x23, 0x20,
+ 0x24, 0x20, 0x25, 0x20, 0x26, 0x20, 0x27, 0x20, 0x28, 0x20, 0x29, 0x20, 0x2A, 0x20, 0x2B, 0x20,
+ 0x2C, 0x20, 0x2D, 0x20, 0x2E, 0x20, 0x2F, 0x20, 0x30, 0x20, 0x31, 0x20, 0x32, 0x20, 0x33, 0x20,
+ 0x34, 0x20, 0x35, 0x20, 0x36, 0x20, 0x37, 0x20, 0x38, 0x20, 0x39, 0x20, 0x3A, 0x20, 0x3B, 0x20,
+ 0x3C, 0x20, 0x3D, 0x20, 0x3E, 0x20, 0x3F, 0x20, 0x40, 0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20,
+ 0x44, 0x20, 0x45, 0x20, 0x46, 0x20, 0x47, 0x20, 0x48, 0x20, 0x49, 0x20, 0x4A, 0x20, 0x4B, 0x20,
+ 0x4C, 0x20, 0x4D, 0x20, 0x4E, 0x20, 0x4F, 0x20, 0x50, 0x20, 0x51, 0x20, 0x52, 0x20, 0x53, 0x20,
+ 0x54, 0x20, 0x55, 0x20, 0x56, 0x20, 0x57, 0x20, 0x58, 0x20, 0x59, 0x20, 0x5A, 0x20, 0x5B, 0x20,
+ 0x5C, 0x20, 0x5D, 0x20, 0x5E, 0x20, 0x5F, 0x20, 0x60, 0x20, 0x61, 0x20, 0x62, 0x20, 0x63, 0x20,
+ 0x64, 0x20, 0x65, 0x20, 0x66, 0x20, 0x67, 0x20, 0x68, 0x20, 0x69, 0x20, 0x6A, 0x20, 0x6B, 0x20,
+ 0x6C, 0x20, 0x6D, 0x20, 0x6E, 0x20, 0x6F, 0x20, 0x70, 0x20, 0x71, 0x20, 0x72, 0x20, 0x73, 0x20,
+ 0x74, 0x20, 0x75, 0x20, 0x76, 0x20, 0x77, 0x20, 0x78, 0x20, 0x79, 0x20, 0x7A, 0x20, 0x7B, 0x20,
+ 0x7C, 0x20, 0x7D, 0x20, 0x7E, 0x20, 0x7F, 0x20, 0x80, 0x20, 0x81, 0x20, 0x82, 0x20, 0x83, 0x20,
+ 0x84, 0x20, 0x85, 0x20, 0x86, 0x20, 0x87, 0x20, 0x88, 0x20, 0x89, 0x20, 0x8A, 0x20, 0x8B, 0x20,
+ 0x8C, 0x20, 0x8D, 0x20, 0x8E, 0x20, 0x8F, 0x20, 0x90, 0x20, 0x91, 0x20, 0x92, 0x20, 0x93, 0x20,
+ 0x94, 0x20, 0x95, 0x20, 0x96, 0x20, 0x97, 0x20, 0x98, 0x20, 0x99, 0x20, 0x9A, 0x20, 0x9B, 0x20,
+ 0x9C, 0x20, 0x9D, 0x20, 0x9E, 0x20, 0x9F, 0x20, 0xA0, 0x20, 0xA1, 0x20, 0xA2, 0x20, 0xA3, 0x20,
+ 0xA4, 0x20, 0xA5, 0x20, 0xA6, 0x20, 0xA7, 0x20, 0xA8, 0x20, 0xA9, 0x20, 0xAA, 0x20, 0xAB, 0x20,
+ 0xAC, 0x20, 0xAD, 0x20, 0xAE, 0x20, 0xAF, 0x20, 0xB0, 0x20, 0xB1, 0x20, 0xB2, 0x20, 0xB3, 0x20,
+ 0xB4, 0x20, 0xB5, 0x20, 0xB6, 0x20, 0xB7, 0x20, 0xB8, 0x20, 0xB9, 0x20, 0xBA, 0x20, 0xBB, 0x20,
+ 0xBC, 0x20, 0xBD, 0x20, 0xBE, 0x20, 0xBF, 0x20, 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20,
+ 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20, 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20,
+ 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20, 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20,
+ 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20, 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20,
+ 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20, 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20,
+ 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20, 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20,
+ 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20, 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20,
+ 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20, 0xF8, 0x20, 0xF9, 0x20, 0xFA, 0x20, 0xFB, 0x20,
+ 0xFC, 0x20, 0xFD, 0x20, 0xFE, 0x20, 0xFF, 0x20, 0x00, 0x21, 0x01, 0x21, 0x02, 0x21, 0x03, 0x21,
+ 0x04, 0x21, 0x05, 0x21, 0x06, 0x21, 0x07, 0x21, 0x08, 0x21, 0x09, 0x21, 0x0A, 0x21, 0x0B, 0x21,
+ 0x0C, 0x21, 0x0D, 0x21, 0x0E, 0x21, 0x0F, 0x21, 0x10, 0x21, 0x11, 0x21, 0x12, 0x21, 0x13, 0x21,
+ 0x14, 0x21, 0x15, 0x21, 0x16, 0x21, 0x17, 0x21, 0x18, 0x21, 0x19, 0x21, 0x1A, 0x21, 0x1B, 0x21,
+ 0x1C, 0x21, 0x1D, 0x21, 0x1E, 0x21, 0x1F, 0x21, 0x20, 0x21, 0x21, 0x21, 0x22, 0x21, 0x23, 0x21,
+ 0x24, 0x21, 0x25, 0x21, 0x26, 0x21, 0x27, 0x21, 0x28, 0x21, 0x29, 0x21, 0x2A, 0x21, 0x2B, 0x21,
+ 0x2C, 0x21, 0x2D, 0x21, 0x2E, 0x21, 0x2F, 0x21, 0x30, 0x21, 0x31, 0x21, 0x32, 0x21, 0x33, 0x21,
+ 0x34, 0x21, 0x35, 0x21, 0x36, 0x21, 0x37, 0x21, 0x38, 0x21, 0x39, 0x21, 0x3A, 0x21, 0x3B, 0x21,
+ 0x3C, 0x21, 0x3D, 0x21, 0x3E, 0x21, 0x3F, 0x21, 0x40, 0x21, 0x41, 0x21, 0x42, 0x21, 0x43, 0x21,
+ 0x44, 0x21, 0x45, 0x21, 0x46, 0x21, 0x47, 0x21, 0x48, 0x21, 0x49, 0x21, 0x4A, 0x21, 0x4B, 0x21,
+ 0x4C, 0x21, 0x4D, 0x21, 0x32, 0x21, 0x4F, 0x21, 0x50, 0x21, 0x51, 0x21, 0x52, 0x21, 0x53, 0x21,
+ 0x54, 0x21, 0x55, 0x21, 0x56, 0x21, 0x57, 0x21, 0x58, 0x21, 0x59, 0x21, 0x5A, 0x21, 0x5B, 0x21,
+ 0x5C, 0x21, 0x5D, 0x21, 0x5E, 0x21, 0x5F, 0x21, 0x60, 0x21, 0x61, 0x21, 0x62, 0x21, 0x63, 0x21,
+ 0x64, 0x21, 0x65, 0x21, 0x66, 0x21, 0x67, 0x21, 0x68, 0x21, 0x69, 0x21, 0x6A, 0x21, 0x6B, 0x21,
+ 0x6C, 0x21, 0x6D, 0x21, 0x6E, 0x21, 0x6F, 0x21, 0x60, 0x21, 0x61, 0x21, 0x62, 0x21, 0x63, 0x21,
+ 0x64, 0x21, 0x65, 0x21, 0x66, 0x21, 0x67, 0x21, 0x68, 0x21, 0x69, 0x21, 0x6A, 0x21, 0x6B, 0x21,
+ 0x6C, 0x21, 0x6D, 0x21, 0x6E, 0x21, 0x6F, 0x21, 0x80, 0x21, 0x81, 0x21, 0x82, 0x21, 0x83, 0x21,
+ 0x83, 0x21, 0xFF, 0xFF, 0x4B, 0x03, 0xB6, 0x24, 0xB7, 0x24, 0xB8, 0x24, 0xB9, 0x24, 0xBA, 0x24,
+ 0xBB, 0x24, 0xBC, 0x24, 0xBD, 0x24, 0xBE, 0x24, 0xBF, 0x24, 0xC0, 0x24, 0xC1, 0x24, 0xC2, 0x24,
+ 0xC3, 0x24, 0xC4, 0x24, 0xC5, 0x24, 0xC6, 0x24, 0xC7, 0x24, 0xC8, 0x24, 0xC9, 0x24, 0xCA, 0x24,
+ 0xCB, 0x24, 0xCC, 0x24, 0xCD, 0x24, 0xCE, 0x24, 0xCF, 0x24, 0xFF, 0xFF, 0x46, 0x07, 0x00, 0x2C,
+ 0x01, 0x2C, 0x02, 0x2C, 0x03, 0x2C, 0x04, 0x2C, 0x05, 0x2C, 0x06, 0x2C, 0x07, 0x2C, 0x08, 0x2C,
+ 0x09, 0x2C, 0x0A, 0x2C, 0x0B, 0x2C, 0x0C, 0x2C, 0x0D, 0x2C, 0x0E, 0x2C, 0x0F, 0x2C, 0x10, 0x2C,
+ 0x11, 0x2C, 0x12, 0x2C, 0x13, 0x2C, 0x14, 0x2C, 0x15, 0x2C, 0x16, 0x2C, 0x17, 0x2C, 0x18, 0x2C,
+ 0x19, 0x2C, 0x1A, 0x2C, 0x1B, 0x2C, 0x1C, 0x2C, 0x1D, 0x2C, 0x1E, 0x2C, 0x1F, 0x2C, 0x20, 0x2C,
+ 0x21, 0x2C, 0x22, 0x2C, 0x23, 0x2C, 0x24, 0x2C, 0x25, 0x2C, 0x26, 0x2C, 0x27, 0x2C, 0x28, 0x2C,
+ 0x29, 0x2C, 0x2A, 0x2C, 0x2B, 0x2C, 0x2C, 0x2C, 0x2D, 0x2C, 0x2E, 0x2C, 0x5F, 0x2C, 0x60, 0x2C,
+ 0x60, 0x2C, 0x62, 0x2C, 0x63, 0x2C, 0x64, 0x2C, 0x65, 0x2C, 0x66, 0x2C, 0x67, 0x2C, 0x67, 0x2C,
+ 0x69, 0x2C, 0x69, 0x2C, 0x6B, 0x2C, 0x6B, 0x2C, 0x6D, 0x2C, 0x6E, 0x2C, 0x6F, 0x2C, 0x70, 0x2C,
+ 0x71, 0x2C, 0x72, 0x2C, 0x73, 0x2C, 0x74, 0x2C, 0x75, 0x2C, 0x75, 0x2C, 0x77, 0x2C, 0x78, 0x2C,
+ 0x79, 0x2C, 0x7A, 0x2C, 0x7B, 0x2C, 0x7C, 0x2C, 0x7D, 0x2C, 0x7E, 0x2C, 0x7F, 0x2C, 0x80, 0x2C,
+ 0x80, 0x2C, 0x82, 0x2C, 0x82, 0x2C, 0x84, 0x2C, 0x84, 0x2C, 0x86, 0x2C, 0x86, 0x2C, 0x88, 0x2C,
+ 0x88, 0x2C, 0x8A, 0x2C, 0x8A, 0x2C, 0x8C, 0x2C, 0x8C, 0x2C, 0x8E, 0x2C, 0x8E, 0x2C, 0x90, 0x2C,
+ 0x90, 0x2C, 0x92, 0x2C, 0x92, 0x2C, 0x94, 0x2C, 0x94, 0x2C, 0x96, 0x2C, 0x96, 0x2C, 0x98, 0x2C,
+ 0x98, 0x2C, 0x9A, 0x2C, 0x9A, 0x2C, 0x9C, 0x2C, 0x9C, 0x2C, 0x9E, 0x2C, 0x9E, 0x2C, 0xA0, 0x2C,
+ 0xA0, 0x2C, 0xA2, 0x2C, 0xA2, 0x2C, 0xA4, 0x2C, 0xA4, 0x2C, 0xA6, 0x2C, 0xA6, 0x2C, 0xA8, 0x2C,
+ 0xA8, 0x2C, 0xAA, 0x2C, 0xAA, 0x2C, 0xAC, 0x2C, 0xAC, 0x2C, 0xAE, 0x2C, 0xAE, 0x2C, 0xB0, 0x2C,
+ 0xB0, 0x2C, 0xB2, 0x2C, 0xB2, 0x2C, 0xB4, 0x2C, 0xB4, 0x2C, 0xB6, 0x2C, 0xB6, 0x2C, 0xB8, 0x2C,
+ 0xB8, 0x2C, 0xBA, 0x2C, 0xBA, 0x2C, 0xBC, 0x2C, 0xBC, 0x2C, 0xBE, 0x2C, 0xBE, 0x2C, 0xC0, 0x2C,
+ 0xC0, 0x2C, 0xC2, 0x2C, 0xC2, 0x2C, 0xC4, 0x2C, 0xC4, 0x2C, 0xC6, 0x2C, 0xC6, 0x2C, 0xC8, 0x2C,
+ 0xC8, 0x2C, 0xCA, 0x2C, 0xCA, 0x2C, 0xCC, 0x2C, 0xCC, 0x2C, 0xCE, 0x2C, 0xCE, 0x2C, 0xD0, 0x2C,
+ 0xD0, 0x2C, 0xD2, 0x2C, 0xD2, 0x2C, 0xD4, 0x2C, 0xD4, 0x2C, 0xD6, 0x2C, 0xD6, 0x2C, 0xD8, 0x2C,
+ 0xD8, 0x2C, 0xDA, 0x2C, 0xDA, 0x2C, 0xDC, 0x2C, 0xDC, 0x2C, 0xDE, 0x2C, 0xDE, 0x2C, 0xE0, 0x2C,
+ 0xE0, 0x2C, 0xE2, 0x2C, 0xE2, 0x2C, 0xE4, 0x2C, 0xE5, 0x2C, 0xE6, 0x2C, 0xE7, 0x2C, 0xE8, 0x2C,
+ 0xE9, 0x2C, 0xEA, 0x2C, 0xEB, 0x2C, 0xEC, 0x2C, 0xED, 0x2C, 0xEE, 0x2C, 0xEF, 0x2C, 0xF0, 0x2C,
+ 0xF1, 0x2C, 0xF2, 0x2C, 0xF3, 0x2C, 0xF4, 0x2C, 0xF5, 0x2C, 0xF6, 0x2C, 0xF7, 0x2C, 0xF8, 0x2C,
+ 0xF9, 0x2C, 0xFA, 0x2C, 0xFB, 0x2C, 0xFC, 0x2C, 0xFD, 0x2C, 0xFE, 0x2C, 0xFF, 0x2C, 0xA0, 0x10,
+ 0xA1, 0x10, 0xA2, 0x10, 0xA3, 0x10, 0xA4, 0x10, 0xA5, 0x10, 0xA6, 0x10, 0xA7, 0x10, 0xA8, 0x10,
+ 0xA9, 0x10, 0xAA, 0x10, 0xAB, 0x10, 0xAC, 0x10, 0xAD, 0x10, 0xAE, 0x10, 0xAF, 0x10, 0xB0, 0x10,
+ 0xB1, 0x10, 0xB2, 0x10, 0xB3, 0x10, 0xB4, 0x10, 0xB5, 0x10, 0xB6, 0x10, 0xB7, 0x10, 0xB8, 0x10,
+ 0xB9, 0x10, 0xBA, 0x10, 0xBB, 0x10, 0xBC, 0x10, 0xBD, 0x10, 0xBE, 0x10, 0xBF, 0x10, 0xC0, 0x10,
+ 0xC1, 0x10, 0xC2, 0x10, 0xC3, 0x10, 0xC4, 0x10, 0xC5, 0x10, 0xFF, 0xFF, 0x1B, 0xD2, 0x21, 0xFF,
+ 0x22, 0xFF, 0x23, 0xFF, 0x24, 0xFF, 0x25, 0xFF, 0x26, 0xFF, 0x27, 0xFF, 0x28, 0xFF, 0x29, 0xFF,
+ 0x2A, 0xFF, 0x2B, 0xFF, 0x2C, 0xFF, 0x2D, 0xFF, 0x2E, 0xFF, 0x2F, 0xFF, 0x30, 0xFF, 0x31, 0xFF,
+ 0x32, 0xFF, 0x33, 0xFF, 0x34, 0xFF, 0x35, 0xFF, 0x36, 0xFF, 0x37, 0xFF, 0x38, 0xFF, 0x39, 0xFF,
+ 0x3A, 0xFF, 0x5B, 0xFF, 0x5C, 0xFF, 0x5D, 0xFF, 0x5E, 0xFF, 0x5F, 0xFF, 0x60, 0xFF, 0x61, 0xFF,
+ 0x62, 0xFF, 0x63, 0xFF, 0x64, 0xFF, 0x65, 0xFF, 0x66, 0xFF, 0x67, 0xFF, 0x68, 0xFF, 0x69, 0xFF,
+ 0x6A, 0xFF, 0x6B, 0xFF, 0x6C, 0xFF, 0x6D, 0xFF, 0x6E, 0xFF, 0x6F, 0xFF, 0x70, 0xFF, 0x71, 0xFF,
+ 0x72, 0xFF, 0x73, 0xFF, 0x74, 0xFF, 0x75, 0xFF, 0x76, 0xFF, 0x77, 0xFF, 0x78, 0xFF, 0x79, 0xFF,
+ 0x7A, 0xFF, 0x7B, 0xFF, 0x7C, 0xFF, 0x7D, 0xFF, 0x7E, 0xFF, 0x7F, 0xFF, 0x80, 0xFF, 0x81, 0xFF,
+ 0x82, 0xFF, 0x83, 0xFF, 0x84, 0xFF, 0x85, 0xFF, 0x86, 0xFF, 0x87, 0xFF, 0x88, 0xFF, 0x89, 0xFF,
+ 0x8A, 0xFF, 0x8B, 0xFF, 0x8C, 0xFF, 0x8D, 0xFF, 0x8E, 0xFF, 0x8F, 0xFF, 0x90, 0xFF, 0x91, 0xFF,
+ 0x92, 0xFF, 0x93, 0xFF, 0x94, 0xFF, 0x95, 0xFF, 0x96, 0xFF, 0x97, 0xFF, 0x98, 0xFF, 0x99, 0xFF,
+ 0x9A, 0xFF, 0x9B, 0xFF, 0x9C, 0xFF, 0x9D, 0xFF, 0x9E, 0xFF, 0x9F, 0xFF, 0xA0, 0xFF, 0xA1, 0xFF,
+ 0xA2, 0xFF, 0xA3, 0xFF, 0xA4, 0xFF, 0xA5, 0xFF, 0xA6, 0xFF, 0xA7, 0xFF, 0xA8, 0xFF, 0xA9, 0xFF,
+ 0xAA, 0xFF, 0xAB, 0xFF, 0xAC, 0xFF, 0xAD, 0xFF, 0xAE, 0xFF, 0xAF, 0xFF, 0xB0, 0xFF, 0xB1, 0xFF,
+ 0xB2, 0xFF, 0xB3, 0xFF, 0xB4, 0xFF, 0xB5, 0xFF, 0xB6, 0xFF, 0xB7, 0xFF, 0xB8, 0xFF, 0xB9, 0xFF,
+ 0xBA, 0xFF, 0xBB, 0xFF, 0xBC, 0xFF, 0xBD, 0xFF, 0xBE, 0xFF, 0xBF, 0xFF, 0xC0, 0xFF, 0xC1, 0xFF,
+ 0xC2, 0xFF, 0xC3, 0xFF, 0xC4, 0xFF, 0xC5, 0xFF, 0xC6, 0xFF, 0xC7, 0xFF, 0xC8, 0xFF, 0xC9, 0xFF,
+ 0xCA, 0xFF, 0xCB, 0xFF, 0xCC, 0xFF, 0xCD, 0xFF, 0xCE, 0xFF, 0xCF, 0xFF, 0xD0, 0xFF, 0xD1, 0xFF,
+ 0xD2, 0xFF, 0xD3, 0xFF, 0xD4, 0xFF, 0xD5, 0xFF, 0xD6, 0xFF, 0xD7, 0xFF, 0xD8, 0xFF, 0xD9, 0xFF,
+ 0xDA, 0xFF, 0xDB, 0xFF, 0xDC, 0xFF, 0xDD, 0xFF, 0xDE, 0xFF, 0xDF, 0xFF, 0xE0, 0xFF, 0xE1, 0xFF,
+ 0xE2, 0xFF, 0xE3, 0xFF, 0xE4, 0xFF, 0xE5, 0xFF, 0xE6, 0xFF, 0xE7, 0xFF, 0xE8, 0xFF, 0xE9, 0xFF,
+ 0xEA, 0xFF, 0xEB, 0xFF, 0xEC, 0xFF, 0xED, 0xFF, 0xEE, 0xFF, 0xEF, 0xFF, 0xF0, 0xFF, 0xF1, 0xFF,
+ 0xF2, 0xFF, 0xF3, 0xFF, 0xF4, 0xFF, 0xF5, 0xFF, 0xF6, 0xFF, 0xF7, 0xFF, 0xF8, 0xFF, 0xF9, 0xFF,
+ 0xFA, 0xFF, 0xFB, 0xFF, 0xFC, 0xFF, 0xFD, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF
+};
+
+#endif /* _UPCASE_H */
diff --git a/fs/sdfat/version.h b/fs/sdfat/version.h
new file mode 100644
index 000000000000..44e44e03d847
--- /dev/null
+++ b/fs/sdfat/version.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : version.h */
+/* PURPOSE : sdFAT File Manager */
+/* */
+/************************************************************************/
+#define SDFAT_VERSION "2.3.0-lineage"
diff --git a/fs/sdfat/xattr.c b/fs/sdfat/xattr.c
new file mode 100644
index 000000000000..40bb850711be
--- /dev/null
+++ b/fs/sdfat/xattr.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/************************************************************************/
+/* */
+/* PROJECT : exFAT & FAT12/16/32 File System */
+/* FILE : xattr.c */
+/* PURPOSE : sdFAT code for supporting xattr(Extended File Attributes) */
+/* */
+/*----------------------------------------------------------------------*/
+/* NOTES */
+/* */
+/* */
+/************************************************************************/
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/xattr.h>
+#include <linux/dcache.h>
+#include "sdfat.h"
+
+#ifndef CONFIG_SDFAT_VIRTUAL_XATTR_SELINUX_LABEL
+#define CONFIG_SDFAT_VIRTUAL_XATTR_SELINUX_LABEL ("undefined")
+#endif
+
+static const char default_xattr[] = CONFIG_SDFAT_VIRTUAL_XATTR_SELINUX_LABEL;
+
+static int can_support(const char *name)
+{
+ if (!name || strcmp(name, "security.selinux"))
+ return -1;
+ return 0;
+}
+
+ssize_t sdfat_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+ return 0;
+}
+
+
+/*************************************************************************
+ * INNER FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+static int __sdfat_xattr_check_support(const char *name)
+{
+ if (can_support(name))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+ssize_t __sdfat_getxattr(const char *name, void *value, size_t size)
+{
+ if (can_support(name))
+ return -EOPNOTSUPP;
+
+ if ((size > strlen(default_xattr)+1) && value)
+ strcpy(value, default_xattr);
+
+ return strlen(default_xattr);
+}
+
+
+/*************************************************************************
+ * FUNCTIONS WHICH HAS KERNEL VERSION DEPENDENCY
+ *************************************************************************/
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+static int sdfat_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size)
+{
+ return __sdfat_getxattr(name, buffer, size);
+}
+
+static int sdfat_xattr_set(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value, size_t size,
+ int flags)
+{
+ return __sdfat_xattr_check_support(name);
+}
+
+const struct xattr_handler sdfat_xattr_handler = {
+ .prefix = "", /* match anything */
+ .get = sdfat_xattr_get,
+ .set = sdfat_xattr_set,
+};
+
+const struct xattr_handler *sdfat_xattr_handlers[] = {
+ &sdfat_xattr_handler,
+ NULL
+};
+
+void setup_sdfat_xattr_handler(struct super_block *sb)
+{
+ sb->s_xattr = sdfat_xattr_handlers;
+}
+#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) */
+int sdfat_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)
+{
+ return __sdfat_xattr_check_support(name);
+}
+
+ssize_t sdfat_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
+{
+ return __sdfat_getxattr(name, value, size);
+}
+
+int sdfat_removexattr(struct dentry *dentry, const char *name)
+{
+ return __sdfat_xattr_check_support(name);
+}
+
+void setup_sdfat_xattr_handler(struct super_block *sb)
+{
+ /* DO NOTHING */
+}
+#endif
diff --git a/fs/select.c b/fs/select.c
index 3d38808dbcb6..f7667f2c6d0d 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -239,7 +239,8 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
set_current_state(state);
if (!pwq->triggered)
- rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
+ rc = freezable_schedule_hrtimeout_range(expires, slack,
+ HRTIMER_MODE_ABS);
__set_current_state(TASK_RUNNING);
/*
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 6dc4296eed62..0004df800c88 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -14,6 +14,7 @@
#include <linux/mm.h>
#include <linux/printk.h>
#include <linux/string_helpers.h>
+#include <linux/pagemap.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -28,6 +29,9 @@ static void *seq_buf_alloc(unsigned long size)
void *buf;
gfp_t gfp = GFP_KERNEL;
+ if (unlikely(size > MAX_RW_COUNT))
+ return NULL;
+
/*
* For high order allocations, use __GFP_NORETRY to avoid oom-killing -
* it's better to fall back to vmalloc() than to kill things. For small
@@ -679,11 +683,11 @@ EXPORT_SYMBOL(seq_puts);
/*
* A helper routine for putting decimal numbers without rich format of printf().
* only 'unsigned long long' is supported.
- * This routine will put one byte delimiter + number into seq_file.
+ * This routine will put strlen(delimiter) + number into seq_file.
* This routine is very quick when you show lots of numbers.
* In usual cases, it will be better to use seq_printf(). It's easier to read.
*/
-void seq_put_decimal_ull(struct seq_file *m, char delimiter,
+void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
unsigned long long num)
{
int len;
@@ -691,8 +695,15 @@ void seq_put_decimal_ull(struct seq_file *m, char delimiter,
if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */
goto overflow;
- if (delimiter)
- m->buf[m->count++] = delimiter;
+ len = strlen(delimiter);
+ if (m->count + len >= m->size)
+ goto overflow;
+
+ memcpy(m->buf + m->count, delimiter, len);
+ m->count += len;
+
+ if (m->count + 1 >= m->size)
+ goto overflow;
if (num < 10) {
m->buf[m->count++] = num + '0';
@@ -702,6 +713,7 @@ void seq_put_decimal_ull(struct seq_file *m, char delimiter,
len = num_to_str(m->buf + m->count, m->size - m->count, num);
if (!len)
goto overflow;
+
m->count += len;
return;
@@ -710,19 +722,42 @@ overflow:
}
EXPORT_SYMBOL(seq_put_decimal_ull);
-void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num)
+void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num)
{
+ int len;
+
+ if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */
+ goto overflow;
+
+ len = strlen(delimiter);
+ if (m->count + len >= m->size)
+ goto overflow;
+
+ memcpy(m->buf + m->count, delimiter, len);
+ m->count += len;
+
+ if (m->count + 2 >= m->size)
+ goto overflow;
+
if (num < 0) {
- if (m->count + 3 >= m->size) {
- seq_set_overflow(m);
- return;
- }
- if (delimiter)
- m->buf[m->count++] = delimiter;
+ m->buf[m->count++] = '-';
num = -num;
- delimiter = '-';
}
- seq_put_decimal_ull(m, delimiter, num);
+
+ if (num < 10) {
+ m->buf[m->count++] = num + '0';
+ return;
+ }
+
+ len = num_to_str(m->buf + m->count, m->size - m->count, num);
+ if (!len)
+ goto overflow;
+
+ m->count += len;
+ return;
+
+overflow:
+ seq_set_overflow(m);
}
EXPORT_SYMBOL(seq_put_decimal_ll);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 270221fcef42..9c5fa0ab5e0f 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -34,17 +34,7 @@
void signalfd_cleanup(struct sighand_struct *sighand)
{
- wait_queue_head_t *wqh = &sighand->signalfd_wqh;
- /*
- * The lockless check can race with remove_wait_queue() in progress,
- * but in this case its caller should run under rcu_read_lock() and
- * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
- */
- if (likely(!waitqueue_active(wqh)))
- return;
-
- /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
- wake_up_poll(wqh, POLLHUP | POLLFREE);
+ wake_up_pollfree(&sighand->signalfd_wqh);
}
struct signalfd_ctx {
diff --git a/fs/splice.c b/fs/splice.c
index 8398974e1538..0562b990d64e 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -183,83 +183,42 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
struct splice_pipe_desc *spd)
{
unsigned int spd_pages = spd->nr_pages;
- int ret, do_wakeup, page_nr;
+ int ret = 0, page_nr = 0;
if (!spd_pages)
return 0;
- ret = 0;
- do_wakeup = 0;
- page_nr = 0;
-
- pipe_lock(pipe);
-
- for (;;) {
- if (!pipe->readers) {
- send_sig(SIGPIPE, current, 0);
- if (!ret)
- ret = -EPIPE;
- break;
- }
-
- if (pipe->nrbufs < pipe->buffers) {
- int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
- struct pipe_buffer *buf = pipe->bufs + newbuf;
-
- buf->page = spd->pages[page_nr];
- buf->offset = spd->partial[page_nr].offset;
- buf->len = spd->partial[page_nr].len;
- buf->private = spd->partial[page_nr].private;
- buf->ops = spd->ops;
- buf->flags = 0;
- if (spd->flags & SPLICE_F_GIFT)
- buf->flags |= PIPE_BUF_FLAG_GIFT;
-
- pipe->nrbufs++;
- page_nr++;
- ret += buf->len;
+ if (unlikely(!pipe->readers)) {
+ send_sig(SIGPIPE, current, 0);
+ ret = -EPIPE;
+ goto out;
+ }
- if (pipe->files)
- do_wakeup = 1;
+ while (pipe->nrbufs < pipe->buffers) {
+ int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+ struct pipe_buffer *buf = pipe->bufs + newbuf;
- if (!--spd->nr_pages)
- break;
- if (pipe->nrbufs < pipe->buffers)
- continue;
+ buf->page = spd->pages[page_nr];
+ buf->offset = spd->partial[page_nr].offset;
+ buf->len = spd->partial[page_nr].len;
+ buf->private = spd->partial[page_nr].private;
+ buf->ops = spd->ops;
+ buf->flags = 0;
+ if (spd->flags & SPLICE_F_GIFT)
+ buf->flags |= PIPE_BUF_FLAG_GIFT;
- break;
- }
-
- if (spd->flags & SPLICE_F_NONBLOCK) {
- if (!ret)
- ret = -EAGAIN;
- break;
- }
+ pipe->nrbufs++;
+ page_nr++;
+ ret += buf->len;
- if (signal_pending(current)) {
- if (!ret)
- ret = -ERESTARTSYS;
+ if (!--spd->nr_pages)
break;
- }
-
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- do_wakeup = 0;
- }
-
- pipe->waiting_writers++;
- pipe_wait(pipe);
- pipe->waiting_writers--;
}
- pipe_unlock(pipe);
-
- if (do_wakeup)
- wakeup_pipe_readers(pipe);
+ if (!ret)
+ ret = -EAGAIN;
+out:
while (page_nr < spd_pages)
spd->spd_release(spd, page_nr++);
@@ -1342,6 +1301,25 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
}
EXPORT_SYMBOL(do_splice_direct);
+static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
+{
+ for (;;) {
+ if (unlikely(!pipe->readers)) {
+ send_sig(SIGPIPE, current, 0);
+ return -EPIPE;
+ }
+ if (pipe->nrbufs != pipe->buffers)
+ return 0;
+ if (flags & SPLICE_F_NONBLOCK)
+ return -EAGAIN;
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+ pipe->waiting_writers++;
+ pipe_wait(pipe);
+ pipe->waiting_writers--;
+ }
+}
+
static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
struct pipe_inode_info *opipe,
size_t len, unsigned int flags);
@@ -1424,8 +1402,13 @@ static long do_splice(struct file *in, loff_t __user *off_in,
offset = in->f_pos;
}
- ret = do_splice_to(in, &offset, opipe, len, flags);
-
+ pipe_lock(opipe);
+ ret = wait_for_space(opipe, flags);
+ if (!ret)
+ ret = do_splice_to(in, &offset, opipe, len, flags);
+ pipe_unlock(opipe);
+ if (ret > 0)
+ wakeup_pipe_readers(opipe);
if (!off_in)
in->f_pos = offset;
else if (copy_to_user(off_in, &offset, sizeof(loff_t)))
@@ -1437,106 +1420,32 @@ static long do_splice(struct file *in, loff_t __user *off_in,
return -EINVAL;
}
-/*
- * Map an iov into an array of pages and offset/length tupples. With the
- * partial_page structure, we can map several non-contiguous ranges into
- * our ones pages[] map instead of splitting that operation into pieces.
- * Could easily be exported as a generic helper for other users, in which
- * case one would probably want to add a 'max_nr_pages' parameter as well.
- */
-static int get_iovec_page_array(const struct iovec __user *iov,
- unsigned int nr_vecs, struct page **pages,
- struct partial_page *partial, bool aligned,
+static int get_iovec_page_array(struct iov_iter *from,
+ struct page **pages,
+ struct partial_page *partial,
unsigned int pipe_buffers)
{
- int buffers = 0, error = 0;
-
- while (nr_vecs) {
- unsigned long off, npages;
- struct iovec entry;
- void __user *base;
- size_t len;
- int i;
-
- error = -EFAULT;
- if (copy_from_user(&entry, iov, sizeof(entry)))
- break;
-
- base = entry.iov_base;
- len = entry.iov_len;
-
- /*
- * Sanity check this iovec. 0 read succeeds.
- */
- error = 0;
- if (unlikely(!len))
- break;
- error = -EFAULT;
- if (!access_ok(VERIFY_READ, base, len))
- break;
-
- /*
- * Get this base offset and number of pages, then map
- * in the user pages.
- */
- off = (unsigned long) base & ~PAGE_MASK;
-
- /*
- * If asked for alignment, the offset must be zero and the
- * length a multiple of the PAGE_SIZE.
- */
- error = -EINVAL;
- if (aligned && (off || len & ~PAGE_MASK))
- break;
-
- npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- if (npages > pipe_buffers - buffers)
- npages = pipe_buffers - buffers;
-
- error = get_user_pages_fast((unsigned long)base, npages,
- 0, &pages[buffers]);
-
- if (unlikely(error <= 0))
- break;
-
- /*
- * Fill this contiguous range into the partial page map.
- */
- for (i = 0; i < error; i++) {
- const int plen = min_t(size_t, len, PAGE_SIZE - off);
-
- partial[buffers].offset = off;
- partial[buffers].len = plen;
-
- off = 0;
- len -= plen;
+ int buffers = 0;
+ while (iov_iter_count(from)) {
+ ssize_t copied;
+ size_t start;
+
+ copied = iov_iter_get_pages(from, pages + buffers, ~0UL,
+ pipe_buffers - buffers, &start);
+ if (copied <= 0)
+ return buffers ? buffers : copied;
+
+ iov_iter_advance(from, copied);
+ while (copied) {
+ int size = min_t(int, copied, PAGE_SIZE - start);
+ partial[buffers].offset = start;
+ partial[buffers].len = size;
+ copied -= size;
+ start = 0;
buffers++;
}
-
- /*
- * We didn't complete this iov, stop here since it probably
- * means we have to move some of this into a pipe to
- * be able to continue.
- */
- if (len)
- break;
-
- /*
- * Don't continue if we mapped fewer pages than we asked for,
- * or if we mapped the max number of pages that we have
- * room for.
- */
- if (error < npages || buffers == pipe_buffers)
- break;
-
- nr_vecs--;
- iov++;
}
-
- if (buffers)
- return buffers;
-
- return error;
+ return buffers;
}
static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
@@ -1590,10 +1499,13 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
* as splice-from-memory, where the regular splice is splice-from-file (or
* to file). In both cases the output is a pipe, naturally.
*/
-static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
+static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov,
unsigned long nr_segs, unsigned int flags)
{
struct pipe_inode_info *pipe;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter from;
struct page *pages[PIPE_DEF_BUFFERS];
struct partial_page partial[PIPE_DEF_BUFFERS];
struct splice_pipe_desc spd = {
@@ -1610,18 +1522,32 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
if (!pipe)
return -EBADF;
- if (splice_grow_spd(pipe, &spd))
- return -ENOMEM;
+ ret = import_iovec(WRITE, uiov, nr_segs,
+ ARRAY_SIZE(iovstack), &iov, &from);
+ if (ret < 0)
+ return ret;
- spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
- spd.partial, false,
- spd.nr_pages_max);
- if (spd.nr_pages <= 0)
- ret = spd.nr_pages;
- else
- ret = splice_to_pipe(pipe, &spd);
+ if (splice_grow_spd(pipe, &spd)) {
+ kfree(iov);
+ return -ENOMEM;
+ }
+ pipe_lock(pipe);
+ ret = wait_for_space(pipe, flags);
+ if (!ret) {
+ spd.nr_pages = get_iovec_page_array(&from, spd.pages,
+ spd.partial,
+ spd.nr_pages_max);
+ if (spd.nr_pages <= 0)
+ ret = spd.nr_pages;
+ else
+ ret = splice_to_pipe(pipe, &spd);
+ }
+ pipe_unlock(pipe);
+ if (ret > 0)
+ wakeup_pipe_readers(pipe);
splice_shrink_spd(&spd);
+ kfree(iov);
return ret;
}
@@ -1876,7 +1802,11 @@ retry:
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
- ibuf->ops->get(ipipe, ibuf);
+ if (!pipe_buf_get(ipipe, ibuf)) {
+ if (ret == 0)
+ ret = -EFAULT;
+ break;
+ }
*obuf = *ibuf;
/*
@@ -1948,7 +1878,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
- ibuf->ops->get(ipipe, ibuf);
+ if (!pipe_buf_get(ipipe, ibuf)) {
+ if (ret == 0)
+ ret = -EFAULT;
+ break;
+ }
obuf = opipe->bufs + nbuf;
*obuf = *ibuf;
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c
index 8073b6532cf0..1d406a2094a5 100644
--- a/fs/squashfs/export.c
+++ b/fs/squashfs/export.c
@@ -54,12 +54,17 @@ static long long squashfs_inode_lookup(struct super_block *sb, int ino_num)
struct squashfs_sb_info *msblk = sb->s_fs_info;
int blk = SQUASHFS_LOOKUP_BLOCK(ino_num - 1);
int offset = SQUASHFS_LOOKUP_BLOCK_OFFSET(ino_num - 1);
- u64 start = le64_to_cpu(msblk->inode_lookup_table[blk]);
+ u64 start;
__le64 ino;
int err;
TRACE("Entered squashfs_inode_lookup, inode_number = %d\n", ino_num);
+ if (ino_num == 0 || (ino_num - 1) >= msblk->inodes)
+ return -EINVAL;
+
+ start = le64_to_cpu(msblk->inode_lookup_table[blk]);
+
err = squashfs_read_metadata(sb, &ino, &start, &offset, sizeof(ino));
if (err < 0)
return err;
@@ -124,7 +129,10 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb,
u64 lookup_table_start, u64 next_table, unsigned int inodes)
{
unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(inodes);
+ unsigned int indexes = SQUASHFS_LOOKUP_BLOCKS(inodes);
+ int n;
__le64 *table;
+ u64 start, end;
TRACE("In read_inode_lookup_table, length %d\n", length);
@@ -134,20 +142,41 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb,
if (inodes == 0)
return ERR_PTR(-EINVAL);
- /* length bytes should not extend into the next table - this check
- * also traps instances where lookup_table_start is incorrectly larger
- * than the next table start
+ /*
+ * The computed size of the lookup table (length bytes) should exactly
+ * match the table start and end points
*/
- if (lookup_table_start + length > next_table)
+ if (length != (next_table - lookup_table_start))
return ERR_PTR(-EINVAL);
table = squashfs_read_table(sb, lookup_table_start, length);
+ if (IS_ERR(table))
+ return table;
/*
- * table[0] points to the first inode lookup table metadata block,
- * this should be less than lookup_table_start
+ * table0], table[1], ... table[indexes - 1] store the locations
+ * of the compressed inode lookup blocks. Each entry should be
+ * less than the next (i.e. table[0] < table[1]), and the difference
+ * between them should be SQUASHFS_METADATA_SIZE or less.
+ * table[indexes - 1] should be less than lookup_table_start, and
+ * again the difference should be SQUASHFS_METADATA_SIZE or less
*/
- if (!IS_ERR(table) && le64_to_cpu(table[0]) >= lookup_table_start) {
+ for (n = 0; n < (indexes - 1); n++) {
+ start = le64_to_cpu(table[n]);
+ end = le64_to_cpu(table[n + 1]);
+
+ if (start >= end
+ || (end - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ start = le64_to_cpu(table[indexes - 1]);
+ if (start >= lookup_table_start ||
+ (lookup_table_start - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
kfree(table);
return ERR_PTR(-EINVAL);
}
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 1ec7bae2751d..979da17cbbf3 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -224,11 +224,11 @@ failure:
* If the skip factor is limited in this way then the file will use multiple
* slots.
*/
-static inline int calculate_skip(int blocks)
+static inline int calculate_skip(u64 blocks)
{
- int skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
+ u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
* SQUASHFS_META_INDEXES);
- return min(SQUASHFS_CACHED_BLKS - 1, skip + 1);
+ return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1);
}
diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c
index d38ea3dab951..d2e15baab537 100644
--- a/fs/squashfs/id.c
+++ b/fs/squashfs/id.c
@@ -48,10 +48,15 @@ int squashfs_get_id(struct super_block *sb, unsigned int index,
struct squashfs_sb_info *msblk = sb->s_fs_info;
int block = SQUASHFS_ID_BLOCK(index);
int offset = SQUASHFS_ID_BLOCK_OFFSET(index);
- u64 start_block = le64_to_cpu(msblk->id_table[block]);
+ u64 start_block;
__le32 disk_id;
int err;
+ if (index >= msblk->ids)
+ return -EINVAL;
+
+ start_block = le64_to_cpu(msblk->id_table[block]);
+
err = squashfs_read_metadata(sb, &disk_id, &start_block, &offset,
sizeof(disk_id));
if (err < 0)
@@ -69,7 +74,10 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb,
u64 id_table_start, u64 next_table, unsigned short no_ids)
{
unsigned int length = SQUASHFS_ID_BLOCK_BYTES(no_ids);
+ unsigned int indexes = SQUASHFS_ID_BLOCKS(no_ids);
+ int n;
__le64 *table;
+ u64 start, end;
TRACE("In read_id_index_table, length %d\n", length);
@@ -80,20 +88,38 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb,
return ERR_PTR(-EINVAL);
/*
- * length bytes should not extend into the next table - this check
- * also traps instances where id_table_start is incorrectly larger
- * than the next table start
+ * The computed size of the index table (length bytes) should exactly
+ * match the table start and end points
*/
- if (id_table_start + length > next_table)
+ if (length != (next_table - id_table_start))
return ERR_PTR(-EINVAL);
table = squashfs_read_table(sb, id_table_start, length);
+ if (IS_ERR(table))
+ return table;
/*
- * table[0] points to the first id lookup table metadata block, this
- * should be less than id_table_start
+ * table[0], table[1], ... table[indexes - 1] store the locations
+ * of the compressed id blocks. Each entry should be less than
+ * the next (i.e. table[0] < table[1]), and the difference between them
+ * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1]
+ * should be less than id_table_start, and again the difference
+ * should be SQUASHFS_METADATA_SIZE or less
*/
- if (!IS_ERR(table) && le64_to_cpu(table[0]) >= id_table_start) {
+ for (n = 0; n < (indexes - 1); n++) {
+ start = le64_to_cpu(table[n]);
+ end = le64_to_cpu(table[n + 1]);
+
+ if (start >= end || (end - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ start = le64_to_cpu(table[indexes - 1]);
+ if (start >= id_table_start || (id_table_start - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
kfree(table);
return ERR_PTR(-EINVAL);
}
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index e66486366f02..2fd1262cc1bd 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -30,6 +30,7 @@
/* size of metadata (inode and directory) blocks */
#define SQUASHFS_METADATA_SIZE 8192
+#define SQUASHFS_BLOCK_OFFSET 2
/* default size of block device I/O */
#ifdef CONFIG_SQUASHFS_4K_DEVBLK_SIZE
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index ef69c31947bf..5234c19a0eab 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -77,5 +77,6 @@ struct squashfs_sb_info {
unsigned int inodes;
unsigned int fragments;
int xattr_ids;
+ unsigned int ids;
};
#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 93aa3e23c845..44500dcf1805 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -177,6 +177,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
msblk->directory_table = le64_to_cpu(sblk->directory_table_start);
msblk->inodes = le32_to_cpu(sblk->inodes);
msblk->fragments = le32_to_cpu(sblk->fragments);
+ msblk->ids = le16_to_cpu(sblk->no_ids);
flags = le16_to_cpu(sblk->flags);
TRACE("Found valid superblock on %s\n", bdevname(sb->s_bdev, b));
@@ -188,7 +189,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
TRACE("Block size %d\n", msblk->block_size);
TRACE("Number of inodes %d\n", msblk->inodes);
TRACE("Number of fragments %d\n", msblk->fragments);
- TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids));
+ TRACE("Number of ids %d\n", msblk->ids);
TRACE("sblk->inode_table_start %llx\n", msblk->inode_table);
TRACE("sblk->directory_table_start %llx\n", msblk->directory_table);
TRACE("sblk->fragment_table_start %llx\n",
@@ -245,8 +246,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
allocate_id_index_table:
/* Allocate and read id index table */
msblk->id_table = squashfs_read_id_index_table(sb,
- le64_to_cpu(sblk->id_table_start), next_table,
- le16_to_cpu(sblk->no_ids));
+ le64_to_cpu(sblk->id_table_start), next_table, msblk->ids);
if (IS_ERR(msblk->id_table)) {
ERROR("unable to read id index table\n");
err = PTR_ERR(msblk->id_table);
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index c83f5d9ec125..30b3aaa08b62 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -30,8 +30,16 @@ extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
u64 start, u64 *xattr_table_start, int *xattr_ids)
{
+ struct squashfs_xattr_id_table *id_table;
+
+ id_table = squashfs_read_table(sb, start, sizeof(*id_table));
+ if (IS_ERR(id_table))
+ return (__le64 *) id_table;
+
+ *xattr_table_start = le64_to_cpu(id_table->xattr_table_start);
+ kfree(id_table);
+
ERROR("Xattrs in filesystem, these will be ignored\n");
- *xattr_table_start = start;
return ERR_PTR(-ENOTSUPP);
}
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index c89607d690c4..7f718d2bf357 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -44,10 +44,15 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
struct squashfs_sb_info *msblk = sb->s_fs_info;
int block = SQUASHFS_XATTR_BLOCK(index);
int offset = SQUASHFS_XATTR_BLOCK_OFFSET(index);
- u64 start_block = le64_to_cpu(msblk->xattr_id_table[block]);
+ u64 start_block;
struct squashfs_xattr_id id;
int err;
+ if (index >= msblk->xattr_ids)
+ return -EINVAL;
+
+ start_block = le64_to_cpu(msblk->xattr_id_table[block]);
+
err = squashfs_read_metadata(sb, &id, &start_block, &offset,
sizeof(id));
if (err < 0)
@@ -63,13 +68,17 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
/*
* Read uncompressed xattr id lookup table indexes from disk into memory
*/
-__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start,
+__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
u64 *xattr_table_start, int *xattr_ids)
{
- unsigned int len;
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ unsigned int len, indexes;
struct squashfs_xattr_id_table *id_table;
+ __le64 *table;
+ u64 start, end;
+ int n;
- id_table = squashfs_read_table(sb, start, sizeof(*id_table));
+ id_table = squashfs_read_table(sb, table_start, sizeof(*id_table));
if (IS_ERR(id_table))
return (__le64 *) id_table;
@@ -83,13 +92,54 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start,
if (*xattr_ids == 0)
return ERR_PTR(-EINVAL);
- /* xattr_table should be less than start */
- if (*xattr_table_start >= start)
+ len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
+ indexes = SQUASHFS_XATTR_BLOCKS(*xattr_ids);
+
+ /*
+ * The computed size of the index table (len bytes) should exactly
+ * match the table start and end points
+ */
+ start = table_start + sizeof(*id_table);
+ end = msblk->bytes_used;
+
+ if (len != (end - start))
return ERR_PTR(-EINVAL);
- len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
+ table = squashfs_read_table(sb, start, len);
+ if (IS_ERR(table))
+ return table;
+
+ /* table[0], table[1], ... table[indexes - 1] store the locations
+ * of the compressed xattr id blocks. Each entry should be less than
+ * the next (i.e. table[0] < table[1]), and the difference between them
+ * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1]
+ * should be less than table_start, and again the difference
+ * shouls be SQUASHFS_METADATA_SIZE or less.
+ *
+ * Finally xattr_table_start should be less than table[0].
+ */
+ for (n = 0; n < (indexes - 1); n++) {
+ start = le64_to_cpu(table[n]);
+ end = le64_to_cpu(table[n + 1]);
+
+ if (start >= end || (end - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ start = le64_to_cpu(table[indexes - 1]);
+ if (start >= table_start || (table_start - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
- TRACE("In read_xattr_index_table, length %d\n", len);
+ if (*xattr_table_start >= le64_to_cpu(table[0])) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
- return squashfs_read_table(sb, start + sizeof(*id_table), len);
+ return table;
}
diff --git a/fs/super.c b/fs/super.c
index 689ec96c43f8..55cd8843b49a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -33,6 +33,7 @@
#include <linux/cleancache.h>
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
+#include <linux/user_namespace.h>
#include "internal.h"
@@ -175,6 +176,7 @@ static void destroy_super(struct super_block *s)
list_lru_destroy(&s->s_inode_lru);
security_sb_free(s);
WARN_ON(!list_empty(&s->s_mounts));
+ put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
kfree(s->s_options);
call_rcu(&s->rcu, destroy_super_rcu);
@@ -184,11 +186,13 @@ static void destroy_super(struct super_block *s)
* alloc_super - create new superblock
* @type: filesystem type superblock should belong to
* @flags: the mount flags
+ * @user_ns: User namespace for the super_block
*
* Allocates and initializes a new &struct super_block. alloc_super()
* returns a pointer new superblock or %NULL if allocation had failed.
*/
-static struct super_block *alloc_super(struct file_system_type *type, int flags)
+static struct super_block *alloc_super(struct file_system_type *type, int flags,
+ struct user_namespace *user_ns)
{
struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
static const struct super_operations default_op;
@@ -198,6 +202,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
return NULL;
INIT_LIST_HEAD(&s->s_mounts);
+ s->s_user_ns = get_user_ns(user_ns);
if (security_sb_alloc(s))
goto fail;
@@ -453,29 +458,42 @@ void generic_shutdown_super(struct super_block *sb)
EXPORT_SYMBOL(generic_shutdown_super);
/**
- * sget - find or create a superblock
+ * sget_userns - find or create a superblock
* @type: filesystem type superblock should belong to
* @test: comparison callback
* @set: setup callback
* @flags: mount flags
+ * @user_ns: User namespace for the super_block
* @data: argument to each of them
*/
-struct super_block *sget(struct file_system_type *type,
+struct super_block *sget_userns(struct file_system_type *type,
int (*test)(struct super_block *,void *),
int (*set)(struct super_block *,void *),
- int flags,
+ int flags, struct user_namespace *user_ns,
void *data)
{
struct super_block *s = NULL;
struct super_block *old;
int err;
+ if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) &&
+ !(type->fs_flags & FS_USERNS_MOUNT) &&
+ !capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
retry:
spin_lock(&sb_lock);
if (test) {
hlist_for_each_entry(old, &type->fs_supers, s_instances) {
if (!test(old, data))
continue;
+ if (user_ns != old->s_user_ns) {
+ spin_unlock(&sb_lock);
+ if (s) {
+ up_write(&s->s_umount);
+ destroy_super(s);
+ }
+ return ERR_PTR(-EBUSY);
+ }
if (!grab_super(old))
goto retry;
if (s) {
@@ -488,7 +506,7 @@ retry:
}
if (!s) {
spin_unlock(&sb_lock);
- s = alloc_super(type, flags);
+ s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns);
if (!s)
return ERR_PTR(-ENOMEM);
goto retry;
@@ -515,6 +533,38 @@ retry:
return s;
}
+EXPORT_SYMBOL(sget_userns);
+
+/**
+ * sget - find or create a superblock
+ * @type: filesystem type superblock should belong to
+ * @test: comparison callback
+ * @set: setup callback
+ * @flags: mount flags
+ * @data: argument to each of them
+ */
+struct super_block *sget(struct file_system_type *type,
+ int (*test)(struct super_block *,void *),
+ int (*set)(struct super_block *,void *),
+ int flags,
+ void *data)
+{
+ struct user_namespace *user_ns = current_user_ns();
+
+ /* We don't yet pass the user namespace of the parent
+ * mount through to here so always use &init_user_ns
+ * until that changes.
+ */
+ if (flags & MS_SUBMOUNT)
+ user_ns = &init_user_ns;
+
+ /* Ensure the requestor has permissions over the target filesystem */
+ if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ return sget_userns(type, test, set, flags, user_ns, data);
+}
+
EXPORT_SYMBOL(sget);
void drop_super(struct super_block *sb)
@@ -947,12 +997,20 @@ static int ns_set_super(struct super_block *sb, void *data)
return set_anon_super(sb, NULL);
}
-struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
- void *data, int (*fill_super)(struct super_block *, void *, int))
+struct dentry *mount_ns(struct file_system_type *fs_type,
+ int flags, void *data, void *ns, struct user_namespace *user_ns,
+ int (*fill_super)(struct super_block *, void *, int))
{
struct super_block *sb;
- sb = sget(fs_type, ns_test_super, ns_set_super, flags, data);
+ /* Don't allow mounting unless the caller has CAP_SYS_ADMIN
+ * over the namespace.
+ */
+ if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ sb = sget_userns(fs_type, ns_test_super, ns_set_super, flags,
+ user_ns, ns);
if (IS_ERR(sb))
return ERR_CAST(sb);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 94374e435025..2b67bda2021b 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -21,14 +21,14 @@ DEFINE_SPINLOCK(sysfs_symlink_target_lock);
void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
{
- char *buf, *path = NULL;
+ char *buf;
buf = kzalloc(PATH_MAX, GFP_KERNEL);
if (buf)
- path = kernfs_path(parent, buf, PATH_MAX);
+ kernfs_path(parent, buf, PATH_MAX);
WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s/%s'\n",
- path, name);
+ buf, name);
kfree(buf);
}
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 666986b95c5d..300cdbdc8494 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -17,6 +17,7 @@
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/seq_file.h>
+#include <linux/mm.h>
#include "sysfs.h"
#include "../kernfs/kernfs-internal.h"
@@ -549,3 +550,57 @@ void sysfs_remove_bin_file(struct kobject *kobj,
kernfs_remove_by_name(kobj->sd, attr->attr.name);
}
EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
+
+/**
+ * sysfs_emit - scnprintf equivalent, aware of PAGE_SIZE buffer.
+ * @buf: start of PAGE_SIZE buffer.
+ * @fmt: format
+ * @...: optional arguments to @format
+ *
+ *
+ * Returns number of characters written to @buf.
+ */
+int sysfs_emit(char *buf, const char *fmt, ...)
+{
+ va_list args;
+ int len;
+
+ if (WARN(!buf || offset_in_page(buf),
+ "invalid sysfs_emit: buf:%p\n", buf))
+ return 0;
+
+ va_start(args, fmt);
+ len = vscnprintf(buf, PAGE_SIZE, fmt, args);
+ va_end(args);
+
+ return len;
+}
+EXPORT_SYMBOL_GPL(sysfs_emit);
+
+/**
+ * sysfs_emit_at - scnprintf equivalent, aware of PAGE_SIZE buffer.
+ * @buf: start of PAGE_SIZE buffer.
+ * @at: offset in @buf to start write in bytes
+ * @at must be >= 0 && < PAGE_SIZE
+ * @fmt: format
+ * @...: optional arguments to @fmt
+ *
+ *
+ * Returns number of characters written starting at &@buf[@at].
+ */
+int sysfs_emit_at(char *buf, int at, const char *fmt, ...)
+{
+ va_list args;
+ int len;
+
+ if (WARN(!buf || offset_in_page(buf) || at < 0 || at >= PAGE_SIZE,
+ "invalid sysfs_emit_at: buf:%p at:%d\n", buf, at))
+ return 0;
+
+ va_start(args, fmt);
+ len = vscnprintf(buf + at, PAGE_SIZE - at, fmt, args);
+ va_end(args);
+
+ return len;
+}
+EXPORT_SYMBOL_GPL(sysfs_emit_at);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 1327a02ec778..147b72349d3b 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -44,6 +44,8 @@ struct timerfd_ctx {
bool might_cancel;
};
+static atomic_t instance_count = ATOMIC_INIT(0);
+
static LIST_HEAD(cancel_list);
static DEFINE_SPINLOCK(cancel_lock);
@@ -387,6 +389,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
{
int ufd;
struct timerfd_ctx *ctx;
+ char task_comm_buf[TASK_COMM_LEN];
+ char file_name_buf[32];
+ int instance;
/* Check the TFD_* constants for consistency. */
BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
@@ -400,6 +405,11 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
clockid != CLOCK_BOOTTIME_ALARM))
return -EINVAL;
+ if (!capable(CAP_WAKE_ALARM) &&
+ (clockid == CLOCK_REALTIME_ALARM ||
+ clockid == CLOCK_BOOTTIME_ALARM))
+ return -EPERM;
+
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
@@ -418,7 +428,12 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
- ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
+ instance = atomic_inc_return(&instance_count);
+ get_task_comm(task_comm_buf, current);
+ snprintf(file_name_buf, sizeof(file_name_buf), "[timerfd%d_%.*s]",
+ instance, (int)sizeof(task_comm_buf), task_comm_buf);
+
+ ufd = anon_inode_getfd(file_name_buf, &timerfd_fops, ctx,
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
if (ufd < 0)
kfree(ctx);
@@ -444,6 +459,11 @@ static int do_timerfd_settime(int ufd, int flags,
return ret;
ctx = f.file->private_data;
+ if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) {
+ fdput(f);
+ return -EPERM;
+ }
+
timerfd_setup_cancel(ctx, flags);
/*
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index c66f2423e1f5..2914220e975c 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -162,6 +162,77 @@ struct tracefs_fs_info {
struct tracefs_mount_opts mount_opts;
};
+static void change_gid(struct dentry *dentry, kgid_t gid)
+{
+ if (!dentry->d_inode)
+ return;
+ dentry->d_inode->i_gid = gid;
+}
+
+/*
+ * Taken from d_walk, but without he need for handling renames.
+ * Nothing can be renamed while walking the list, as tracefs
+ * does not support renames. This is only called when mounting
+ * or remounting the file system, to set all the files to
+ * the given gid.
+ */
+static void set_gid(struct dentry *parent, kgid_t gid)
+{
+ struct dentry *this_parent;
+ struct list_head *next;
+
+ this_parent = parent;
+ spin_lock(&this_parent->d_lock);
+
+ change_gid(this_parent, gid);
+repeat:
+ next = this_parent->d_subdirs.next;
+resume:
+ while (next != &this_parent->d_subdirs) {
+ struct list_head *tmp = next;
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ next = tmp->next;
+
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+
+ change_gid(dentry, gid);
+
+ if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&this_parent->d_lock);
+ spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
+ this_parent = dentry;
+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
+ goto repeat;
+ }
+ spin_unlock(&dentry->d_lock);
+ }
+ /*
+ * All done at this level ... ascend and resume the search.
+ */
+ rcu_read_lock();
+ascend:
+ if (this_parent != parent) {
+ struct dentry *child = this_parent;
+ this_parent = child->d_parent;
+
+ spin_unlock(&child->d_lock);
+ spin_lock(&this_parent->d_lock);
+
+ /* go into the first sibling still alive */
+ do {
+ next = child->d_child.next;
+ if (next == &this_parent->d_subdirs)
+ goto ascend;
+ child = list_entry(next, struct dentry, d_child);
+ } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
+ rcu_read_unlock();
+ goto resume;
+ }
+ rcu_read_unlock();
+ spin_unlock(&this_parent->d_lock);
+ return;
+}
+
static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
{
substring_t args[MAX_OPT_ARGS];
@@ -194,6 +265,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
if (!gid_valid(gid))
return -EINVAL;
opts->gid = gid;
+ set_gid(tracefs_mount->mnt_root, gid);
break;
case Opt_mode:
if (match_octal(&args[0], &option))
@@ -411,6 +483,8 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
inode->i_mode = mode;
inode->i_fop = fops ? fops : &tracefs_file_operations;
inode->i_private = data;
+ inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+ inode->i_gid = d_inode(dentry->d_parent)->i_gid;
d_instantiate(dentry, inode);
fsnotify_create(dentry->d_parent->d_inode, dentry);
return end_creating(dentry);
@@ -429,9 +503,12 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
if (unlikely(!inode))
return failed_creating(dentry);
- inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+ /* Do not set bits for OTH */
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP;
inode->i_op = ops;
inode->i_fop = &simple_dir_operations;
+ inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+ inode->i_gid = d_inode(dentry->d_parent)->i_gid;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 595ca0debe11..09134a13a39c 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -1125,6 +1125,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
err = PTR_ERR(dent);
if (err == -ENOENT)
break;
+ kfree(pdent);
return err;
}
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index b895af7d8d80..e6d0a7df341d 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -782,8 +782,9 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
if (page_offset > end_index)
break;
- page = find_or_create_page(mapping, page_offset,
- GFP_NOFS | __GFP_COLD);
+ page = pagecache_get_page(mapping, page_offset,
+ FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT,
+ GFP_NOFS | __GFP_COLD);
if (!page)
break;
if (!PageUptodate(page))
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 97be41215332..99caaae01cab 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -237,7 +237,7 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
int offs, int quiet, int must_chk_crc)
{
- int err = -EINVAL, type, node_len;
+ int err = -EINVAL, type, node_len, dump_node = 1;
uint32_t crc, node_crc, magic;
const struct ubifs_ch *ch = buf;
@@ -290,10 +290,22 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
out_len:
if (!quiet)
ubifs_err(c, "bad node length %d", node_len);
+ if (type == UBIFS_DATA_NODE && node_len > UBIFS_DATA_NODE_SZ)
+ dump_node = 0;
out:
if (!quiet) {
ubifs_err(c, "bad node at LEB %d:%d", lnum, offs);
- ubifs_dump_node(c, buf);
+ if (dump_node) {
+ ubifs_dump_node(c, buf);
+ } else {
+ int safe_len = min3(node_len, c->leb_size - offs,
+ (int)UBIFS_MAX_DATA_NODE_SZ);
+ pr_err("\tprevent out-of-bounds memory access\n");
+ pr_err("\ttruncated data node length %d\n", safe_len);
+ pr_err("\tcorrupted data node:\n");
+ print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1,
+ buf, safe_len, 0);
+ }
dump_stack();
}
return err;
@@ -319,7 +331,7 @@ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad)
{
uint32_t crc;
- ubifs_assert(pad >= 0 && !(pad & 7));
+ ubifs_assert(pad >= 0);
if (pad >= UBIFS_PAD_NODE_SZ) {
struct ubifs_ch *ch = buf;
@@ -709,6 +721,10 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
* write-buffer.
*/
memcpy(wbuf->buf + wbuf->used, buf, len);
+ if (aligned_len > len) {
+ ubifs_assert(aligned_len - len < 8);
+ ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len);
+ }
if (aligned_len == wbuf->avail) {
dbg_io("flush jhead %s wbuf to LEB %d:%d",
@@ -801,13 +817,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
}
spin_lock(&wbuf->lock);
- if (aligned_len)
+ if (aligned_len) {
/*
* And now we have what's left and what does not take whole
* max. write unit, so write it to the write-buffer and we are
* done.
*/
memcpy(wbuf->buf, buf + written, len);
+ if (aligned_len > len) {
+ ubifs_assert(aligned_len - len < 8);
+ ubifs_pad(c, wbuf->buf + len, aligned_len - len);
+ }
+ }
if (c->leb_size - wbuf->offs >= c->max_write_size)
wbuf->size = c->max_write_size;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 7968b7a5e787..2b35d1dd665d 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1695,7 +1695,6 @@ out:
kthread_stop(c->bgt);
c->bgt = NULL;
}
- free_wbufs(c);
kfree(c->write_reserve_buf);
c->write_reserve_buf = NULL;
vfree(c->ileb_buf);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 3876448ec0dc..b709c51c9f9e 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -140,21 +140,24 @@ void udf_evict_inode(struct inode *inode)
struct udf_inode_info *iinfo = UDF_I(inode);
int want_delete = 0;
- if (!inode->i_nlink && !is_bad_inode(inode)) {
- want_delete = 1;
- udf_setsize(inode, 0);
- udf_update_inode(inode, IS_SYNC(inode));
+ if (!is_bad_inode(inode)) {
+ if (!inode->i_nlink) {
+ want_delete = 1;
+ udf_setsize(inode, 0);
+ udf_update_inode(inode, IS_SYNC(inode));
+ }
+ if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
+ inode->i_size != iinfo->i_lenExtents) {
+ udf_warn(inode->i_sb,
+ "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
+ inode->i_ino, inode->i_mode,
+ (unsigned long long)inode->i_size,
+ (unsigned long long)iinfo->i_lenExtents);
+ }
}
truncate_inode_pages_final(&inode->i_data);
invalidate_inode_buffers(inode);
clear_inode(inode);
- if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
- inode->i_size != iinfo->i_lenExtents) {
- udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
- inode->i_ino, inode->i_mode,
- (unsigned long long)inode->i_size,
- (unsigned long long)iinfo->i_lenExtents);
- }
kfree(iinfo->i_ext.i_data);
iinfo->i_ext.i_data = NULL;
udf_clear_extent_cache(inode);
@@ -257,10 +260,6 @@ int udf_expand_file_adinicb(struct inode *inode)
char *kaddr;
struct udf_inode_info *iinfo = UDF_I(inode);
int err;
- struct writeback_control udf_wbc = {
- .sync_mode = WB_SYNC_NONE,
- .nr_to_write = 1,
- };
WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
if (!iinfo->i_lenAlloc) {
@@ -304,8 +303,10 @@ int udf_expand_file_adinicb(struct inode *inode)
iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
/* from now on we have normal address_space methods */
inode->i_data.a_ops = &udf_aops;
+ set_page_dirty(page);
+ unlock_page(page);
up_write(&iinfo->i_data_sem);
- err = inode->i_data.a_ops->writepage(page, &udf_wbc);
+ err = filemap_fdatawrite(inode->i_mapping);
if (err) {
/* Restore everything back so that we don't lose data... */
lock_page(page);
@@ -317,6 +318,7 @@ int udf_expand_file_adinicb(struct inode *inode)
unlock_page(page);
iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
inode->i_data.a_ops = &udf_adinicb_aops;
+ iinfo->i_lenAlloc = inode->i_size;
up_write(&iinfo->i_data_sem);
}
page_cache_release(page);
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index 71d1c25f360d..8c7f9ea251e5 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -175,13 +175,22 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type,
else
offset = le32_to_cpu(eahd->appAttrLocation);
- while (offset < iinfo->i_lenEAttr) {
+ while (offset + sizeof(*gaf) < iinfo->i_lenEAttr) {
+ uint32_t attrLength;
+
gaf = (struct genericFormat *)&ea[offset];
+ attrLength = le32_to_cpu(gaf->attrLength);
+
+ /* Detect undersized elements and buffer overflows */
+ if ((attrLength < sizeof(*gaf)) ||
+ (attrLength > (iinfo->i_lenEAttr - offset)))
+ break;
+
if (le32_to_cpu(gaf->attrType) == type &&
gaf->attrSubtype == subtype)
return gaf;
else
- offset += le32_to_cpu(gaf->attrLength);
+ offset += attrLength;
}
}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index c97b5a8d1e24..af6fd442b9d8 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -947,6 +947,10 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
iinfo->i_location.partitionReferenceNum,
0);
epos.bh = udf_tgetblk(sb, block);
+ if (unlikely(!epos.bh)) {
+ err = -ENOMEM;
+ goto out_no_entry;
+ }
lock_buffer(epos.bh);
memset(epos.bh->b_data, 0x00, bsize);
set_buffer_uptodate(epos.bh);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 159977ec8e54..710f1b8fad9b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1390,6 +1390,12 @@ static int udf_load_sparable_map(struct super_block *sb,
(int)spm->numSparingTables);
return -EIO;
}
+ if (le32_to_cpu(spm->sizeSparingTable) > sb->s_blocksize) {
+ udf_err(sb, "error loading logical volume descriptor: "
+ "Too big sparing table size (%u)\n",
+ le32_to_cpu(spm->sizeSparingTable));
+ return -EIO;
+ }
for (i = 0; i < spm->numSparingTables; i++) {
loc = le32_to_cpu(spm->locSparingTable[i]);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 10f364490833..be68b48de1cc 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -99,7 +99,7 @@ static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 gene
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct inode *inode;
- if (ino < UFS_ROOTINO || ino > uspi->s_ncg * uspi->s_ipg)
+ if (ino < UFS_ROOTINO || ino > (u64)uspi->s_ncg * uspi->s_ipg)
return ERR_PTR(-ESTALE);
inode = ufs_iget(sb, ino);
diff --git a/fs/xattr.c b/fs/xattr.c
index 7444fb1b3484..828a919eb4b3 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -735,6 +735,8 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
if (!buffer) {
for_each_xattr_handler(handlers, handler) {
+ if (!handler->list)
+ continue;
size += handler->list(handler, dentry, NULL, 0,
NULL, 0);
}
@@ -742,6 +744,8 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
char *buf = buffer;
for_each_xattr_handler(handlers, handler) {
+ if (!handler->list)
+ continue;
size = handler->list(handler, dentry, buf, buffer_size,
NULL, 0);
if (size > buffer_size)
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 01a5ecfedfcf..4539ff4d351f 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -514,8 +514,8 @@ xfs_attr_shortform_create(xfs_da_args_t *args)
ASSERT(ifp->if_flags & XFS_IFINLINE);
}
xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK);
- hdr = (xfs_attr_sf_hdr_t *)ifp->if_u1.if_data;
- hdr->count = 0;
+ hdr = (struct xfs_attr_sf_hdr *)ifp->if_u1.if_data;
+ memset(hdr, 0, sizeof(*hdr));
hdr->totsize = cpu_to_be16(sizeof(*hdr));
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
}
@@ -779,9 +779,8 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
ASSERT(blkno == 0);
error = xfs_attr3_leaf_create(args, blkno, &bp);
if (error) {
- error = xfs_da_shrink_inode(args, 0, bp);
- bp = NULL;
- if (error)
+ /* xfs_attr3_leaf_create may not have instantiated a block */
+ if (bp && (xfs_da_shrink_inode(args, 0, bp) != 0))
goto out;
xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */
memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */
@@ -1327,7 +1326,9 @@ xfs_attr3_leaf_add_work(
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
if (ichdr->freemap[i].base == tmp) {
ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t);
- ichdr->freemap[i].size -= sizeof(xfs_attr_leaf_entry_t);
+ ichdr->freemap[i].size -=
+ min_t(uint16_t, ichdr->freemap[i].size,
+ sizeof(xfs_attr_leaf_entry_t));
}
}
ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index d98ba57ef01a..c167bdaafc50 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -793,6 +793,8 @@ xfs_bmap_extents_to_btree(
*logflagsp = 0;
if ((error = xfs_alloc_vextent(&args))) {
xfs_iroot_realloc(ip, -1, whichfork);
+ ASSERT(ifp->if_broot == NULL);
+ XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return error;
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index adbc1f59969a..d8cdab4bfd30 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -135,6 +135,46 @@ xfs_inode_free(
}
/*
+ * If we are allocating a new inode, then check what was returned is
+ * actually a free, empty inode. If we are not allocating an inode,
+ * then check we didn't find a free inode.
+ *
+ * Returns:
+ * 0 if the inode free state matches the lookup context
+ * -ENOENT if the inode is free and we are not allocating
+ * -EFSCORRUPTED if there is any state mismatch at all
+ */
+static int
+xfs_iget_check_free_state(
+ struct xfs_inode *ip,
+ int flags)
+{
+ if (flags & XFS_IGET_CREATE) {
+ /* should be a free inode */
+ if (ip->i_d.di_mode != 0) {
+ xfs_warn(ip->i_mount,
+"Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)",
+ ip->i_ino, ip->i_d.di_mode);
+ return -EFSCORRUPTED;
+ }
+
+ if (ip->i_d.di_nblocks != 0) {
+ xfs_warn(ip->i_mount,
+"Corruption detected! Free inode 0x%llx has blocks allocated!",
+ ip->i_ino);
+ return -EFSCORRUPTED;
+ }
+ return 0;
+ }
+
+ /* should be an allocated inode */
+ if (ip->i_d.di_mode == 0)
+ return -ENOENT;
+
+ return 0;
+}
+
+/*
* Check the validity of the inode we just found it the cache
*/
static int
@@ -183,12 +223,12 @@ xfs_iget_cache_hit(
}
/*
- * If lookup is racing with unlink return an error immediately.
+ * Check the inode free state is valid. This also detects lookup
+ * racing with unlinks.
*/
- if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
- error = -ENOENT;
+ error = xfs_iget_check_free_state(ip, flags);
+ if (error)
goto out_error;
- }
/*
* If IRECLAIMABLE is set, we've torn down the VFS inode already.
@@ -298,10 +338,14 @@ xfs_iget_cache_miss(
trace_xfs_iget_miss(ip);
- if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
- error = -ENOENT;
+
+ /*
+ * Check the inode free state is valid. This also detects lookup
+ * racing with unlinks.
+ */
+ error = xfs_iget_check_free_state(ip, flags);
+ if (error)
goto out_destroy;
- }
/*
* Preload the radix tree so we can insert safely under the
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index e4a4f82ea13f..2e4f78d68b05 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -729,7 +729,8 @@ xfs_ioc_space(
flags |= XFS_PREALLOC_CLEAR;
if (bf->l_start > XFS_ISIZE(ip)) {
error = xfs_alloc_file_space(ip, XFS_ISIZE(ip),
- bf->l_start - XFS_ISIZE(ip), 0);
+ bf->l_start - XFS_ISIZE(ip),
+ XFS_BMAPI_PREALLOC);
if (error)
goto out_unlock;
}
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 1a05d8ae327d..e7372cef5ac3 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -346,6 +346,7 @@ xfs_compat_attrlist_by_handle(
{
int error;
attrlist_cursor_kern_t *cursor;
+ compat_xfs_fsop_attrlist_handlereq_t __user *p = arg;
compat_xfs_fsop_attrlist_handlereq_t al_hreq;
struct dentry *dentry;
char *kbuf;
@@ -380,6 +381,11 @@ xfs_compat_attrlist_by_handle(
if (error)
goto out_kfree;
+ if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) {
+ error = -EFAULT;
+ goto out_kfree;
+ }
+
if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
error = -EFAULT;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 245268a0cdf0..d70a004378d8 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -770,7 +770,7 @@ xfs_setattr_size(
ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
ASSERT(S_ISREG(ip->i_d.di_mode));
ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
- ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+ ATTR_MTIME_SET|ATTR_TIMES_SET)) == 0);
oldsize = inode->i_size;
newsize = iattr->ia_size;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 73b725f965eb..065aa4752607 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1503,6 +1503,8 @@ out_free_iclog:
if (iclog->ic_bp)
xfs_buf_free(iclog->ic_bp);
kmem_free(iclog);
+ if (prev_iclog == log->l_iclog)
+ break;
}
spinlock_destroy(&log->l_icloglock);
xfs_buf_free(log->l_xbuf);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index dc6221942b85..ab66ea0a72bf 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -162,7 +162,7 @@ xfs_fs_map_blocks(
goto out_unlock;
error = invalidate_inode_pages2(inode->i_mapping);
if (WARN_ON_ONCE(error))
- return error;
+ goto out_unlock;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 7795e0d01382..4b94db85b2a0 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -214,6 +214,9 @@ xfs_fs_rm_xquota(
if (XFS_IS_QUOTA_ON(mp))
return -EINVAL;
+ if (uflags & ~(FS_USER_QUOTA | FS_GROUP_QUOTA | FS_PROJ_QUOTA))
+ return -EINVAL;
+
if (uflags & FS_USER_QUOTA)
flags |= XFS_DQ_USER;
if (uflags & FS_GROUP_QUOTA)
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index ab1bac6a3a1c..acadeaf72674 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -256,6 +256,9 @@ xfs_rtallocate_extent_block(
end = XFS_BLOCKTOBIT(mp, bbno + 1) - 1;
i <= end;
i++) {
+ /* Make sure we don't scan off the end of the rt volume. */
+ maxlen = min(mp->m_sb.sb_rextents, i + maxlen) - i;
+
/*
* See if there's a free extent of maxlen starting at i.
* If it's not so then next will contain the first non-free.
@@ -447,6 +450,14 @@ xfs_rtallocate_extent_near(
*/
if (bno >= mp->m_sb.sb_rextents)
bno = mp->m_sb.sb_rextents - 1;
+
+ /* Make sure we don't run off the end of the rt volume. */
+ maxlen = min(mp->m_sb.sb_rextents, bno + maxlen) - bno;
+ if (maxlen < minlen) {
+ *rtblock = NULLRTBLOCK;
+ return 0;
+ }
+
/*
* Try the exact allocation first.
*/
@@ -1006,10 +1017,13 @@ xfs_growfs_rt(
xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
/*
- * Update the bitmap inode's size.
+ * Update the bitmap inode's size ondisk and incore. We need
+ * to update the incore size so that inode inactivation won't
+ * punch what it thinks are "posteof" blocks.
*/
mp->m_rbmip->i_d.di_size =
nsbp->sb_rbmblocks * nsbp->sb_blocksize;
+ i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_d.di_size);
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
/*
* Get the summary inode into the transaction.
@@ -1017,9 +1031,12 @@ xfs_growfs_rt(
xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
/*
- * Update the summary inode's size.
+ * Update the summary inode's size. We need to update the
+ * incore size so that inode inactivation won't punch what it
+ * thinks are "posteof" blocks.
*/
mp->m_rsumip->i_d.di_size = nmp->m_rsumsize;
+ i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_d.di_size);
xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE);
/*
* Copy summary data from old to new sizes.
@@ -1225,13 +1242,11 @@ xfs_rtmount_inodes(
xfs_sb_t *sbp;
sbp = &mp->m_sb;
- if (sbp->sb_rbmino == NULLFSINO)
- return 0;
error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip);
if (error)
return error;
ASSERT(mp->m_rbmip != NULL);
- ASSERT(sbp->sb_rsumino != NULLFSINO);
+
error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
if (error) {
IRELE(mp->m_rbmip);
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
index be692e59938d..c457b010c623 100644
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -44,9 +44,11 @@ xfs_sysfs_init(
struct xfs_kobj *parent_kobj,
const char *name)
{
+ struct kobject *parent;
+
+ parent = parent_kobj ? &parent_kobj->kobject : NULL;
init_completion(&kobj->complete);
- return kobject_init_and_add(&kobj->kobject, ktype,
- &parent_kobj->kobject, "%s", name);
+ return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name);
}
static inline void
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index ce78534a047e..bb8de2dddabe 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -662,7 +662,7 @@ xfs_trans_dqresv(
}
}
if (ninos > 0) {
- total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos;
+ total_count = dqp->q_res_icount + ninos;
timer = be32_to_cpu(dqp->q_core.d_itimer);
warns = be16_to_cpu(dqp->q_core.d_iwarns);
warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;