From 0dc1ba24f7fff659725eecbba2c9ad679a0954cd Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 Apr 2011 17:23:20 -0700 Subject: SELINUX: Make selinux cache VFS RCU walks safe Now that the security modules can decide whether they support the dcache RCU walk or not it's possible to make selinux a bit more RCU friendly. The SELinux AVC and security server access decision code is RCU safe. A specific piece of the LSM audit code may not be RCU safe. This patch makes the VFS RCU walk retry if it would hit the non RCU safe chunk of code. It will normally just work under RCU. This is done simply by passing the VFS RCU state as a flag down into the avc_audit() code and returning ECHILD there if it would have an issue. Based-on-patch-by: Andi Kleen Signed-off-by: Eric Paris --- security/selinux/avc.c | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) (limited to 'security/selinux/avc.c') diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 9da6420e2056..1d027e29ce8d 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -471,6 +471,7 @@ static void avc_audit_post_callback(struct audit_buffer *ab, void *a) * @avd: access vector decisions * @result: result from avc_has_perm_noaudit * @a: auxiliary audit data + * @flags: VFS walk flags * * Audit the granting or denial of permissions in accordance * with the policy. This function is typically called by @@ -481,9 +482,10 @@ static void avc_audit_post_callback(struct audit_buffer *ab, void *a) * be performed under a lock, to allow the lock to be released * before calling the auditing code. */ -void avc_audit(u32 ssid, u32 tsid, +int avc_audit(u32 ssid, u32 tsid, u16 tclass, u32 requested, - struct av_decision *avd, int result, struct common_audit_data *a) + struct av_decision *avd, int result, struct common_audit_data *a, + unsigned flags) { struct common_audit_data stack_data; u32 denied, audited; @@ -515,11 +517,24 @@ void avc_audit(u32 ssid, u32 tsid, else audited = requested & avd->auditallow; if (!audited) - return; + return 0; + if (!a) { a = &stack_data; COMMON_AUDIT_DATA_INIT(a, NONE); } + + /* + * When in a RCU walk do the audit on the RCU retry. This is because + * the collection of the dname in an inode audit message is not RCU + * safe. Note this may drop some audits when the situation changes + * during retry. However this is logically just as if the operation + * happened a little later. + */ + if ((a->type == LSM_AUDIT_DATA_FS) && + (flags & IPERM_FLAG_RCU)) + return -ECHILD; + a->selinux_audit_data.tclass = tclass; a->selinux_audit_data.requested = requested; a->selinux_audit_data.ssid = ssid; @@ -529,6 +544,7 @@ void avc_audit(u32 ssid, u32 tsid, a->lsm_pre_audit = avc_audit_pre_callback; a->lsm_post_audit = avc_audit_post_callback; common_lsm_audit(a); + return 0; } /** @@ -793,6 +809,7 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid, * @tclass: target security class * @requested: requested permissions, interpreted based on @tclass * @auditdata: auxiliary audit data + * @flags: VFS walk flags * * Check the AVC to determine whether the @requested permissions are granted * for the SID pair (@ssid, @tsid), interpreting the permissions @@ -802,14 +819,19 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid, * permissions are granted, -%EACCES if any permissions are denied, or * another -errno upon other errors. */ -int avc_has_perm(u32 ssid, u32 tsid, u16 tclass, - u32 requested, struct common_audit_data *auditdata) +int avc_has_perm_flags(u32 ssid, u32 tsid, u16 tclass, + u32 requested, struct common_audit_data *auditdata, + unsigned flags) { struct av_decision avd; - int rc; + int rc, rc2; rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd); - avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata); + + rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata, + flags); + if (rc2) + return rc2; return rc; } -- cgit v1.2.3 From f48b7399840b453e7282b523f535561fe9638a2d Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 25 Apr 2011 12:54:27 -0400 Subject: LSM: split LSM_AUDIT_DATA_FS into _PATH and _INODE The lsm common audit code has wacky contortions making sure which pieces of information are set based on if it was given a path, dentry, or inode. Split this into path and inode to get rid of some of the code complexity. Signed-off-by: Eric Paris Acked-by: Casey Schaufler --- security/selinux/avc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'security/selinux/avc.c') diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 1d027e29ce8d..ce742f1778e1 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -531,7 +531,7 @@ int avc_audit(u32 ssid, u32 tsid, * during retry. However this is logically just as if the operation * happened a little later. */ - if ((a->type == LSM_AUDIT_DATA_FS) && + if ((a->type == LSM_AUDIT_DATA_INODE) && (flags & IPERM_FLAG_RCU)) return -ECHILD; -- cgit v1.2.3 From 9ade0cf440a1e5800dc68eef2e77b8d9d83a6dff Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 25 Apr 2011 16:26:29 -0400 Subject: SELINUX: Make selinux cache VFS RCU walks safe Now that the security modules can decide whether they support the dcache RCU walk or not it's possible to make selinux a bit more RCU friendly. The SELinux AVC and security server access decision code is RCU safe. A specific piece of the LSM audit code may not be RCU safe. This patch makes the VFS RCU walk retry if it would hit the non RCU safe chunk of code. It will normally just work under RCU. This is done simply by passing the VFS RCU state as a flag down into the avc_audit() code and returning ECHILD there if it would have an issue. Based-on-patch-by: Andi Kleen Signed-off-by: Eric Paris Signed-off-by: Linus Torvalds --- security/selinux/avc.c | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) (limited to 'security/selinux/avc.c') diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 9da6420e2056..1d027e29ce8d 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -471,6 +471,7 @@ static void avc_audit_post_callback(struct audit_buffer *ab, void *a) * @avd: access vector decisions * @result: result from avc_has_perm_noaudit * @a: auxiliary audit data + * @flags: VFS walk flags * * Audit the granting or denial of permissions in accordance * with the policy. This function is typically called by @@ -481,9 +482,10 @@ static void avc_audit_post_callback(struct audit_buffer *ab, void *a) * be performed under a lock, to allow the lock to be released * before calling the auditing code. */ -void avc_audit(u32 ssid, u32 tsid, +int avc_audit(u32 ssid, u32 tsid, u16 tclass, u32 requested, - struct av_decision *avd, int result, struct common_audit_data *a) + struct av_decision *avd, int result, struct common_audit_data *a, + unsigned flags) { struct common_audit_data stack_data; u32 denied, audited; @@ -515,11 +517,24 @@ void avc_audit(u32 ssid, u32 tsid, else audited = requested & avd->auditallow; if (!audited) - return; + return 0; + if (!a) { a = &stack_data; COMMON_AUDIT_DATA_INIT(a, NONE); } + + /* + * When in a RCU walk do the audit on the RCU retry. This is because + * the collection of the dname in an inode audit message is not RCU + * safe. Note this may drop some audits when the situation changes + * during retry. However this is logically just as if the operation + * happened a little later. + */ + if ((a->type == LSM_AUDIT_DATA_FS) && + (flags & IPERM_FLAG_RCU)) + return -ECHILD; + a->selinux_audit_data.tclass = tclass; a->selinux_audit_data.requested = requested; a->selinux_audit_data.ssid = ssid; @@ -529,6 +544,7 @@ void avc_audit(u32 ssid, u32 tsid, a->lsm_pre_audit = avc_audit_pre_callback; a->lsm_post_audit = avc_audit_post_callback; common_lsm_audit(a); + return 0; } /** @@ -793,6 +809,7 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid, * @tclass: target security class * @requested: requested permissions, interpreted based on @tclass * @auditdata: auxiliary audit data + * @flags: VFS walk flags * * Check the AVC to determine whether the @requested permissions are granted * for the SID pair (@ssid, @tsid), interpreting the permissions @@ -802,14 +819,19 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid, * permissions are granted, -%EACCES if any permissions are denied, or * another -errno upon other errors. */ -int avc_has_perm(u32 ssid, u32 tsid, u16 tclass, - u32 requested, struct common_audit_data *auditdata) +int avc_has_perm_flags(u32 ssid, u32 tsid, u16 tclass, + u32 requested, struct common_audit_data *auditdata, + unsigned flags) { struct av_decision avd; - int rc; + int rc, rc2; rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd); - avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata); + + rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata, + flags); + if (rc2) + return rc2; return rc; } -- cgit v1.2.3 From 044aea9b83614948c98564000db07d1d32b2d29b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 19 May 2011 18:59:47 -0700 Subject: selinux: de-crapify avc cache stat code generation You can turn off the avc cache stats, but distributions seem to not do that (perhaps because several performance tuning how-to's talk about the avc cache statistics). Which is sad, because the code it generates is truly horrendous, with the statistics update being sandwitched between get_cpu/put_cpu which in turn causes preemption disables etc. We're talking ten+ instructions just to increment a per-cpu variable in some pretty hot code. Fix the craziness by just using 'this_cpu_inc()' instead. Suddenly we only need a single 'inc' instruction to increment the statistics. This is quite noticeable in the incredibly hot avc_has_perm_noaudit() function (which triggers all the statistics by virtue of doing an avc_lookup() call). Signed-off-by: Linus Torvalds --- security/selinux/avc.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'security/selinux/avc.c') diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 1d027e29ce8d..5971e30e8239 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -38,11 +38,7 @@ #define AVC_CACHE_RECLAIM 16 #ifdef CONFIG_SECURITY_SELINUX_AVC_STATS -#define avc_cache_stats_incr(field) \ -do { \ - per_cpu(avc_cache_stats, get_cpu()).field++; \ - put_cpu(); \ -} while (0) +#define avc_cache_stats_incr(field) this_cpu_inc(avc_cache_stats.field) #else #define avc_cache_stats_incr(field) do {} while (0) #endif -- cgit v1.2.3 From 257313b2a87795e07a0bdf58d0fffbdba8b31051 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 19 May 2011 21:22:53 -0700 Subject: selinux: avoid unnecessary avc cache stat hit count There is no point in counting hits - we can calculate it from the number of lookups and misses. This makes the avc statistics a bit smaller, and makes the code generation better too. Signed-off-by: Linus Torvalds --- security/selinux/avc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'security/selinux/avc.c') diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 5971e30e8239..3d2715fd35ea 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -343,11 +343,10 @@ static struct avc_node *avc_lookup(u32 ssid, u32 tsid, u16 tclass) node = avc_search_node(ssid, tsid, tclass); if (node) - avc_cache_stats_incr(hits); - else - avc_cache_stats_incr(misses); + return node; - return node; + avc_cache_stats_incr(misses); + return NULL; } static int avc_latest_notif_update(int seqno, int is_insert) @@ -765,7 +764,7 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid, rcu_read_lock(); node = avc_lookup(ssid, tsid, tclass); - if (!node) { + if (unlikely(!node)) { rcu_read_unlock(); if (in_avd) -- cgit v1.2.3 From f01e1af445fac107e91d62a2d59dd535f633810b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 24 May 2011 13:48:51 -0700 Subject: selinux: don't pass in NULL avd to avc_has_perm_noaudit Right now security_get_user_sids() will pass in a NULL avd pointer to avc_has_perm_noaudit(), which then forces that function to have a dummy entry for that case and just generally test it. Don't do it. The normal callers all pass a real avd pointer, and this helper function is incredibly hot. So don't make avc_has_perm_noaudit() do conditional stuff that isn't needed for the common case. This also avoids some duplicated stack space. Signed-off-by: Linus Torvalds --- security/selinux/avc.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'security/selinux/avc.c') diff --git a/security/selinux/avc.c b/security/selinux/avc.c index fcb89cb0f223..d515b2128a4e 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -752,10 +752,9 @@ int avc_ss_reset(u32 seqno) int avc_has_perm_noaudit(u32 ssid, u32 tsid, u16 tclass, u32 requested, unsigned flags, - struct av_decision *in_avd) + struct av_decision *avd) { struct avc_node *node; - struct av_decision avd_entry, *avd; int rc = 0; u32 denied; @@ -766,18 +765,11 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid, node = avc_lookup(ssid, tsid, tclass); if (unlikely(!node)) { rcu_read_unlock(); - - if (in_avd) - avd = in_avd; - else - avd = &avd_entry; - security_compute_av(ssid, tsid, tclass, avd); rcu_read_lock(); node = avc_insert(ssid, tsid, tclass, avd); } else { - if (in_avd) - memcpy(in_avd, &node->ae.avd, sizeof(*in_avd)); + memcpy(avd, &node->ae.avd, sizeof(*avd)); avd = &node->ae.avd; } -- cgit v1.2.3