Skip to content

Commit 69b4573

Browse files
Andi KleenAl Viro
Andi Kleen
authored and
Al Viro
committed
Cache xattr security drop check for write v2
Some recent benchmarking on btrfs showed that a major scaling bottleneck on large systems on btrfs is currently the xattr lookup on every write. Why xattr lookup on every write I hear you ask? write wants to drop suid and security related xattrs that could set o capabilities for executables. To do that it currently looks up security.capability on EVERY write (even for non executables) to decide whether to drop it or not. In btrfs this causes an additional tree walk, hitting some per file system locks and quite bad scalability. In a simple read workload on a 8S system I saw over 90% CPU time in spinlocks related to that. Chris Mason tells me this is also a problem in ext4, where it hits the global mbcache lock. This patch adds a simple per inode to avoid this problem. We only do the lookup once per file and then if there is no xattr cache the decision. All xattr changes clear the flag. I also used the same flag to avoid the suid check, although that one is pretty cheap. A file system can also set this flag when it creates the inode, if it has a cheap way to do so. This is done for some common file systems in followon patches. With this patch a major part of the lock contention disappears for btrfs. Some testing on smaller systems didn't show significant performance changes, but at least it helps the larger systems and is generally more efficient. v2: Rename is_sgid. add file system helper. Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: Serge E. Hallyn <[email protected]> Signed-off-by: Andi Kleen <[email protected]> Signed-off-by: Al Viro <[email protected]>
1 parent d76ee18 commit 69b4573

File tree

4 files changed

+37
-4
lines changed

4 files changed

+37
-4
lines changed

fs/attr.c

+7
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
175175
return -EPERM;
176176
}
177177

178+
if ((ia_valid & ATTR_MODE)) {
179+
mode_t amode = attr->ia_mode;
180+
/* Flag setting protected by i_mutex */
181+
if (is_sxid(amode))
182+
inode->i_flags &= ~S_NOSEC;
183+
}
184+
178185
now = current_fs_time(inode->i_sb);
179186

180187
attr->ia_ctime = now;

fs/xattr.c

+5-2
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,19 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
9191
{
9292
struct inode *inode = dentry->d_inode;
9393
int error = -EOPNOTSUPP;
94+
int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
95+
XATTR_SECURITY_PREFIX_LEN);
9496

97+
if (issec)
98+
inode->i_flags &= ~S_NOSEC;
9599
if (inode->i_op->setxattr) {
96100
error = inode->i_op->setxattr(dentry, name, value, size, flags);
97101
if (!error) {
98102
fsnotify_xattr(dentry);
99103
security_inode_post_setxattr(dentry, name, value,
100104
size, flags);
101105
}
102-
} else if (!strncmp(name, XATTR_SECURITY_PREFIX,
103-
XATTR_SECURITY_PREFIX_LEN)) {
106+
} else if (issec) {
104107
const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
105108
error = security_inode_setsecurity(inode, suffix, value,
106109
size, flags);

include/linux/fs.h

+13
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ struct inodes_stat_t {
237237
#define S_PRIVATE 512 /* Inode is fs-internal */
238238
#define S_IMA 1024 /* Inode has an associated IMA struct */
239239
#define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */
240+
#define S_NOSEC 4096 /* no suid or xattr security attributes */
240241

241242
/*
242243
* Note that nosuid etc flags are inode-specific: setting some file-system
@@ -273,6 +274,7 @@ struct inodes_stat_t {
273274
#define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE)
274275
#define IS_IMA(inode) ((inode)->i_flags & S_IMA)
275276
#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
277+
#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
276278

277279
/* the read-only stuff doesn't really belong here, but any other place is
278280
probably as bad and I don't want to create yet another include file. */
@@ -2582,5 +2584,16 @@ int __init get_filesystem_list(char *buf);
25822584
#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
25832585
(flag & __FMODE_NONOTIFY)))
25842586

2587+
static inline int is_sxid(mode_t mode)
2588+
{
2589+
return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
2590+
}
2591+
2592+
static inline void inode_has_no_xattr(struct inode *inode)
2593+
{
2594+
if (!is_sxid(inode->i_mode))
2595+
inode->i_flags |= S_NOSEC;
2596+
}
2597+
25852598
#endif /* __KERNEL__ */
25862599
#endif /* _LINUX_FS_H */

mm/filemap.c

+12-2
Original file line numberDiff line numberDiff line change
@@ -1982,16 +1982,26 @@ static int __remove_suid(struct dentry *dentry, int kill)
19821982
int file_remove_suid(struct file *file)
19831983
{
19841984
struct dentry *dentry = file->f_path.dentry;
1985-
int killsuid = should_remove_suid(dentry);
1986-
int killpriv = security_inode_need_killpriv(dentry);
1985+
struct inode *inode = dentry->d_inode;
1986+
int killsuid;
1987+
int killpriv;
19871988
int error = 0;
19881989

1990+
/* Fast path for nothing security related */
1991+
if (IS_NOSEC(inode))
1992+
return 0;
1993+
1994+
killsuid = should_remove_suid(dentry);
1995+
killpriv = security_inode_need_killpriv(dentry);
1996+
19891997
if (killpriv < 0)
19901998
return killpriv;
19911999
if (killpriv)
19922000
error = security_inode_killpriv(dentry);
19932001
if (!error && killsuid)
19942002
error = __remove_suid(dentry, killsuid);
2003+
if (!error)
2004+
inode->i_flags |= S_NOSEC;
19952005

19962006
return error;
19972007
}

0 commit comments

Comments
 (0)