Skip to content

Commit 3ddcd05

Browse files
committed
vfs: optimize inode cache access patterns
The inode structure layout is largely random, and some of the vfs paths really do care. The path lookup in particular is already quite D$ intensive, and profiles show that accessing the 'inode->i_op->xyz' fields is quite costly. We already optimized the dcache to not unnecessarily load the d_op structure for members that are often NULL using the DCACHE_OP_xyz bits in dentry->d_flags, and this does something very similar for the inode ops that are used during pathname lookup. It also re-orders the fields so that the fields accessed by 'stat' are together at the beginning of the inode structure, and roughly in the order accessed. The effect of this seems to be in the 1-2% range for an empty kernel "make -j" run (which is fairly kernel-intensive, mostly in filename lookup), so it's visible. The numbers are fairly noisy, though, and likely depend a lot on exact microarchitecture. So there's more tuning to be done. Signed-off-by: Linus Torvalds <[email protected]>
1 parent 830c0f0 commit 3ddcd05

File tree

4 files changed

+106
-34
lines changed

4 files changed

+106
-34
lines changed

fs/inode.c

+1
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
143143
inode->i_op = &empty_iops;
144144
inode->i_fop = &empty_fops;
145145
inode->i_nlink = 1;
146+
inode->i_opflags = 0;
146147
inode->i_uid = 0;
147148
inode->i_gid = 0;
148149
atomic_set(&inode->i_writecount, 0);

fs/namei.c

+66-10
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,26 @@ int generic_permission(struct inode *inode, int mask)
308308
return -EACCES;
309309
}
310310

311+
/*
312+
* We _really_ want to just do "generic_permission()" without
313+
* even looking at the inode->i_op values. So we keep a cache
314+
* flag in inode->i_opflags, that says "this has not special
315+
* permission function, use the fast case".
316+
*/
317+
static inline int do_inode_permission(struct inode *inode, int mask)
318+
{
319+
if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
320+
if (likely(inode->i_op->permission))
321+
return inode->i_op->permission(inode, mask);
322+
323+
/* This gets set once for the inode lifetime */
324+
spin_lock(&inode->i_lock);
325+
inode->i_opflags |= IOP_FASTPERM;
326+
spin_unlock(&inode->i_lock);
327+
}
328+
return generic_permission(inode, mask);
329+
}
330+
311331
/**
312332
* inode_permission - check for access rights to a given inode
313333
* @inode: inode to check permission on
@@ -322,7 +342,7 @@ int inode_permission(struct inode *inode, int mask)
322342
{
323343
int retval;
324344

325-
if (mask & MAY_WRITE) {
345+
if (unlikely(mask & MAY_WRITE)) {
326346
umode_t mode = inode->i_mode;
327347

328348
/*
@@ -339,11 +359,7 @@ int inode_permission(struct inode *inode, int mask)
339359
return -EACCES;
340360
}
341361

342-
if (inode->i_op->permission)
343-
retval = inode->i_op->permission(inode, mask);
344-
else
345-
retval = generic_permission(inode, mask);
346-
362+
retval = do_inode_permission(inode, mask);
347363
if (retval)
348364
return retval;
349365

@@ -1245,6 +1261,26 @@ static void terminate_walk(struct nameidata *nd)
12451261
}
12461262
}
12471263

1264+
/*
1265+
* Do we need to follow links? We _really_ want to be able
1266+
* to do this check without having to look at inode->i_op,
1267+
* so we keep a cache of "no, this doesn't need follow_link"
1268+
* for the common case.
1269+
*/
1270+
static inline int do_follow_link(struct inode *inode, int follow)
1271+
{
1272+
if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
1273+
if (likely(inode->i_op->follow_link))
1274+
return follow;
1275+
1276+
/* This gets set once for the inode lifetime */
1277+
spin_lock(&inode->i_lock);
1278+
inode->i_opflags |= IOP_NOFOLLOW;
1279+
spin_unlock(&inode->i_lock);
1280+
}
1281+
return 0;
1282+
}
1283+
12481284
static inline int walk_component(struct nameidata *nd, struct path *path,
12491285
struct qstr *name, int type, int follow)
12501286
{
@@ -1267,7 +1303,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
12671303
terminate_walk(nd);
12681304
return -ENOENT;
12691305
}
1270-
if (unlikely(inode->i_op->follow_link) && follow) {
1306+
if (do_follow_link(inode, follow)) {
12711307
if (nd->flags & LOOKUP_RCU) {
12721308
if (unlikely(unlazy_walk(nd, path->dentry))) {
12731309
terminate_walk(nd);
@@ -1319,6 +1355,26 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
13191355
return res;
13201356
}
13211357

1358+
/*
1359+
* We really don't want to look at inode->i_op->lookup
1360+
* when we don't have to. So we keep a cache bit in
1361+
* the inode ->i_opflags field that says "yes, we can
1362+
* do lookup on this inode".
1363+
*/
1364+
static inline int can_lookup(struct inode *inode)
1365+
{
1366+
if (likely(inode->i_opflags & IOP_LOOKUP))
1367+
return 1;
1368+
if (likely(!inode->i_op->lookup))
1369+
return 0;
1370+
1371+
/* We do this once for the lifetime of the inode */
1372+
spin_lock(&inode->i_lock);
1373+
inode->i_opflags |= IOP_LOOKUP;
1374+
spin_unlock(&inode->i_lock);
1375+
return 1;
1376+
}
1377+
13221378
/*
13231379
* Name resolution.
13241380
* This is the basic name resolution function, turning a pathname into
@@ -1398,10 +1454,10 @@ static int link_path_walk(const char *name, struct nameidata *nd)
13981454
if (err)
13991455
return err;
14001456
}
1457+
if (can_lookup(nd->inode))
1458+
continue;
14011459
err = -ENOTDIR;
1402-
if (!nd->inode->i_op->lookup)
1403-
break;
1404-
continue;
1460+
break;
14051461
/* here ends the main loop */
14061462

14071463
last_component:

fs/stat.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
2727
stat->uid = inode->i_uid;
2828
stat->gid = inode->i_gid;
2929
stat->rdev = inode->i_rdev;
30+
stat->size = i_size_read(inode);
3031
stat->atime = inode->i_atime;
3132
stat->mtime = inode->i_mtime;
3233
stat->ctime = inode->i_ctime;
33-
stat->size = i_size_read(inode);
34-
stat->blocks = inode->i_blocks;
3534
stat->blksize = (1 << inode->i_blkbits);
35+
stat->blocks = inode->i_blocks;
3636
}
3737

3838
EXPORT_SYMBOL(generic_fillattr);

include/linux/fs.h

+37-22
Original file line numberDiff line numberDiff line change
@@ -738,22 +738,54 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
738738
struct posix_acl;
739739
#define ACL_NOT_CACHED ((void *)(-1))
740740

741+
#define IOP_FASTPERM 0x0001
742+
#define IOP_LOOKUP 0x0002
743+
#define IOP_NOFOLLOW 0x0004
744+
745+
/*
746+
* Keep mostly read-only and often accessed (especially for
747+
* the RCU path lookup and 'stat' data) fields at the beginning
748+
* of the 'struct inode'
749+
*/
741750
struct inode {
742-
/* RCU path lookup touches following: */
743751
umode_t i_mode;
752+
unsigned short i_opflags;
744753
uid_t i_uid;
745754
gid_t i_gid;
755+
unsigned int i_flags;
756+
757+
#ifdef CONFIG_FS_POSIX_ACL
758+
struct posix_acl *i_acl;
759+
struct posix_acl *i_default_acl;
760+
#endif
761+
746762
const struct inode_operations *i_op;
747763
struct super_block *i_sb;
764+
struct address_space *i_mapping;
748765

749-
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
750-
unsigned int i_flags;
751-
unsigned long i_state;
752766
#ifdef CONFIG_SECURITY
753767
void *i_security;
754768
#endif
755-
struct mutex i_mutex;
756769

770+
/* Stat data, not accessed from path walking */
771+
unsigned long i_ino;
772+
unsigned int i_nlink;
773+
dev_t i_rdev;
774+
loff_t i_size;
775+
struct timespec i_atime;
776+
struct timespec i_mtime;
777+
struct timespec i_ctime;
778+
unsigned int i_blkbits;
779+
blkcnt_t i_blocks;
780+
781+
#ifdef __NEED_I_SIZE_ORDERED
782+
seqcount_t i_size_seqcount;
783+
#endif
784+
785+
/* Misc */
786+
unsigned long i_state;
787+
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
788+
struct mutex i_mutex;
757789

758790
unsigned long dirtied_when; /* jiffies of first dirtying */
759791

@@ -765,25 +797,12 @@ struct inode {
765797
struct list_head i_dentry;
766798
struct rcu_head i_rcu;
767799
};
768-
unsigned long i_ino;
769800
atomic_t i_count;
770-
unsigned int i_nlink;
771-
dev_t i_rdev;
772-
unsigned int i_blkbits;
773801
u64 i_version;
774-
loff_t i_size;
775-
#ifdef __NEED_I_SIZE_ORDERED
776-
seqcount_t i_size_seqcount;
777-
#endif
778-
struct timespec i_atime;
779-
struct timespec i_mtime;
780-
struct timespec i_ctime;
781-
blkcnt_t i_blocks;
782802
unsigned short i_bytes;
783803
atomic_t i_dio_count;
784804
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
785805
struct file_lock *i_flock;
786-
struct address_space *i_mapping;
787806
struct address_space i_data;
788807
#ifdef CONFIG_QUOTA
789808
struct dquot *i_dquot[MAXQUOTAS];
@@ -806,10 +825,6 @@ struct inode {
806825
atomic_t i_readcount; /* struct files open RO */
807826
#endif
808827
atomic_t i_writecount;
809-
#ifdef CONFIG_FS_POSIX_ACL
810-
struct posix_acl *i_acl;
811-
struct posix_acl *i_default_acl;
812-
#endif
813828
void *i_private; /* fs or device private pointer */
814829
};
815830

0 commit comments

Comments
 (0)