While path walking we do follow_mount or follow_down which uses 
dcache_lock for serialisation.  vfsmount related operations also use 
dcache_lock for all updates. I think we can use a separate lock for 
vfsmount related work and can improve path walking. 

The following two patches does the same. The first one replaces 
dcache_lock with new vfsmount_lock in namespace.c. The lock is 
local to namespace.c and is not required outside. The second patch 
uses RCU to have lock free lookup_mnt(). The patches are quite simple 
and straight forward.

The lockmeter reults show reduced contention, and lock acquisitions 
for dcache_lock while running dcachebench* on a 4-way SMP box

SPINLOCKS         HOLD            WAIT
UTIL  CON    MEAN(  MAX )   MEAN(  MAX )(% CPU)     TOTAL NOWAIT SPIN RJECT  NAME

baselkm-2569: 	
20.7% 20.9%  0.5us( 146us)  2.9us( 144us)(0.81%)  31590840 79.1% 20.9%    0%  dcache_lock
mntlkm-2569: 	
14.3% 13.6%  0.4us( 170us)  2.9us( 187us)(0.42%)  23071746 86.4% 13.6%    0%  dcache_lock

We get more than 8% improvement on 4-way SMP and 44% improvement on 16-way
NUMAQ while runing dcachebench*.

		Average (usecs/iteration)	Std. Deviation 
		(lower is better)
4-way SMP
2.5.69		15739.3				470.90
2.5.69-mnt	14459.6				298.51

16-way NUMAQ	
2.5.69		120426.5			363.78
2.5.69-mnt	 63225.8			427.60

*dcachebench is a microbenchmark written by Bill Hartner and is available at
http://www-124.ibm.com/developerworks/opensource/linuxperf/dcachebench/dcachebench.html

vfsmount_lock.patch
-------------------
- Patch for replacing dcache_lock with new vfsmount_lock for all mount 
  related operation. This removes the need to take dcache_lock while
  doing follow_mount or follow_down operations in path walking.

I re-ran dcachebench with 2.5.70 as base on 16-way NUMAQ box.

                	Average (usecs/iteration)       Std. Deviation
                	(lower is better)
16-way NUMAQ
2.5.70 				120710.9		 	230.67
 + vfsmount_lock.patch  	65209.6				242.97
    + lookup_mnt-rcu.patch 	64042.3				416.61
 
So just the lock splitting (vfsmount_lock.patch) gives almost similar benifits



 fs/dcache.c           |   25 +++++++++++--------
 fs/namei.c            |   22 +++++++----------
 fs/namespace.c        |   64 ++++++++++++++++++++++++++++----------------------
 fs/proc/base.c        |    9 +++----
 include/linux/mount.h |    1 
 5 files changed, 68 insertions(+), 53 deletions(-)

diff -puN fs/dcache.c~vfsmount_lock fs/dcache.c
--- 25/fs/dcache.c~vfsmount_lock	2003-06-26 18:37:55.000000000 -0700
+++ 25-akpm/fs/dcache.c	2003-06-26 18:37:55.000000000 -0700
@@ -1451,19 +1451,24 @@ out:
 int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry)
 {
 	int result;
+	unsigned long seq;
 
 	result = 0;
-	for (;;) {
-		if (new_dentry != old_dentry) {
-			struct dentry * parent = new_dentry->d_parent;
-			if (parent == new_dentry)
-				break;
-			new_dentry = parent;
-			continue;
+        do {
+		seq = read_seqbegin(&rename_lock);
+		for (;;) {
+			if (new_dentry != old_dentry) {
+				struct dentry * parent = new_dentry->d_parent;
+				if (parent == new_dentry)
+					break;
+				new_dentry = parent;
+				continue;
+			}
+			result = 1;
+			break;
 		}
-		result = 1;
-		break;
-	}
+	} while (read_seqretry(&rename_lock, seq));
+
 	return result;
 }
 
diff -puN fs/namei.c~vfsmount_lock fs/namei.c
--- 25/fs/namei.c~vfsmount_lock	2003-06-26 18:37:55.000000000 -0700
+++ 25-akpm/fs/namei.c	2003-06-26 18:37:55.000000000 -0700
@@ -434,19 +434,17 @@ int follow_up(struct vfsmount **mnt, str
 	return 1;
 }
 
+/* no need for dcache_lock, as serialization is taken care in
+ * namespace.c
+ */
 static int follow_mount(struct vfsmount **mnt, struct dentry **dentry)
 {
 	int res = 0;
 	while (d_mountpoint(*dentry)) {
-		struct vfsmount *mounted;
-		spin_lock(&dcache_lock);
-		mounted = lookup_mnt(*mnt, *dentry);
-		if (!mounted) {
-			spin_unlock(&dcache_lock);
+		struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
+		if (!mounted)
 			break;
-		}
-		*mnt = mntget(mounted);
-		spin_unlock(&dcache_lock);
+		*mnt = mounted;
 		dput(*dentry);
 		mntput(mounted->mnt_parent);
 		*dentry = dget(mounted->mnt_root);
@@ -455,21 +453,21 @@ static int follow_mount(struct vfsmount 
 	return res;
 }
 
+/* no need for dcache_lock, as serialization is taken care in
+ * namespace.c
+ */
 static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
 {
 	struct vfsmount *mounted;
 
-	spin_lock(&dcache_lock);
 	mounted = lookup_mnt(*mnt, *dentry);
 	if (mounted) {
-		*mnt = mntget(mounted);
-		spin_unlock(&dcache_lock);
+		*mnt = mounted;
 		dput(*dentry);
 		mntput(mounted->mnt_parent);
 		*dentry = dget(mounted->mnt_root);
 		return 1;
 	}
-	spin_unlock(&dcache_lock);
 	return 0;
 }
 
diff -puN fs/namespace.c~vfsmount_lock fs/namespace.c
--- 25/fs/namespace.c~vfsmount_lock	2003-06-26 18:37:55.000000000 -0700
+++ 25-akpm/fs/namespace.c	2003-06-26 18:37:55.000000000 -0700
@@ -26,6 +26,8 @@
 extern int __init init_rootfs(void);
 extern int __init sysfs_init(void);
 
+/* spinlock for vfsmount related operations, inplace of dcache_lock */
+spinlock_t vfsmount_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 static struct list_head *mount_hashtable;
 static int hash_mask, hash_bits;
 static kmem_cache_t *mnt_cache; 
@@ -66,30 +68,38 @@ void free_vfsmnt(struct vfsmount *mnt)
 	kmem_cache_free(mnt_cache, mnt);
 }
 
+/*
+ * Now, lookup_mnt increments the ref count before returning
+ * the vfsmount struct.
+ */
 struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct list_head * head = mount_hashtable + hash(mnt, dentry);
 	struct list_head * tmp = head;
-	struct vfsmount *p;
+	struct vfsmount *p, *found = NULL;
 
+	spin_lock(&vfsmount_lock);
 	for (;;) {
 		tmp = tmp->next;
 		p = NULL;
 		if (tmp == head)
 			break;
 		p = list_entry(tmp, struct vfsmount, mnt_hash);
-		if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry)
+		if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
+			found = mntget(p);
 			break;
+		}
 	}
-	return p;
+	spin_unlock(&vfsmount_lock);
+	return found;
 }
 
 static int check_mnt(struct vfsmount *mnt)
 {
-	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
 	while (mnt->mnt_parent != mnt)
 		mnt = mnt->mnt_parent;
-	spin_unlock(&dcache_lock);
+	spin_unlock(&vfsmount_lock);
 	return mnt == current->namespace->root;
 }
 
@@ -263,15 +273,15 @@ void umount_tree(struct vfsmount *mnt)
 		mnt = list_entry(kill.next, struct vfsmount, mnt_list);
 		list_del_init(&mnt->mnt_list);
 		if (mnt->mnt_parent == mnt) {
-			spin_unlock(&dcache_lock);
+			spin_unlock(&vfsmount_lock);
 		} else {
 			struct nameidata old_nd;
 			detach_mnt(mnt, &old_nd);
-			spin_unlock(&dcache_lock);
+			spin_unlock(&vfsmount_lock);
 			path_release(&old_nd);
 		}
 		mntput(mnt);
-		spin_lock(&dcache_lock);
+		spin_lock(&vfsmount_lock);
 	}
 }
 
@@ -324,17 +334,17 @@ static int do_umount(struct vfsmount *mn
 	}
 
 	down_write(&current->namespace->sem);
-	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
 
 	if (atomic_read(&sb->s_active) == 1) {
 		/* last instance - try to be smart */
-		spin_unlock(&dcache_lock);
+		spin_unlock(&vfsmount_lock);
 		lock_kernel();
 		DQUOT_OFF(sb);
 		acct_auto_close(sb);
 		unlock_kernel();
 		security_sb_umount_close(mnt);
-		spin_lock(&dcache_lock);
+		spin_lock(&vfsmount_lock);
 	}
 	retval = -EBUSY;
 	if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) {
@@ -342,7 +352,7 @@ static int do_umount(struct vfsmount *mn
 			umount_tree(mnt);
 		retval = 0;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&vfsmount_lock);
 	if (retval)
 		security_sb_umount_busy(mnt);
 	up_write(&current->namespace->sem);
@@ -449,18 +459,18 @@ static struct vfsmount *copy_tree(struct
 			q = clone_mnt(p, p->mnt_root);
 			if (!q)
 				goto Enomem;
-			spin_lock(&dcache_lock);
+			spin_lock(&vfsmount_lock);
 			list_add_tail(&q->mnt_list, &res->mnt_list);
 			attach_mnt(q, &nd);
-			spin_unlock(&dcache_lock);
+			spin_unlock(&vfsmount_lock);
 		}
 	}
 	return res;
  Enomem:
 	if (res) {
-		spin_lock(&dcache_lock);
+		spin_lock(&vfsmount_lock);
 		umount_tree(res);
-		spin_unlock(&dcache_lock);
+		spin_unlock(&vfsmount_lock);
 	}
 	return NULL;
 }
@@ -485,7 +495,7 @@ static int graft_tree(struct vfsmount *m
 		goto out_unlock;
 
 	err = -ENOENT;
-	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
 	if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) {
 		struct list_head head;
 
@@ -495,7 +505,7 @@ static int graft_tree(struct vfsmount *m
 		mntget(mnt);
 		err = 0;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&vfsmount_lock);
 out_unlock:
 	up(&nd->dentry->d_inode->i_sem);
 	if (!err)
@@ -532,9 +542,9 @@ static int do_loopback(struct nameidata 
 	if (mnt) {
 		err = graft_tree(mnt, nd);
 		if (err) {
-			spin_lock(&dcache_lock);
+			spin_lock(&vfsmount_lock);
 			umount_tree(mnt);
-			spin_unlock(&dcache_lock);
+			spin_unlock(&vfsmount_lock);
 		} else
 			mntput(mnt);
 	}
@@ -599,7 +609,7 @@ static int do_move_mount(struct nameidat
 	if (IS_DEADDIR(nd->dentry->d_inode))
 		goto out1;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
 	if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
 		goto out2;
 
@@ -623,7 +633,7 @@ static int do_move_mount(struct nameidat
 	detach_mnt(old_nd.mnt, &parent_nd);
 	attach_mnt(old_nd.mnt, nd);
 out2:
-	spin_unlock(&dcache_lock);
+	spin_unlock(&vfsmount_lock);
 out1:
 	up(&nd->dentry->d_inode->i_sem);
 out:
@@ -804,9 +814,9 @@ int copy_namespace(int flags, struct tas
 	down_write(&tsk->namespace->sem);
 	/* First pass: copy the tree topology */
 	new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root);
-	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
 	list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&vfsmount_lock);
 
 	/* Second pass: switch the tsk->fs->* elements */
 	if (fs) {
@@ -1027,7 +1037,7 @@ asmlinkage long sys_pivot_root(const cha
 	if (new_nd.mnt->mnt_root != new_nd.dentry)
 		goto out2; /* not a mountpoint */
 	tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */
-	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
 	if (tmp != new_nd.mnt) {
 		for (;;) {
 			if (tmp->mnt_parent == tmp)
@@ -1044,7 +1054,7 @@ asmlinkage long sys_pivot_root(const cha
 	detach_mnt(user_nd.mnt, &root_parent);
 	attach_mnt(user_nd.mnt, &old_nd);
 	attach_mnt(new_nd.mnt, &root_parent);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&vfsmount_lock);
 	chroot_fs_refs(&user_nd, &new_nd);
 	security_sb_post_pivotroot(&user_nd, &new_nd);
 	error = 0;
@@ -1061,7 +1071,7 @@ out0:
 	unlock_kernel();
 	return error;
 out3:
-	spin_unlock(&dcache_lock);
+	spin_unlock(&vfsmount_lock);
 	goto out2;
 }
 
diff -puN fs/proc/base.c~vfsmount_lock fs/proc/base.c
--- 25/fs/proc/base.c~vfsmount_lock	2003-06-26 18:37:55.000000000 -0700
+++ 25-akpm/fs/proc/base.c	2003-06-26 18:37:55.000000000 -0700
@@ -307,20 +307,22 @@ static int proc_check_root(struct inode 
 	base = dget(current->fs->root);
 	read_unlock(&current->fs->lock);
 
-	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
 	de = root;
 	mnt = vfsmnt;
 
 	while (vfsmnt != our_vfsmnt) {
-		if (vfsmnt == vfsmnt->mnt_parent)
+		if (vfsmnt == vfsmnt->mnt_parent) {
+			spin_unlock(&vfsmount_lock);
 			goto out;
+		}
 		de = vfsmnt->mnt_mountpoint;
 		vfsmnt = vfsmnt->mnt_parent;
 	}
+	spin_unlock(&vfsmount_lock);
 
 	if (!is_subdir(de, base))
 		goto out;
-	spin_unlock(&dcache_lock);
 
 exit:
 	dput(base);
@@ -329,7 +331,6 @@ exit:
 	mntput(mnt);
 	return res;
 out:
-	spin_unlock(&dcache_lock);
 	res = -EACCES;
 	goto exit;
 }
diff -puN include/linux/mount.h~vfsmount_lock include/linux/mount.h
--- 25/include/linux/mount.h~vfsmount_lock	2003-06-26 18:37:55.000000000 -0700
+++ 25-akpm/include/linux/mount.h	2003-06-26 18:37:55.000000000 -0700
@@ -54,6 +54,7 @@ extern void free_vfsmnt(struct vfsmount 
 extern struct vfsmount *alloc_vfsmnt(const char *name);
 extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
 				      const char *name, void *data);
+extern spinlock_t vfsmount_lock;
 
 #endif
 #endif /* _LINUX_MOUNT_H */

_