# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
#	           ChangeSet	v2.5.66 -> 1.1011 
#	drivers/video/fbmem.c	1.64    -> 1.65   
#	fs/xfs/linux/xfs_lrw.c	1.17    -> 1.18   
#	drivers/media/video/saa7110.c	1.9     -> 1.10   
#	arch/sparc64/kernel/pci.c	1.27    -> 1.29   
#	       fs/nfs/read.c	1.30    -> 1.32   
#	drivers/char/drm/drm_agpsupport.h	1.15    -> 1.16   
#	drivers/char/ipmi/ipmi_kcs_intf.c	1.3     -> 1.4    
#	 include/pcmcia/ds.h	1.6     -> 1.8    
#	arch/ppc64/kernel/pci.h	1.7     -> 1.8    
#	drivers/pcmcia/cs_internal.h	1.5     -> 1.10   
#	 include/pcmcia/cs.h	1.2     -> 1.3    
#	drivers/i2c/chips/lm75.c	1.7     -> 1.11   
#	  fs/xfs/xfs_mount.h	1.9     -> 1.11   
#	include/asm-x86_64/hdreg.h	1.2     -> 1.3    
#	drivers/media/video/tda9887.c	1.3     -> 1.4    
#	arch/sparc64/boot/Makefile	1.6     -> 1.7    
#	fs/xfs/linux/xfs_linux.h	1.16    -> 1.17   
#	fs/xfs/support/mutex.h	1.1     -> 1.2    
#	include/asm-ppc/pmac_feature.h	1.8     -> 1.9    
#	drivers/media/video/bt856.c	1.7     -> 1.8    
#	drivers/ide/ide-taskfile.c	1.13    -> 1.14   
#	     crypto/digest.c	1.14    -> 1.15   
#	arch/x86_64/kernel/traps.c	1.17    -> 1.18   
#	arch/arm/mach-sa1100/assabet.c	1.18    -> 1.19   
#	arch/x86_64/mm/fault.c	1.10    -> 1.11   
#	include/asm-x86_64/numa.h	1.1     -> 1.2    
#	arch/alpha/kernel/module.c	1.3     -> 1.4    
#	arch/arm/kernel/entry-common.S	1.13    -> 1.14   
#	drivers/pcmcia/sa1100_graphicsclient.c	1.6     -> 1.7    
#	fs/xfs/linux/xfs_vfs.h	1.4     -> 1.7    
#	arch/sparc64/kernel/us3_cpufreq.c	1.5     -> 1.6    
#	   crypto/internal.h	1.16    -> 1.17   
#	arch/arm/mach-sa1100/cpu-sa1110.c	1.19    -> 1.20   
#	drivers/pcmcia/sa1100_freebird.c	1.9     -> 1.10   
#	arch/x86_64/ia32/fpu32.c	1.5     -> 1.6    
#	Documentation/IPMI.txt	1.1     -> 1.2    
#	         mm/fremap.c	1.7     -> 1.8    
#	drivers/i2c/chips/adm1021.c	1.7     -> 1.11   
#	include/net/irda/irlmp.h	1.13    -> 1.14   
#	drivers/net/tulip/de4x5.c	1.23    -> 1.24   
#	net/ipv4/xfrm_input.c	1.8     -> 1.9     net/xfrm/xfrm_input.c (moved)
#	 include/linux/aio.h	1.7     -> 1.8    
#	include/asm-s390/bitops.h	1.9     -> 1.10   
#	fs/xfs/support/atomic.h	1.1     -> 1.2    
#	include/linux/swap.h	1.73    -> 1.76   
#	include/linux/buffer_head.h	1.39    -> 1.40   
#	fs/xfs/support/mrlock.c	1.2     -> 1.3    
#	drivers/i2c/i2c-elektor.c	1.14    -> 1.15   
#	   fs/cramfs/inode.c	1.26    -> 1.27   
#	 drivers/net/3c509.c	1.34    -> 1.35   
#	drivers/usb/net/cdc-ether.c	1.27    -> 1.28   
#	fs/xfs/pagebuf/page_buf.c	1.45    -> 1.47   
#	     fs/xfs/xfs_qm.c	1.9     -> 1.11    fs/xfs/quota/xfs_qm.c (moved)
#	drivers/media/video/saa7185.c	1.9     -> 1.10   
#	net/irda/irnet/irnet_irda.c	1.14    -> 1.16   
#	arch/x86_64/kernel/setup.c	1.10    -> 1.11   
#	include/linux/raid/md_k.h	1.56    -> 1.57   
#	drivers/pcmcia/sa1100_pangolin.c	1.9     -> 1.10   
#	         net/Kconfig	1.7     -> 1.9    
#	net/ipv6/ip6_output.c	1.14    -> 1.15   
#	arch/sparc64/lib/Makefile	1.9     -> 1.10   
#	     mm/page_alloc.c	1.149   -> 1.150  
#	arch/alpha/kernel/pci.c	1.28    -> 1.29   
#	include/asm-ppc64/pci-bridge.h	1.5     -> 1.6    
#	 include/linux/idr.h	1.2     -> 1.3    
#	include/asm-arm/proc-armv/pgtable.h	1.11    -> 1.12   
#	drivers/char/drm/i830_dma.c	1.12    -> 1.13   
#	drivers/usb/host/ohci-hcd.c	1.39    -> 1.40   
#	include/asm-i386/ide.h	1.11    -> 1.12   
#	drivers/char/drm/drm_proc.h	1.8     -> 1.9    
#	arch/ppc/platforms/sleep.S	1.8     -> 1.10    arch/ppc/platforms/pmac_sleep.S (moved)
#	fs/xfs/support/debug.c	1.12    -> 1.13   
#	drivers/pcmcia/i82092aa.h	1.2     -> 1.3    
#	drivers/media/video/bttv-if.c	1.10    -> 1.11   
#	drivers/video/Makefile	1.83    -> 1.84   
#	drivers/i2c/i2c-philips-par.c	1.10    -> 1.11   
#	arch/ppc/platforms/spruce_setup.c	1.9     -> 1.10   
#	drivers/media/video/bt819.c	1.6     -> 1.7    
#	include/asm-sparc64/timer.h	1.2     -> 1.3    
#	arch/ppc64/kernel/chrp_setup.c	1.20    -> 1.21   
#	include/asm-arm/system.h	1.15    -> 1.16   
#	drivers/char/ipmi/ipmi_kcs_sm.c	1.1     -> 1.2    
#	net/irda/irnet/irnet_irda.h	1.5     -> 1.7    
#	         mm/vmscan.c	1.149   -> 1.151  
#	 arch/ppc64/Makefile	1.25    -> 1.26   
#	include/asm-x86_64/kdebug.h	1.2     -> 1.3    
#	arch/sparc64/kernel/pci_iommu.c	1.7     -> 1.8    
#	 arch/sparc/Makefile	1.21    -> 1.22   
#	drivers/i2c/i2c-core.c	1.24    -> 1.29   
#	arch/x86_64/kernel/apic.c	1.14    -> 1.15   
#	fs/xfs/xfs_dquot_item.c	1.2     -> 1.4     fs/xfs/quota/xfs_dquot_item.c (moved)
#	include/asm-i386/uaccess.h	1.20    -> 1.21   
#	drivers/char/drm/r128_cce.c	1.9     -> 1.10   
#	drivers/media/video/saa7134/saa7134-i2c.c	1.6     -> 1.7    
#	 drivers/base/node.c	1.8     -> 1.9    
#	drivers/char/drm/r128_state.c	1.10    -> 1.11   
#	   fs/jfs/jfs_imap.c	1.20    -> 1.21   
#	      net/ipv4/udp.c	1.32    -> 1.33   
#	include/asm-sparc64/pci.h	1.10    -> 1.12   
#	         init/main.c	1.95    -> 1.96   
#	 include/linux/i2c.h	1.16    -> 1.21   
#	drivers/pcmcia/sa1100_badge4.c	1.8     -> 1.9    
#	fs/xfs/xfs_trans_dquot.c	1.3     -> 1.5     fs/xfs/quota/xfs_trans_dquot.c (moved)
#	arch/sparc64/kernel/Makefile	1.23    -> 1.24   
#	drivers/char/drm/radeon_cp.c	1.15    -> 1.16   
#	 fs/xfs/xfs_vfsops.c	1.25    -> 1.27   
#	  drivers/md/raid5.c	1.63    -> 1.65   
#	drivers/pcmcia/pci_socket.h	1.4     -> 1.5    
#	    include/net/ah.h	1.1     -> 1.2    
#	        fs/xfs/xfs.h	1.3     -> 1.4    
#	drivers/video/cyber2000fb.c	1.25    -> 1.26   
#	include/linux/xfrm.h	1.4     -> 1.6    
#	include/asm-ppc/pgtable.h	1.20    -> 1.21   
#	drivers/char/drm/drm_bufs.h	1.9     -> 1.10   
#	 fs/jbd/checkpoint.c	1.7     -> 1.8    
#	drivers/char/drm/drm_ioctl.h	1.8     -> 1.9    
#	  net/ipv4/devinet.c	1.12    -> 1.13   
#	include/net/compat.h	1.1     -> 1.2    
#	drivers/usb/core/message.c	1.23    -> 1.24   
#	   net/ipv4/Makefile	1.15    -> 1.16   
#	net/irda/discovery.c	1.3     -> 1.4    
#	arch/x86_64/kernel/smp.c	1.13    -> 1.14   
#	net/irda/irlap_frame.c	1.10    -> 1.11   
#	   fs/xfs/xfs_bmap.h	1.1     -> 1.2    
#	  include/linux/fs.h	1.222   -> 1.223  
#	 drivers/pcmcia/cs.c	1.16.1.1 -> 1.23   
#	drivers/usb/storage/usb.c	1.57    -> 1.58   
#	include/asm-i386/timer.h	1.5     -> 1.6    
#	arch/sparc64/kernel/devices.c	1.8     -> 1.9    
#	drivers/pcmcia/cardbus.c	1.24    -> 1.25   
#	Documentation/networking/bonding.txt	1.8     -> 1.9    
#	arch/x86_64/kernel/i387.c	1.5     -> 1.6    
#	fs/xfs/linux/xfs_super.h	1.10    -> 1.14   
#	include/net/sctp/sctp.h	1.25    -> 1.26   
#	      net/ipv4/tcp.c	1.36    -> 1.37   
#	include/linux/crypto.h	1.27    -> 1.28   
#	       crypto/proc.c	1.1     -> 1.2    
#	drivers/pcmcia/sa1111_generic.c	1.11    -> 1.13   
#	fs/xfs/linux/xfs_stats.h	1.2     -> 1.3    
#	  drivers/net/mace.c	1.12    -> 1.13   
#	arch/x86_64/ia32/ptrace32.c	1.5     -> 1.6    
#	arch/x86_64/kernel/setup64.c	1.11    -> 1.12   
#	arch/alpha/kernel/smp.c	1.32    -> 1.33   
#	arch/ppc/platforms/pmac_time.c	1.11    -> 1.12   
#	     drivers/md/md.c	1.158   -> 1.160  
#	include/asm-alpha/pgtable.h	1.17    -> 1.18   
#	     fs/xfs/xfs_qm.h	1.1     -> 1.4     fs/xfs/quota/xfs_qm.h (moved)
#	drivers/i2c/busses/i2c-piix4.c	1.7     -> 1.8    
#	net/sunrpc/rpc_pipe.c	1.6     -> 1.7    
#	drivers/char/drm/i830.h	1.5     -> 1.6    
#	arch/x86_64/kernel/ptrace.c	1.11    -> 1.12   
#	drivers/pcmcia/cistpl.c	1.8.1.1 -> 1.12   
#	arch/arm/kernel/entry-armv.S	1.27    -> 1.28   
#	drivers/isdn/hisax/sedlbauer_cs.c	1.6     -> 1.7    
#	  net/irda/af_irda.c	1.37    -> 1.38   
#	  drivers/base/cpu.c	1.7     -> 1.9    
#	drivers/usb/media/ov511.c	1.40    -> 1.41   
#	 fs/xfs/xfs_macros.c	1.3     -> 1.4    
#	include/asm-ppc64/machdep.h	1.13    -> 1.14   
#	drivers/usb/misc/emi26.c	1.6     -> 1.7    
#	drivers/media/video/tvaudio.c	1.15    -> 1.16   
#	       fs/char_dev.c	1.10    -> 1.13   
#	drivers/char/drm/i810.h	1.5     -> 1.6    
#	   net/core/filter.c	1.3     -> 1.4    
#	arch/sparc64/defconfig	1.77    -> 1.78   
#	fs/xfs/linux/xfs_vnode.c	1.11    -> 1.13   
#	drivers/char/drm/i810_dma.c	1.20    -> 1.21   
#	arch/sparc64/kernel/setup.c	1.36    -> 1.37   
#	drivers/ide/ide-iops.c	1.14    -> 1.15   
#	  fs/xfs/xfs_mount.c	1.20    -> 1.23   
#	drivers/char/pcmcia/synclink_cs.c	1.9     -> 1.10   
#	arch/arm/kernel/armksyms.c	1.20    -> 1.21   
#	        net/socket.c	1.45    -> 1.47   
#	arch/x86_64/kernel/entry.S	1.10    -> 1.11   
#	drivers/ieee1394/pcilynx.c	1.25    -> 1.26   
#	drivers/pcmcia/ti113x.h	1.4     -> 1.5    
#	net/ipv4/xfrm_algo.c	1.6     -> 1.8     net/xfrm/xfrm_algo.c (moved)
#	drivers/char/hw_random.c	1.7     -> 1.8    
#	drivers/s390/net/ctcmain.c	1.17    -> 1.18   
#	 drivers/base/base.h	1.22    -> 1.23   
#	drivers/pcmcia/sa1100_pfs168.c	1.7     -> 1.8    
#	drivers/ide/ide-timing.h	1.2     -> 1.3    
#	           fs/exec.c	1.73    -> 1.74   
#	fs/xfs/linux/xfs_globals.h	1.2     -> 1.4    
#	include/asm-ppc64/pci.h	1.4     -> 1.5    
#	   net/sunrpc/clnt.c	1.33    -> 1.34   
#	drivers/usb/storage/transport.c	1.64    -> 1.66   
#	arch/alpha/kernel/sys_nautilus.c	1.11    -> 1.12   
#	drivers/char/drm/i830_drm.h	1.5     -> 1.6    
#	drivers/pcmcia/sa1100_xp860.c	1.7     -> 1.8    
#	drivers/char/drm/mga_state.c	1.12    -> 1.13   
#	      net/ipv6/raw.c	1.19    -> 1.20   
#	arch/x86_64/kernel/head64.c	1.6     -> 1.7    
#	           lib/idr.c	1.2     -> 1.3    
#	       net/netsyms.c	1.56    -> 1.59   
#	drivers/ide/ide-disk.c	1.35    -> 1.36   
#	drivers/media/video/saa7111.c	1.7     -> 1.8    
#	drivers/char/drm/gamma_drv.h	1.5     -> 1.6    
#	include/net/protocol.h	1.7     -> 1.8    
#	 drivers/md/linear.c	1.26    -> 1.29   
#	include/asm-x86_64/pci.h	1.5     -> 1.6    
#	arch/x86_64/boot/compressed/misc.c	1.5     -> 1.6    
#	          mm/shmem.c	1.107   -> 1.109  
#	        net/Makefile	1.21    -> 1.22   
#	drivers/media/video/tuner-3036.c	1.6     -> 1.7    
#	drivers/pcmcia/sa1100_jornada720.c	1.8     -> 1.9    
#	arch/x86_64/ia32/sys_ia32.c	1.25    -> 1.26   
#	arch/x86_64/kernel/pci-gart.c	1.7     -> 1.8    
#	drivers/media/video/tda9875.c	1.10    -> 1.11   
#	arch/ppc64/kernel/irq.c	1.22    -> 1.23   
#	     mm/swap_state.c	1.57    -> 1.58   
#	drivers/media/radio/radio-cadet.c	1.11    -> 1.12   
#	arch/x86_64/ia32/ipc32.c	1.6     -> 1.7    
#	crypto/crypto_null.c	1.1     -> 1.2    
#	drivers/md/multipath.c	1.45    -> 1.47   
#	  fs/xfs/xfs_dquot.c	1.5     -> 1.7     fs/xfs/quota/xfs_dquot.c (moved)
#	include/asm-arm/div64.h	1.1     -> 1.2    
#	            fs/aio.c	1.29    -> 1.30   
#	arch/x86_64/kernel/nmi.c	1.9     -> 1.10   
#	  fs/xfs/xfs_trans.c	1.5     -> 1.6    
#	drivers/usb/usb-skeleton.c	1.29    -> 1.31   
#	net/sunrpc/auth_gss/auth_gss.c	1.4     -> 1.5    
#	drivers/pcmcia/ricoh.h	1.4     -> 1.5    
#	drivers/usb/core/hub.c	1.59    -> 1.61   
#	drivers/video/matrox/i2c-matroxfb.c	1.6     -> 1.7    
#	arch/sparc64/kernel/rtrap.S	1.13    -> 1.14   
#	arch/arm/tools/mach-types	1.25    -> 1.26   
#	drivers/isdn/hisax/elsa_cs.c	1.3     -> 1.4    
#	arch/i386/kernel/smpboot.c	1.54    -> 1.55   
#	drivers/i2c/scx200_acb.c	1.3     -> 1.6    
#	arch/x86_64/kernel/suspend.c	1.2     -> 1.3    
#	drivers/serial/Makefile	1.13    -> 1.14   
#	  fs/jfs/jfs_xtree.c	1.8     -> 1.9    
#	 fs/xfs/xfs_rename.c	1.7     -> 1.8    
#	drivers/pcmcia/sa1100_flexanet.c	1.9     -> 1.10   
#	drivers/i2c/busses/i2c-i801.c	1.7     -> 1.8    
#	drivers/usb/storage/isd200.c	1.28    -> 1.29   
#	drivers/serial/8250_cs.c	1.9     -> 1.10   
#	arch/sparc64/kernel/time.c	1.35    -> 1.36   
#	drivers/scsi/scsi_lib.c	1.75    -> 1.76   
#	drivers/usb/storage/scsiglue.c	1.37    -> 1.40   
#	net/ipv4/xfrm_policy.c	1.21    -> 1.22    net/xfrm/xfrm_policy.c (moved)
#	drivers/net/irda/irda-usb.c	1.35    -> 1.36   
#	include/asm-x86_64/i387.h	1.7     -> 1.8    
#	arch/ppc/platforms/pmac_smp.c	1.9     -> 1.10   
#	drivers/i2c/busses/i2c-ali15x3.c	1.6     -> 1.7    
#	drivers/usb/core/usb.c	1.116   -> 1.117  
#	drivers/char/drm/radeon_drv.h	1.17    -> 1.18   
#	kernel/posix-timers.c	1.9     -> 1.10   
#	 drivers/net/r8169.c	1.7     -> 1.8    
#	drivers/i2c/i2c-proc.c	1.18    -> 1.19   
#	drivers/pcmcia/sa1100_stork.c	1.10    -> 1.11   
#	fs/xfs/linux/xfs_iomap.c	1.6     -> 1.7    
#	drivers/i2c/i2c-algo-bit.c	1.13    -> 1.14   
#	arch/x86_64/ia32/ia32_ioctl.c	1.16    -> 1.17   
#	fs/xfs/linux/xfs_file.c	1.10    -> 1.11   
#	arch/i386/kernel/timers/timer_none.c	1.2     -> 1.3    
#	   crypto/compress.c	1.6     -> 1.7    
#	include/asm-x86_64/suspend.h	1.3     -> 1.4    
#	arch/i386/mm/highmem.c	1.2     -> 1.3    
#	fs/xfs/xfs_bmap_btree.c	1.7     -> 1.8    
#	 fs/jfs/jfs_extent.c	1.7     -> 1.8    
#	  drivers/md/raid1.c	1.57    -> 1.59   
#	fs/xfs/xfs_log_recover.c	1.14    -> 1.15   
#	drivers/usb/image/mdc800.c	1.27    -> 1.28   
#	  net/sunrpc/sched.c	1.23    -> 1.24   
#	net/irda/irlan/irlan_client.c	1.5     -> 1.6    
#	    net/irda/timer.c	1.3     -> 1.4    
#	  drivers/md/raid0.c	1.23    -> 1.25   
#	drivers/char/drm/drm_fops.h	1.8     -> 1.9    
#	arch/ppc/kernel/head.S	1.29    -> 1.30   
#	include/pcmcia/driver_ops.h	1.2     -> 1.3    
#	drivers/i2c/busses/i2c-isa.c	1.1     -> 1.2    
#	arch/x86_64/kernel/process.c	1.15    -> 1.16   
#	 net/ipv6/af_inet6.c	1.23    -> 1.24   
#	 net/ipv6/tcp_ipv6.c	1.43    -> 1.44   
#	drivers/pcmcia/tcic.c	1.16.1.1 -> 1.19   
#	drivers/s390/net/netiucv.c	1.17    -> 1.18   
#	net/ipv4/xfrm_user.c	1.15    -> 1.17    net/xfrm/xfrm_user.c (moved)
#	drivers/pcmcia/hd64465_ss.c	1.11    -> 1.13   
#	 net/sched/sch_csz.c	1.5     -> 1.6    
#	         fs/buffer.c	1.191   -> 1.192  
#	fs/xfs/linux/xfs_behavior.h	1.2     -> 1.3    
#	drivers/pcmcia/sa1100.h	1.9     -> 1.10   
#	    fs/nfsd/export.c	1.74    -> 1.75   
#	drivers/pcmcia/sa1100_trizeps.c	1.9     -> 1.10   
#	drivers/usb/core/hcd.c	1.54    -> 1.55   
#	include/pcmcia/bus_ops.h	1.2     -> 1.3    
#	         lib/Kconfig	1.2     -> 1.3    
#	include/linux/skbuff.h	1.21    -> 1.22   
#	fs/xfs/pagebuf/page_buf.h	1.25    -> 1.26   
#	   fs/xfs/xfs_attr.c	1.4     -> 1.5    
#	drivers/char/tty_io.c	1.67    -> 1.68   
#	arch/x86_64/pci/irq.c	1.6     -> 1.7    
#	include/asm-ppc/keylargo.h	1.7     -> 1.8    
#	 fs/jfs/jfs_txnmgr.c	1.40    -> 1.42   
#	drivers/pcmcia/sa1100_generic.h	1.8     -> 1.9    
#	arch/ppc64/kernel/process.c	1.29    -> 1.30   
#	     net/sctp/ipv6.c	1.26    -> 1.27   
#	         MAINTAINERS	1.131   -> 1.132  
#	 net/sctp/protocol.c	1.37    -> 1.38   
#	drivers/pcmcia/rsrc_mgr.c	1.9.1.1 -> 1.13   
#	drivers/pcmcia/yenta.c	1.19    -> 1.21   
#	include/asm-alpha/fcntl.h	1.2     -> 1.3    
#	        net/compat.c	1.5     -> 1.7    
#	drivers/pcmcia/sa1100_generic.c	1.26    -> 1.30   
#	net/irda/irlap_event.c	1.16    -> 1.18   
#	drivers/media/video/msp3400.c	1.15    -> 1.16   
#	           mm/mmap.c	1.75    -> 1.77   
#	      fs/proc/base.c	1.39    -> 1.40   
#	        mm/highmem.c	1.42    -> 1.43   
#	fs/xfs/linux/xfs_globals.c	1.8     -> 1.10   
#	drivers/net/irda/sir_dev.c	1.4     -> 1.5    
#	     net/ipv6/esp6.c	1.5     -> 1.7    
#	drivers/char/drm/drm_dma.h	1.10    -> 1.11   
#	fs/xfs/support/mrlock.h	1.1     -> 1.2    
#	drivers/usb/class/cdc-acm.c	1.35    -> 1.36   
#	fs/xfs/xfs_qm_syscalls.c	1.4     -> 1.7     fs/xfs/quota/xfs_qm_syscalls.c (moved)
#	drivers/char/drm/radeon_drm.h	1.11    -> 1.12   
#	drivers/char/drm/drm_lock.h	1.6     -> 1.7    
#	fs/xfs/xfs_dquot_item.h	1.1     -> 1.3     fs/xfs/quota/xfs_dquot_item.h (moved)
#	arch/ppc/platforms/pmac_setup.c	1.25    -> 1.26   
#	arch/x86_64/kernel/head.S	1.9     -> 1.10   
#	arch/arm/lib/Makefile	1.14    -> 1.15   
#	   fs/xfs/xfs_clnt.h	1.3     -> 1.4    
#	include/net/irda/discovery.h	1.4     -> 1.5    
#	drivers/char/drm/sis_mm.c	1.4     -> 1.5    
#	   include/net/esp.h	1.1     -> 1.2    
#	net/packet/af_packet.c	1.20    -> 1.21   
#	drivers/char/drm/i830_drv.h	1.6     -> 1.7    
#	arch/i386/kernel/timers/timer_tsc.c	1.14    -> 1.15   
#	drivers/usb/media/stv680.c	1.23    -> 1.24   
#	include/asm-x86_64/unistd.h	1.13    -> 1.14   
#	   net/sunrpc/xprt.c	1.51    -> 1.52   
#	      net/ipv6/ah6.c	1.5     -> 1.7    
#	arch/i386/kernel/timers/timer_cyclone.c	1.4     -> 1.5    
#	net/irda/irnet/irnet.h	1.15    -> 1.16   
#	include/asm-x86_64/debugreg.h	1.1     -> 1.2    
#	arch/x86_64/kernel/signal.c	1.15    -> 1.16   
#	include/asm-sparc64/rwsem.h	1.9     -> 1.10   
#	include/asm-x86_64/thread_info.h	1.8     -> 1.9    
#	fs/xfs/xfs_rtalloc.h	1.1     -> 1.2    
#	     fs/xfs/Makefile	1.9     -> 1.11   
#	             CREDITS	1.79    -> 1.80   
#	  fs/jfs/jfs_dtree.c	1.20    -> 1.21   
#	drivers/media/video/tda7432.c	1.8     -> 1.9    
#	drivers/pcmcia/sa1100_h3600.c	1.9     -> 1.10   
#	drivers/char/ipmi/Kconfig	1.1     -> 1.2    
#	drivers/pcmcia/sa1100_graphicsmaster.c	1.5     -> 1.6    
#	net/ipv6/ipv6_syms.c	1.10    -> 1.11   
#	arch/ppc64/kernel/prom.c	1.21    -> 1.22   
#	drivers/usb/host/ohci-pci.c	1.10    -> 1.11   
#	net/ipv4/xfrm_state.c	1.15    -> 1.17    net/xfrm/xfrm_state.c (moved)
#	include/asm-x86_64/pgtable.h	1.16    -> 1.17   
#	   net/core/skbuff.c	1.21    -> 1.23   
#	  include/net/sock.h	1.33    -> 1.34   
#	  fs/xfs/xfs_inode.h	1.11    -> 1.12   
#	net/irda/irnet/irnet_ppp.h	1.4     -> 1.5    
#	arch/ppc64/kernel/iSeries_setup.c	1.9     -> 1.10   
#	     net/core/sock.c	1.19    -> 1.21   
#	net/ipv4/sysctl_net_ipv4.c	1.8     -> 1.9    
#	drivers/char/drm/drmP.h	1.17    -> 1.18   
#	net/irda/irnet/irnet_ppp.c	1.10    -> 1.11   
#	net/ipv6/ip6_input.c	1.8     -> 1.9    
#	arch/x86_64/kernel/Makefile	1.18    -> 1.19   
#	  fs/xfs/xfs_trans.h	1.4     -> 1.5    
#	arch/x86_64/kernel/aperture.c	1.2     -> 1.3    
#	drivers/serial/21285.c	1.15    -> 1.16   
#	net/ipv4/tcp_input.c	1.34    -> 1.35   
#	drivers/media/video/saa5249.c	1.12    -> 1.13   
#	drivers/pcmcia/Kconfig	1.2     -> 1.4    
#	   fs/nfs/nfs4proc.c	1.12    -> 1.13   
#	drivers/mtd/maps/epxa10db-flash.c	1.2     -> 1.3    
#	include/linux/highmem.h	1.23    -> 1.24   
#	net/ipv4/tcp_minisocks.c	1.24    -> 1.25   
#	 drivers/pcmcia/ds.c	1.19    -> 1.25   
#	drivers/pcmcia/sa1100_yopy.c	1.9     -> 1.10   
#	 fs/xfs/xfs_iocore.c	1.2     -> 1.4    
#	fs/xfs/linux/xfs_ioctl.c	1.11    -> 1.12   
#	drivers/net/gt96100eth.c	1.7     -> 1.8    
#	    net/ipv4/Kconfig	1.4     -> 1.5    
#	arch/ppc64/kernel/stab.c	1.8     -> 1.10   
#	drivers/i2c/i2c-dev.c	1.23.1.1 -> 1.26   
#	drivers/i2c/i2c-algo-pcf.c	1.10    -> 1.11   
#	           mm/slab.c	1.71    -> 1.73   
#	    fs/xfs/xfsidbg.c	1.20    -> 1.22   
#	net/ipv4/netfilter/ip_nat_standalone.c	1.18    -> 1.19   
#	drivers/pcmcia/Makefile	1.21    -> 1.23   
#	 arch/x86_64/Kconfig	1.16    -> 1.17   
#	include/net/irda/irlan_client.h	1.2     -> 1.3    
#	      kernel/sched.c	1.174   -> 1.175  
#	drivers/pcmcia/sa1100_assabet.c	1.12    -> 1.13   
#	     crypto/tcrypt.c	1.21    -> 1.22   
#	drivers/i2c/i2c-elv.c	1.10    -> 1.12   
#	include/linux/if_bonding.h	1.6     -> 1.7    
#	drivers/i2c/i2c-velleman.c	1.8     -> 1.9    
#	drivers/char/drm/drm_drv.h	1.13    -> 1.15   
#	      net/ipv6/udp.c	1.20    -> 1.23   
#	drivers/i2c/busses/i2c-amd8111.c	1.6     -> 1.7    
#	arch/x86_64/mm/numa.c	1.1     -> 1.2    
#	arch/sparc64/kernel/sparc64_ksyms.c	1.42    -> 1.43   
#	drivers/char/drm/drm_lists.h	1.6     -> 1.7    
#	net/ipv4/netfilter/ipt_REJECT.c	1.12    -> 1.13   
#	include/asm-x86_64/system.h	1.12    -> 1.13   
#	  include/net/xfrm.h	1.22    -> 1.24   
#	   fs/nfsd/nfs4xdr.c	1.10    -> 1.13   
#	arch/x86_64/kernel/reboot.c	1.2     -> 1.3    
#	  fs/xfs/xfs_dqblk.h	1.1     ->         (deleted)      
#	drivers/media/video/tuner.c	1.15    -> 1.16   
#	    fs/sysfs/inode.c	1.84    -> 1.85   
#	drivers/char/drm/radeon_state.c	1.18    -> 1.19   
#	drivers/char/ipmi/ipmi_devintf.c	1.4     -> 1.5    
#	arch/i386/kernel/timers/timer_pit.c	1.8     -> 1.9    
#	drivers/i2c/busses/i2c-amd756.c	1.5     -> 1.6    
#	include/asm-ppc/uninorth.h	1.7     -> 1.8    
#	 drivers/net/Kconfig	1.21    -> 1.22   
#	arch/ppc64/kernel/pci.c	1.27    -> 1.29   
#	arch/ppc64/boot/Makefile	1.12    -> 1.13   
#	arch/sparc64/kernel/binfmt_elf32.c	1.8     -> 1.9    
#	drivers/char/drm/Kconfig	1.2     -> 1.3    
#	drivers/char/drm/radeon_mem.c	1.5     -> 1.6    
#	drivers/usb/serial/kobil_sct.c	1.5     -> 1.6    
#	drivers/base/driver.c	1.19    -> 1.20   
#	net/ipv4/netfilter/iptable_mangle.c	1.10    -> 1.11   
#	arch/sparc64/Kconfig	1.16    -> 1.17   
#	drivers/char/drm/mga_dma.c	1.10    -> 1.11   
#	include/asm-x86_64/proto.h	1.8     -> 1.9    
#	arch/x86_64/Makefile	1.24    -> 1.25   
#	include/asm-x86_64/apic.h	1.4     -> 1.5    
#	include/net/irda/af_irda.h	1.2     -> 1.3    
#	drivers/pcmcia/i82365.c	1.24.1.1 -> 1.27   
#	arch/x86_64/kernel/bluesmoke.c	1.8     -> 1.9    
#	drivers/net/tulip/dmfe.c	1.26    -> 1.28   
#	drivers/char/drm/mga_drv.h	1.11    -> 1.12   
#	arch/ppc/kernel/Makefile	1.35    -> 1.36   
#	drivers/char/drm/radeon.h	1.9     -> 1.10   
#	     fs/ext3/super.c	1.56    -> 1.57   
#	fs/xfs/linux/xfs_aops.c	1.27    -> 1.28   
#	include/linux/device.h	1.85    -> 1.86   
#	net/irda/ircomm/ircomm_tty_attach.c	1.8     -> 1.9    
#	drivers/pci/Makefile	1.24    -> 1.25   
#	    net/irda/irlmp.c	1.21    -> 1.22   
#	drivers/char/drm/drm_os_linux.h	1.7     -> 1.8    
#	arch/ppc/platforms/pmac_feature.c	1.15    -> 1.16   
#	arch/ppc/kernel/misc.S	1.41    -> 1.42   
#	arch/alpha/kernel/err_titan.c	1.2     -> 1.3    
#	     crypto/tcrypt.h	1.12    -> 1.13   
#	drivers/media/video/adv7175.c	1.8     -> 1.9    
#	fs/xfs/linux/xfs_super.c	1.30    -> 1.33   
#	drivers/serial/Kconfig	1.6     -> 1.7    
#	arch/sparc/boot/Makefile	1.13    -> 1.14   
#	drivers/pcmcia/sa1100_shannon.c	1.8     -> 1.9    
#	drivers/char/drm/gamma_dma.c	1.8     -> 1.9    
#	drivers/pcmcia/sa1100_cerf.c	1.10    -> 1.11   
#	drivers/pcmcia/sa1100_neponset.c	1.9     -> 1.10   
#	fs/xfs/linux/xfs_stats.c	1.2     -> 1.3    
#	           mm/rmap.c	1.22    -> 1.23   
#	   include/net/dst.h	1.12    -> 1.13   
#	   fs/xfs/xfs_bmap.c	1.8     -> 1.9    
#	arch/sparc64/kernel/irq.c	1.26    -> 1.27   
#	drivers/net/bonding.c	1.18    -> 1.19   
#	drivers/scsi/3w-xxxx.c	1.27    -> 1.28   
#	      crypto/Kconfig	1.11    -> 1.12   
#	drivers/char/drm/radeon_irq.c	1.7     -> 1.8    
#	   net/core/Makefile	1.12    -> 1.13   
#	arch/x86_64/mm/ioremap.c	1.8     -> 1.9    
#	drivers/char/drm/i810_drv.h	1.7     -> 1.8    
#	include/linux/netdevice.h	1.31    -> 1.32   
#	arch/ppc/kernel/smp.c	1.29    -> 1.30   
#	drivers/usb/Makefile	1.39    -> 1.40   
#	include/asm-ppc64/pgtable.h	1.17.1.1 -> 1.21   
#	arch/arm/kernel/apm.c	1.1     -> 1.2    
#	drivers/pcmcia/sa1111_generic.h	1.5     -> 1.6    
#	 include/pcmcia/ss.h	1.7     -> 1.10   
#	        crypto/api.c	1.29    -> 1.30   
#	drivers/atm/iphase.c	1.14    -> 1.15   
#	drivers/usb/misc/usbtest.c	1.12    -> 1.14   
#	  fs/xfs/xfs_dmapi.h	1.3     -> 1.7    
#	drivers/char/drm/Makefile	1.15    -> 1.16   
#	arch/i386/kernel/timers/timer.c	1.5     -> 1.6    
#	fs/xfs/linux/xfs_behavior.c	1.2     -> 1.3    
#	     crypto/Makefile	1.16    -> 1.17   
#	    fs/xfs/xfs_buf.h	1.10    -> 1.11   
#	          fs/quota.c	1.12    -> 1.13   
#	  fs/xfs/xfs_inode.c	1.15    -> 1.17   
#	   fs/xfs/xfs_iget.c	1.9     -> 1.11   
#	arch/x86_64/boot/setup.S	1.4     -> 1.5    
#	drivers/usb/core/usb-debug.c	1.8     -> 1.9    
#	drivers/usb/core/buffer.c	1.4     -> 1.5    
#	arch/x86_64/ia32/ia32_signal.c	1.11    -> 1.12   
#	drivers/char/drm/sis.h	1.4     -> 1.5    
#	include/asm-x86_64/processor.h	1.13    -> 1.14   
#	drivers/pcmcia/sa1100_simpad.c	1.10    -> 1.11   
#	drivers/usb/class/audio.c	1.32    -> 1.33   
#	     fs/ext3/fsync.c	1.5     -> 1.6    
#	arch/x86_64/mm/k8topology.c	1.1     -> 1.2    
#	  fs/xfs/xfs_quota.h	1.1     -> 1.3    
#	drivers/scsi/pcmcia/nsp_cs.c	1.17.1.1 -> 1.19   
#	arch/sparc64/kernel/traps.c	1.24    -> 1.25   
#	    arch/ppc/Kconfig	1.18    -> 1.19   
#	arch/ppc/syslib/prom_init.c	1.6     -> 1.7    
#	arch/sparc64/Makefile	1.25    -> 1.26   
#	arch/ppc64/kernel/iSeries_pci.c	1.9     -> 1.10   
#	          fs/inode.c	1.89    -> 1.90   
#	drivers/pcmcia/pci_socket.c	1.13    -> 1.15   
#	  fs/xfs/xfs_dquot.h	1.1     -> 1.3     fs/xfs/quota/xfs_dquot.h (moved)
#	drivers/pcmcia/sa1100_system3.c	1.7     -> 1.8    
#	fs/xfs/xfs_quota_priv.h	1.2     -> 1.4     fs/xfs/quota/xfs_quota_priv.h (moved)
#	fs/xfs/support/time.h	1.6     -> 1.7    
#	drivers/base/memblk.c	1.4     -> 1.5    
#	  fs/xfs/xfs_utils.c	1.7     -> 1.8    
#	drivers/acpi/Kconfig	1.7     -> 1.8    
#	arch/ppc64/kernel/pSeries_lpar.c	1.19    -> 1.20   
#	drivers/usb/misc/speedtch.c	1.70    -> 1.74   
#	fs/xfs/linux/xfs_vnode.h	1.14    -> 1.15   
#	net/ipv6/xfrm6_input.c	1.2     -> 1.3    
#	arch/ppc/platforms/Makefile	1.15    -> 1.17   
#	drivers/acorn/char/i2c.c	1.6     -> 1.7    
#	drivers/pcmcia/sa1100_adsbitsy.c	1.7     -> 1.8    
#	arch/x86_64/kernel/smpboot.c	1.15    -> 1.16   
#	drivers/pcmcia/i82092.c	1.13    -> 1.15   
#	drivers/char/drm/r128_drv.h	1.12    -> 1.13   
#	arch/arm/mach-footbridge/dc21285.c	1.6     -> 1.7    
#	fs/xfs/xfs_vnodeops.c	1.22    -> 1.23   
#	      fs/block_dev.c	1.125   -> 1.126  
#	Documentation/kernel-parameters.txt	1.17    -> 1.19   
#	include/asm-x86_64/desc.h	1.8     -> 1.9    
#	arch/ppc64/kernel/pSeries_pci.c	1.22    -> 1.23   
#	arch/i386/mm/boot_ioremap.c	1.1     -> 1.2    
#	               (new)	        -> 1.1     arch/sparc64/lib/rwsem.c
#	               (new)	        -> 1.3     fs/xfs/linux/xfs_vfs.c
#	               (new)	        -> 1.1     fs/xfs/quota/xfs_qm_bhv.c
#	               (new)	        -> 1.1     fs/xfs/quota/xfs_qm_stats.c
#	               (new)	        -> 1.1     include/asm-ppc/macio_asic.h
#	               (new)	        -> 1.1     drivers/pcmcia/sa11xx_core.c
#	               (new)	        -> 1.1     arch/ppc/kernel/cpu_setup_6xx.S
#	               (new)	        -> 1.3     fs/xfs/xfs_qmops.c
#	               (new)	        -> 1.1     net/xfrm/Kconfig
#	               (new)	        -> 1.2     arch/ppc/platforms/pmac_cpufreq.c
#	               (new)	        -> 1.1     drivers/pcmcia/sa11xx_core.h
#	               (new)	        -> 1.1     arch/sparc64/kernel/us2e_cpufreq.c
#	               (new)	        -> 1.1     drivers/char/drm/i830_irq.c
#	               (new)	        -> 1.1     include/linux/kmalloc_sizes.h
#	               (new)	        -> 1.2     fs/xfs/xfs_dmops.c
#	               (new)	        -> 1.1     fs/xfs/quota/xfs_qm_stats.h
#	               (new)	        -> 1.1     crypto/deflate.c
#	               (new)	        -> 1.1     arch/arm/lib/div64.S
#	               (new)	        -> 1.1     net/xfrm/Makefile
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 03/03/24	torvalds@penguin.transmeta.com	1.977.2.92
# Linux 2.5.66
# --------------------------------------------
# 03/03/25	anton@samba.org	1.977.1.12
# Merge samba.org:/scratch/anton/linux-2.5
# into samba.org:/scratch/anton/tmp3
# --------------------------------------------
# 03/03/25	anton@samba.org	1.977.1.13
# Merge samba.org:/scratch/anton/linux-2.5
# into samba.org:/scratch/anton/tmp3
# --------------------------------------------
# 03/03/24	greg@kroah.com	1.977.3.4
# [PATCH] i2c: set up a "generic" i2c driver to prevent oopses when devices are registering.
# 
# This is needed as we are still not using the driver core model for
# matching up devices to drivers, but doing it by hand.  Once that is
# changed, this will not be needed.
# --------------------------------------------
# 03/03/24	greg@kroah.com	1.977.2.93
# Merge kroah.com:/home/greg/linux/BK/bleed-2.5
# into kroah.com:/home/greg/linux/BK/i2c-2.5
# --------------------------------------------
# 03/03/25	anton@samba.org	1.977.1.14
# ppc64: Rework pci probe to be like alpha.
# --------------------------------------------
# 03/03/24	greg@kroah.com	1.977.11.1
# Merge kroah.com:/home/greg/linux/BK/bleed-2.5
# into kroah.com:/home/greg/linux/BK/gregkh-2.5
# --------------------------------------------
# 03/03/24	davem@nuts.ninka.net	1.977.12.1
# Merge nuts.ninka.net:/home/davem/src/BK/network-2.5
# into nuts.ninka.net:/home/davem/src/BK/net-2.5
# --------------------------------------------
# 03/03/25	paulus@samba.org	1.977.1.15
# ppc64: Add missing RELOCs
# --------------------------------------------
# 03/03/24	hch@lst.de	1.977.12.2
# [NET]: Kill dev_init_buffers, was scheduled to die in 2.5.x
# --------------------------------------------
# 03/03/24	toml@us.ibm.com	1.977.12.3
# [IPSEC]: Fix IPV6 UDP policy checking.
# --------------------------------------------
# 03/03/25	anton@samba.org	1.977.1.16
# ppc64: fix pci probe on large bus systems
# --------------------------------------------
# 03/03/25	anton@samba.org	1.977.13.1
# Merge samba.org:/scratch/anton/linux-2.5
# into samba.org:/scratch/anton/linux-2.5_ppc64
# --------------------------------------------
# 03/03/25	anton@samba.org	1.977.1.17
# Merge samba.org:/scratch/anton/export
# into samba.org:/scratch/anton/linux-2.5_ppc64
# --------------------------------------------
# 03/03/25	anton@samba.org	1.977.14.1
# ppc64: Disable 32bit SLB invalidation optimisation for the moment
# --------------------------------------------
# 03/03/25	anton@samba.org	1.977.15.1
# Merge samba.org:/scratch/anton/export into samba.org:/scratch/anton/tmp3
# --------------------------------------------
# 03/03/25	anton@samba.org	1.977.15.2
# ppc64: Fix problem with casting out the segment for our kernel stack
# --------------------------------------------
# 03/03/25	anton@samba.org	1.977.1.18
# Merge samba.org:/scratch/anton/export
# into samba.org:/scratch/anton/linux-2.5_ppc64
# --------------------------------------------
# 03/03/24	rth@are.twiddle.net	1.977.16.1
# Merge are.twiddle.net:/home/rth/BK/linus-2.5
# into are.twiddle.net:/home/rth/BK/axp-2.5
# --------------------------------------------
# 03/03/24	ink@jurassic.park.msu.ru	1.977.16.2
# [PATCH] alpha: file offset in pte
# 
# It's possible to squeeze more bits out of lower half of pte,
# but 32 seem to be a plenty...
# 
# Ivan.
# --------------------------------------------
# 03/03/24	ink@jurassic.park.msu.ru	1.977.16.3
# [PATCH] alpha: handle unaligned REFQUADs produced by BUG() macro
# 
# --------------------------------------------
# 03/03/24	ink@jurassic.park.msu.ru	1.977.16.4
# [PATCH] alpha: pci update
# 
# - Check for parent PCI bus instead of bridge device to break the
#   look in common_swizzle(). Functionally it's the same, but it's
#   cleaner for PC-style host bridges (nautiluses).
# - Generic PCI setup changes finally went in (thanks to rmk), so that
#   FIXME in common_init_pci() can go.
# 
# Ivan.
# --------------------------------------------
# 03/03/24	ink@jurassic.park.msu.ru	1.977.16.5
# [PATCH] alpha: nautilus_init_pci() cleanup
# 
# --------------------------------------------
# 03/03/24	ink@jurassic.park.msu.ru	1.977.16.6
# [PATCH] alpha: fix jiffies compile warning in smp.c
# 
# Fix more annoying compile problems due to wrong types
# for comparing jiffies. This patch applies to alpha arch.
# 
# From Thomas Weyergraf.
# 
# Ivan.
# --------------------------------------------
# 03/03/24	torvalds@home.transmeta.com	1.977.2.94
# Merge bk://are.twiddle.net/axp-2.5/
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/24	davem@nuts.ninka.net	1.977.12.4
# [TCP]: Forward port of 2.4.x bugfix, noticed as missing by davej@codemonkey.org.uk.
# 
# In tcp_sendmsg, make sure we jump to the out label
# when seglen is decremented to zero and no more iovecs remain.
# This matches the do_tcp_sendpages logic and makes sure that
# PSH is set correctly at the end of a write even if the write length
# equals the current mss.
# --------------------------------------------
# 03/03/24	sfr@canb.auug.org.au	1.977.12.5
# [SCTP]: Fix IRQ flags warnings.
# --------------------------------------------
# 03/03/24	yoshfuji@linux-ipv6.org	1.977.12.6
# [IPSEC]: Move xfrm_*.c into net/xfrm/
# --------------------------------------------
# 03/03/24	davem@kernel.bkbits.net	1.977.2.95
# Merge davem@nuts.ninka.net:/home/davem/src/BK/net-2.5
# into kernel.bkbits.net:/home/davem/net-2.5
# --------------------------------------------
# 03/03/24	davem@nuts.ninka.net	1.977.17.1
# [DRM]: Fix warnings and build errors introduced by previous changes to drm_drv.h
# --------------------------------------------
# 03/03/24	davem@kernel.bkbits.net	1.977.18.1
# Merge davem@nuts.ninka.net:/home/davem/src/BK/sparc-2.5
# into kernel.bkbits.net:/home/davem/sparc-2.5
# --------------------------------------------
# 03/03/25	rmk@flint.arm.linux.org.uk	1.977.19.1
# [ARM] Add pte_file() and friends to pgtable.h
# --------------------------------------------
# 03/03/25	davem@nuts.ninka.net	1.977.12.7
# [IPSEC]: Kill skb_ah_walk, not needed.
# --------------------------------------------
# 03/03/25	davem@nuts.ninka.net	1.977.12.8
# [NET]: Make SKB layout/initialization/copy more cache friendly.
# --------------------------------------------
# 03/03/25	zaitcev@redhat.com	1.977.17.2
# [SPARC]: Handle make w/o arg sanely, by Sam Ravnborg.
# --------------------------------------------
# 03/03/25	zaitcev@redhat.com	1.977.17.3
# [SPARC64]: Kill ELF_FLAGS_INIT.
# --------------------------------------------
# 03/03/25	davem@nuts.ninka.net	1.977.17.4
# [SPARC64]: Uninline rwsem assembler.
# --------------------------------------------
# 03/03/25	shaggy@shaggy.austin.ibm.com	1.977.20.1
# Merge jfs@jfs.bkbits.net:linux-2.5
# into shaggy.austin.ibm.com:/shaggy/bk/jfs-2.5
# --------------------------------------------
# 03/03/25	torvalds@home.transmeta.com	1.977.18.2
# Merge bk://kernel.bkbits.net/davem/sparc-2.5
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/25	torvalds@home.transmeta.com	1.977.2.96
# Merge bk://kernel.bkbits.net/davem/net-2.5
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/25	torvalds@home.transmeta.com	1.977.1.19
# Merge http://ppc.bkbits.net/for-linus-ppc64
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/25	ak@suse.de	1.977.1.20
# [PATCH] aio compat patches
# 
# The new aio emulation in x86-64 needs a few aio symbols exported.
# Export them.
# --------------------------------------------
# 03/03/25	ak@suse.de	1.977.1.21
# [PATCH] ACPI NUMA option fix for x86-64
# 
# The ACPI NUMA support doesn't work on x86-64 currently (no SRAT
# parsing yet). Disable for now.
# --------------------------------------------
# 03/03/25	baldrick@wanadoo.fr	1.977.11.2
# [PATCH] USB speedtouch: code reorganization
# 
# Remove dead code from sarlib, reorganize live sarlib code (trivial transformations).
# --------------------------------------------
# 03/03/25	baldrick@wanadoo.fr	1.977.11.3
# [PATCH] USB speedtouch: trivial cleanups
# --------------------------------------------
# 03/03/25	baldrick@wanadoo.fr	1.977.11.4
# [PATCH] USB speedtouch: per vcc data cleanups
# 
# Use struct list_head rather than a singly linked list in udsl_vcc_data.  Reject
# attempts to open multiple vccs with the same vpi/vci pair.  Some cleanups too.
# --------------------------------------------
# 03/03/25	baldrick@wanadoo.fr	1.977.11.5
# [PATCH] USB speedtouch: eliminate ATM open/close races
# 
# The list of open vccs is modified by open/close, and traversed by the
# receive tasklet.  This is the last race I know of in this driver.
# --------------------------------------------
# 03/03/25	ink@jurassic.park.msu.ru	1.977.11.6
# [PATCH] USB: missing include
# 
# at least Alpha needs mm.h for "page_address".
# --------------------------------------------
# 03/03/25	david-b@pacbell.net	1.977.11.7
# [PATCH] USB: usb-skeleton, usbtest use "real" device ids
# 
# I'll be switching "gadget zero" to use real product IDs
# (donated by NetChip), and these are the two drivers that
# will need to recognize them.
# --------------------------------------------
# 03/03/25	joe@perches.com	1.977.11.8
# [PATCH] USB: usb_skeleton.c trivial fix
# 
# Remove redundant __FILE__.
# --------------------------------------------
# 03/03/25	ak@suse.de	1.977.1.22
# [PATCH] x86-64 updates
# 
# Lots of x86-64 updates. Merge with 2.4 and NUMA works now. Also reenabled
# the preemptive kernel. And some other bug fixes.
# IOMMU disabled by default now because it has problems.
# 
#  - Add more CONFIG options for device driver debugging and iommu
#    force/debug.  (don't enable iommu force currently)
#  - Some S3/ACPI fixes/cleanups from Pavel.
#  - Set MSG_COMPAT_* in msg_flags for networking 32bit emulation.
#    This unfortunately still doesn't fix the fd passing problems.
#  - Sync PCI IOMMU code with 2.4 (minor fixes, flush less often)
#  - Really fix UP compilation (Pavel)
#  - Reenable preempt
#  - Fix CONFIG_DISCONTIGMEM bootup and enable.  Still needs more tuning.
#  - Fix some bugs in topology discovery and clean code up.
#  - Don't put unwind tables into object files
#  - Some kernel debugging hooks
#  - Move CPU detection into early real mode code to better interact with
#    vesafb consoles
#  - Initialize mode in real mode character output
#  - New 32bit FPU signal save/restore
#  - Various fixes in FPU handling in ptrace
#  - Fix security holes in ptrace (32bit and 64bit)
#  - Fix serial ioctl (including security hole)
#  - Add bluetooth ioctls to 32bit emu (from sparc64)
#  - Correctly enable si_val in queued signals in 32bit emulation
#  - Rework SEM_STAT emulation.  LTP still fails unfortunately.
#  - Fix error case in msg* emulation
#  - Fix debug register access from ptrace (Michal Ludvig, me)
#  - Fix handling of NULL arguments in 32bit execve
#  - Fix some error cases for 32bit readv/writev (LTP still complains)
#  - Remove rate control from unimplemented syscall warnings
#  - Fix error message for missing aperture
#  - Turn some APIC printks into Dprintk to make the bootup more quiet
#  - Some fixes for no APIC (probably still broken), add disableapic
#    option (untested)
#  - Sync K8 MCE handler with 2.4.  Should work a lot better now.
#  - Remove never used KDB hooks
#  - Fix buffer overflow in command line copying
#  - Merge from i386: use separate status word for lazy FPU state
#  - Don't force the IOMMU for dma masks < 4GB.
#  - Print backtrace in Sysrq-T (from Andrea)
#  - Merge from i386: fix FPU race in fork.
#  - Disable NX mode by default for now
#  - Rewrite dump_pagetable
#  - Fix off by one bug in ioremap (i386 merge)
#  - Merge from i386: handle VIA pci bridge bugs
#  - Disable NUMA ACPI support (no SRAT support yet)
#  - Fix aio 32bit emulation
#  - Increase 32bit address space to nearly 4GB
#  - Add exit_group syscall
#  - Fix TLS setting in clone (Ulrich Drepper)
# --------------------------------------------
# 03/03/25	oliver.spang@siemens.com	1.977.11.9
# [PATCH] USB: Compiler error in cdc-acm when DEBUG defined
# --------------------------------------------
# 03/03/25	randy.dunlap@verizon.net	1.977.11.10
# [PATCH] USB: usb/misc/emi26.c stack reduction
# 
# Reduces stack usage in emi26_load_firmware().
# --------------------------------------------
# 03/03/25	greg@kroah.com	1.977.11.11
# [PATCH] USB: fix Makefile to allow usb midi driver to be built if it's the only class driver selected.
# --------------------------------------------
# 03/03/25	mochel@osdl.org	1.977.21.1
# driver model: don't define DEBUG in base.h
# --------------------------------------------
# 03/03/25	mochel@osdl.org	1.977.21.2
# driver model: Make sure we initialize drivers' class_list.
# --------------------------------------------
# 03/03/25	mochel@osdl.org	1.977.21.3
# driver model:  Fix error handling in sysfs registration
# 
# From Matt Dobson:
# 
# The cpu, memblk, and node driver/device registration should be a little 
# more clean in the way it handles registration failures.  Or at least 
# *consistent* amongst the topology elements.  Right now, failures are 
# either silent, obscure, or leave things in an inconsistent state.
# --------------------------------------------
# 03/03/25	oliver@neukum.name	1.977.11.12
# [PATCH] USB: Another memory allocation in block IO error handling path
# 
#   - memory allocation in block io error code path with GFP_KERNEL
# --------------------------------------------
# 03/03/25	oliver@neukum.name	1.977.11.13
# [PATCH] USB: storage device reset cleanup
# 
# > In the absence of far-reaching changes to the API, my suggestion is to
# > have the emulated SCSI bus reset code in usb-storage do nothing but log an
# > error message and return an error code.  For the time being, considering
# > how infrequently these resets occur, we can simply rely on the user
# > unplugging the USB cable and putting it back in or cycling the power to
# > the drive.  (Yes, there are situations where these resets crop up
# > regularly -- but they are the result of some other incompatibility that a
# > device reset won't fix anyway.)
# 
# OK, as the consensus seems to be that in the short run changing things
# for a full reset implementation is not worth it, here's an implementation
# that does the best we can do without.
# It issues a reset only if we can be sure that there are no other users
# of the device in question.
# As the version currently in the storage driver is broken anyway,
# this is a definite improvement. And it addresses the need of exporting
# the probe/remove functions for storage's sake.
# --------------------------------------------
# 03/03/25	oliver@neukum.name	1.977.11.14
# [PATCH] USB: storage: add logging to reset
# 
#   - add logging to reset
# --------------------------------------------
# 03/03/25	nathans@sgi.com	1.977.22.1
# [XFS] Next step in bhv code cleanup - this is a start on moving quota and dmapi
# into behavior layers, purging several points where these sit slap bang in
# the middle of XFS code (esp. read_super).  Also removes numerous #ifdef's
# and a bunch of unused #define's from all over the place.  More to come.
# 
# SGI Modid: 2.5.x-xfs:slinx:141499a
# --------------------------------------------
# 03/03/25	nathans@sgi.com	1.977.22.2
# [XFS] In showargs, report the usrquota/grpquota option variant, which is common.
# 
# SGI Modid: 2.5.x-xfs:slinx:141511a
# --------------------------------------------
# 03/03/25	mdharm-usb@one-eyed-alien.net	1.977.11.15
# [PATCH] usb-storage: LUN and isd200
# 
# This patch (developed with assistance from Jan Harkes
# <jaharkes@cs.cmu.edu>) makes the LUN field of a bulk-only transport come
# from a known-good source, rather than the likely-good command-byte.  It
# also updates the ISD200 driver to work with this change.
# --------------------------------------------
# 03/03/25	mdharm-usb@one-eyed-alien.net	1.977.11.16
# [PATCH] usb-storage: initialize urb status
# 
# This patch initializes the URB status before it's used.  While not
# technically required, it's good programming practice (and a similar bug
# just bit us on 2.4 with UHCI).
# --------------------------------------------
# 03/03/25	mdharm-usb@one-eyed-alien.net	1.977.11.17
# [PATCH] usb-storage: cleanup
# 
# This patch changes some debugging output to be a bit more clear, and
# removes some un-needed code -- it's no longer possible for us to have
# active URBs in the disconnect path.
# --------------------------------------------
# 03/03/25	mochel@osdl.org	1.977.23.1
# driver model: increase BUS_ID_SIZE to 20
# 
# From Ben Collins, for the sake of ieee1394, so the 64-bit (16 char) GUID
# can be used for the bus_id. 
# --------------------------------------------
# 03/03/25	nathans@sgi.com	1.977.22.3
# [XFS] whitespace and code formatting changes
# --------------------------------------------
# 03/03/25	greg@kroah.com	1.977.11.18
# [PATCH] USB: fix compiler warning in usb-storage
# --------------------------------------------
# 03/03/25	bhards@bigpond.net.au	1.977.11.19
# [PATCH] USB: CDC Ethernet zero packet fix
# --------------------------------------------
# 03/03/25	bhards@bigpond.net.au	1.977.11.20
# [PATCH] USB: CDC Ethernet maintainer transfer
# --------------------------------------------
# 03/03/25	greg@kroah.com	1.977.11.21
# USB: remove unneeded #include <linux/version.h>
# --------------------------------------------
# 03/03/25	greg@kroah.com	1.977.1.23
# Merge kroah.com:/home/greg/linux/BK/bleed-2.5
# into kroah.com:/home/greg/linux/BK/gregkh-2.5
# --------------------------------------------
# 03/03/25	mochel@osdl.org	1.977.24.1
# Merge bk://linux.bkbits.net/linux-2.5
# into osdl.org:/home/mochel/src/kernel/devel/linux-2.5-virgin
# --------------------------------------------
# 03/03/25	mochel@osdl.org	1.977.24.2
# Merge osdl.org:/home/mochel/src/kernel/devel/linux-2.5-virgin
# into osdl.org:/home/mochel/src/kernel/devel/linux-2.5-core
# --------------------------------------------
# 03/03/26	rmk@flint.arm.linux.org.uk	1.977.19.2
# [ARM] Fix ARM do_div() implementation
# 
# The ARM do_div() implementation was rather lax in that it only
# performed a 32-bit divide.  This cset fixes this oversight by
# providing a 64-bit by 32-bit division in asm.
# 
# This is necessary for posix-timers to function correctly.
# --------------------------------------------
# 03/03/25	mochel@osdl.org	1.977.24.3
# driver model: fix warning in cpu init.
# --------------------------------------------
# 03/03/25	davem@nuts.ninka.net	1.977.25.1
# Merge nuts.ninka.net:/home/davem/src/BK/network-2.5
# into nuts.ninka.net:/home/davem/src/BK/net-2.5
# --------------------------------------------
# 03/03/25	davem@nuts.ninka.net	1.977.26.1
# Merge nuts.ninka.net:/home/davem/src/BK/sparcwork-2.5
# into nuts.ninka.net:/home/davem/src/BK/sparc-2.5
# --------------------------------------------
# 03/03/25	jgrimm2@us.ibm.com	1.977.25.2
# [IPV6]: Export some icmpv6 symbols for SCTP.
# --------------------------------------------
# 03/03/25	yoshfuji@linux-ipv6.org	1.977.25.3
# [IPSEC]: Remove duplicate / obsolete entry in include/linux/dst.h
# --------------------------------------------
# 03/03/25	randy.dunlap@verizon.net	1.977.25.4
# [NET]: typo and comment fixes
# --------------------------------------------
# 03/03/25	chas@locutus.cmf.nrl.navy.mil	1.977.25.5
# [ATM]: Fix total_len calculation in IPHASE driver.
# --------------------------------------------
# 03/03/25	kuznet@ms2.inr.ac.ru	1.977.25.6
# [IPV4]: Make sure rtcache flush happens after sysctl updates.
# --------------------------------------------
# 03/03/26	davem@nuts.ninka.net	1.977.26.2
# [SPARC64]: cpufreq cleanup, move notifier into common area.
# --------------------------------------------
# 03/03/26	davem@nuts.ninka.net	1.977.26.3
# [SPARC64]: Initial cut at Ultra-IIe cpufreq driver.
# --------------------------------------------
# 03/03/26	davem@nuts.ninka.net	1.977.26.4
# [SPARC64]: Make boot targets get cleaned up properly.
# --------------------------------------------
# 03/03/26	davem@nuts.ninka.net	1.977.25.7
# [IPSEC]: Remove unused field 'owner' from selector.
# --------------------------------------------
# 03/03/26	rmk@flint.arm.linux.org.uk	1.977.19.3
# [ARM] Remove EXPORT_NO_SYMBOLS
# --------------------------------------------
# 03/03/26	rmk@flint.arm.linux.org.uk	1.977.19.4
# [ARM] Update mach-types
# --------------------------------------------
# 03/03/26	rmk@flint.arm.linux.org.uk	1.977.19.5
# [ARM] Pass prev task_struct through __switch_to
# --------------------------------------------
# 03/03/26	davem@nuts.ninka.net	1.977.25.8
# [IPSEC]: linux/xfrm.h u32 --> __u32.
# --------------------------------------------
# 03/03/26	rmk@flint.arm.linux.org.uk	1.977.19.6
# [ARM] console init functions return type int
# --------------------------------------------
# 03/03/26	rmk@flint.arm.linux.org.uk	1.977.19.7
# [ARM] Update Cyber2000fb driver for new fbcon API
# 
# This cset updates cyber2000fb (used on NetWinders) to the new
# fbcon API, and adds cfbfillrect, cfbcopyarea and cfbimgblt
# objects when building cyber2000fb.
# --------------------------------------------
# 03/03/26	rmk@flint.arm.linux.org.uk	1.977.19.8
# [ARM] Support write combining on framebuffers
# --------------------------------------------
# 03/03/26	rmk@flint.arm.linux.org.uk	1.977.19.9
# [ARM] Fix potential oops in epxa10db-flash.c
# 
# Fix potential oops if kmalloc returns NULL.
# Really return an error when out of memory.
# --------------------------------------------
# 03/03/26	rmk@flint.arm.linux.org.uk	1.977.19.10
# [ARM] Quieten dc21285 host bridge driver during bus probing.
# 
# - don't report PCI aborts during config cycles as errors.
# - don't scan the pci buses if we receive a master abort.
# - setup error handlers earlier.
# --------------------------------------------
# 03/03/26	rmk@flint.arm.linux.org.uk	1.977.19.11
# [ARM] Kill compiler warning about uninitialised ppcr in cpu-sa1110.c.
# --------------------------------------------
# 03/03/26	rmk@flint.arm.linux.org.uk	1.977.19.12
# [ARM] Ensure transmit lines are held in mark state.
# --------------------------------------------
# 03/03/26	rmk@flint.arm.linux.org.uk	1.977.27.1
# Merge flint.arm.linux.org.uk:/usr/src/linux-bk-2.5/linux-2.5-pci
# into flint.arm.linux.org.uk:/usr/src/linux-bk-2.5/linux-2.5-pcmcia
# --------------------------------------------
# 03/03/26	torvalds@home.transmeta.com	1.977.25.9
# Merge bk://kernel.bkbits.net/davem/net-2.5
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/26	torvalds@home.transmeta.com	1.977.25.10
# Merge bk://bk.arm.linux.org.uk/linux-2.5-rmk
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/26	hch@sgi.com	1.977.22.4
# [XFS] Separate the quota source into its own subdirectory ala dmapi.
# Push a bunch of quota- and dmapi-specific code down into these
# subdirs which previously was compiled into the core XFS code,
# and don't descend into these subdirs if options config'd off.
# 
# SGI Modid: 2.5.x-xfs:slinx:141850a
# --------------------------------------------
# 03/03/26	nathans@sgi.com	1.977.22.5
# [XFS] Cleanup/remove a bunch of macros, comments and code.
# 
# SGI Modid: 2.5.x-xfs:slinx:141925a
# --------------------------------------------
# 03/03/26	nathans@sgi.com	1.977.22.6
# [XFS] Header shuffling to try and keep several source trees aligned - move the
# realtime inode detection macro somewhere more appropriate.
# 
# SGI Modid: 2.5.x-xfs:slinx:141951a
# --------------------------------------------
# 03/03/26	nathans@sgi.com	1.977.22.7
# [XFS] Cut and paste stuff up on my part in the DMAPI headers.
# 
# SGI Modid: 2.5.x-xfs:slinx:142163a
# --------------------------------------------
# 03/03/26	nathans@sgi.com	1.977.22.8
# [XFS] Cut and paste stuff up on my part in the DMAPI headers.
# 
# SGI Modid: 2.5.x-xfs:slinx:142170a
# --------------------------------------------
# 03/03/26	nathans@sgi.com	1.977.22.9
# [XFS] Add back the pagebuf flag for scheduling on the data daemon.  Moving
# this into just a pagebuf_iodone parameter was broken as we don't have
# sufficient state in all the places we need it to make the decision.
# 
# SGI Modid: 2.5.x-xfs:slinx:141626a
# --------------------------------------------
# 03/03/26	lord@sgi.com	1.977.22.10
# [XFS] optimize timestamp updates, use new hires timestamps more directly,
# also fix a bug where the mtime field was not correctly updated.
# 
# SGI Modid: 2.5.x-xfs:slinx:142296a
# --------------------------------------------
# 03/03/26	sandeen@sgi.com	1.977.22.11
# [XFS] Use mod_timer in place of del/modify/add (can race)
# Also use del_timer_sync when we're done.
# 
# SGI Modid: 2.5.x-xfs:slinx:142197a
# --------------------------------------------
# 03/03/26	roehrich@sgi.com	1.977.22.12
# [XFS] fix initialization of dmapi code
# 
# SGI Modid: 2.5.x-xfs:slinx:142389a
# --------------------------------------------
# 03/03/26	hch@sgi.com	1.977.22.13
# [XFS] remove fs/xfs/xfs_dqblk.h
# 
# This one should have gone away a few checkings ago.  I blame it on
# BitKeeper.. :)
# --------------------------------------------
# 03/03/26	torvalds@home.transmeta.com	1.977.1.24
# Merge bk://linuxusb.bkbits.net/linus-2.5
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/26	hch@hera.kernel.org	1.977.1.25
# Merge
# --------------------------------------------
# 03/03/26	torvalds@home.transmeta.com	1.977.28.1
# Merge bk://bk.arm.linux.org.uk/linux-2.5-pcmcia
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/26	torvalds@home.transmeta.com	1.977.1.26
# Merge ssh://master.kernel.org//home/hch/BK/xfs/linux-2.5/
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/26	linux@de.rmk.(none)	1.977.27.2
# [PCMCIA] "driver services" socket add/remove abstraction
# 
# Previously, "Driver Services" could only be called when the socket
# drivers were initialized earlier. This caused an awful lot of
# problems, especially when modprobe tried to load ds.ko and a pcmcia
# card driver at once.
# 
# As all socket devices are registered with the driver model core as
# being of "class_type pcmcia_socket_class", we can take use of that and
# register them with "Driver Services" upon detection or upon
# module loading of ds.c.
# 
# Also, the "I-need-two-initcalls-in-a-module"-tweak can go away.
# 
# Unfortunately, this patch reportedly breaks some RedHat pcmcia init
# scritps - they relied on the failed loading of ds.c to detect that no
# socket driver was loaded previously. To properly detect this, you
# should take a look at the /sys/class/pcmcia_socket/devices directory.
# --------------------------------------------
# 03/03/26	linux@de.rmk.(none)	1.977.27.3
# [PCMCIA] remove "init_status" from struct pcmcia_driver
# 
# As we don't have a late_initcall in ds.c any more, we can't easily
# distinguish between in-kernel drivers and those built as modules. This
# information was used by cardmgr to detect whether "rmmod" makes
# sense. As unloading of modules seems to be deprecated behaviour anyway
# in 2.5., and the current driver unloading process is IMO broken
# anyway, I don't shed any tears on this lost functionality.
# --------------------------------------------
# 03/03/27	linux@de.rmk.(none)	1.977.27.4
# [PATCH] convert ds.c's socekt_info_t to struct pcmcia_bus_socket
# 
# Rename socket_info_t (which is used many, many times differently
# within pcmcia) to "struct pcmcia_bus_socket".
# 
# Also, a couple of functions in ds.c can be converted to use the "struct
# pcmcia_bus_socket" as argument instead of the socket number.
# --------------------------------------------
# 03/03/27	linux@de.rmk.(none)	1.977.27.5
# [PCMCIA] remove unused include/pcmcia/driver_ops.h
# 
# Except for dev_node_t, the contents of include/pcmcia/driver_ops.h
# aren't used anywhere within the kernel. It's a left-over file from the
# days when cardbus 32-bit cards weren't handled as pci devices, and
# their drivers as pci drivers. So, move the dev_node_t to
# include/pcmcia/ds.h, remove the lone in-kernel reference to
# driver_ops.h, and remove the contents of driver_ops.h.
# --------------------------------------------
# 03/03/27	paulus@samba.org	1.983
# Merge samba.org:/home/paulus/kernel/linux-2.5
# into samba.org:/home/paulus/kernel/for-linus-ppc
# --------------------------------------------
# 03/03/27	benh@kernel.crashing.org	1.984
# PPC32: Factor out common code for reading/setting various SPRs.
# --------------------------------------------
# 03/03/27	benh@kernel.crashing.org	1.985
# PPC32: Add support for CPU frequency scaling on some PowerMacs
# --------------------------------------------
# 03/03/26	torvalds@home.transmeta.com	1.977.1.27
# Merge bk://bk.arm.linux.org.uk/linux-2.5-pcmcia
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/27	benh@kernel.crashing.org	1.986
# PPC32: Add function for choosing which PLL to use on 750FX cpus.
# --------------------------------------------
# 03/03/27	benh@kernel.crashing.org	1.987
# PPC32: Forward-port support for new powermacs from 2.4 tree.
# --------------------------------------------
# 03/03/27	paulus@samba.org	1.988
# PPC32: Fix compilation of powermac cpufreq stuff
# --------------------------------------------
# 03/03/27	benh@kernel.crashing.org	1.989
# PPC32: Unmap the VIA (versatile interface adaptor) chip after we are done with it.
# --------------------------------------------
# 03/03/27	trini@kernel.crashing.org	1.990
# PPC32: Update the banner printed for the Spruce board
# --------------------------------------------
# 03/03/27	benh@kernel.crashing.org	1.991
# PPC32: Get Open Firmware to initialize all the displays, not just one.
# --------------------------------------------
# 03/03/27	mochel@osdl.org	1.977.1.28
# Merge bk://linux.bkbits.net/linux-2.5
# into osdl.org:/home/mochel/src/kernel/devel/linux-2.5-virgin
# --------------------------------------------
# 03/03/27	mochel@osdl.org	1.977.1.29
# Merge osdl.org:/home/mochel/src/kernel/devel/linux-2.5-virgin
# into osdl.org:/home/mochel/src/kernel/devel/linux-2.5-core
# --------------------------------------------
# 03/03/27	jmorris@intercode.com.au	1.977.25.11
# [NET]: Warn only once about SO_BSDCOMPAT.
# --------------------------------------------
# 03/03/27	trond.myklebust@fys.uio.no	1.977.29.1
# Fix xprt.c so that it resends RPC requests immediately after a timeout.
# Doing this ensures that we keep probing the connection to the server
# rather than just waiting for the entire congestion window to time out.
# The latter can be very expensive due to the exponential backoff rule.
# --------------------------------------------
# 03/03/27	trond.myklebust@fys.uio.no	1.977.29.2
# Micro-optimization: rename rpc_lookup_path() as rpc_lookup_parent()
# and drop the 'flags' argument (it was always set too LOOKUP_PARENT).
# 
# If the user supplies and empty path, return -ENOENT.
# --------------------------------------------
# 03/03/27	trond.myklebust@fys.uio.no	1.977.29.3
# Fix the RPC debugging code so that it doesn't Oops if a task has
# a null 'p_proc' procedure pointer.
# --------------------------------------------
# 03/03/27	trond.myklebust@fys.uio.no	1.977.29.4
#   [PATCH] Fix misleading EIO on NFS client
# 
#   The following patch by Joe Korty removes an over-zealous check in the NFS
#   read code that causes pages to be incorrectly marked with PG_error and
#   hence causes an EIO to be returned to userland.
# 
#   The test is incorrect as it ignores the fact that we may be caching a
#   write that will extend the file on the server (and hence will create a
#   hole in the region concerned.).
# --------------------------------------------
# 03/03/27	trond.myklebust@fys.uio.no	1.977.29.5
# Fix a typo in auth_gss.c. Clean out an unused variable.
# --------------------------------------------
# 03/03/27	alan@lxorguk.ukuu.org.uk	1.977.30.1
# [PATCH] Avoid unknown IDE commands
# 
# One from Jens - fix up the problems with older Samsung disks that don't
# abort unknown commands sometimes
# --------------------------------------------
# 03/03/27	alan@lxorguk.ukuu.org.uk	1.977.30.2
# [PATCH] Add a comment that the irq_nosync stuff needs revisiting
# --------------------------------------------
# 03/03/27	alan@lxorguk.ukuu.org.uk	1.977.30.3
# [PATCH] Ensure hdparm errors to the user when the request isnt allowed
# --------------------------------------------
# 03/03/27	alan@lxorguk.ukuu.org.uk	1.977.30.4
# [PATCH] Remove obsolete IDE timing hack
# 
# Vojtech Pavlik
# 
# The ide fixes for the VIA8235 obsolete this hack
# --------------------------------------------
# 03/03/27	trond.myklebust@fys.uio.no	1.977.29.6
# Fix a memory corruption bug in NFSv4 client.
# --------------------------------------------
# 03/03/27	alan@lxorguk.ukuu.org.uk	1.977.30.5
# [PATCH] fix radio_cadet driver locking
# 
# Forward port the replacement to the horribly broken locking in 2.5
# radio_cadet driver.
# --------------------------------------------
# 03/03/27	alan@lxorguk.ukuu.org.uk	1.977.30.6
# [PATCH] Fix up 3w-xxxx driver
# 
# Fix up 3w-xxxx. I didnt test SMP and it shows
# --------------------------------------------
# 03/03/27	alan@lxorguk.ukuu.org.uk	1.977.30.7
# [PATCH] Merge the serial config entries for PC9800
# 
# From Osamu Tomita
# --------------------------------------------
# 03/03/27	alan@lxorguk.ukuu.org.uk	1.977.30.8
# [PATCH] Make cramfs compile again
# --------------------------------------------
# 03/03/27	alan@lxorguk.ukuu.org.uk	1.977.30.9
# [PATCH] fs/exec.c does not need __NO_VERSION__
# 
# From Christoph Hellwig I think
# --------------------------------------------
# 03/03/27	alan@lxorguk.ukuu.org.uk	1.977.30.10
# [PATCH] Quota should not reference user addresses directly
# 
# (Stanford Checker, Chris Wright)
# --------------------------------------------
# 03/03/27	alan@lxorguk.ukuu.org.uk	1.977.30.11
# [PATCH] PC9800 uses different IDE i/o bases for legacy mode devices
# 
# From Osamu Tomita
# --------------------------------------------
# 03/03/27	alan@lxorguk.ukuu.org.uk	1.977.30.12
# [PATCH] Wrong kind of NUL fix for asm headers
# --------------------------------------------
# 03/03/27	alan@lxorguk.ukuu.org.uk	1.977.30.13
# [PATCH] S/390 typo fixes
# 
# From Steven Cole
# --------------------------------------------
# 03/03/27	trond.myklebust@fys.uio.no	1.977.29.7
# Fix clnt.c to skip re-encoding an RPC call in the case when
# we're writing over TCP and have done a partial send.
# --------------------------------------------
# 03/03/27	neilb@cse.unsw.edu.au	1.977.30.14
# [PATCH] kNFSd: READ_BUF macro update
# 
# Fix some misleading comments, and correct the test for requesting more
# than one page.
# --------------------------------------------
# 03/03/27	neilb@cse.unsw.edu.au	1.977.30.15
# [PATCH] kNFSd: fix WRITE decoding
# 
# NFSv4 operations after WRITE are decoded into wr_vec[] pages, thus the
# argp->pagelen can be non-zero at the end of decoding the WRITE
# operation.
# 
# This patch correctly sets argp->pagelen, and correctly advances argp->p
# after the WRITE operation
# --------------------------------------------
# 03/03/27	neilb@cse.unsw.edu.au	1.977.30.16
# [PATCH] kNFSd:fix read encoding
# 
# encode_read: change 'len' variable from unsigned long to a long for
# '(len > 0)' comparison.  don't set up a tail iovec for zero length
# reads.
# --------------------------------------------
# 03/03/27	neilb@cse.unsw.edu.au	1.977.30.17
# [PATCH] kNFSd: Be more careful with readlock in exp_parent
# 
# We currently hold a read_lock of dparent_lock
# while calling exp_get_by_name on several ancestors
# of a given dentry.  However exp_get_by_name can
# malloc(GFP_KERNEL), so that isn't a good idea.
# 
# Now we only claim the lock while actually
# stepping up the parent chain.
# 
# This addresses bug 29 @ bugme.osdl.org
# --------------------------------------------
# 03/03/27	neilb@cse.unsw.edu.au	1.977.30.18
# [PATCH] md: md/linear oops fix
# 
# From: Daniel McNeil <daniel@osdl.org>
# 
# This fixes an oops caused by incorrect usage of sector_div()
# in which_dev() in md/linear.c.  It was dereferencing an non-existent
# hash table entry.
# --------------------------------------------
# 03/03/27	neilb@cse.unsw.edu.au	1.977.30.19
# [PATCH] md: Cleanup #define TEXT text ... printk(TEXT)
# 
# Also reformat some printks so lines don't exceed 80chars.
# 
# Thanks to  Angus Sawyer <angus.sawyer@dsl.pipex.com>
# --------------------------------------------
# 03/03/27	neilb@cse.unsw.edu.au	1.977.30.20
# [PATCH] md: Convert md personalities to new module interface
# 
# Thanks to  Angus Sawyer <angus.sawyer@dsl.pipex.com> and
#    Daniel McNeil <daniel@osdl.org>
# --------------------------------------------
# 03/03/27	torvalds@penguin.transmeta.com	1.977.29.8
# Merge http://nfsclient.bkbits.net/linux-2.5
# into penguin.transmeta.com:/home/penguin/torvalds/repositories/kernel/linux
# --------------------------------------------
# 03/03/27	davem@nuts.ninka.net	1.977.31.1
# [USB]: In ohci-pci.c, use size_t printf format.
# --------------------------------------------
# 03/03/27	davem@nuts.ninka.net	1.977.31.2
# [SPARC64]: Fix pcibios_resource_to_bus and the build for this platform.
# --------------------------------------------
# 03/03/27	torvalds@penguin.transmeta.com	1.992
# Merge bk://ppc.bkbits.net/for-linus-ppc
# into penguin.transmeta.com:/home/penguin/torvalds/repositories/kernel/linux
# --------------------------------------------
# 03/03/27	davem@nuts.ninka.net	1.977.31.3
# [SPARC64]: Implement dump stack and handle dumping currents stack properly.
# --------------------------------------------
# 03/03/27	davem@nuts.ninka.net	1.977.31.4
# [SPARC64]: Use GFP_ATOMIC in request_irq.
# --------------------------------------------
# 03/03/27	mochel@osdl.org	1.977.1.30
# sysfs: Fix file removal
# 
# Turns out we do need to do d_delete() on individual files (and symlinks)
# when removing them the tree, to make sure they actually get removed from
# the hierarchy. 
# --------------------------------------------
# 03/03/27	mochel@osdl.org	1.977.32.1
# Merge bk://linux.bkbits.net/linux-2.5
# into osdl.org:/home/mochel/src/kernel/devel/linux-2.5-virgin
# --------------------------------------------
# 03/03/27	mochel@osdl.org	1.977.1.31
# Merge osdl.org:/home/mochel/src/kernel/devel/linux-2.5-virgin
# into osdl.org:/home/mochel/src/kernel/devel/linux-2.5-core
# --------------------------------------------
# 03/03/27	cminyard@mvista.com	1.977.29.9
# [PATCH] fix ipmi_devintf.c compilation
# 
# This fixes ipmi compilation (with some documentation updates and another
# minor fix, too).
# 
# It also fixes a problem with the state machine getting stuck in a
# certain error condition.
# --------------------------------------------
# 03/03/27	ldm.adm@hostme.bitkeeper.com	1.977.33.1
# Merge hostme.bitkeeper.com:/ua/repos/l/ldm/linux-2.5
# into hostme.bitkeeper.com:/ua/repos/l/ldm/linux-2.5-core
# --------------------------------------------
# 03/03/27	mochel@osdl.org	1.977.1.32
# Merge bk://ldm@bkbits.net/linux-2.5-core
# into osdl.org:/home/mochel/src/kernel/devel/linux-2.5-core
# --------------------------------------------
# 03/03/28	linux@de.rmk.(none)	1.977.27.6
# [PCMCIA] fix pcmcia_bind_driver
# 
# Don't allow "bind_request" to be called before "register_pccard_driver".
# --------------------------------------------
# 03/03/28	linux@de.rmk.(none)	1.977.27.7
# [PCMCIA] fix compilation with PCMCIA_DEBUG on
# --------------------------------------------
# 03/03/27	torvalds@home.transmeta.com	1.993
# Merge penguin:v2.5/linux
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/27	torvalds@home.transmeta.com	1.994
# Merge bk://bk.arm.linux.org.uk/linux-2.5-pcmcia
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/28	linux@de.rmk.(none)	1.995
# [PCMCIA] generic suspend/resume capability
# 
# The socket drivers already offer suspend and resume
# capability. Integrate this with the driver model, based on a
# suggestion by Russell King.
# 
# Also, remove two never-used functions from the socket drivers (to_ns).
# 
#  drivers/pcmcia/cs.c             |   70 ++++++++++++++++++++--------------------
#  drivers/pcmcia/cs_internal.h    |    1
#  drivers/pcmcia/hd64465_ss.c     |    2 +
#  drivers/pcmcia/i82092.c         |   17 ++++++---
#  drivers/pcmcia/i82365.c         |    2 +
#  drivers/pcmcia/pci_socket.c     |   15 +-------
#  drivers/pcmcia/sa1100_generic.c |    2 +
#  drivers/pcmcia/sa1111_generic.c |   14 +-------
#  drivers/pcmcia/tcic.c           |    7 +---
#  include/pcmcia/ss.h             |    5 ++
#  10 files changed, 64 insertions(+), 71 deletions(-)
# --------------------------------------------
# 03/03/28	linux@de.rmk.(none)	1.996
# [PCMCIA] don't inform "driver services" of cardbus-related events
# --------------------------------------------
# 03/03/29	hch@de.rmk.(none)	1.997
# [SERIAL] switch over 8250_cs to pcmcia_register_driver
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.1
# [PATCH] initcall debug code
# 
# The patch is designed to help locate where the kernel is dying during the
# startup sequence.
# 
# - Boot parameter "initcall_debug" causes the kernel to print out the
#   address of each initcall before calling it.
# 
#   The kallsyms tables do not cover __init sections, so printing the
#   symbolic version of these symbols doesn't work.  They need to be looked up
#   in System.map.
# 
# - Detect whether an initcall returns with interrupts disabled or with a
#   locking imbalance.  If it does, complain and then try to fix it up.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.2
# [PATCH] POSIX timers interface long/int cleanup
# 
# From: Eric Piel <Eric.Piel@Bull.Net>
# 
# Fixes some long/int confusion on 64-bit machines which was causing failures
# on ia64 - we end up trying to set bits in the 32-63 range on an int and the
# kernel locks up.
# 
# Also cleans up idr.h.
# 
# George has acked this change.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.3
# [PATCH] slab: fix off-by-one in size calculation
# 
# From: Manfred Spraul <manfred@colorfullife.com>
# 
# Brian spotted a stupid bug in the slab initialization:
# 
# If multiple objects fit into one cacheline, then the allocator ignores
# SLAB_HWCACHE_ALIGN and squeezes the objects into the same cacheline.  The
# implementation contains an off by one error and thus doesn't work correctly:
# For Athlon optimized kernels, the 32-byte slab uses 64 byte of memory.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.4
# [PATCH] add flush_cache_page() to install_page()
# 
# install_page() needs to run flush_cache_page() prior to overwriting an
# already-established pte.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.5
# [PATCH] posix timers: fix double-reporting of timer expiration
# 
# From: george anzinger <george@mvista.com>
# 
# Timer expirations are being reported twice.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.6
# [PATCH] remove SWAP_ERROR
# 
# From: Hugh Dickins <hugh@veritas.com>
# 
# Delete unused SWAP_ERROR and non-existent page_over_rsslimit().
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.7
# [PATCH] permit page unmapping if !CONFIG_SWAP
# 
# From: Hugh Dickins <hugh@veritas.com>
# 
# Raised #endif CONFIG_SWAP in shrink_list, it was excluding
# try_to_unmap of file pages.  Suspect !CONFIG_MMU relied on
# that to suppress try_to_unmap, added SWAP_FAIL stub for it.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.8
# [PATCH] make add_to_swap_cache() static
# 
# From: Hugh Dickins <hugh@veritas.com>
# 
# Make add_to_swap_cache static, it's only used by read_swap_cache_async;
# and since that has just done a GFP_HIGHUSER allocation, surely it's
# better for add_to_swap_cache to use GFP_KERNEL than GFP_ATOMIC.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.9
# [PATCH] tmpfs truncation fix
# 
# From: Hugh Dickins <hugh@veritas.com>
# 
# Recent testing has shown that swapoff can sneak a page back into the
# tmpfs page cache after truncate_inode_pages has cleaned it, before
# shmem_truncate resets next_index to stop that: BUG_ON(inode->i_blocks)
# in shmem_delete_inode.  So call truncate_inode_pages again to be safe.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.10
# [PATCH] handle oom in tmpfs
# 
# From: Hugh Dickins <hugh@veritas.com>
# 
# move_from_swap_cache and add_to_page_cache_lru are using GFP_ATOMIC,
# which can easily fail in an intermittent way.  Rude if shmem_getpage
# then fails with -ENOMEM: use blk_congestion_wait() to let kswapd in,
# and repeat.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.11
# [PATCH] remove vm_enough_memory double counting
# 
# From: Hugh Dickins <hugh@veritas.com>
# 
# Stop vm_enough_memory double counting total_swapcache_pages: it dates
# from the days when we didn't free swap when freeing swapcache page.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.12
# [PATCH] ext3: fix max file size
# 
# ext3 is only permitting files of up to 32G.  It should be allowing files to
# be up to 2TB.
# 
# This is because it is running ext3_max_size(1024) before the filesystem's
# blocksize has been determined.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.13
# [PATCH] wait_on_buffer refcounting checks
# 
# It is generally illegal to wait on an unpinned buffer - another CPU could
# free it up even before __wait_on_buffer() has taken a ref against the buffer.
# 
# Maybe external locking rules will prevent this in specific cases, but that is
# really subtle and fragile as locking rules are evolved.
# 
# The patch detects people calling wait_on_buffer() against an unpinned buffer
# and issues a diagnostic.
# 
# Also remove the get_bh() from __wait_on_buffer().  It is too late.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.14
# [PATCH] x86 clock override boot option
# 
# From: john stultz <johnstul@us.ibm.com>
# 
# This patch allows one to manually specify the i386 gettimeofday time-source
# by passing clock=[pit|tsc|cyclone|...] as a boot argument.  The argument will
# override the default probled selection, and in case the selected time-source
# not be avalible the code defaults to using the PIT (printing a warning saying
# so).
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.15
# [PATCH] fix to support discontigmem for 16way x440
# 
# From: Patricia Gaughen <gone@us.ibm.com>
# 
# The boot-time ioermap code needs to invalidate the tlb entries after setting
# up the new pte.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.16
# [PATCH] tty_io cleanup
# 
# From: Andries.Brouwer@cwi.nl
# 
# Adding the unregister_chrdev_region call that is the counterpart to
# register_chrdev_region, we get a nice cleanup of tty_io.c.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.17
# [PATCH] speed up ext3_sync_file()
# 
# There is never a need to write out b_assoc_buffers() in ext3_sync_file().
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.18
# [PATCH] add a might_sleep() check to kmap()
# 
# kmap() can sleep, but rarely does.  Add a check for kmap() being called from
# inappropriate contexts.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.19
# [PATCH] ext3: remove dead code and variables
# 
# Spotted by Oleg Drokin: remove a couple of local variables which aren't being
# used for anything.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.20
# [PATCH] use page_to_pfn() in __blk_queue_bounce()
# 
# From: William Lee Irwin III <wli@holomorphy.com>
# 
# __blk_queue_bounce() contains an open-coded page_to_pfn() for the
# discontig, non-MAP_NR_DENSE() case (wherever MAP_NR_DENSE() went).
# This converts it to use the standard page_to_pfn() abstraction.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.21
# [PATCH] init_inode_once() wants sizeof(struct hlist_head)
# 
# From: William Lee Irwin III <wli@holomorphy.com>
# 
# inode_init() wants to deal with things in in units of the size of
# struct hlist_head, not struct list_head.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.22
# [PATCH] honour VM_DONTEXPAND in vma merging
# 
# From: Gerd Knorr <kraxel@bytesex.org>
# 
# 2.5.x kernels don't look at the VM_DONTEXPAND flag when merging multiple vmas
# into one.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.23
# [PATCH] Fix 64bit warnings in mm/page_alloc.c
# 
# From: Andi Kleen <ak@muc.de>
# 
# Fix a warning on x86_64 (and presumably ia64) which arises from casting an
# int to a pointer.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.24
# [PATCH] make cdevname() callable from interrupts
# 
# We just made bdevname() irq-safe.  This patch makes cdevname() callable from
# interrupts as well.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.25
# [PATCH] register_chrdev_region() leak and race fix
# 
# - If two CPUs run register_chrdev_region(major == 0) at the same time they
#   can get the same major.
# 
#   Fix that by extending the lock coverage.
# 
# - local variable `cd' was leaky on an error path.
# 
# - Add some API commentary.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.26
# [PATCH] slab: cache sizes cleanup
# 
# From: Brian Gerst <bgerst@didntduck.org>
# 
# - Reduce code duplication by putting the kmalloc cache sizes into a header
#   file.
# 
# - Tidy up kmem_cache_sizes_init().
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.27
# [PATCH] sync blockdevs on the final close only
# 
# From: davej@codemonkey.org.uk
# 
# We currently run sync_blockdev() on every close of a blockdev.  This patch
# arranges for the sync to happen only on the final close.
# --------------------------------------------
# 03/03/28	akpm@digeo.com	1.994.1.28
# [PATCH] Remove unused variable in nfs_readpage_result()
# 
# Remove unused local `fattr' in nfs_readpage_result().
# --------------------------------------------
# 03/03/28	bwindle@fint.org	1.994.1.29
# [tulip] remove unnecessary linux/version.h includes
# --------------------------------------------
# 03/03/29	davej@codemonkey.org.uk	1.994.1.30
# [tulip dmfe] add pci id
# --------------------------------------------
# 03/03/29	fubar@us.ibm.com	1.994.1.31
# [bonding] bug fixes, and a few minor feature additions
# 
# Mainly sync w/ 2.4.x version.
# --------------------------------------------
# 03/03/29	bunk@fs.tum.de	1.994.1.32
# [PATCH] fix .text.exit error in drivers/net/r8169.c
# 
# In drivers/net/r8169.c the function rtl8169_remove_one is __devexit but
# the pointer to it didn't use __devexit_p resulting in a.text.exit
# compile error when !CONFIG_HOTPLUG.
# 
# The fix is simple:
# --------------------------------------------
# 03/03/29	davej@codemonkey.org.uk	1.994.1.33
# [PATCH] finish init_etherdev conversion for gt96100eth
# 
# - No need to alloc dev->priv (due to init_etherdev usage)
# - No need to kfree dev->priv (kfree'd with (dev) already)
# --------------------------------------------
# 03/03/29	paulus@samba.org	1.994.1.34
# [PATCH] MACE ethernet driver update
# 
# This patch updates the MACE ethernet driver, used on older powermacs,
# to remove the uses of save_flags/restore_flags/cli/sti and use a
# spinlock instead.
# 
# Jeff, please send this on to Linus.
# 
# Paul.
# --------------------------------------------
# 03/03/29	mbligh@aracnet.com	1.994.1.35
# [PATCH] remove warning for 3c509.c
# 
# Get this compile warning:
# drivers/net/3c509.c:207: warning: `el3_device_remove' declared `static' but never defined
# because the function definition is under
# "#if defined(CONFIG_EISA) || defined(CONFIG_MCA)".
# 
# This patch puts the declaration under the same conditions.
# I'd be shocked if it wasn't correct ;-)
# 
# M.
# --------------------------------------------
# 03/03/28	davem@nuts.ninka.net	1.977.31.5
# [SPARC64]: Fix interrupt enabling on trap return.
# --------------------------------------------
# 03/03/29	davem@nuts.ninka.net	1.994.2.1
# Merge nuts.ninka.net:/home/davem/src/BK/sparcwork-2.5
# into nuts.ninka.net:/home/davem/src/BK/sparc-2.5
# --------------------------------------------
# 03/03/29	davem@nuts.ninka.net	1.994.2.2
# [SPARC64]: Update defconfig.
# --------------------------------------------
# 03/03/29	jmorris@intercode.com.au	1.977.25.12
# [CRYPTO]: Add Deflate algorithm to crypto API.
# --------------------------------------------
# 03/03/29	davem@nuts.ninka.net	1.994.2.3
# [SPARC64]: Implement pcibios_bus_to_resource.
# --------------------------------------------
# 03/03/29	toml@us.ibm.com	1.977.25.13
# [IPSEC]: Missing ipv6 policy checks.
# --------------------------------------------
# 03/03/29	toml@us.ibm.com	1.977.25.14
# [IPSEC]: IPV6 AH/ESP fixes.
# --------------------------------------------
# 03/03/29	jt@bougret.hpl.hp.com	1.994.3.1
# [PATCH] : Discovery locking fixes
# 
# 	o [CRITICA] Fix remaining locking problem with discovery log
# 	o [CRITICA] Don't call expiry callback under spinlock
# 	o [FEATURE] Simplify/cleanup/optimise discovery/expiry code
# --------------------------------------------
# 03/03/29	jt@bougret.hpl.hp.com	1.994.3.2
# [PATCH] : IrLAP dynamic window code fix
# 
# 	o [FEATURE] Fix the dynamic window code to properly send the pf bit.
# 		Increase perf by 40% for large packets at SIR.
# --------------------------------------------
# 03/03/29	jt@bougret.hpl.hp.com	1.994.3.3
# [PATCH] : irda-usb Rx path cleanup + no clear_halt
# 
# 	o [CORRECT] Don't do usb_clear_halt() on USB control pipe
# 	o [FEATURE] Cleanup and simplify the USB Rx path
# --------------------------------------------
# 03/03/29	jt@bougret.hpl.hp.com	1.994.3.4
# [PATCH] : irtty-sir ZeroCopy Rx
# 
# 	o [FEATURE] Enable ZeroCopy Rx in irtty-sir/sir-dev
# 		(provided by the new SIR wrapper in 2.5.61).
# --------------------------------------------
# 03/03/29	jt@bougret.hpl.hp.com	1.994.3.5
# [PATCH] : IrDA timer fix
# 
# 	o [FEATURE] Make IrDA timers use mod_timer instead of
# 	add+del_timer
# --------------------------------------------
# 03/03/29	jt@bougret.hpl.hp.com	1.994.3.6
# [PATCH] : IrNET module fix
# 
# 	o [CORRECT] Fix module refcounting (MOD_INC/DEC => .owner)
# 	o [FEATURE] Add hints to discovery (control channel)
# --------------------------------------------
# 03/03/29	jgarzik@redhat.com	1.994.4.1
# [hw_random] add AMD pci id
# 
# Contributed by Andi Kleen
# --------------------------------------------
# 03/03/29	torvalds@home.transmeta.com	1.994.1.36
# Merge bk://kernel.bkbits.net/jgarzik/irda-2.5
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/29	torvalds@home.transmeta.com	1.994.1.37
# Merge bk://kernel.bkbits.net/jgarzik/misc-2.5
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/29	torvalds@home.transmeta.com	1.994.1.38
# Merge bk://ldm.bkbits.net/linux-2.5-core
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/30	linux@de.rmk.(none)	1.998
# [PCMCIA] Fix "Removing wireless card triggers might_sleep warnings."
# 
# Bug 516.
# 
# Use schedule_delayed_work instead of a timer should fix this. Thanks
# to Andrew Morton and Russell King.
# 
# (Added flush_scheduled_work() to ensure our delayed work completes
# before we free the pcmcia_bus_socket structure. --rmk)
# --------------------------------------------
# 03/03/29	torvalds@home.transmeta.com	1.994.1.39
# Update direct-rendering to current DRI CVS tree.
# 
# This adds support for i830 interrupt handling, and new improved
# lock context keying. See per-file comments for more detail, as this
# commit sadly mixes up a few different things (that's what you get
# for not tracking the changes at a fine enough granularity).
# --------------------------------------------
# 03/03/30	rmk@flint.arm.linux.org.uk	1.999
# [PCMCIA] Reorganise SA11xx PCMCIA support.
# 
# The SA1100 PCMCIA structure didn't lend itself well to the device
# model.  With this reorganisation, we end up with a reasonable
# structure which fits better with the driver model.  It is now
# obvious that SA11x0-based socket drivers are separate from
# SA1111-based socket drivers, and are treated as two separate drivers
# by the driver model.
# --------------------------------------------
# 03/03/30	davem@nuts.ninka.net	1.977.25.15
# [NET]: Nuke CONFIG_FILTER.
# --------------------------------------------
# 03/03/30	Andries.Brouwer@cwi.nl	1.994.1.40
# [PATCH] readlink in /proc w/ overlong path
# 
# Trying to read the overlong target of a /proc/*/fd/N file descriptor
# leads to a SIGSEGV inside the kernel, because the code doesn't check
# for an error pointer return from d_path.
# --------------------------------------------
# 03/03/30	torvalds@home.transmeta.com	1.1000
# Merge bk://bk.arm.linux.org.uk/linux-2.5-pcmcia
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/31	davem@nuts.ninka.net	1.1001
# Merge nuts.ninka.net:/home/davem/src/BK/network-2.5
# into nuts.ninka.net:/home/davem/src/BK/net-2.5
# --------------------------------------------
# 03/03/31	randolph@tausq.org	1.1002
# [COMPAT]: Fix sock_fprog handling.
# --------------------------------------------
# 03/03/31	randolph@tausq.org	1.1003
# [COMPAT]: Fix MSG_CMSG_COMPAT flag passing, kill cmsg_compat_recvmsg_fixup.
# --------------------------------------------
# 03/03/31	hch@lst.de	1.1004
# [IPV6]: Fix warning with modular ipv6.
# --------------------------------------------
# 03/03/31	davem@nuts.ninka.net	1.1000.1.1
# Merge nuts.ninka.net:/home/davem/src/BK/sparcwork-2.5
# into nuts.ninka.net:/home/davem/src/BK/sparc-2.5
# --------------------------------------------
# 03/03/31	davem@nuts.ninka.net	1.1000.1.2
# [SPARC64]: Get ALI trident sound working again.
# --------------------------------------------
# 03/03/31	davem@nuts.ninka.net	1.1005
# [NET]: Use might_sleep in alloc_skb.
# --------------------------------------------
# 03/03/31	laforge@netfilter.org	1.1006
# [NETFILTER]: iptables iptable_mangle LOCAL_IN bugfix.
# --------------------------------------------
# 03/03/31	laforge@netfilter.org	1.1007
# [NETFILTER]: ipt_REJECT bugfix for TCP RST packets + asymm. routing.
# --------------------------------------------
# 03/03/31	paulm@routefree.com	1.1008
# [NETFILTER]: ip_conntrack bugfix for LOCAL_NAT and PPTP.
# --------------------------------------------
# 03/03/31	torvalds@home.transmeta.com	1.1009
# Merge bk://kernel.bkbits.net/davem/net-2.5
# into home.transmeta.com:/home/torvalds/v2.5/linux
# --------------------------------------------
# 03/03/31	davem@redhat.com	1.1010
# [PATCH] Check for disabled local interrupts in "might_sleep()"
# 
# Self explanatory, as per the discussion last week.
# --------------------------------------------
# 03/03/31	axboe@suse.de	1.1011
# [PATCH] scsi queueing weirdness
# 
# The queueing logic in scsi_lib looks really odd right now.
# 
# - Defers calling elv_next_request() until we actually think we can queue
#   something. Always want to do that, request will have been marked
#   REQ_STARTED after this, so block layer cannot touch it or merge to it.
# 
# - Kill the queue empty check. If elv_next_request() returned a req,
#   there's one to queue.
# --------------------------------------------
#
diff -Nru a/CREDITS b/CREDITS
--- a/CREDITS	Mon Mar 31 13:41:07 2003
+++ b/CREDITS	Mon Mar 31 13:41:07 2003
@@ -1210,6 +1210,10 @@
 S: Atlanta, Georgia 30332
 S: USA
 
+N: Brad Hards
+E: bradh@frogmouth.net
+D: Various USB bits, other minor patches
+
 N: Angelo Haritsis
 E: ah@computer.org
 D: kernel patches (serial, watchdog)
diff -Nru a/Documentation/IPMI.txt b/Documentation/IPMI.txt
--- a/Documentation/IPMI.txt	Mon Mar 31 13:41:06 2003
+++ b/Documentation/IPMI.txt	Mon Mar 31 13:41:06 2003
@@ -5,6 +5,18 @@
 			  <minyard@mvista.com>
 			    <minyard@acm.org>
 
+The Intelligent Platform Management Interface, or IPMI, is a
+standard for controlling intelligent devices that monitor a system.
+It provides for dynamic discovery of sensors in the system and the
+ability to monitor the sensors and be informed when the sensor's
+values change or go outside certain boundaries.  It also has a
+standardized database for field-replacable units (FRUs) and a watchdog
+timer.
+
+To use this, you need an interface to an IPMI controller in your
+system (called a Baseboard Management Controller, or BMC) and
+management software that can use the IPMI system.
+
 This document describes how to use the IPMI driver for Linux.  If you
 are not familiar with IPMI itself, see the web site at
 http://www.intel.com/design/servers/ipmi/index.htm.  IPMI is a big
diff -Nru a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
--- a/Documentation/kernel-parameters.txt	Mon Mar 31 13:41:09 2003
+++ b/Documentation/kernel-parameters.txt	Mon Mar 31 13:41:09 2003
@@ -207,6 +207,12 @@
 
 	chandev=	[HW,NET] Generic channel device initialisation
  
+ 	clock=		[BUGS=IA-32, HW] gettimeofday timesource override. 
+			Forces specified timesource (if avaliable) to be used
+			when calculating gettimeofday(). If specicified timesource
+			is not avalible, it defaults to PIT. 
+			Format: { pit | tsc | cyclone | ... }
+			
 	cm206=		[HW,CD]
 			Format: { auto | [<io>,][<irq>] }
 
@@ -389,6 +395,10 @@
 			Format: <full_path>
 			Run specified binary instead of /sbin/init as init
 			process.
+
+	initcall_debug	[KNL] Trace initcalls as they are executed.  Useful
+			for working out where the kernel is dying during
+			startup.
 
 	initrd=		[BOOT] Specify the location of the initial ramdisk
 
diff -Nru a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
--- a/Documentation/networking/bonding.txt	Mon Mar 31 13:41:06 2003
+++ b/Documentation/networking/bonding.txt	Mon Mar 31 13:41:06 2003
@@ -7,6 +7,7 @@
   - Constantine Gavrilov <const-g at xpert.com>
   - Chad N. Tindel <ctindel at ieee dot org>
   - Janice Girouard <girouard at us dot ibm dot com>
+  - Jay Vosburgh <fubar at us dot ibm dot com>
 
 Note :
 ------
@@ -199,28 +200,42 @@
 parameters be specified, otherwise serious network degradation will occur
 during link failures.
 
+max_bonds
+
+	Specifies the number of bonding devices to create for this
+	instance of the bonding driver.  E.g., if max_bonds is 3, and
+	the bonding driver is not already loaded, then bond0, bond1
+	and bond2 will be created.  The default value is 1.
+
 mode
-        Specifies one of four bonding policies. The default is round-robin.
-        Possible values are:
- 
-        0       Round-robin policy: Transmit in a sequential order from the
-                first available slave through the last. This mode provides
-                load balancing and fault tolerance.
-
-        1       Active-backup policy: Only one slave in the bond is active. A 
-                different slave becomes active if, and only if, the active slave
-                fails. The bond's MAC address is externally visible on only 
-                one port (network adapter) to avoid confusing the switch.
-                This mode provides fault tolerance.
- 
- 
-        2       XOR policy: Transmit based on [(source MAC address XOR'd with 
-                destination MAC address) modula slave count]. This selects the 
-                same slave for each destination MAC address. This mode provides
-                load balancing and fault tolerance.
 
-        3       Broadcast policy: transmits everything on all slave interfaces.
-                This mode provides fault tolerance.
+	Specifies one of four bonding policies. The default is
+round-robin (balance-rr).  Possible values are (you can use either the
+text or numeric option):
+ 
+	balance-rr or 0
+		Round-robin policy: Transmit in a sequential order
+		from the first available slave through the last. This
+		mode provides load balancing and fault tolerance.
+
+	active-backup or 1
+		Active-backup policy: Only one slave in the bond is
+		active. A different slave becomes active if, and only
+		if, the active slave fails. The bond's MAC address is
+		externally visible on only one port (network adapter)
+		to avoid confusing the switch.  This mode provides
+		fault tolerance.
+ 
+        balance-xor or 2
+		XOR policy: Transmit based on [(source MAC address
+		XOR'd with destination MAC address) modula slave
+		count]. This selects the same slave for each
+		destination MAC address. This mode provides load
+		balancing and fault tolerance.
+
+	broadcast or 3
+		Broadcast policy: transmits everything on all slave
+		interfaces. This mode provides fault tolerance.
 
 miimon
  
@@ -229,6 +244,27 @@
         100 is a good starting point. See High Availability section for 
         additional information. The default value is 0.
 
+use_carrier
+
+        Specifies whether or not miimon should use MII or ETHTOOL
+        ioctls vs. netif_carrier_ok() to determine the link status.
+        The MII or ETHTOOL ioctls are less efficient and utilize a
+        deprecated calling sequence within the kernel.  The
+        netif_carrier_ok() relies on the device driver to maintain its
+        state with netif_carrier_on/off; at this writing, most, but
+        not all, device drivers support this facility.
+
+        If bonding insists that the link is up when it should not be,
+        it may be that your network device driver does not support
+        netif_carrier_on/off.  This is because the default state for
+        netif_carrier is "carrier on." In this case, disabling
+        use_carrier will cause bonding to revert to the MII / ETHTOOL
+        ioctl method to determine the link state.
+
+        A value of 1 enables the use of netif_carrier_ok(), a value of
+        0 will use the deprecated MII / ETHTOOL ioctls.  The default
+        value is 1.
+
 downdelay
  
         Specifies the delay time in milli-seconds to disable a link after a 
@@ -277,14 +313,17 @@
 
 multicast
 
-        Integer value for the mode of operation for multicast support.
+        Option specifying the mode of operation for multicast support.
         Possible values are:
 
-        0       Disabled (no multicast support)
+	disabled or 0
+		Disabled (no multicast support)
 
-        1       Enabled on active slave only, useful in active-backup mode
+        active or 1
+		Enabled on active slave only, useful in active-backup mode
 
-        2       Enabled on all slaves, this is the default
+	all or 2
+		Enabled on all slaves, this is the default
 
 
 Configuring Multiple Bonds
@@ -321,7 +360,52 @@
 alias bond0 bonding
 options bond0 arp_interval=60 arp_ip_target=192.168.0.100
 
+Potential Problems When Using ARP Monitor
+=========================================
+
+1. Driver support
 
+The ARP monitor relies on the network device driver to maintain two
+statistics: the last receive time (dev->last_rx), and the last
+transmit time (dev->trans_start).  If the network device driver does
+not update one or both of these, then the typical result will be that,
+upon startup, all links in the bond will immediately be declared down,
+and remain that way.  A network monitoring tool (tcpdump, e.g.) will
+show ARP requests and replies being sent and received on the bonding
+device.
+
+The possible resolutions for this are to (a) fix the device driver, or
+(b) discontinue the ARP monitor (using miimon as an alternative, for
+example).
+
+2. Adventures in Routing
+
+When bonding is set up with the ARP monitor, it is important that the
+slave devices not have routes that supercede routes of the master (or,
+generally, not have routes at all).  For example, suppose the bonding
+device bond0 has two slaves, eth0 and eth1, and the routing table is
+as follows:
+
+Kernel IP routing table
+Destination     Gateway         Genmask         Flags   MSS Window  irtt Iface
+10.0.0.0        0.0.0.0         255.255.0.0     U        40 0          0 eth0
+10.0.0.0        0.0.0.0         255.255.0.0     U        40 0          0 eth1
+10.0.0.0        0.0.0.0         255.255.0.0     U        40 0          0 bond0
+127.0.0.0       0.0.0.0         255.0.0.0       U        40 0          0 lo
+
+In this case, the ARP monitor (and ARP itself) may become confused,
+because ARP requests will be sent on one interface (bond0), but the
+corresponding reply will arrive on a different interface (eth0).  This
+reply looks to ARP as an unsolicited ARP reply (because ARP matches
+replies on an interface basis), and is discarded.  This will likely
+still update the receive/transmit times in the driver, but will lose
+packets.
+
+The resolution here is simply to insure that slaves do not have routes
+of their own, and if for some reason they must, those routes do not
+supercede routes of their master.  This should generally be the case,
+but unusual configurations or errant manual or automatic static route
+additions may cause trouble.
 
 Switch Configuration
 ====================
@@ -462,7 +546,7 @@
 	If not explicitly configured with ifconfig, the MAC address of the
 	bonding device is taken from its first slave device. This MAC address
 	is then passed to all following slaves and remains persistent (even if
-	the first slave is removed) until the bonding device is brought
+	the the first slave is removed) until the bonding device is brought
 	down or reconfigured.
 	
 	If you wish to change the MAC address, you can set it with ifconfig:
@@ -606,12 +690,16 @@
 
 To use this mode, pass "mode=1" to the module at load time :
 
+    # modprobe bonding miimon=100 mode=active-backup
+
+	or:
+
     # modprobe bonding miimon=100 mode=1
 
 Or, put in your /etc/modules.conf :
 
     alias bond0 bonding
-    options bond0 miimon=100 mode=1
+    options bond0 miimon=100 mode=active-backup
 
 Example 1: Using multiple host and multiple switches to build a "no single
 point of failure" solution.
@@ -698,7 +786,7 @@
 Examples :
 
     # modprobe bonding miimon=100 mode=1 downdelay=2000 updelay=5000
-    # modprobe bonding miimon=100 mode=0 downdelay=0 updelay=5000
+    # modprobe bonding miimon=100 mode=balance-rr downdelay=0 updelay=5000
 
 
 Promiscuous Sniffing notes
diff -Nru a/MAINTAINERS b/MAINTAINERS
--- a/MAINTAINERS	Mon Mar 31 13:41:07 2003
+++ b/MAINTAINERS	Mon Mar 31 13:41:07 2003
@@ -1869,11 +1869,12 @@
 W:	http://www.kroah.com/linux-usb/
 
 USB CDC ETHERNET DRIVER
-P:	Brad Hards
-M:	bradh@frogmouth.net
+P:	Greg Kroah-Hartman
+M:	greg@kroah.com
 L:	linux-usb-users@lists.sourceforge.net
 L:	linux-usb-devel@lists.sourceforge.net
 S:	Maintained
+W:	http://www.kroah.com/linux-usb/
 
 USB EHCI DRIVER
 P:	David Brownell
diff -Nru a/arch/alpha/kernel/err_titan.c b/arch/alpha/kernel/err_titan.c
--- a/arch/alpha/kernel/err_titan.c	Mon Mar 31 13:41:08 2003
+++ b/arch/alpha/kernel/err_titan.c	Mon Mar 31 13:41:08 2003
@@ -22,8 +22,10 @@
 static int
 titan_parse_c_misc(u64 c_misc, int print)
 {
+#ifdef CONFIG_VERBOSE_MCHECK
 	char *src;
 	int nxs = 0;
+#endif
 	int status = MCHK_DISPOSITION_REPORT;
 
 #define TITAN__CCHIP_MISC__NXM		(1UL << 28)
@@ -263,11 +265,11 @@
 static int
 titan_parse_p_agperror(int which, u64 agperror, int print)
 {
+	int status = MCHK_DISPOSITION_REPORT;
+#ifdef CONFIG_VERBOSE_MCHECK
 	int cmd, len;
 	unsigned long addr;
-	int status = MCHK_DISPOSITION_REPORT;
 
-#ifdef CONFIG_VERBOSE_MCHECK
 	char *agperror_cmd[] = { "Read (low-priority)",	"Read (high-priority)",
 				 "Write (low-priority)",
 				 "Write (high-priority)",
@@ -575,14 +577,14 @@
 static int
 privateer_process_680_frame(struct el_common *mchk_header, int print)
 {
+	int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
+#ifdef CONFIG_VERBOSE_MCHECK
 	struct el_PRIVATEER_envdata_mcheck *emchk =
 		(struct el_PRIVATEER_envdata_mcheck *)
 		((unsigned long)mchk_header + mchk_header->sys_offset);
-	int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
 
 	/* TODO - catagorize errors, for now, no error */
 
-#ifdef CONFIG_VERBOSE_MCHECK
 	if (!print)
 		return status;
 
diff -Nru a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c
--- a/arch/alpha/kernel/module.c	Mon Mar 31 13:41:06 2003
+++ b/arch/alpha/kernel/module.c	Mon Mar 31 13:41:06 2003
@@ -199,7 +199,9 @@
 		case R_ALPHA_NONE:
 			break;
 		case R_ALPHA_REFQUAD:
-			*(u64 *)location = value;
+			/* BUG() can produce misaligned relocations. */
+			((u32 *)location)[0] = value;
+			((u32 *)location)[1] = value >> 32;
 			break;
 		case R_ALPHA_GPREL32:
 			value -= gp;
diff -Nru a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
--- a/arch/alpha/kernel/pci.c	Mon Mar 31 13:41:06 2003
+++ b/arch/alpha/kernel/pci.c	Mon Mar 31 13:41:06 2003
@@ -285,7 +285,7 @@
 			pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn));
 			/* Move up the chain of bridges. */
 			dev = dev->bus->self;
-		} while (dev->bus->self);
+		} while (dev->bus->parent);
 		*pinp = pin;
 
 		/* The slot is the slot of the last bridge. */
@@ -410,10 +410,8 @@
 
 	if (pci_probe_only)
 		pcibios_claim_console_setup();
-	else	/* FIXME: `else' will be removed when
-		   pci_assign_unassigned_resources() is able to work
-		   correctly with [partially] allocated PCI tree. */
-		pci_assign_unassigned_resources();
+
+	pci_assign_unassigned_resources();
 	pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq);
 }
 
diff -Nru a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
--- a/arch/alpha/kernel/smp.c	Mon Mar 31 13:41:06 2003
+++ b/arch/alpha/kernel/smp.c	Mon Mar 31 13:41:06 2003
@@ -112,7 +112,7 @@
 static void __init
 wait_boot_cpu_to_stop(int cpuid)
 {
-	long stop = jiffies + 10*HZ;
+	unsigned long stop = jiffies + 10*HZ;
 
 	while (time_before(jiffies, stop)) {
 	        if (!smp_secondary_alive)
@@ -349,7 +349,7 @@
 {
 	struct percpu_struct *cpu;
 	struct pcb_struct *hwpcb, *ipcb;
-	long timeout;
+	unsigned long timeout;
 	  
 	cpu = (struct percpu_struct *)
 		((char*)hwrpb
@@ -428,7 +428,7 @@
 smp_boot_one_cpu(int cpuid)
 {
 	struct task_struct *idle;
-	long timeout;
+	unsigned long timeout;
 
 	/* Cook up an idler for this guy.  Note that the address we
 	   give to kernel_thread is irrelevant -- it's going to start
@@ -816,7 +816,7 @@
 			  int wait, unsigned long to_whom)
 {
 	struct smp_call_struct data;
-	long timeout;
+	unsigned long timeout;
 	int num_cpus_to_call;
 	
 	data.func = func;
diff -Nru a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c
--- a/arch/alpha/kernel/sys_nautilus.c	Mon Mar 31 13:41:07 2003
+++ b/arch/alpha/kernel/sys_nautilus.c	Mon Mar 31 13:41:07 2003
@@ -183,14 +183,17 @@
 
 extern void free_reserved_mem(void *, void *);
 
+static struct resource irongate_mem = {
+	.name	= "Irongate PCI MEM",
+	.flags	= IORESOURCE_MEM,
+};
+
 void __init
 nautilus_init_pci(void)
 {
 	struct pci_controller *hose = hose_head;
 	struct pci_bus *bus;
 	struct pci_dev *irongate;
-	unsigned long saved_io_start, saved_io_end;
-	unsigned long saved_mem_start, saved_mem_end;
 	unsigned long bus_align, bus_size, pci_mem;
 	unsigned long memtop = max_low_pfn << PAGE_SHIFT;
 
@@ -199,50 +202,41 @@
 	hose->bus = bus;
 	hose->last_busno = bus->subordinate;
 
-	/* We're going to size the root bus, so we must
-	   - have a non-NULL PCI device associated with the bus
-	   - preserve hose resources. */
 	irongate = pci_find_slot(0, 0);
 	bus->self = irongate;
-	saved_io_start = bus->resource[0]->start;
-	saved_io_end = bus->resource[0]->end;
-	saved_mem_start = bus->resource[1]->start;
-	saved_mem_end = bus->resource[1]->end;
+	bus->resource[1] = &irongate_mem;
 
 	pci_bus_size_bridges(bus);
 
-	/* Don't care about IO. */
-	bus->resource[0]->start = saved_io_start;
-	bus->resource[0]->end = saved_io_end;
+	/* IO port range. */
+	bus->resource[0]->start = 0;
+	bus->resource[0]->end = 0xffff;
 
+	/* Set up PCI memory range - limit is hardwired to 0xffffffff,
+	   base must be at aligned to 16Mb. */
 	bus_align = bus->resource[1]->start;
 	bus_size = bus->resource[1]->end + 1 - bus_align;
-	/* Align to 16Mb. */
 	if (bus_align < 0x1000000UL)
 		bus_align = 0x1000000UL;
 
-	/* Restore hose MEM resource. */
-	bus->resource[1]->start = saved_mem_start;
-	bus->resource[1]->end = saved_mem_end;
-
 	pci_mem = (0x100000000UL - bus_size) & -bus_align;
 
+	bus->resource[1]->start = pci_mem;
+	bus->resource[1]->end = 0xffffffffUL;
+	if (request_resource(&iomem_resource, bus->resource[1]) < 0)
+		printk(KERN_ERR "Failed to request MEM on hose 0\n");
+
 	if (pci_mem < memtop && pci_mem > alpha_mv.min_mem_address) {
 		free_reserved_mem(__va(alpha_mv.min_mem_address),
 				  __va(pci_mem));
-		printk("nautilus_init_arch: %ldk freed\n",
+		printk("nautilus_init_pci: %ldk freed\n",
 			(pci_mem - alpha_mv.min_mem_address) >> 10);
 	}
 
-	alpha_mv.min_mem_address = pci_mem;
 	if ((IRONGATE0->dev_vendor >> 16) > 0x7006)	/* Albacore? */
 		IRONGATE0->pci_mem = pci_mem;
 
 	pci_bus_assign_resources(bus);
-
-	/* To break the loop in common_swizzle() */
-	bus->self = NULL;
-
 	pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq);
 }
 
diff -Nru a/arch/arm/kernel/apm.c b/arch/arm/kernel/apm.c
--- a/arch/arm/kernel/apm.c	Mon Mar 31 13:41:08 2003
+++ b/arch/arm/kernel/apm.c	Mon Mar 31 13:41:08 2003
@@ -552,8 +552,6 @@
 MODULE_DESCRIPTION("Advanced Power Management");
 MODULE_LICENSE("GPL");
 
-EXPORT_NO_SYMBOLS;
-
 #ifndef MODULE
 static int __init apm_setup(char *str)
 {
diff -Nru a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
--- a/arch/arm/kernel/armksyms.c	Mon Mar 31 13:41:06 2003
+++ b/arch/arm/kernel/armksyms.c	Mon Mar 31 13:41:06 2003
@@ -69,6 +69,7 @@
 extern void __udivsi3(void);
 extern void __umodsi3(void);
 extern void abort(void);
+extern void do_div64(void);
 
 extern void ret_from_exception(void);
 extern void fpundefinstr(void);
@@ -234,6 +235,7 @@
 EXPORT_SYMBOL_NOVERS(__udivmoddi4);
 EXPORT_SYMBOL_NOVERS(__udivsi3);
 EXPORT_SYMBOL_NOVERS(__umodsi3);
+EXPORT_SYMBOL_NOVERS(do_div64);
 
 	/* bitops */
 EXPORT_SYMBOL(_set_bit_le);
diff -Nru a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
--- a/arch/arm/kernel/entry-armv.S	Mon Mar 31 13:41:06 2003
+++ b/arch/arm/kernel/entry-armv.S	Mon Mar 31 13:41:06 2003
@@ -997,11 +997,11 @@
  * previous and next are guaranteed not to be the same.
  */
 ENTRY(__switch_to)
-		add	ip, r0, #TI_CPU_SAVE
-		ldr	r2, [r1, #TI_CPU_DOMAIN]!
+		add	ip, r1, #TI_CPU_SAVE
+		ldr	r3, [r2, #TI_CPU_DOMAIN]!
 		stmia	ip, {r4 - sl, fp, sp, lr}	@ Store most regs on stack
-		mcr	p15, 0, r2, c3, c0		@ Set domain register
-		ldmib	r1, {r4 - sl, fp, sp, pc}	@ Load all regs saved previously
+		mcr	p15, 0, r3, c3, c0, 0		@ Set domain register
+		ldmib	r2, {r4 - sl, fp, sp, pc}	@ Load all regs saved previously
 
 		__INIT
 /*
diff -Nru a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
--- a/arch/arm/kernel/entry-common.S	Mon Mar 31 13:41:06 2003
+++ b/arch/arm/kernel/entry-common.S	Mon Mar 31 13:41:06 2003
@@ -75,7 +75,6 @@
  * This is how we return from a fork.
  */
 ENTRY(ret_from_fork)
-	ldr	r0, [r0, #TI_TASK]
 	bl	schedule_tail
 	get_thread_info tsk
 	ldr	r1, [tsk, #TI_FLAGS]		@ check for syscall tracing
diff -Nru a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
--- a/arch/arm/lib/Makefile	Mon Mar 31 13:41:07 2003
+++ b/arch/arm/lib/Makefile	Mon Mar 31 13:41:07 2003
@@ -13,7 +13,7 @@
 		   strnlen_user.o strchr.o strrchr.o testchangebit.o  \
 		   testclearbit.o testsetbit.o uaccess.o getuser.o    \
 		   putuser.o ashldi3.o ashrdi3.o lshrdi3.o muldi3.o   \
-		   ucmpdi2.o udivdi3.o lib1funcs.o
+		   ucmpdi2.o udivdi3.o lib1funcs.o div64.o
 obj-m		:=
 obj-n		:=
 
diff -Nru a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/arch/arm/lib/div64.S	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,42 @@
+#include <linux/linkage.h>
+
+ql	.req	r0			@ quotient low
+qh	.req	r1			@ quotient high
+dl	.req	r3			@ divisor low
+dh	.req	r2			@ divisor high
+nl	.req	r4			@ dividend low
+nh	.req	r5			@ dividend high
+
+ENTRY(do_div64)
+	stmfd	sp!, {r4, r5, lr}
+	mov	nl, r0
+	movs	nh, r1			@ if high bits are zero
+	movne	lr, #33
+	moveq	lr, #1			@ only divide low bits
+	moveq	nh, r0
+
+1:	cmp	nh, dh
+	bls	2f
+	add	lr, lr, #1
+	movs	dh, dh, lsl #1		@ left justify divisor
+	bpl	1b
+
+2:	movs	nh, r1
+	moveq	dl, dh
+	moveq	dh, #0
+	movne	dl, #0
+	mov	ql, #0
+	mov	qh, #0
+3:	subs	ip, nl, dl		@ trial subtraction
+	sbcs	ip, nh, dh
+	movcs	nh, ip			@ only update if successful
+	subcs	nl, nl, dl		@ (repeat the subtraction)
+	adcs	ql, ql, ql		@ C=1 if successful, shift into
+	adc	qh, qh, qh		@ quotient
+	movs	dh, dh, lsr #1		@ shift base high part right
+	mov	dl, dl, rrx		@ shift base low part right
+	subs	lr, lr, #1
+	bne	3b
+
+	mov	r2, nl
+	ldmfd	sp!, {r4, r5, pc}
diff -Nru a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c
--- a/arch/arm/mach-footbridge/dc21285.c	Mon Mar 31 13:41:09 2003
+++ b/arch/arm/mach-footbridge/dc21285.c	Mon Mar 31 13:41:09 2003
@@ -25,10 +25,13 @@
 
 #define MAX_SLOTS		21
 
-#define PCICMD_ERROR_BITS ((PCI_STATUS_DETECTED_PARITY | \
-			PCI_STATUS_REC_MASTER_ABORT | \
-			PCI_STATUS_REC_TARGET_ABORT | \
-			PCI_STATUS_PARITY) << 16)
+#define PCICMD_ABORT		((PCI_STATUS_REC_MASTER_ABORT| \
+				  PCI_STATUS_REC_TARGET_ABORT)<<16)
+
+#define PCICMD_ERROR_BITS	((PCI_STATUS_DETECTED_PARITY | \
+				  PCI_STATUS_REC_MASTER_ABORT | \
+				  PCI_STATUS_REC_TARGET_ABORT | \
+				  PCI_STATUS_PARITY) << 16)
 
 extern int setup_arm_irq(int, struct irqaction *);
 extern void pcibios_report_status(u_int status_mask, int warn);
@@ -84,6 +87,12 @@
 
 	*value = v;
 
+	v = *CSR_PCICMD;
+	if (v & PCICMD_ABORT) {
+		*CSR_PCICMD = v & (0xffff|PCICMD_ABORT);
+		return -1;
+	}
+
 	return PCIBIOS_SUCCESSFUL;
 }
 
@@ -92,6 +101,7 @@
 		     int size, u32 value)
 {
 	unsigned long addr = dc21285_base_address(bus, devfn);
+	u32 v;
 
 	if (addr)
 		switch (size) {
@@ -109,6 +119,12 @@
 			break;
 		}
 
+	v = *CSR_PCICMD;
+	if (v & PCICMD_ABORT) {
+		*CSR_PCICMD = v & (0xffff|PCICMD_ABORT);
+		return -1;
+	}
+
 	return PCIBIOS_SUCCESSFUL;
 }
 
@@ -148,16 +164,16 @@
 	cmd = cmd & 0xffff;
 
 	if (status & PCI_STATUS_REC_MASTER_ABORT) {
-		printk(KERN_DEBUG "PCI: master abort: ");
-		pcibios_report_status(PCI_STATUS_REC_MASTER_ABORT, 1);
-		printk("\n");
-
+		printk(KERN_DEBUG "PCI: master abort, pc=0x%08lx\n",
+			instruction_pointer(regs));
 		cmd |= PCI_STATUS_REC_MASTER_ABORT << 16;
 	}
 
 	if (status & PCI_STATUS_REC_TARGET_ABORT) {
 		printk(KERN_DEBUG "PCI: target abort: ");
-		pcibios_report_status(PCI_STATUS_SIG_TARGET_ABORT, 1);
+		pcibios_report_status(PCI_STATUS_REC_MASTER_ABORT |
+				      PCI_STATUS_SIG_TARGET_ABORT |
+				      PCI_STATUS_REC_TARGET_ABORT, 1);
 		printk("\n");
 
 		cmd |= PCI_STATUS_REC_TARGET_ABORT << 16;
@@ -289,6 +305,38 @@
 		"%s mode\n", *CSR_CLASSREV & 0xff, cfn_mode ?
 		"central function" : "addin");
 
+	if (footbridge_cfn_mode()) {
+		/*
+		 * Clear any existing errors - we aren't
+		 * interested in historical data...
+		 */
+		*CSR_SA110_CNTL	= (*CSR_SA110_CNTL & 0xffffde07) |
+				  SA110_CNTL_RXSERR;
+		*CSR_PCICMD = (*CSR_PCICMD & 0xffff) | PCICMD_ERROR_BITS;
+	}
+
+	init_timer(&serr_timer);
+	init_timer(&perr_timer);
+
+	serr_timer.data = IRQ_PCI_SERR;
+	serr_timer.function = dc21285_enable_error;
+	perr_timer.data = IRQ_PCI_PERR;
+	perr_timer.function = dc21285_enable_error;
+
+	/*
+	 * We don't care if these fail.
+	 */
+	request_irq(IRQ_PCI_SERR, dc21285_serr_irq, SA_INTERRUPT,
+		    "PCI system error", &serr_timer);
+	request_irq(IRQ_PCI_PERR, dc21285_parity_irq, SA_INTERRUPT,
+		    "PCI parity error", &perr_timer);
+	request_irq(IRQ_PCI_ABORT, dc21285_abort_irq, SA_INTERRUPT,
+		    "PCI abort", NULL);
+	request_irq(IRQ_DISCARD_TIMER, dc21285_discard_irq, SA_INTERRUPT,
+		    "Discard timer", NULL);
+	request_irq(IRQ_PCI_DPERR, dc21285_dparity_irq, SA_INTERRUPT,
+		    "PCI data parity", NULL);
+
 	if (cfn_mode) {
 		static struct resource csrio;
 
@@ -324,35 +372,5 @@
 
 void __init dc21285_postinit(void)
 {
-	if (footbridge_cfn_mode()) {
-		/*
-		 * Clear any existing errors - we aren't
-		 * interested in historical data...
-		 */
-		*CSR_SA110_CNTL	= (*CSR_SA110_CNTL & 0xffffde07) |
-				  SA110_CNTL_RXSERR;
-		*CSR_PCICMD = (*CSR_PCICMD & 0xffff) | PCICMD_ERROR_BITS;
-	}
-
-	/*
-	 * Initialise PCI error IRQ after we've finished probing
-	 */
-	request_irq(IRQ_PCI_ABORT,     dc21285_abort_irq,   SA_INTERRUPT, "PCI abort",       NULL);
-	request_irq(IRQ_DISCARD_TIMER, dc21285_discard_irq, SA_INTERRUPT, "Discard timer",   NULL);
-	request_irq(IRQ_PCI_DPERR,     dc21285_dparity_irq, SA_INTERRUPT, "PCI data parity", NULL);
-
-	init_timer(&serr_timer);
-	init_timer(&perr_timer);
-
-	serr_timer.data = IRQ_PCI_SERR;
-	serr_timer.function = dc21285_enable_error;
-	perr_timer.data = IRQ_PCI_PERR;
-	perr_timer.function = dc21285_enable_error;
-
-	request_irq(IRQ_PCI_SERR, dc21285_serr_irq, SA_INTERRUPT,
-		    "PCI system error", &serr_timer);
-	request_irq(IRQ_PCI_PERR, dc21285_parity_irq, SA_INTERRUPT,
-		    "PCI parity error", &perr_timer);
-
 	register_isa_ports(DC21285_PCI_MEM, DC21285_PCI_IO, 0);
 }
diff -Nru a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
--- a/arch/arm/mach-sa1100/assabet.c	Mon Mar 31 13:41:06 2003
+++ b/arch/arm/mach-sa1100/assabet.c	Mon Mar 31 13:41:06 2003
@@ -117,6 +117,8 @@
 	PGSR = 0;
 	PCFR = 0;
 	PSDR = 0;
+	PPDR |= PPC_TXD3 | PPC_TXD1;
+	PPSR |= PPC_TXD3 | PPC_TXD1;
 
 	sa1100fb_lcd_power = assabet_lcd_power;
 	sa1100fb_backlight_power = assabet_backlight_power;
diff -Nru a/arch/arm/mach-sa1100/cpu-sa1110.c b/arch/arm/mach-sa1100/cpu-sa1110.c
--- a/arch/arm/mach-sa1100/cpu-sa1110.c	Mon Mar 31 13:41:06 2003
+++ b/arch/arm/mach-sa1100/cpu-sa1110.c	Mon Mar 31 13:41:06 2003
@@ -234,6 +234,8 @@
 		    (sa11x0_ppcr_to_freq(ppcr-1) >= policy->min))
 			ppcr--;
 		break;
+	default:
+		return -EINVAL;
 	}
 
 	freqs.old = sa11x0_getspeed();
diff -Nru a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types
--- a/arch/arm/tools/mach-types	Mon Mar 31 13:41:07 2003
+++ b/arch/arm/tools/mach-types	Mon Mar 31 13:41:07 2003
@@ -6,7 +6,7 @@
 # To add an entry into this database, please see Documentation/arm/README,
 # or contact rmk@arm.linux.org.uk
 #
-# Last update: Wed Mar 5 22:11:59 2003
+# Last update: Tue Mar 25 16:34:29 2003
 #
 # machine_is_xxx	CONFIG_xxxx		MACH_TYPE_xxx		number
 #
@@ -305,3 +305,11 @@
 pxa_whitechapel		ARCH_PXA_WHITECHAPEL	PXA_WHITECHAPEL		294
 nwsc			ARCH_NWSC		NWSC			295
 nwlarm			ARCH_NWLARM		NWLARM			296
+ixp425_mguard		ARCH_IXP425_MGUARD	IXP425_MGUARD		297
+pxa_netdcu4		ARCH_PXA_NETDCU4	PXA_NETDCU4		298
+ixdp2401		ARCH_IXDP2401		IXDP2401		299
+ixdp2801		ARCH_IXDP2801		IXDP2801		300
+zodiac			ARCH_ZODIAC		ZODIAC			301
+armmodul		ARCH_ARMMODUL		ARMMODUL		302
+ketop			SA1100_KETOP		KETOP			303
+av7200			ARCH_AV7200		AV7200			304
diff -Nru a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
--- a/arch/i386/kernel/smpboot.c	Mon Mar 31 13:41:07 2003
+++ b/arch/i386/kernel/smpboot.c	Mon Mar 31 13:41:07 2003
@@ -422,7 +422,7 @@
 	/*
 	 *      Synchronize the TSC with the BP
 	 */
-	if (cpu_has_tsc)
+	if (cpu_has_tsc && cpu_khz)
 		synchronize_tsc_ap();
 }
 
@@ -1114,7 +1114,7 @@
 	/*
 	 * Synchronize the TSC with the AP
 	 */
-	if (cpu_has_tsc && cpucount)
+	if (cpu_has_tsc && cpucount && cpu_khz)
 		synchronize_tsc_bp();
 }
 
diff -Nru a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
--- a/arch/i386/kernel/timers/timer.c	Mon Mar 31 13:41:08 2003
+++ b/arch/i386/kernel/timers/timer.c	Mon Mar 31 13:41:08 2003
@@ -1,4 +1,6 @@
+#include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/string.h>
 #include <asm/timer.h>
 
 /* list of externed timers */
@@ -17,6 +19,17 @@
 	NULL,
 };
 
+static char clock_override[10] __initdata;
+
+static int __init clock_setup(char* str)
+{
+	if (str) {
+		strncpy(clock_override, str,10);
+		clock_override[9] = '\0';
+	}
+	return 1;
+}
+__setup("clock=", clock_setup);
 
 /* iterates through the list of timers, returning the first 
  * one that initializes successfully.
@@ -28,7 +41,7 @@
 	/* find most preferred working timer */
 	while (timers[i]) {
 		if (timers[i]->init)
-			if (timers[i]->init() == 0)
+			if (timers[i]->init(clock_override) == 0)
 				return timers[i];
 		++i;
 	}
diff -Nru a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
--- a/arch/i386/kernel/timers/timer_cyclone.c	Mon Mar 31 13:41:07 2003
+++ b/arch/i386/kernel/timers/timer_cyclone.c	Mon Mar 31 13:41:07 2003
@@ -10,6 +10,7 @@
 #include <linux/init.h>
 #include <linux/timex.h>
 #include <linux/errno.h>
+#include <linux/string.h>
 
 #include <asm/timer.h>
 #include <asm/io.h>
@@ -73,7 +74,7 @@
 	return delay_at_last_interrupt + offset;
 }
 
-static int init_cyclone(void)
+static int __init init_cyclone(char* override)
 {
 	u32* reg;	
 	u32 base;		/* saved cyclone base address */
@@ -81,8 +82,11 @@
 	u32 offset;		/* offset from pageaddr to cyclone_timer register */
 	int i;
 	
+	/* check clock override */
+	if (override[0] && strncmp(override,"cyclone",7))
+			return -ENODEV;
+
 	/*make sure we're on a summit box*/
-	/*XXX need to use proper summit hooks! such as xapic -john*/
 	if(!use_cyclone) return -ENODEV; 
 	
 	printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
diff -Nru a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
--- a/arch/i386/kernel/timers/timer_none.c	Mon Mar 31 13:41:07 2003
+++ b/arch/i386/kernel/timers/timer_none.c	Mon Mar 31 13:41:07 2003
@@ -1,6 +1,7 @@
+#include <linux/init.h>
 #include <asm/timer.h>
 
-static int init_none(void)
+static int __init init_none(char* override)
 {
 	return 0;
 }
diff -Nru a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
--- a/arch/i386/kernel/timers/timer_pit.c	Mon Mar 31 13:41:08 2003
+++ b/arch/i386/kernel/timers/timer_pit.c	Mon Mar 31 13:41:08 2003
@@ -17,8 +17,12 @@
 extern spinlock_t i8253_lock;
 #include "do_timer.h"
 
-static int init_pit(void)
+static int __init init_pit(char* override)
 {
+	/* check clock override */
+	if (override[0] && strncmp(override,"pit",3))
+		printk(KERN_ERR "Warning: clock= override failed. Defaulting to PIT\n");
+
 	return 0;
 }
 
diff -Nru a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
--- a/arch/i386/kernel/timers/timer_tsc.c	Mon Mar 31 13:41:07 2003
+++ b/arch/i386/kernel/timers/timer_tsc.c	Mon Mar 31 13:41:07 2003
@@ -8,6 +8,7 @@
 #include <linux/timex.h>
 #include <linux/errno.h>
 #include <linux/cpufreq.h>
+#include <linux/string.h>
 
 #include <asm/timer.h>
 #include <asm/io.h>
@@ -244,8 +245,13 @@
 #endif
 
 
-static int init_tsc(void)
+static int __init init_tsc(char* override)
 {
+
+	/* check clock override */
+	if (override[0] && strncmp(override,"tsc",3))
+			return -ENODEV;
+
 	/*
 	 * If we have APM enabled or the CPU clock speed is variable
 	 * (CPU stops clock on HLT or slows clock to save power)
diff -Nru a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c
--- a/arch/i386/mm/boot_ioremap.c	Mon Mar 31 13:41:09 2003
+++ b/arch/i386/mm/boot_ioremap.c	Mon Mar 31 13:41:09 2003
@@ -19,6 +19,7 @@
 #undef CONFIG_X86_PAE
 #include <asm/page.h>
 #include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 #include <linux/init.h>
 #include <linux/stddef.h>
 
@@ -48,10 +49,12 @@
 {
 	boot_pte_t* pte;
 	int i;
+	char *vaddr = virtual_source;
 
 	pte = boot_vaddr_to_pte(virtual_source);
 	for (i=0; i < nrpages; i++, phys_addr += PAGE_SIZE, pte++) {
 		set_pte(pte, pfn_pte(phys_addr>>PAGE_SHIFT, PAGE_KERNEL));
+		__flush_tlb_one(&vaddr[i*PAGE_SIZE]);
 	}
 }
 
diff -Nru a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c
--- a/arch/i386/mm/highmem.c	Mon Mar 31 13:41:07 2003
+++ b/arch/i386/mm/highmem.c	Mon Mar 31 13:41:07 2003
@@ -2,8 +2,7 @@
 
 void *kmap(struct page *page)
 {
-	if (in_interrupt())
-		BUG();
+	might_sleep();
 	if (page < highmem_start_page)
 		return page_address(page);
 	return kmap_high(page);
diff -Nru a/arch/ppc/Kconfig b/arch/ppc/Kconfig
--- a/arch/ppc/Kconfig	Mon Mar 31 13:41:08 2003
+++ b/arch/ppc/Kconfig	Mon Mar 31 13:41:08 2003
@@ -666,6 +666,52 @@
 	  here.  Saying Y here will not hurt performance (on any machine) but
 	  will increase the size of the kernel.
 
+config CPU_FREQ
+	bool "CPU Frequency scaling"
+	help
+	  Clock scaling allows you to change the clock speed of CPUs on the
+	  fly. This is a nice method to save battery power on notebooks,
+	  because the lower the clock speed, the less power the CPU consumes.
+
+	  For more information, take a look at linux/Documentation/cpufreq or
+	  at <http://www.brodo.de/cpufreq/>
+
+	  If in doubt, say N.
+
+config CPU_FREQ_PROC_INTF
+	bool "/proc/cpufreq interface (DEPRECATED)"
+	depends on CPU_FREQ && PROC_FS
+	help
+	  This enables the /proc/cpufreq interface for controlling
+	  CPUFreq. Please note that it is recommended to use the sysfs
+	  interface instead (which is built automatically). 
+	  
+	  For details, take a look at linux/Documentation/cpufreq. 
+	  
+	  If in doubt, say N.
+
+config CPU_FREQ_24_API
+	bool "/proc/sys/cpu/ interface (2.4. / OLD)"
+	depends on CPU_FREQ
+	help
+	  This enables the /proc/sys/cpu/ sysctl interface for controlling
+	  CPUFreq, as known from the 2.4.-kernel patches for CPUFreq. 2.5
+	  uses a sysfs interface instead. Please note that some drivers do 
+	  not work well with the 2.4. /proc/sys/cpu sysctl interface,
+	  so if in doubt, say N here.
+
+	  For details, take a look at linux/Documentation/cpufreq. 
+
+	  If in doubt, say N.
+
+config CPU_FREQ_PMAC
+	bool "Support for Apple PowerBooks"
+	depends on CPU_FREQ && ADB_PMU
+	help
+	  This adds support for frequency switching on Apple PowerBooks,
+	  this currently includes some models of iBook & Titanium
+	  PowerBook.
+
 endmenu
 
 menu "General setup"
diff -Nru a/arch/ppc/kernel/Makefile b/arch/ppc/kernel/Makefile
--- a/arch/ppc/kernel/Makefile	Mon Mar 31 13:41:08 2003
+++ b/arch/ppc/kernel/Makefile	Mon Mar 31 13:41:08 2003
@@ -21,7 +21,7 @@
 					process.o signal.o ptrace.o align.o \
 					semaphore.o syscalls.o setup.o \
 					cputable.o ppc_htab.o
-obj-$(CONFIG_6xx)		+= l2cr.o
+obj-$(CONFIG_6xx)		+= l2cr.o cpu_setup_6xx.o
 obj-$(CONFIG_MODULES)		+= module.o ppc_ksyms.o
 obj-$(CONFIG_PCI)		+= pci.o 
 obj-$(CONFIG_PCI)		+= pci-dma.o
diff -Nru a/arch/ppc/kernel/cpu_setup_6xx.S b/arch/ppc/kernel/cpu_setup_6xx.S
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/arch/ppc/kernel/cpu_setup_6xx.S	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,415 @@
+/*
+ * This file contains low level CPU setup functions.
+ *    Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/offsets.h>
+
+_GLOBAL(__setup_cpu_601)
+	blr
+_GLOBAL(__setup_cpu_603)
+	b	setup_common_caches
+_GLOBAL(__setup_cpu_604)
+	mflr	r4
+	bl	setup_common_caches
+	bl	setup_604_hid0
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_750)
+	mflr	r4
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_750cx)
+	mflr	r4
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	bl	setup_750cx
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_750fx)
+	mflr	r4
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	bl	setup_750fx
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_7400)
+	mflr	r4
+	bl	setup_7400_workarounds
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_7410)
+	mflr	r4
+	bl	setup_7410_workarounds
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	li	r3,0
+	mtspr	SPRN_L2CR2,r3
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_7450)
+	mflr	r4
+	bl	setup_common_caches
+	bl	setup_745x_specifics
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_7455)
+	mflr	r4
+	bl	setup_common_caches
+	bl	setup_745x_specifics
+	mtlr	r4
+	blr
+
+/* Enable caches for 603's, 604, 750 & 7400 */
+setup_common_caches:
+	mfspr	r11,HID0
+	andi.	r0,r11,HID0_DCE
+#ifdef CONFIG_DCACHE_DISABLE
+	ori	r11,r11,HID0_ICE
+#else
+	ori	r11,r11,HID0_ICE|HID0_DCE
+#endif
+	ori	r8,r11,HID0_ICFI
+	bne	1f			/* don't invalidate the D-cache */
+	ori	r8,r8,HID0_DCI		/* unless it wasn't enabled */
+1:	sync
+	mtspr	HID0,r8			/* enable and invalidate caches */
+	sync
+	mtspr	HID0,r11		/* enable caches */
+	sync
+	isync
+	blr
+
+/* 604, 604e, 604ev, ...
+ * Enable superscalar execution & branch history table
+ */
+setup_604_hid0:
+	mfspr	r11,HID0
+	ori	r11,r11,HID0_SIED|HID0_BHTE
+	ori	r8,r11,HID0_BTCD
+	sync
+	mtspr	HID0,r8		/* flush branch target address cache */
+	sync			/* on 604e/604r */
+	mtspr	HID0,r11
+	sync
+	isync
+	blr
+
+/* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some
+ * erratas we work around here.
+ * Moto MPC710CE.pdf describes them, those are errata
+ * #3, #4 and #5
+ * Note that we assume the firmware didn't choose to
+ * apply other workarounds (there are other ones documented
+ * in the .pdf). It appear that Apple firmware only works
+ * around #3 and with the same fix we use. We may want to
+ * check if the CPU is using 60x bus mode in which case
+ * the workaround for errata #4 is useless. Also, we may
+ * want to explicitely clear HID0_NOPDST as this is not
+ * needed once we have applied workaround #5 (though it's
+ * not set by Apple's firmware at least).
+ */
+setup_7400_workarounds:
+	mfpvr	r3
+	rlwinm	r3,r3,0,20,31
+	cmpwi	0,r3,0x0207
+	ble	1f
+	blr
+setup_7410_workarounds:
+	mfpvr	r3
+	rlwinm	r3,r3,0,20,31
+	cmpwi	0,r3,0x0100
+	bnelr
+1:
+	mfspr	r11,SPRN_MSSSR0
+	/* Errata #3: Set L1OPQ_SIZE to 0x10 */
+	rlwinm	r11,r11,0,9,6
+	oris	r11,r11,0x0100
+	/* Errata #4: Set L2MQ_SIZE to 1 (check for MPX mode first ?) */
+	oris	r11,r11,0x0002
+	/* Errata #5: Set DRLT_SIZE to 0x01 */
+	rlwinm	r11,r11,0,5,2
+	oris	r11,r11,0x0800
+	sync
+	mtspr	SPRN_MSSSR0,r11
+	sync
+	isync
+	blr
+	
+/* 740/750/7400/7410
+ * Enable Store Gathering (SGE), Address Brodcast (ABE),
+ * Branch History Table (BHTE), Branch Target ICache (BTIC)
+ * Dynamic Power Management (DPM), Speculative (SPD)
+ * Clear Instruction cache throttling (ICTC)
+ */
+setup_750_7400_hid0:
+	mfspr	r11,HID0
+	ori	r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC
+BEGIN_FTR_SECTION
+	oris	r11,r11,HID0_DPM@h	/* enable dynamic power mgmt */
+END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
+	li	r3,HID0_SPD
+	andc	r11,r11,r3		/* clear SPD: enable speculative */
+ 	li	r3,0
+ 	mtspr	ICTC,r3			/* Instruction Cache Throttling off */
+	isync
+	mtspr	HID0,r11
+	sync
+	isync
+	blr
+
+/* 750cx specific
+ * Looks like we have to disable NAP feature for some PLL settings...
+ * (waiting for confirmation)
+ */
+setup_750cx:
+	mfspr	r10, SPRN_HID1
+	rlwinm	r10,r10,4,28,31
+	cmpi	cr0,r10,7
+	cmpi	cr1,r10,9
+	cmpi	cr2,r10,11
+	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr2+eq
+	bnelr
+	lwz	r6,CPU_SPEC_FEATURES(r5)
+	li	r7,CPU_FTR_CAN_NAP
+	andc	r6,r6,r7
+	stw	r6,CPU_SPEC_FEATURES(r5)
+	blr
+
+/* 750fx specific
+ */
+setup_750fx:
+	blr
+
+/* MPC 745x
+ * Enable Store Gathering (SGE), Branch Folding (FOLD)
+ * Branch History Table (BHTE), Branch Target ICache (BTIC)
+ * Dynamic Power Management (DPM), Speculative (SPD)
+ * Ensure our data cache instructions really operate.
+ * Timebase has to be running or we wouldn't have made it here,
+ * just ensure we don't disable it.
+ * Clear Instruction cache throttling (ICTC)
+ * Enable L2 HW prefetch
+ */
+setup_745x_specifics:
+	/* We check for the presence of an L3 cache setup by
+	 * the firmware. If any, we disable NAP capability as
+	 * it's known to be bogus on rev 2.1 and earlier
+	 */
+	mfspr	r11,SPRN_L3CR
+	andis.	r11,r11,L3CR_L3E@h
+	beq	1f
+	lwz	r6,CPU_SPEC_FEATURES(r5)
+	andi.	r0,r6,CPU_FTR_L3_DISABLE_NAP
+	beq	1f
+	li	r7,CPU_FTR_CAN_NAP
+	andc	r6,r6,r7
+	stw	r6,CPU_SPEC_FEATURES(r5)
+1:	
+	mfspr	r11,HID0
+
+	/* All of the bits we have to set.....
+	 */
+	ori	r11,r11,HID0_SGE | HID0_FOLD | HID0_BHTE | HID0_BTIC | HID0_LRSTK
+BEGIN_FTR_SECTION
+	oris	r11,r11,HID0_DPM@h	/* enable dynamic power mgmt */
+END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
+
+	/* All of the bits we have to clear....
+	 */
+	li	r3,HID0_SPD | HID0_NOPDST | HID0_NOPTI
+	andc	r11,r11,r3		/* clear SPD: enable speculative */
+ 	li	r3,0
+
+ 	mtspr	ICTC,r3			/* Instruction Cache Throttling off */
+	isync
+	mtspr	HID0,r11
+	sync
+	isync
+
+	/* Enable L2 HW prefetch
+	 */
+	mfspr	r3,SPRN_MSSCR0
+	ori	r3,r3,3
+	sync
+	mtspr	SPRN_MSSCR0,r3
+	sync
+	isync
+	blr
+
+/* Definitions for the table use to save CPU states */
+#define CS_HID0		0
+#define CS_HID1		4
+#define	CS_MSSCR0	8
+#define CS_MSSSR0	12
+#define CS_ICTRL	16
+#define CS_LDSTCR	20
+#define CS_LDSTDB	24
+#define CS_SIZE		28
+
+	.data
+	.balign	4
+cpu_state_storage:	
+	.space	CS_SIZE
+	.text
+	
+/* Called in normal context to backup CPU 0 state. This
+ * does not include cache settings. This function is also
+ * called for machine sleep. This does not include the MMU
+ * setup, BATs, etc... but rather the "special" registers
+ * like HID0, HID1, MSSCR0, etc...
+ */
+_GLOBAL(__save_cpu_setup)
+	/* Get storage ptr */
+	lis	r5,cpu_state_storage@h
+	ori	r5,r5,cpu_state_storage@l
+
+	/* Save HID0 (common to all CONFIG_6xx cpus) */
+	mfspr	r3,SPRN_HID0
+	stw	r3,CS_HID0(r5)
+
+	/* Now deal with CPU type dependent registers */
+	mfspr	r3,PVR
+	srwi	r3,r3,16
+	cmpli	cr0,r3,0x8000	/* 7450 */
+	cmpli	cr1,r3,0x000c	/* 7400 */
+	cmpli	cr2,r3,0x800c	/* 7410 */
+	cmpli	cr3,r3,0x8001	/* 7455 */
+	cmpli	cr4,r3,0x8002	/* 7457 */
+	cmpli	cr5,r3,0x7000	/* 750FX */
+	/* cr1 is 7400 || 7410 */
+	cror	4*cr1+eq,4*cr1+eq,4*cr2+eq
+	/* cr0 is 74xx */
+	cror	4*cr0+eq,4*cr0+eq,4*cr3+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr4+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
+	bne	1f
+	/* Backup 74xx specific regs */
+	mfspr	r4,SPRN_MSSCR0
+	stw	r4,CS_MSSCR0(r5)
+	mfspr	r4,SPRN_MSSSR0
+	stw	r4,CS_MSSSR0(r5)
+	beq	cr1,1f
+	/* Backup 745x specific registers */
+	mfspr	r4,SPRN_HID1
+	stw	r4,CS_HID1(r5)
+	mfspr	r4,SPRN_ICTRL
+	stw	r4,CS_ICTRL(r5)
+	mfspr	r4,SPRN_LDSTCR
+	stw	r4,CS_LDSTCR(r5)
+	mfspr	r4,SPRN_LDSTDB
+	stw	r4,CS_LDSTDB(r5)
+1:	
+	bne	cr5,1f
+	/* Backup 750FX specific registers */
+	mfspr	r4,SPRN_HID1
+	stw	r4,CS_HID1(r5)
+1:
+	blr
+
+/* Called with no MMU context (typically MSR:IR/DR off) to
+ * restore CPU state as backed up by the previous
+ * function. This does not include cache setting
+ */
+_GLOBAL(__restore_cpu_setup)
+	/* Get storage ptr */
+	lis	r5,(cpu_state_storage-KERNELBASE)@h
+	ori	r5,r5,cpu_state_storage@l
+
+	/* Restore HID0 */
+	lwz	r3,CS_HID0(r5)
+	sync
+	isync
+	mtspr	SPRN_HID0,r3
+	sync
+	isync
+
+	/* Now deal with CPU type dependent registers */
+	mfspr	r3,PVR
+	srwi	r3,r3,16
+	cmpli	cr0,r3,0x8000	/* 7450 */
+	cmpli	cr1,r3,0x000c	/* 7400 */
+	cmpli	cr2,r3,0x800c	/* 7410 */
+	cmpli	cr3,r3,0x8001	/* 7455 */
+	cmpli	cr4,r3,0x8002	/* 7457 */
+	cmpli	cr5,r3,0x7000	/* 750FX */
+	/* cr1 is 7400 || 7410 */
+	cror	4*cr1+eq,4*cr1+eq,4*cr2+eq
+	/* cr0 is 74xx */
+	cror	4*cr0+eq,4*cr0+eq,4*cr3+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr4+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
+	bne	2f
+	/* Restore 74xx specific regs */
+	lwz	r4,CS_MSSCR0(r5)
+	sync
+	mtspr	SPRN_MSSCR0,r4
+	sync
+	isync
+	lwz	r4,CS_MSSSR0(r5)
+	sync
+	mtspr	SPRN_MSSSR0,r4
+	sync
+	isync
+	bne	cr2,1f
+	/* Clear 7410 L2CR2 */
+	li	r4,0
+	mtspr	SPRN_L2CR2,r4
+1:	beq	cr1,2f
+	/* Restore 745x specific registers */
+	lwz	r4,CS_HID1(r5)
+	sync
+	mtspr	SPRN_HID1,r4
+	isync
+	sync
+	lwz	r4,CS_ICTRL(r5)
+	sync
+	mtspr	SPRN_ICTRL,r4
+	isync
+	sync
+	lwz	r4,CS_LDSTCR(r5)
+	sync
+	mtspr	SPRN_LDSTCR,r4
+	isync
+	sync
+	lwz	r4,CS_LDSTDB(r5)
+	sync
+	mtspr	SPRN_LDSTDB,r4
+	isync
+	sync
+2:	bne	cr5,1f
+	/* Restore 750FX specific registers
+	 * that is restore PLL config & switch
+	 * to PLL 0
+	 */
+	lwz	r4,CS_HID1(r5)
+	rlwinm  r5,r4,0,16,14
+	mtspr	SPRN_HID1,r5
+		/* Wait for PLL to stabilize */
+	mftbl	r5
+3:	mftbl	r6
+	sub	r6,r6,r5
+	cmpli	cr0,r6,10000
+	ble	3b
+	/* Setup final PLL */
+	mtspr	SPRN_HID1,r4
+1:
+	blr
+
diff -Nru a/arch/ppc/kernel/head.S b/arch/ppc/kernel/head.S
--- a/arch/ppc/kernel/head.S	Mon Mar 31 13:41:07 2003
+++ b/arch/ppc/kernel/head.S	Mon Mar 31 13:41:07 2003
@@ -890,7 +890,7 @@
  */
 	mfmsr	r5
 	oris	r5,r5,MSR_VEC@h
-	mtmsr	r5			/* enable use of AltiVec now */
+	MTMSRD(r5)			/* enable use of AltiVec now */
 	isync
 /*
  * For SMP, we don't do lazy AltiVec switching because it just gets too
@@ -962,7 +962,7 @@
 	mfmsr	r5
 	oris	r5,r5,MSR_VEC@h
 	SYNC
-	mtmsr	r5			/* enable use of AltiVec now */
+	MTMSRD(r5)			/* enable use of AltiVec now */
 	isync
 	cmpi	0,r3,0
 	beqlr-				/* if no previous owner, done */
@@ -999,7 +999,7 @@
 	ori	r5,r5,MSR_FP
 	SYNC_601
 	ISYNC_601
-	mtmsr	r5			/* enable use of fpu now */
+	MTMSRD(r5)			/* enable use of fpu now */
 	SYNC_601
 	isync
 	cmpi	0,r3,0
@@ -1191,6 +1191,8 @@
 	MTMSRD(r0)
 	isync
 #endif
+	/* Copy some CPU settings from CPU 0 */
+	bl	__restore_cpu_setup
 
 	lis	r3,-KERNELBASE@h
 	mr	r4,r24
@@ -1236,248 +1238,21 @@
 #endif /* CONFIG_SMP */
 
 /*
- * Enable caches and 604-specific features if necessary.
+ * Those generic dummy functions are kept for CPUs not
+ * included in CONFIG_6xx
  */
-_GLOBAL(__setup_cpu_601)
-	blr
-_GLOBAL(__setup_cpu_603)
-	b	setup_common_caches
-_GLOBAL(__setup_cpu_604)
-	mflr	r4
-	bl	setup_common_caches
-	bl	setup_604_hid0
-	mtlr	r4
-	blr
-_GLOBAL(__setup_cpu_750)
-	mflr	r4
-	bl	setup_common_caches
-	bl	setup_750_7400_hid0
-	mtlr	r4
-	blr
-_GLOBAL(__setup_cpu_750cx)
-	mflr	r4
-	bl	setup_common_caches
-	bl	setup_750_7400_hid0
-	bl	setup_750cx
-	mtlr	r4
-	blr
-_GLOBAL(__setup_cpu_750fx)
-	mflr	r4
-	bl	setup_common_caches
-	bl	setup_750_7400_hid0
-	bl	setup_750fx
-	mtlr	r4
-	blr
-_GLOBAL(__setup_cpu_7400)
-	mflr	r4
-	bl	setup_7400_workarounds
-	bl	setup_common_caches
-	bl	setup_750_7400_hid0
-	mtlr	r4
-	blr
-_GLOBAL(__setup_cpu_7410)
-	mflr	r4
-	bl	setup_7410_workarounds
-	bl	setup_common_caches
-	bl	setup_750_7400_hid0
-	li	r3,0
-	mtspr	SPRN_L2CR2,r3
-	mtlr	r4
-	blr
-_GLOBAL(__setup_cpu_7450)
-	mflr	r4
-	bl	setup_common_caches
-	bl	setup_745x_specifics
-	mtlr	r4
-	blr
-_GLOBAL(__setup_cpu_7455)
-	mflr	r4
-	bl	setup_common_caches
-	bl	setup_745x_specifics
-	mtlr	r4
-	blr
 _GLOBAL(__setup_cpu_power3)
 	blr
 _GLOBAL(__setup_cpu_generic)
 	blr
 
-/* Enable caches for 603's, 604, 750 & 7400 */
-setup_common_caches:
-	mfspr	r11,HID0
-	andi.	r0,r11,HID0_DCE
-#ifdef CONFIG_DCACHE_DISABLE
-	ori	r11,r11,HID0_ICE
-#else
-	ori	r11,r11,HID0_ICE|HID0_DCE
-#endif
-	ori	r8,r11,HID0_ICFI
-	bne	1f			/* don't invalidate the D-cache */
-	ori	r8,r8,HID0_DCI		/* unless it wasn't enabled */
-1:	sync
-	mtspr	HID0,r8			/* enable and invalidate caches */
-	sync
-	mtspr	HID0,r11		/* enable caches */
-	sync
-	isync
-	blr
-
-/* 604, 604e, 604ev, ...
- * Enable superscalar execution & branch history table
- */
-setup_604_hid0:
-	mfspr	r11,HID0
-	ori	r11,r11,HID0_SIED|HID0_BHTE
-	ori	r8,r11,HID0_BTCD
-	sync
-	mtspr	HID0,r8		/* flush branch target address cache */
-	sync			/* on 604e/604r */
-	mtspr	HID0,r11
-	sync
-	isync
-	blr
-
-/* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some
- * errata we work around here.
- * Moto MPC710CE.pdf describes them, those are errata
- * #3, #4 and #5
- * Note that we assume the firmware didn't choose to
- * apply other workarounds (there are other ones documented
- * in the .pdf). It appear that Apple firmware only works
- * around #3 and with the same fix we use. We may want to
- * check if the CPU is using 60x bus mode in which case
- * the workaround for errata #4 is useless. Also, we may
- * want to explicitely clear HID0_NOPDST as this is not
- * needed once we have applied workaround #5 (though it's
- * not set by Apple's firmware at least).
- */
-setup_7400_workarounds:
-	mfpvr	r3
-	rlwinm	r3,r3,0,20,31
-	cmpwi	0,r3,0x0207
-	ble	1f
-	blr
-setup_7410_workarounds:
-	mfpvr	r3
-	rlwinm	r3,r3,0,20,31
-	cmpwi	0,r3,0x0100
-	bnelr
-1:
-	mfspr	r11,SPRN_MSSSR0
-	/* Errata #3: Set L1OPQ_SIZE to 0x10 */
-	rlwinm	r11,r11,0,9,6
-	oris	r11,r11,0x0100
-	/* Errata #4: Set L2MQ_SIZE to 1 (check for MPX mode first ?) */
-	oris	r11,r11,0x0002
-	/* Errata #5: Set DRLT_SIZE to 0x01 */
-	rlwinm	r11,r11,0,5,2
-	oris	r11,r11,0x0800
-	sync
-	mtspr	SPRN_MSSSR0,r11
-	sync
-	isync
-	blr
-	
-/* 740/750/7400/7410
- * Enable Store Gathering (SGE), Address Brodcast (ABE),
- * Branch History Table (BHTE), Branch Target ICache (BTIC)
- * Dynamic Power Management (DPM), Speculative (SPD)
- * Clear Instruction cache throttling (ICTC)
- */
-setup_750_7400_hid0:
-	mfspr	r11,HID0
-	ori	r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC
-BEGIN_FTR_SECTION
-	oris	r11,r11,HID0_DPM@h	/* enable dynamic power mgmt */
-END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
-	li	r3,HID0_SPD
-	andc	r11,r11,r3		/* clear SPD: enable speculative */
- 	li	r3,0
- 	mtspr	ICTC,r3			/* Instruction Cache Throttling off */
-	isync
-	mtspr	HID0,r11
-	sync
-	isync
-	blr
-
-/* 750cx specific
- * Looks like we have to disable NAP feature for some PLL settings...
- * (waiting for confirmation)
- */
-setup_750cx:
-	mfspr	r10, SPRN_HID1
-	rlwinm	r10,r10,4,28,31
-	cmpi	cr0,r10,7
-	cmpi	cr1,r10,9
-	cmpi	cr2,r10,11
-	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
-	cror	4*cr0+eq,4*cr0+eq,4*cr2+eq
-	bnelr
-	lwz	r6,CPU_SPEC_FEATURES(r5)
-	li	r7,CPU_FTR_CAN_NAP
-	andc	r6,r6,r7
-	stw	r6,CPU_SPEC_FEATURES(r5)
+#ifndef CONFIG_6xx
+_GLOBAL(__save_cpu_setup)
 	blr
-
-/* 750fx specific
- */
-setup_750fx:
+_GLOBAL(__restore_cpu_setup)
 	blr
+#endif /* CONFIG_6xx */
 
-/* MPC 745x
- * Enable Store Gathering (SGE), Branch Folding (FOLD)
- * Branch History Table (BHTE), Branch Target ICache (BTIC)
- * Dynamic Power Management (DPM), Speculative (SPD)
- * Ensure our data cache instructions really operate.
- * Timebase has to be running or we wouldn't have made it here,
- * just ensure we don't disable it.
- * Clear Instruction cache throttling (ICTC)
- * Enable L2 HW prefetch
- */
-setup_745x_specifics:
-	/* We check for the presence of an L3 cache setup by
-	 * the firmware. If any, we disable NAP capability as
-	 * it's known to be bogus on rev 2.1 and earlier
-	 */
-	mfspr	r11,SPRN_L3CR
-	andis.	r11,r11,L3CR_L3E@h
-	beq	1f
-	lwz	r6,CPU_SPEC_FEATURES(r5)
-	andi.	r0,r6,CPU_FTR_L3_DISABLE_NAP
-	beq	1f
-	li	r7,CPU_FTR_CAN_NAP
-	andc	r6,r6,r7
-	stw	r6,CPU_SPEC_FEATURES(r5)
-1:	
-	mfspr	r11,HID0
-
-	/* All of the bits we have to set.....
-	 */
-	ori	r11,r11,HID0_SGE | HID0_FOLD | HID0_BHTE | HID0_BTIC | HID0_LRSTK
-BEGIN_FTR_SECTION
-	oris	r11,r11,HID0_DPM@h	/* enable dynamic power mgmt */
-END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
-
-	/* All of the bits we have to clear....
-	 */
-	li	r3,HID0_SPD | HID0_NOPDST | HID0_NOPTI
-	andc	r11,r11,r3		/* clear SPD: enable speculative */
- 	li	r3,0
-
- 	mtspr	ICTC,r3			/* Instruction Cache Throttling off */
-	isync
-	mtspr	HID0,r11
-	sync
-	isync
-
-	/* Enable L2 HW prefetch
-	 */
-	mfspr	r3,SPRN_MSSCR0
-	ori	r3,r3,3
-	sync
-	mtspr	SPRN_MSSCR0,r3
-	sync
-	isync
-	blr
 
 /*
  * Load stuff into the MMU.  Intended to be called with
diff -Nru a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S
--- a/arch/ppc/kernel/misc.S	Mon Mar 31 13:41:08 2003
+++ b/arch/ppc/kernel/misc.S	Mon Mar 31 13:41:08 2003
@@ -201,6 +201,60 @@
 	mr	r4,r24
 	bctr
 
+#ifdef CONFIG_CPU_FREQ_PMAC
+
+/* This gets called by via-pmu.c to switch the PLL selection
+ * on 750fx CPU. This function should really be moved to some
+ * other place (as most of the cpufreq code in via-pmu
+ */
+_GLOBAL(low_choose_750fx_pll)
+	/* Clear MSR:EE */
+	mfmsr	r7
+	rlwinm	r0,r7,0,17,15
+	mtmsr	r0
+
+	/* If switching to PLL1, disable HID0:BTIC */
+	cmpli	cr0,r3,0
+	beq	1f
+	mfspr	r5,HID0
+	rlwinm	r5,r5,0,27,25
+	sync
+	mtspr	HID0,r5
+	isync
+	sync
+	
+1:
+	/* Calc new HID1 value */
+	mfspr	r4,SPRN_HID1	/* Build a HID1:PS bit from parameter */
+	rlwinm	r5,r3,16,15,15	/* Clear out HID1:PS from value read */
+	rlwinm	r4,r4,0,16,14	/* Could have I used rlwimi here ? */
+	or	r4,r4,r5
+	mtspr	SPRN_HID1,r4
+
+	/* Store new HID1 image */
+	rlwinm	r6,r1,0,0,18
+	lwz	r6,TI_CPU(r6)
+	slwi	r6,r6,2
+	addis	r6,r6,nap_save_hid1@ha
+	stw	r4,nap_save_hid1@l(r6)
+
+	/* If switching to PLL0, enable HID0:BTIC */
+	cmpli	cr0,r3,0
+	bne	1f
+	mfspr	r5,HID0
+	ori	r5,r5,HID0_BTIC
+	sync
+	mtspr	HID0,r5
+	isync
+	sync
+	
+1:	
+	/* Return */		
+	mtmsr	r7
+	blr
+
+#endif /* CONFIG_CPU_FREQ_PMAC */
+
 /* void local_save_flags_ptr(unsigned long *flags) */
 _GLOBAL(local_save_flags_ptr)
 	mfmsr	r4
@@ -351,7 +405,16 @@
 	sync			/* Flush to memory before changing mapping */
 	tlbia
 	isync			/* Flush shadow TLB */
-#else /* ! defined(CONFIG_40x) */
+#elif defined(CONFIG_440)
+	lis	r3,0
+	sync
+1:
+	tlbwe	r3,r3,PPC440_TLB_PAGEID
+	addi	r3,r3,1
+	cmpwi	0,r3,61
+	ble	1b
+	isync
+#else /* !(CONFIG_40x || CONFIG_440) */
 #if defined(CONFIG_SMP)
 	rlwinm	r8,r1,0,0,18
 	lwz	r8,TI_CPU(r8)
@@ -392,7 +455,7 @@
  * Flush MMU TLB for a particular address
  */
 _GLOBAL(_tlbie)
-#ifdef CONFIG_40x
+#if defined(CONFIG_40x)
 	tlbsx.	r3, 0, r3
 	bne	10f
 	sync
@@ -402,7 +465,31 @@
 	tlbwe	r3, r3, TLB_TAG
 	isync
 10:
-#else /* ! CONFIG_40x */
+#elif defined(CONFIG_440)
+	mfspr	r4,SPRN_MMUCR			/* Get MMUCR */
+	lis	r5,PPC440_MMUCR_STS@h
+	ori	r5,r5,PPC440_MMUCR_TID@l	/* Create mask */
+	andc	r4,r4,r5			/* Clear out TID/STS bits */
+	mfspr	r5,SPRN_PID			/* Get PID */
+	or	r4,r4,r5			/* Set TID bits */
+	mfmsr	r6				/* Get MSR */
+	andi.	r6,r6,MSR_IS@l			/* TS=1? */
+	beq	11f				/* If not, leave STS=0 */
+	oris	r4,r4,PPC440_MMUCR_STS@h	/* Set STS=1 */
+11:	mtspr	SPRN_MMUCR, r4			/* Put MMUCR */
+
+	tlbsx.	r3, 0, r3
+	bne	10f
+	sync
+	/* There are only 64 TLB entries, so r3 < 64,
+	 * which means bit 22, is clear.  Since 22 is
+	 * the V bit in the TLB_PAGEID, loading this
+	 * value will invalidate the TLB entry.
+	 */
+	tlbwe	r3, r3, PPC440_TLB_PAGEID
+	isync
+10:
+#else /* !(CONFIG_40x || CONFIG_440) */
 #if defined(CONFIG_SMP)
 	rlwinm	r8,r1,0,0,18
 	lwz	r8,TI_CPU(r8)
@@ -569,22 +656,18 @@
 	blr
 
 #ifdef CONFIG_NOT_COHERENT_CACHE
-/* This is a bad one....It is used by 'consistent_sync' functions when
- * there isn't any handle on the virtual address needed by the usual
- * cache flush instructions.  On the MPC8xx, we can use the cache line
- * flush command, on others all we can do is read enough data to completely
- * reload the cache, flushing old data out.
- */
-
-/* Cache organization.  The 4xx has a 8K (128 line) cache, and the 8xx
- * has 1, 2, 4, 8K variants.  For now, cover worst case.  When we can
- * deteremine actual size, we will use that later.
+/*
+ * 40x cores have 8K or 16K dcache and 32 byte line size.
+ * 440 has a 32K dcache and 32 byte line size.
+ * 8xx has 1, 2, 4, 8K variants.
+ * For now, cover the worst case of the 440.
+ * Must be called with external interrupts disabled.
  */
-#define CACHE_NWAYS	2
-#define CACHE_NLINES	128
+#define CACHE_NWAYS	64
+#define CACHE_NLINES	16
 
 _GLOBAL(flush_dcache_all)
-	li	r4, (CACHE_NWAYS * CACHE_NLINES)
+	li	r4, (2 * CACHE_NWAYS * CACHE_NLINES)
 	mtctr	r4
 	lis     r5, KERNELBASE@h
 1:	lwz	r3, 0(r5)		/* Load one word from every line */
diff -Nru a/arch/ppc/kernel/smp.c b/arch/ppc/kernel/smp.c
--- a/arch/ppc/kernel/smp.c	Mon Mar 31 13:41:08 2003
+++ b/arch/ppc/kernel/smp.c	Mon Mar 31 13:41:08 2003
@@ -68,6 +68,9 @@
 static int __smp_call_function(void (*func) (void *info), void *info,
 			       int wait, int target);
 
+/* Low level assembly function used to backup CPU 0 state */
+extern void __save_cpu_setup(void);
+
 /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
  * 
  * Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up
@@ -348,6 +351,9 @@
 	/* Probe platform for CPUs: always linear. */
 	num_cpus = smp_ops->probe();
 	cpu_possible_map = (1 << num_cpus)-1;
+
+	/* Backup CPU 0 state */
+	__save_cpu_setup();
 
 	if (smp_ops->space_timers)
 		smp_ops->space_timers(num_cpus);
diff -Nru a/arch/ppc/platforms/Makefile b/arch/ppc/platforms/Makefile
--- a/arch/ppc/platforms/Makefile	Mon Mar 31 13:41:08 2003
+++ b/arch/ppc/platforms/Makefile	Mon Mar 31 13:41:08 2003
@@ -19,12 +19,14 @@
 obj-$(CONFIG_ALL_PPC)		+= pmac_pic.o pmac_setup.o pmac_time.o \
 					pmac_feature.o pmac_pci.o chrp_setup.o\
 					chrp_time.o chrp_pci.o prep_pci.o \
-					prep_time.o prep_setup.o
+					prep_time.o prep_setup.o pmac_sleep.o
 ifeq ($(CONFIG_ALL_PPC),y)
 obj-$(CONFIG_NVRAM)		+= pmac_nvram.o
 endif
 obj-$(CONFIG_PMAC_BACKLIGHT)	+= pmac_backlight.o
-obj-$(CONFIG_PMAC_PBOOK)	+= sleep.o
+ifeq ($(CONFIG_ALL_PPC),y)
+obj-$(CONFIG_CPU_FREQ_PMAC)	+= pmac_cpufreq.o
+endif
 obj-$(CONFIG_PPC_RTAS)		+= error_log.o proc_rtas.o
 obj-$(CONFIG_PREP_RESIDUAL)	+= residual.o
 obj-$(CONFIG_ADIR)		+= adir_setup.o adir_pic.o adir_pci.o
diff -Nru a/arch/ppc/platforms/pmac_cpufreq.c b/arch/ppc/platforms/pmac_cpufreq.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/arch/ppc/platforms/pmac_cpufreq.c	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,345 @@
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/slab.h>
+#include <linux/cpufreq.h>
+#include <linux/init.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/hardirq.h>
+#include <asm/pmac_feature.h>
+#include <asm/mmu_context.h>
+#include <asm/sections.h>
+#include <asm/cputable.h>
+#include <asm/time.h>
+
+#undef DEBUG_FREQ
+
+extern void low_choose_750fx_pll(int pll);
+extern void low_sleep_handler(void);
+extern void openpic_sleep_save_intrs(void);
+extern void openpic_sleep_restore_intrs(void);
+extern void enable_kernel_altivec(void);
+extern void enable_kernel_fp(void);
+
+static unsigned int low_freq;
+static unsigned int hi_freq;
+static unsigned int cur_freq;
+static int cpufreq_uses_pmu;
+
+#define PMAC_CPU_LOW_SPEED	1
+#define PMAC_CPU_HIGH_SPEED	0
+
+static inline void
+wakeup_decrementer(void)
+{
+	set_dec(tb_ticks_per_jiffy);
+	/* No currently-supported powerbook has a 601,
+	 * so use get_tbl, not native
+	 */
+	last_jiffy_stamp(0) = tb_last_stamp = get_tbl();
+}
+
+#ifdef DEBUG_FREQ
+static inline void
+debug_calc_bogomips(void)
+{
+	/* This will cause a recalc of bogomips and display the
+	 * result. We backup/restore the value to avoid affecting the
+	 * core cpufreq framework's own calculation.
+	 */
+	extern void calibrate_delay(void);
+
+	unsigned long save_lpj = loops_per_jiffy;
+	calibrate_delay();
+	loops_per_jiffy = save_lpj;
+}
+#endif
+
+/* Switch CPU speed under 750FX CPU control
+ */
+static int __pmac
+cpu_750fx_cpu_speed(int low_speed)
+{
+#ifdef DEBUG_FREQ
+	printk(KERN_DEBUG "HID1, before: %x\n", mfspr(SPRN_HID1));	
+#endif
+	low_choose_750fx_pll(low_speed);
+#ifdef DEBUG_FREQ
+	printk(KERN_DEBUG "HID1, after: %x\n", mfspr(SPRN_HID1));	
+	debug_calc_bogomips();
+#endif
+
+	return 0;
+}
+
+/* Switch CPU speed under PMU control
+ */
+static int __pmac
+pmu_set_cpu_speed(unsigned int low_speed)
+{
+	struct adb_request req;
+	unsigned long save_l2cr;
+	unsigned long save_l3cr;
+	
+#ifdef DEBUG_FREQ
+	printk(KERN_DEBUG "HID1, before: %x\n", mfspr(SPRN_HID1));	
+#endif
+	/* Disable all interrupt sources on openpic */
+	openpic_sleep_save_intrs();
+
+	/* Make sure the PMU is idle */
+	pmu_suspend();
+
+	/* Make sure the decrementer won't interrupt us */
+	asm volatile("mtdec %0" : : "r" (0x7fffffff));
+	/* Make sure any pending DEC interrupt occuring while we did
+	 * the above didn't re-enable the DEC */
+	mb();
+	asm volatile("mtdec %0" : : "r" (0x7fffffff));
+
+	/* We can now disable MSR_EE */
+	local_irq_disable();
+
+	/* Giveup the FPU & vec */
+	enable_kernel_fp();
+
+#ifdef CONFIG_ALTIVEC
+	if (cur_cpu_spec[0]->cpu_features & CPU_FTR_ALTIVEC)
+		enable_kernel_altivec();
+#endif /* CONFIG_ALTIVEC */
+
+	/* Save & disable L2 and L3 caches */
+	save_l3cr = _get_L3CR();	/* (returns -1 if not available) */
+	save_l2cr = _get_L2CR();	/* (returns -1 if not available) */
+	if (save_l3cr != 0xffffffff && (save_l3cr & L3CR_L3E) != 0)
+		_set_L3CR(save_l3cr & 0x7fffffff);
+	if (save_l2cr != 0xffffffff && (save_l2cr & L2CR_L2E) != 0)
+		_set_L2CR(save_l2cr & 0x7fffffff);
+
+	/* Send the new speed command. My assumption is that this command
+	 * will cause PLL_CFG[0..3] to be changed next time CPU goes to sleep
+	 */
+	pmu_request(&req, NULL, 6, PMU_CPU_SPEED, 'W', 'O', 'O', 'F', low_speed);
+	while (!req.complete)
+		pmu_poll();
+	
+	pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,1);
+
+	low_sleep_handler();
+	
+	pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,0);
+
+	/* Restore L2 cache */
+	if (save_l2cr != 0xffffffff && (save_l2cr & L2CR_L2E) != 0)
+ 		_set_L2CR(save_l2cr);
+	/* Restore L3 cache */
+	if (save_l3cr != 0xffffffff && (save_l3cr & L3CR_L3E) != 0)
+ 		_set_L3CR(save_l3cr);
+
+	/* Restore userland MMU context */
+	set_context(current->active_mm->context, current->active_mm->pgd);
+
+#ifdef DEBUG_FREQ
+	printk(KERN_DEBUG "HID1, after: %x\n", mfspr(SPRN_HID1));	
+#endif
+
+	/* Restore decrementer */
+	wakeup_decrementer();
+
+	/* Restore interrupts */
+	openpic_sleep_restore_intrs();
+
+	pmu_resume();
+
+	/* Let interrupts flow again ... */
+	local_irq_enable();
+
+#ifdef DEBUG_FREQ
+	debug_calc_bogomips();
+#endif
+
+	return 0;
+}
+
+static int __pmac
+do_set_cpu_speed(int speed_mode)
+{
+	struct cpufreq_freqs    freqs;
+	int rc;
+	
+	freqs.old = cur_freq;
+	freqs.new = (speed_mode == PMAC_CPU_HIGH_SPEED) ? hi_freq : low_freq;
+	freqs.cpu = CPUFREQ_ALL_CPUS;
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	if (cpufreq_uses_pmu)
+		rc = pmu_set_cpu_speed(speed_mode);
+	else
+		rc = cpu_750fx_cpu_speed(speed_mode);
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	cur_freq = (speed_mode == PMAC_CPU_HIGH_SPEED) ? hi_freq : low_freq;
+
+	return rc;
+}
+
+static int __pmac
+pmac_cpufreq_verify(struct cpufreq_policy *policy)
+{
+	if (!policy)
+		return -EINVAL;
+		
+	policy->cpu = 0; /* UP only */
+
+	cpufreq_verify_within_limits(policy, low_freq, hi_freq);
+
+	if ((policy->min > low_freq) && 
+	    (policy->max < hi_freq))
+		policy->max = hi_freq;
+
+	return 0;
+}
+
+static int __pmac
+pmac_cpufreq_setpolicy(struct cpufreq_policy *policy)
+{
+	int rc;
+	
+	if (!policy)
+		return -EINVAL;
+	if (policy->min > low_freq)
+		rc = do_set_cpu_speed(PMAC_CPU_HIGH_SPEED);
+	else if (policy->max < hi_freq)
+		rc = do_set_cpu_speed(PMAC_CPU_LOW_SPEED);
+	else if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
+		rc = do_set_cpu_speed(PMAC_CPU_LOW_SPEED);
+	else
+		rc = do_set_cpu_speed(PMAC_CPU_HIGH_SPEED);
+
+	return rc;
+}
+
+unsigned int __pmac
+pmac_get_one_cpufreq(int i)
+{
+	/* Supports only one CPU for now */
+	return (i == 0) ? cur_freq : 0;
+}
+
+
+/* Currently, we support the following machines:
+ * 
+ *  - Titanium PowerBook 800 (PMU based, 667Mhz & 800Mhz)
+ *  - Titanium PowerBook 500 (PMU based, 300Mhz & 500Mhz)
+ *  - iBook2 500 (PMU based, 400Mhz & 500Mhz)
+ *  - iBook2 700 (CPU based, 400Mhz & 700Mhz, support low voltage)
+ */
+static int __init
+pmac_cpufreq_setup(void)
+{	
+	struct device_node	*cpunode;
+	struct cpufreq_driver   *driver;
+	u32			*value;
+	int			has_freq_ctl = 0;
+	int			rc;
+	
+	memset(&driver, 0, sizeof(driver));
+
+	/* Assume only one CPU */
+	cpunode = find_type_devices("cpu");
+	if (!cpunode)
+		goto out;
+
+	/* Get current cpu clock freq */
+	value = (u32 *)get_property(cpunode, "clock-frequency", NULL);
+	if (!value)
+		goto out;
+	cur_freq = (*value) / 1000;
+
+	/* Check for tibook 800Mhz or 1Ghz */
+	if (machine_is_compatible("PowerBook3,4") || machine_is_compatible("PowerBook3,5")) {
+		value = (u32 *)get_property(cpunode, "min-clock-frequency", NULL);
+		if (!value)
+			goto out;
+		low_freq = (*value) / 1000;
+
+		value = (u32 *)get_property(cpunode, "max-clock-frequency", NULL);
+		if (!value)
+			goto out;
+		hi_freq = (*value) / 1000;			
+		has_freq_ctl = 1;
+		cpufreq_uses_pmu = 1;
+	}
+	/* Else check for iBook2 500 */
+	else if (machine_is_compatible("PowerBook4,1")) {
+		/* We only know about 500Mhz model */
+		if (cur_freq < 450000 || cur_freq > 550000)
+			goto out;
+		hi_freq = cur_freq;
+		low_freq = 400000;
+		has_freq_ctl = 1;
+		cpufreq_uses_pmu = 1;
+	}
+	/* Else check for TiPb 500 */
+	else if (machine_is_compatible("PowerBook3,2")) {
+		/* We only know about 500Mhz model */
+		if (cur_freq < 450000 || cur_freq > 550000)
+			goto out;
+		hi_freq = cur_freq;
+		low_freq = 300000;
+		has_freq_ctl = 1;
+		cpufreq_uses_pmu = 1;
+	}
+	/* Else check for 750FX */
+	else if (PVR_VER(mfspr(PVR)) == 0x7000) {
+		if (get_property(cpunode, "dynamic-power-step", NULL) == NULL)
+			goto out;	
+		hi_freq = cur_freq;
+		value = (u32 *)get_property(cpunode, "reduced-clock-frequency", NULL);
+		if (!value)
+			goto out;
+		low_freq = (*value) / 1000;
+		cpufreq_uses_pmu = 0;
+		has_freq_ctl = 1;
+	}
+out:
+	if (!has_freq_ctl)
+		return -ENODEV;
+	
+	/* initialization of main "cpufreq" code*/
+	driver = kmalloc(sizeof(struct cpufreq_driver) + 
+			 NR_CPUS * sizeof(struct cpufreq_policy), GFP_KERNEL);
+	if (!driver)
+		return -ENOMEM;
+
+	driver->policy = (struct cpufreq_policy *) (driver + 1);
+
+	driver->verify		= &pmac_cpufreq_verify;
+	driver->setpolicy	= &pmac_cpufreq_setpolicy;
+	driver->init		= NULL;
+	driver->exit		= NULL;
+	strncpy(driver->name, "powermac", CPUFREQ_NAME_LEN);
+
+	driver->policy[0].cpu				= 0;
+	driver->policy[0].cpuinfo.transition_latency	= CPUFREQ_ETERNAL;
+	driver->policy[0].cpuinfo.min_freq		= low_freq;
+	driver->policy[0].min				= low_freq;
+	driver->policy[0].max				= cur_freq;
+	driver->policy[0].cpuinfo.max_freq		= cur_freq;
+	driver->policy[0].policy			= (cur_freq == low_freq) ? 
+	    	CPUFREQ_POLICY_POWERSAVE : CPUFREQ_POLICY_PERFORMANCE;
+
+	rc = cpufreq_register_driver(driver);
+	if (rc)
+		kfree(driver);
+	return rc;
+}
+
+__initcall(pmac_cpufreq_setup);
+
diff -Nru a/arch/ppc/platforms/pmac_feature.c b/arch/ppc/platforms/pmac_feature.c
--- a/arch/ppc/platforms/pmac_feature.c	Mon Mar 31 13:41:08 2003
+++ b/arch/ppc/platforms/pmac_feature.c	Mon Mar 31 13:41:08 2003
@@ -14,6 +14,8 @@
  *   - Replace mdelay with some schedule loop if possible
  *   - Shorten some obfuscated delays on some routines (like modem
  *     power)
+ *   - Refcount some clocks (see darwin)
+ *   - Split split split...
  *
  */
 #include <linux/config.h>
@@ -25,6 +27,8 @@
 #include <linux/spinlock.h>
 #include <linux/adb.h>
 #include <linux/pmu.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
 #include <asm/sections.h>
 #include <asm/errno.h>
 #include <asm/ohare.h>
@@ -34,8 +38,10 @@
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/machdep.h>
+#include <asm/macio_asic.h>
 #include <asm/pmac_feature.h>
 #include <asm/dbdma.h>
+#include <asm/pci-bridge.h>
 
 #undef DEBUG_FEATURE
 
@@ -46,7 +52,8 @@
 #endif
 
 /* Exported from arch/ppc/kernel/idle.c */
-extern unsigned long powersave_nap;
+extern int powersave_nap;
+extern int powersave_lowspeed;
 
 /*
  * We use a single global lock to protect accesses. Each driver has
@@ -57,53 +64,13 @@
 #define LOCK(flags)	spin_lock_irqsave(&feature_lock, flags);
 #define UNLOCK(flags)	spin_unlock_irqrestore(&feature_lock, flags);
 
+
 /*
- * Helper functions regarding the various flavors of mac-io
+ * Instance of some macio stuffs
  */
- 
-#define MAX_MACIO_CHIPS		2
+struct macio_chip macio_chips[MAX_MACIO_CHIPS]  __pmacdata;
 
-enum {
-	macio_unknown = 0,
-	macio_grand_central,
-	macio_ohare,
-	macio_ohareII,
-	macio_heathrow,
-	macio_gatwick,
-	macio_paddington,
-	macio_keylargo,
-	macio_pangea
-};
-
-static const char* macio_names[] __pmacdata = 
-{
-	"Unknown",
-	"Grand Central",
-	"OHare",
-	"OHareII",
-	"Heathrow",
-	"Gatwick",
-	"Paddington",
-	"Keylargo",
-	"Pangea"
-};
-
-static struct macio_chip
-{
-	struct device_node*	of_node;
-	int			type;
-	int			rev;
-	volatile u32*		base;
-	unsigned long		flags;
-} macio_chips[MAX_MACIO_CHIPS]  __pmacdata;
-
-#define MACIO_FLAG_SCCA_ON	0x00000001
-#define MACIO_FLAG_SCCB_ON	0x00000002
-#define MACIO_FLAG_SCC_LOCKED	0x00000004
-#define MACIO_FLAG_AIRPORT_ON	0x00000010
-#define MACIO_FLAG_FW_SUPPORTED	0x00000020
-
-static struct macio_chip* __pmac
+struct macio_chip* __pmac
 macio_find(struct device_node* child, int type)
 {
 	while(child) {
@@ -118,15 +85,21 @@
 	return NULL;
 }
 
-#define MACIO_FCR32(macio, r)	((macio)->base + ((r) >> 2))
-#define MACIO_FCR8(macio, r)	(((volatile u8*)((macio)->base)) + (r))
+static const char* macio_names[] __pmacdata = 
+{
+	"Unknown",
+	"Grand Central",
+	"OHare",
+	"OHareII",
+	"Heathrow",
+	"Gatwick",
+	"Paddington",
+	"Keylargo",
+	"Pangea",
+	"Intrepid"
+};
+
 
-#define MACIO_IN32(r)		(in_le32(MACIO_FCR32(macio,r)))
-#define MACIO_OUT32(r,v)	(out_le32(MACIO_FCR32(macio,r), (v)))
-#define MACIO_BIS(r,v)		(MACIO_OUT32((r), MACIO_IN32(r) | (v)))
-#define MACIO_BIC(r,v)		(MACIO_OUT32((r), MACIO_IN32(r) & ~(v)))
-#define MACIO_IN8(r)		(in_8(MACIO_FCR8(macio,r)))
-#define MACIO_OUT8(r,v)		(out_8(MACIO_FCR8(macio,r), (v)))
 
 /*
  * Uninorth reg. access. Note that Uni-N regs are big endian
@@ -196,7 +169,7 @@
 	unsigned long		chan_mask;
 	unsigned long		fcr;
 	unsigned long		flags;
-	int			htw;
+	int			htw, trans;
 	unsigned long		rmask;
 	
 	macio = macio_find(node, 0);
@@ -211,6 +184,9 @@
 
 	htw = (macio->type == macio_heathrow || macio->type == macio_paddington
 		|| macio->type == macio_gatwick);
+	/* On these machines, the HRW_SCC_TRANS_EN_N bit mustn't be touched */
+	trans = (pmac_mb.model_id != PMAC_TYPE_YOSEMITE &&
+	    	 pmac_mb.model_id != PMAC_TYPE_YIKES);
 	if (value) {
 #ifdef CONFIG_ADB_PMU
 		if ((param & 0xfff) == PMAC_SCC_IRDA)
@@ -222,7 +198,13 @@
 		if (!(fcr & OH_SCC_ENABLE)) {
 			fcr |= OH_SCC_ENABLE;
 			if (htw) {
-				fcr &= ~HRW_SCC_TRANS_EN_N;
+				/* Side effect: this will also power up the
+				 * modem, but it's too messy to figure out on which
+				 * ports this controls the tranceiver and on which
+				 * it controls the modem
+				 */
+				if (trans)
+					fcr &= ~HRW_SCC_TRANS_EN_N;
 				MACIO_OUT32(OHARE_FCR, fcr);
 				fcr |= (rmask = HRW_RESET_SCC);
 				MACIO_OUT32(OHARE_FCR, fcr);
@@ -258,7 +240,7 @@
 		MACIO_OUT32(OHARE_FCR, fcr);
 		if ((fcr & (OH_SCCA_IO | OH_SCCB_IO)) == 0) {
 			fcr &= ~OH_SCC_ENABLE;
-			if (htw)
+			if (htw && trans)
 				fcr |= HRW_SCC_TRANS_EN_N;
 			MACIO_OUT32(OHARE_FCR, fcr);
 		}
@@ -330,9 +312,9 @@
 
 	if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0)
 		return -EPERM;
-	if (value) {
+	if (value == 1) {
 		MACIO_BIC(OHARE_FCR, OH_IOBUS_ENABLE);
-	} else {
+	} else if (value == 0) {
 		MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE);
 	}
 	
@@ -522,7 +504,7 @@
 	return 0;
 }
 
-static u32 save_fcr[5] __pmacdata;
+static u32 save_fcr[6] __pmacdata;
 static u32 save_mbcr __pmacdata;
 static u32 save_gpio_levels[2] __pmacdata;
 static u8 save_gpio_extint[KEYLARGO_GPIO_EXTINT_CNT] __pmacdata;
@@ -586,6 +568,7 @@
 		/* This seems to be necessary as well or the fan
 		 * keeps coming up and battery drains fast */
 		MACIO_BIC(HEATHROW_FCR, HRW_IOBUS_ENABLE);
+		MACIO_BIC(HEATHROW_FCR, HRW_IDE0_RESET_N);
 		/* Make sure eth is down even if module or sleep
 		 * won't work properly */
 		MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE | HRW_BMAC_RESET);
@@ -750,7 +733,13 @@
 	struct macio_chip*	macio;
 	u8			gpio;
 	unsigned long		flags;
-	
+
+	/* Hack for internal USB modem */
+	if (node == NULL) {	
+		if (macio_chips[0].type != macio_keylargo)
+			return -ENODEV;
+		node = macio_chips[0].of_node;
+	}
 	macio = macio_find(node, 0);
 	if (!macio)
 		return -ENODEV;
@@ -791,8 +780,96 @@
 }
 
 static int __pmac
+pangea_modem_enable(struct device_node* node, int param, int value)
+{
+	struct macio_chip*	macio;
+	u8			gpio;
+	unsigned long		flags;
+	
+	/* Hack for internal USB modem */
+	if (node == NULL) {	
+		if (macio_chips[0].type != macio_pangea &&
+		    macio_chips[0].type != macio_intrepid)
+			return -ENODEV;
+		node = macio_chips[0].of_node;
+	}
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	gpio = MACIO_IN8(KL_GPIO_MODEM_RESET);
+	gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE;
+	gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA;
+	
+	if (!value) {
+		LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+		UNLOCK(flags);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		mdelay(250);
+	}
+    	LOCK(flags);
+	if (value) {
+		MACIO_OUT8(KL_GPIO_MODEM_POWER,
+			KEYLARGO_GPIO_OUTPUT_ENABLE);
+    		UNLOCK(flags);
+	    	(void)MACIO_IN32(KEYLARGO_FCR2);
+		mdelay(250);
+	} else {
+		MACIO_OUT8(KL_GPIO_MODEM_POWER,
+			KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA);
+    		UNLOCK(flags);
+	}
+	if (value) {
+		LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+	    	UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+	    	UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+	    	UNLOCK(flags); mdelay(250);
+	}
+	return 0;
+}
+
+static int __pmac
+core99_ata100_enable(struct device_node* node, int value)
+{
+	unsigned long flags;
+	struct pci_dev *pdev = NULL;
+	u8 pbus, pid;
+
+    	if (uninorth_rev < 0x24)
+    		return -ENODEV;
+
+	LOCK(flags);
+	if (value)
+		UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100);
+	else
+		UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100);
+	(void)UN_IN(UNI_N_CLOCK_CNTL);
+	UNLOCK(flags);
+	udelay(20);
+
+	if (value) {
+		if (pci_device_from_OF_node(node, &pbus, &pid) == 0)
+			pdev = pci_find_slot(pbus, pid);
+		if (pdev == NULL)
+			return 0;
+		pci_enable_device(pdev);
+		pci_set_master(pdev);
+	}
+    	return 0;
+}
+
+static int __pmac
 core99_ide_enable(struct device_node* node, int param, int value)
 {
+	/* Bus ID 0 to 2 are KeyLargo based IDE, busID 3 is U2
+	 * based ata-100
+	 */
 	switch(param) {
 	    case 0:
 		return simple_feature_tweak(node, macio_unknown,
@@ -803,6 +880,8 @@
 	    case 2:
 		return simple_feature_tweak(node, macio_unknown,
 			KEYLARGO_FCR1, KL1_UIDE_ENABLE, value);
+	    case 3:
+	    	return core99_ata100_enable(node, value);
 	    default:
 	    	return -ENODEV;
 	}
@@ -850,7 +929,8 @@
 	struct macio_chip* macio;
 	
 	macio = &macio_chips[0];
-	if (macio->type != macio_keylargo && macio->type != macio_pangea)
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
 		return -ENODEV;
 
 	LOCK(flags);
@@ -987,27 +1067,40 @@
 static int __pmac
 core99_reset_cpu(struct device_node* node, int param, int value)
 {
-	const int reset_lines[] = {	KL_GPIO_RESET_CPU0,
-					KL_GPIO_RESET_CPU1,
-					KL_GPIO_RESET_CPU2,
-					KL_GPIO_RESET_CPU3 };
-	int reset_io;
+	unsigned int reset_io = 0;
 	unsigned long flags;
 	struct macio_chip* macio;
+	struct device_node* np;
+	const int dflt_reset_lines[] = {	KL_GPIO_RESET_CPU0,
+						KL_GPIO_RESET_CPU1,
+						KL_GPIO_RESET_CPU2,
+						KL_GPIO_RESET_CPU3 };
 	
 	macio = &macio_chips[0];
-	if (macio->type != macio_keylargo && macio->type != macio_pangea)
+	if (macio->type != macio_keylargo)
 		return -ENODEV;
-	if (param > 3 || param < 0)
+		
+	np = find_path_device("/cpus");
+	if (np == NULL)
 		return -ENODEV;
-
-	reset_io = reset_lines[param];
+	for (np = np->child; np != NULL; np = np->sibling) {
+		u32* num = (u32 *)get_property(np, "reg", NULL);
+		u32* rst = (u32 *)get_property(np, "soft-reset", NULL);
+		if (num == NULL || rst == NULL)
+			continue;
+		if (param == *num) {
+			reset_io = *rst;
+			break;
+		}
+	}
+	if (np == NULL || reset_io == 0)
+		reset_io = dflt_reset_lines[param];
 	
 	LOCK(flags);
 	MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE);
 	(void)MACIO_IN8(reset_io);
 	udelay(1);
-	MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA);
+	MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTOUT_DATA | KEYLARGO_GPIO_OUTPUT_ENABLE);
 	(void)MACIO_IN8(reset_io);
 	UNLOCK(flags);
 
@@ -1025,15 +1118,19 @@
 	u32 reg;
 	
 	macio = &macio_chips[0];
-	if (macio->type != macio_keylargo && macio->type != macio_pangea)
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
 		return -ENODEV;
-	
+
+	/* XXX Fix handling of 3rd USB controller in Intrepid, move the
+	 * port connect stuff (KL4_*) to the sleep code eventually
+	 */
 	prop = (char *)get_property(node, "AAPL,clock-id", NULL);
 	if (!prop)
 		return -ENODEV;
-	if (strncmp(prop, "usb0u048", strlen("usb0u048")) == 0)
+	if (strncmp(prop, "usb0u048", 8) == 0)
 		number = 0;
-	else if (strncmp(prop, "usb1u148", strlen("usb1u148")) == 0)
+	else if (strncmp(prop, "usb1u148", 8) == 0)
 		number = 2;
 	else
 		return -ENODEV;
@@ -1104,7 +1201,8 @@
 	struct macio_chip* macio;
 
 	macio = &macio_chips[0];
-	if (macio->type != macio_keylargo && macio->type != macio_pangea)
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
 		return -ENODEV;
 	if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED))
 		return -ENODEV;
@@ -1133,7 +1231,8 @@
 	if ((pmac_mb.board_flags & PMAC_MB_HAS_FW_POWER) == 0)
 	    	return -ENODEV;
 	macio = &macio_chips[0];
-	if (macio->type != macio_keylargo && macio->type != macio_pangea)
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
 		return -ENODEV;
 	if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED))
 		return -ENODEV;
@@ -1172,23 +1271,24 @@
 }
 
 static void __pmac
-keylargo_shutdown(struct macio_chip* macio, int restart)
+keylargo_shutdown(struct macio_chip* macio, int sleep_mode)
 {
 	u32 temp;
 
-	mdelay(1);
-	MACIO_BIS(KEYLARGO_FCR0, KL0_USB_REF_SUSPEND);
-	(void)MACIO_IN32(KEYLARGO_FCR0);
-	mdelay(100);
+	if (sleep_mode) {
+		mdelay(1);
+		MACIO_BIS(KEYLARGO_FCR0, KL0_USB_REF_SUSPEND);
+		(void)MACIO_IN32(KEYLARGO_FCR0);
+		mdelay(1);
+	}
 
 	MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE |
 				KL0_SCC_CELL_ENABLE |
 		      		KL0_IRDA_ENABLE | KL0_IRDA_CLK32_ENABLE |
 		      		KL0_IRDA_CLK19_ENABLE);
-
-	(void)MACIO_IN32(KEYLARGO_FCR0); udelay(10);
+	
 	MACIO_BIC(KEYLARGO_MBCR, KL_MBCR_MB0_DEV_MASK);
-	(void)MACIO_IN32(KEYLARGO_MBCR); udelay(10);
+	MACIO_BIS(KEYLARGO_MBCR, KL_MBCR_MB0_IDE_ENABLE);
 
 	MACIO_BIC(KEYLARGO_FCR1,
 		KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT |
@@ -1199,27 +1299,33 @@
 		KL1_EIDE0_ENABLE | KL1_EIDE0_RESET_N |
 		KL1_EIDE1_ENABLE | KL1_EIDE1_RESET_N |
 		KL1_UIDE_ENABLE);
-	(void)MACIO_IN32(KEYLARGO_FCR1); udelay(10);
 
 	MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
- 	udelay(10);
  	MACIO_BIC(KEYLARGO_FCR2, KL2_IOBUS_ENABLE);
- 	udelay(10);
+
 	temp = MACIO_IN32(KEYLARGO_FCR3);
-	if (macio->rev >= 2)
-		temp |= (KL3_SHUTDOWN_PLL2X | KL3_SHUTDOWN_PLL_TOTAL);
-		
+	if (macio->rev >= 2) {
+		temp |= KL3_SHUTDOWN_PLL2X;
+		if (sleep_mode)
+			temp |= KL3_SHUTDOWN_PLL_TOTAL;
+	}
+	
 	temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 |
-		KL3_SHUTDOWN_PLLKW35 | KL3_SHUTDOWN_PLLKW12;
+		KL3_SHUTDOWN_PLLKW35;
+	if (sleep_mode)
+		temp |= KL3_SHUTDOWN_PLLKW12;
 	temp &= ~(KL3_CLK66_ENABLE | KL3_CLK49_ENABLE | KL3_CLK45_ENABLE
-		| KL3_CLK31_ENABLE | KL3_TIMER_CLK18_ENABLE | KL3_I2S1_CLK18_ENABLE
-		| KL3_I2S0_CLK18_ENABLE | KL3_VIA_CLK16_ENABLE);
+		| KL3_CLK31_ENABLE | KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE);
+	if (sleep_mode)
+		temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_VIA_CLK16_ENABLE);
 	MACIO_OUT32(KEYLARGO_FCR3, temp);
-	(void)MACIO_IN32(KEYLARGO_FCR3); udelay(10);
+
+	/* Flush posted writes & wait a bit */
+	(void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1);
 }
 
 static void __pmac
-pangea_shutdown(struct macio_chip* macio, int restart)
+pangea_shutdown(struct macio_chip* macio, int sleep_mode)
 {
 	u32 temp;
 
@@ -1227,10 +1333,6 @@
 				KL0_SCC_CELL_ENABLE |
 				KL0_USB0_CELL_ENABLE | KL0_USB1_CELL_ENABLE);
 
-	(void)MACIO_IN32(KEYLARGO_FCR0); udelay(10);
-	MACIO_BIC(KEYLARGO_MBCR, KL_MBCR_MB0_DEV_MASK);
-	(void)MACIO_IN32(KEYLARGO_MBCR); udelay(10);
-
 	MACIO_BIC(KEYLARGO_FCR1,
 		KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT |
 		KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE |
@@ -1238,18 +1340,54 @@
 		KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE |
 		KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE |
 		KL1_UIDE_ENABLE);
-	(void)MACIO_IN32(KEYLARGO_FCR1); udelay(10);
+	if (pmac_mb.board_flags & PMAC_MB_MOBILE)
+		MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N);
 
 	MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
- 	udelay(10);
+ 	
 	temp = MACIO_IN32(KEYLARGO_FCR3);
 	temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 |
 		KL3_SHUTDOWN_PLLKW35;
-	temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE
-		| KL3_CLK31_ENABLE | KL3_TIMER_CLK18_ENABLE | KL3_I2S1_CLK18_ENABLE
-		| KL3_I2S0_CLK18_ENABLE | KL3_VIA_CLK16_ENABLE);
+	temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE | KL3_CLK31_ENABLE
+		| KL3_I2S0_CLK18_ENABLE | KL3_I2S1_CLK18_ENABLE);
+	if (sleep_mode)
+		temp &= ~(KL3_VIA_CLK16_ENABLE | KL3_TIMER_CLK18_ENABLE);
 	MACIO_OUT32(KEYLARGO_FCR3, temp);
-	(void)MACIO_IN32(KEYLARGO_FCR3); udelay(10);
+
+	/* Flush posted writes & wait a bit */
+	(void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1);
+}
+
+static void __pmac
+intrepid_shutdown(struct macio_chip* macio, int sleep_mode)
+{
+	u32 temp;
+
+	MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE |
+				KL0_SCC_CELL_ENABLE |
+				KL0_USB0_CELL_ENABLE | KL0_USB1_CELL_ENABLE);
+
+	MACIO_BIC(KEYLARGO_FCR1,
+		KL1_USB2_CELL_ENABLE |
+		KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT |
+		KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE |
+		KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE);
+	if (pmac_mb.board_flags & PMAC_MB_MOBILE)
+		MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N);
+
+	MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+ 	
+	temp = MACIO_IN32(KEYLARGO_FCR3);
+	temp |= KL3_IT_SHUTDOWN_PLL1 | KL3_IT_SHUTDOWN_PLL2 |
+		KL3_IT_SHUTDOWN_PLL3;
+	temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE |
+		  KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE);
+	if (sleep_mode)
+		temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_IT_VIA_CLK32_ENABLE);
+	MACIO_OUT32(KEYLARGO_FCR3, temp);
+
+	/* Flush posted writes & wait a bit */
+	(void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1);
 }
 
 static int __pmac
@@ -1259,7 +1397,8 @@
 	int i;
 
 	macio = &macio_chips[0];
-	if (macio->type != macio_keylargo && macio->type != macio_pangea)
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
 		return -ENODEV;
 	
 	/* We power off the wireless slot in case it was not done
@@ -1275,7 +1414,11 @@
 	}
 
 	/* We make sure int. modem is off (in case driver lost it) */
-	core99_modem_enable(macio->of_node, 0, 0);
+	if (macio->type == macio_keylargo)
+		core99_modem_enable(macio->of_node, 0, 0);
+	else
+		pangea_modem_enable(macio->of_node, 0, 0);
+
 	/* We make sure the sound is off as well */
 	core99_sound_chip_enable(macio->of_node, 0, 0);
 	 
@@ -1292,12 +1435,15 @@
 		save_gpio_normal[i] = MACIO_IN8(KEYLARGO_GPIO_0+i);
 
 	/* Save the FCRs */
-	save_mbcr = MACIO_IN32(KEYLARGO_MBCR);
+	if (macio->type == macio_keylargo)
+		save_mbcr = MACIO_IN32(KEYLARGO_MBCR);
 	save_fcr[0] = MACIO_IN32(KEYLARGO_FCR0);
 	save_fcr[1] = MACIO_IN32(KEYLARGO_FCR1);
 	save_fcr[2] = MACIO_IN32(KEYLARGO_FCR2);
 	save_fcr[3] = MACIO_IN32(KEYLARGO_FCR3);
 	save_fcr[4] = MACIO_IN32(KEYLARGO_FCR4);
+	if (macio->type == macio_pangea || macio->type == macio_intrepid)
+		save_fcr[5] = MACIO_IN32(KEYLARGO_FCR5);
 
 	/* Save state & config of DBDMA channels */
 	dbdma_save(macio, save_dbdma);
@@ -1306,9 +1452,11 @@
 	 * Turn off as much as we can
 	 */
 	if (macio->type == macio_pangea)
-		pangea_shutdown(macio, 0);
+		pangea_shutdown(macio, 1);
+	else if (macio->type == macio_intrepid)
+		intrepid_shutdown(macio, 1);
 	else if (macio->type == macio_keylargo)
-		keylargo_shutdown(macio, 0);
+		keylargo_shutdown(macio, 1);
 	
 	/* 
 	 * Put the host bridge to sleep
@@ -1338,7 +1486,8 @@
 	int i;
 
 	macio = &macio_chips[0];
-	if (macio->type != macio_keylargo && macio->type != macio_pangea)
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
 		return -ENODEV;
 
 	/*
@@ -1352,9 +1501,11 @@
 	/*
 	 * Restore KeyLargo
 	 */
-	 
-	MACIO_OUT32(KEYLARGO_MBCR, save_mbcr);
-	(void)MACIO_IN32(KEYLARGO_MBCR); udelay(10);
+
+	if (macio->type == macio_keylargo) {
+		MACIO_OUT32(KEYLARGO_MBCR, save_mbcr);
+		(void)MACIO_IN32(KEYLARGO_MBCR); udelay(10);
+	}
 	MACIO_OUT32(KEYLARGO_FCR0, save_fcr[0]);
 	(void)MACIO_IN32(KEYLARGO_FCR0); udelay(10);
 	MACIO_OUT32(KEYLARGO_FCR1, save_fcr[1]);
@@ -1365,6 +1516,10 @@
 	(void)MACIO_IN32(KEYLARGO_FCR3); udelay(10);
 	MACIO_OUT32(KEYLARGO_FCR4, save_fcr[4]);
 	(void)MACIO_IN32(KEYLARGO_FCR4); udelay(10);
+	if (macio->type == macio_pangea || macio->type == macio_intrepid) {
+		MACIO_OUT32(KEYLARGO_FCR5, save_fcr[5]);
+		(void)MACIO_IN32(KEYLARGO_FCR5); udelay(10);
+	}
 
 	dbdma_restore(macio, save_dbdma);
 
@@ -1390,6 +1545,21 @@
 static int __pmac
 core99_sleep_state(struct device_node* node, int param, int value)
 {
+	/* Param == 1 means to enter the "fake sleep" mode that is
+	 * used for CPU speed switch
+	 */
+	if (param == 1) {
+		if (value == 1) {
+			UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_SLEEPING);
+			UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_IDLE2);
+		} else {
+			UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_NORMAL);
+			udelay(10);
+			UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_RUNNING);
+			udelay(10);
+		}
+		return 0;
+	}
 	if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0)
 		return -EPERM;
 	if (value == 1)
@@ -1400,55 +1570,6 @@
 }
 
 static int __pmac
-pangea_modem_enable(struct device_node* node, int param, int value)
-{
-	struct macio_chip*	macio;
-	u8			gpio;
-	unsigned long		flags;
-	
-	macio = macio_find(node, 0);
-	if (!macio)
-		return -ENODEV;
-	gpio = MACIO_IN8(KL_GPIO_MODEM_RESET);
-	gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE;
-	gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA;
-	
-	if (!value) {
-		LOCK(flags);
-		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
-		UNLOCK(flags);
-		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
-		mdelay(250);
-	}
-    	LOCK(flags);
-	if (value) {
-		MACIO_OUT8(KL_GPIO_MODEM_POWER,
-			KEYLARGO_GPIO_OUTPUT_ENABLE);
-    		UNLOCK(flags);
-	    	(void)MACIO_IN32(KEYLARGO_FCR2);
-		mdelay(250);
-	} else {
-		MACIO_OUT8(KL_GPIO_MODEM_POWER,
-			KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA);
-    		UNLOCK(flags);
-	}
-	if (value) {
-		LOCK(flags);
-		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
-		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
-	    	UNLOCK(flags); mdelay(250); LOCK(flags);
-		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
-		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
-	    	UNLOCK(flags); mdelay(250); LOCK(flags);
-		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
-		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
-	    	UNLOCK(flags); mdelay(250);
-	}
-	return 0;
-}
-
-
-static int __pmac
 generic_get_mb_info(struct device_node* node, int param, int value)
 {
 	switch(param) {
@@ -1561,6 +1682,26 @@
 	{ 0, NULL }
 };
 
+/* RackMac
+ */
+static struct feature_table_entry rackmac_features[]  __pmacdata = {
+	{ PMAC_FTR_SCC_ENABLE,		core99_scc_enable },
+	{ PMAC_FTR_IDE_ENABLE,		core99_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		core99_ide_reset },
+	{ PMAC_FTR_GMAC_ENABLE,		core99_gmac_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	core99_gmac_phy_reset },
+	{ PMAC_FTR_USB_ENABLE,		core99_usb_enable },
+	{ PMAC_FTR_1394_ENABLE,		core99_firewire_enable },
+	{ PMAC_FTR_1394_CABLE_POWER,	core99_firewire_cable_power },
+	{ PMAC_FTR_SLEEP_STATE,		core99_sleep_state },
+#ifdef CONFIG_SMP
+	{ PMAC_FTR_RESET_CPU,		core99_reset_cpu },
+#endif /* CONFIG_SMP */
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ 0, NULL }
+};
+
 /* Pangea features
  */
 static struct feature_table_entry pangea_features[]  __pmacdata = {
@@ -1580,6 +1721,26 @@
 	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
 	{ 0, NULL }
 };
+
+/* Intrepid features
+ */
+static struct feature_table_entry intrepid_features[]  __pmacdata = {
+	{ PMAC_FTR_SCC_ENABLE,		core99_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	pangea_modem_enable },
+	{ PMAC_FTR_IDE_ENABLE,		core99_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		core99_ide_reset },
+	{ PMAC_FTR_GMAC_ENABLE,		core99_gmac_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	core99_gmac_phy_reset },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	core99_sound_chip_enable },
+	{ PMAC_FTR_AIRPORT_ENABLE,	core99_airport_enable },
+	{ PMAC_FTR_USB_ENABLE,		core99_usb_enable },
+	{ PMAC_FTR_1394_ENABLE,		core99_firewire_enable },
+	{ PMAC_FTR_1394_CABLE_POWER,	core99_firewire_cable_power },
+	{ PMAC_FTR_SLEEP_STATE,		core99_sleep_state },
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ 0, NULL }
+};
 	
 static struct pmac_mb_def pmac_mb_defs[] __pmacdata = {
 	/* Warning: ordering is important as some models may claim
@@ -1611,11 +1772,11 @@
 	},
 	{	"AAPL,3400/2400",		"PowerBook 3400",
 		PMAC_TYPE_HOOPER,		ohare_features,
-		PMAC_MB_CAN_SLEEP
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
 	},
 	{	"AAPL,3500",			"PowerBook 3500",
 		PMAC_TYPE_KANGA,		ohare_features,
-		PMAC_MB_CAN_SLEEP
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
 	},
 	{	"AAPL,Gossamer",		"PowerMac G3 (Gossamer)",
 		PMAC_TYPE_GOSSAMER,		heathrow_desktop_features,
@@ -1627,11 +1788,11 @@
 	},
 	{	"AAPL,PowerBook1998",		"PowerBook Wallstreet",
 		PMAC_TYPE_WALLSTREET,		heathrow_laptop_features,
-		PMAC_MB_CAN_SLEEP
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
 	},
-	{	"AAPL,PowerBook1,1",		"PowerBook 101 (Lombard)",
+	{	"PowerBook1,1",			"PowerBook 101 (Lombard)",
 		PMAC_TYPE_101_PBOOK,		paddington_features,
-		PMAC_MB_CAN_SLEEP
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
 	},
 	{	"iMac,1",			"iMac (first generation)",
 		PMAC_TYPE_ORIG_IMAC,		paddington_features,
@@ -1641,13 +1802,21 @@
 		PMAC_TYPE_PANGEA_IMAC,		pangea_features,
 		PMAC_MB_CAN_SLEEP
 	},
-	{	"PowerBook4,2",			"iBook 2 with 14\" LCD",
+	{	"PowerBook4,3",			"iBook 2 rev. 2",
 		PMAC_TYPE_IBOOK2,		pangea_features,
-		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER
+		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook4,2",			"iBook 2",
+		PMAC_TYPE_IBOOK2,		pangea_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
 	},
 	{	"PowerBook4,1",			"iBook 2",
 		PMAC_TYPE_IBOOK2,		pangea_features,
-		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER
+		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerMac4,4",			"eMac",
+		PMAC_TYPE_EMAC,			core99_features,
+		PMAC_MB_CAN_SLEEP
 	},
 	{	"PowerMac4,2",			"Flat panel iMac",
 		PMAC_TYPE_FLAT_PANEL_IMAC,	pangea_features,
@@ -1663,34 +1832,35 @@
 	},
 	{	"PowerBook2,1",			"iBook (first generation)",
 		PMAC_TYPE_ORIG_IBOOK,		core99_features,
-		PMAC_MB_CAN_SLEEP
+		PMAC_MB_CAN_SLEEP | PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
 	},
 	{	"PowerMac3,1",			"PowerMac G4 AGP Graphics",
 		PMAC_TYPE_SAWTOOTH,		core99_features,
-		0
+		PMAC_MB_OLD_CORE99
 	},
 	{	"PowerMac3,2",			"PowerMac G4 AGP Graphics",
 		PMAC_TYPE_SAWTOOTH,		core99_features,
-		0
+		PMAC_MB_OLD_CORE99
 	},
 	{	"PowerMac3,3",			"PowerMac G4 AGP Graphics",
 		PMAC_TYPE_SAWTOOTH,		core99_features,
-		0
+		PMAC_MB_OLD_CORE99
 	},
 	{	"PowerMac2,1",			"iMac FireWire",
 		PMAC_TYPE_FW_IMAC,		core99_features,
-		PMAC_MB_CAN_SLEEP
+		PMAC_MB_CAN_SLEEP | PMAC_MB_OLD_CORE99
 	},
 	{	"PowerMac2,2",			"iMac FireWire",
 		PMAC_TYPE_FW_IMAC,		core99_features,
-		PMAC_MB_CAN_SLEEP
+		PMAC_MB_CAN_SLEEP | PMAC_MB_OLD_CORE99
 	},
 	{	"PowerBook2,2",			"iBook FireWire",
 		PMAC_TYPE_FW_IBOOK,		core99_features,
-		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER
+		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
 	},
 	{	"PowerMac5,1",			"PowerMac G4 Cube",
 		PMAC_TYPE_CUBE,			core99_features,
+		PMAC_MB_OLD_CORE99
 	},
 	{	"PowerMac3,4",			"PowerMac G4 Silver",
 		PMAC_TYPE_QUICKSILVER,		core99_features,
@@ -1702,19 +1872,31 @@
 	},
 	{	"PowerBook3,1",			"PowerBook Pismo",
 		PMAC_TYPE_PISMO,		core99_features,
-		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER
+		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
 	},
 	{	"PowerBook3,2",			"PowerBook Titanium",
 		PMAC_TYPE_TITANIUM,		core99_features,
-		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER
+		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
 	},
 	{	"PowerBook3,3",			"PowerBook Titanium II",
 		PMAC_TYPE_TITANIUM2,		core99_features,
-		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER
+		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
 	},
 	{	"PowerBook3,4",			"PowerBook Titanium III",
 		PMAC_TYPE_TITANIUM3,		core99_features,
-		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER
+		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,5",			"PowerBook Titanium IV",
+		PMAC_TYPE_TITANIUM4,		core99_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"RackMac1,1",			"XServe",
+		PMAC_TYPE_RACKMAC,		rackmac_features,
+		0,
+	},
+	{	"PowerMac3,6",			"PowerMac G4 Windtunnel",
+		PMAC_TYPE_WINDTUNNEL,		rackmac_features,
+		0,
 	},
 };
 
@@ -1758,8 +1940,22 @@
 {
 	int i;
 	struct macio_chip* macio = &macio_chips[0];
-
-	/* Lookup known motherboard type in device-tree */
+	const char* model = NULL;
+	struct device_node *dt;
+	
+	/* Lookup known motherboard type in device-tree. First try an
+	 * exact match on the "model" property, then try a "compatible"
+	 * match is none is found.
+	 */
+	dt = find_devices("device-tree");
+	if (dt != NULL)
+		model = (const char *) get_property(dt, "model", NULL);
+	for(i=0; model && i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) {
+	    if (strcmp(model, pmac_mb_defs[i].model_string) == 0) {
+		pmac_mb = pmac_mb_defs[i];
+		goto found;
+	    }
+	}
 	for(i=0; i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) {
 	    if (machine_is_compatible(pmac_mb_defs[i].model_string)) {
 		pmac_mb = pmac_mb_defs[i];
@@ -1797,6 +1993,11 @@
 		pmac_mb.model_name = "Unknown Pangea-based";
 	    	pmac_mb.features = pangea_features;
 		break;
+	    case macio_intrepid:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PANGEA;
+		pmac_mb.model_name = "Unknown Pangea-based";
+	    	pmac_mb.features = intrepid_features;
+	    	break;
 	    default:
 	    	return -ENODEV;
 	}
@@ -1815,6 +2016,7 @@
 		iounmap(mach_id_ptr);
 	}
 
+#ifdef CONFIG_6xx
 	/* Set default value of powersave_nap on machines that support it.
 	 * It appears that uninorth rev 3 has a problem with it, we don't
 	 * enable it on those. In theory, the flush-on-lock property is
@@ -1823,7 +2025,6 @@
 	 */
 	while (uninorth_base && uninorth_rev > 3) {
 		struct device_node* np = find_path_device("/cpus");
-		u32 pvr = mfspr(PVR);
 		if (!np || !np->child) {
 			printk(KERN_WARNING "Can't find CPU(s) in device tree !\n");
 			break;
@@ -1835,14 +2036,23 @@
 		/* Nap mode not supported if flush-on-lock property is present */
 		if (get_property(np, "flush-on-lock", NULL))
 			break;
-		/* Some 7450 may have problem with NAP mode too ... */
-		if (((pvr >> 16) == 0x8000) && ((pvr & 0xffff) < 0x0201))
-			break;
 		powersave_nap = 1;
 		printk(KERN_INFO "Processor NAP mode on idle enabled.\n");
 		break;
 	}
 
+	/* On CPUs that support it (750FX), lowspeed by default during
+	 * NAP mode
+	 */
+	powersave_lowspeed = 1;
+#endif /* CONFIG_6xx */
+
+	/* Check for "mobile" machine */
+	if (model && (strncmp(model, "PowerBook", 9) == 0
+		   || strncmp(model, "iBook", 5) == 0))
+		pmac_mb.board_flags |= PMAC_MB_MOBILE;
+	
+	
 	printk(KERN_INFO "PowerMac motherboard: %s\n", pmac_mb.model_name);
 	return 0;
 }
@@ -1857,7 +2067,7 @@
 	/* Locate core99 Uni-N */
 	uninorth_node = find_devices("uni-n");
 	if (uninorth_node && uninorth_node->n_addrs > 0) {
-		uninorth_base = ioremap(uninorth_node->addrs[0].address, 0x1000);
+		uninorth_base = ioremap(uninorth_node->addrs[0].address, 0x4000);
 		uninorth_rev = in_be32(UN_REG(UNI_N_VERSION));
 	} else
 		uninorth_node = NULL;
@@ -1867,15 +2077,23 @@
 	
 	printk(KERN_INFO "Found Uninorth memory controller & host bridge, revision: %d\n",
 			uninorth_rev);
+	printk(KERN_INFO "Mapped at 0x%08lx\n", (unsigned long)uninorth_base);
 
 	/* Set the arbitrer QAck delay according to what Apple does
 	 */
-	if (uninorth_rev < 0x10) {
+	if (uninorth_rev < 0x11) {
 		actrl = UN_IN(UNI_N_ARB_CTRL) & ~UNI_N_ARB_CTRL_QACK_DELAY_MASK;
 		actrl |= ((uninorth_rev < 3) ? UNI_N_ARB_CTRL_QACK_DELAY105 :
 			UNI_N_ARB_CTRL_QACK_DELAY) << UNI_N_ARB_CTRL_QACK_DELAY_SHIFT;
 		UN_OUT(UNI_N_ARB_CTRL, actrl);
 	}
+
+	/* Some more magic as done by them in recent MacOS X on UniNorth
+	 * revs 1.5 to 2.O and Pangea. Seem to toggle the UniN Maxbus/PCI
+	 * memory timeout
+	 */
+	if ((uninorth_rev >= 0x11 && uninorth_rev <= 0x24) || uninorth_rev == 0xc0)
+		UN_OUT(0x2160, UN_IN(0x2160) & 0x00ffffff);
 }	
 
 static void __init
@@ -1917,11 +2135,14 @@
 		u32* did = (u32 *)get_property(node, "device-id", NULL);
 		if (*did == 0x00000025)
 			type = macio_pangea;
+		if (*did == 0x0000003e)
+			type = macio_intrepid;
 	}
 	macio_chips[i].of_node	= node;
 	macio_chips[i].type	= type;
 	macio_chips[i].base	= base;
 	macio_chips[i].flags	= MACIO_FLAG_SCCB_ON | MACIO_FLAG_SCCB_ON;
+	macio_chips[i].name 	= macio_names[type];
 	revp = (u32 *)get_property(node, "revision-id", NULL);
 	if (revp)
 		macio_chips[i].rev = *revp;
@@ -2011,7 +2232,8 @@
 	}
 
 	if (macio_chips[0].type == macio_keylargo ||
-	    macio_chips[0].type == macio_pangea) {
+	    macio_chips[0].type == macio_pangea ||
+	    macio_chips[0].type == macio_intrepid) {
 		/* Enable GMAC for now for PCI probing. It will be disabled
 		 * later on after PCI probe
 		 */
@@ -2042,6 +2264,17 @@
 			np = np->next;
 		}
 		
+		/* Enable ATA-100 before PCI probe. */
+		np = find_devices("ata-6");
+		while(np) {
+			if (np->parent
+			    && device_is_compatible(np->parent, "uni-north")
+			    && device_is_compatible(np, "kauai-ata")) {
+				core99_ata100_enable(np, 1);
+			}
+			np = np->next;
+		}
+		
 		/* Switch airport off */
 		np = find_devices("radio");
 		while(np) {
@@ -2079,9 +2312,6 @@
 		initial_serial_shutdown(np);
 		np = np->next;
 	}
-	
-	/* Let hardware settle down */
-	mdelay(10);
 }
 
 void __init
diff -Nru a/arch/ppc/platforms/pmac_setup.c b/arch/ppc/platforms/pmac_setup.c
--- a/arch/ppc/platforms/pmac_setup.c	Mon Mar 31 13:41:07 2003
+++ b/arch/ppc/platforms/pmac_setup.c	Mon Mar 31 13:41:07 2003
@@ -225,6 +225,20 @@
 	return 0;
 }
 
+int __openfirmware
+pmac_show_percpuinfo(struct seq_file *m, int i)
+{
+#ifdef CONFIG_CPU_FREQ_PMAC
+	extern unsigned int pmac_get_one_cpufreq(int i);
+	unsigned int freq = pmac_get_one_cpufreq(i);
+	if (freq != 0) {
+		seq_printf(m, "clock\t\t: %dMHz\n", freq/1000);
+		return 0;
+	}
+#endif /* CONFIG_CPU_FREQ_PMAC */
+	return of_show_percpuinfo(m, i);
+}
+
 static volatile u32 *sysctrl_regs;
 
 void __init
@@ -604,7 +618,7 @@
 
 	ppc_md.setup_arch     = pmac_setup_arch;
 	ppc_md.show_cpuinfo   = pmac_show_cpuinfo;
-	ppc_md.show_percpuinfo = of_show_percpuinfo;
+	ppc_md.show_percpuinfo = pmac_show_percpuinfo;
 	ppc_md.irq_cannonicalize = NULL;
 	ppc_md.init_IRQ       = pmac_pic_init;
 	ppc_md.get_irq        = pmac_get_irq; /* Changed later on ... */
diff -Nru a/arch/ppc/platforms/pmac_sleep.S b/arch/ppc/platforms/pmac_sleep.S
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/arch/ppc/platforms/pmac_sleep.S	Mon Mar 31 13:41:06 2003
@@ -0,0 +1,375 @@
+/*
+ * This file contains sleep low-level functions for PowerBook G3.
+ *    Copyright (C) 1999 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *    and Paul Mackerras (paulus@samba.org).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/offsets.h>
+
+#define MAGIC	0x4c617273	/* 'Lars' */
+
+/*
+ * Structure for storing CPU registers on the stack.
+ */
+#define SL_SP		0
+#define SL_PC		4
+#define SL_MSR		8
+#define SL_SDR1		0xc
+#define SL_SPRG0	0x10	/* 4 sprg's */
+#define SL_DBAT0	0x20
+#define SL_IBAT0	0x28
+#define SL_DBAT1	0x30
+#define SL_IBAT1	0x38
+#define SL_DBAT2	0x40
+#define SL_IBAT2	0x48
+#define SL_DBAT3	0x50
+#define SL_IBAT3	0x58
+#define SL_TB		0x60
+#define SL_R2		0x68
+#define SL_CR		0x6c
+#define SL_R12		0x70	/* r12 to r31 */
+#define SL_SIZE		(SL_R12 + 80)
+
+	.section .text
+	.align	5
+
+#if defined(CONFIG_PMAC_PBOOK) || defined(CONFIG_CPU_FREQ_PMAC)
+
+/* This gets called by via-pmu.c late during the sleep process.
+ * The PMU was already send the sleep command and will shut us down
+ * soon. We need to save all that is needed and setup the wakeup
+ * vector that will be called by the ROM on wakeup
+ */
+_GLOBAL(low_sleep_handler)
+	mflr	r0
+	stw	r0,4(r1)
+	stwu	r1,-SL_SIZE(r1)
+	mfcr	r0
+	stw	r0,SL_CR(r1)
+	stw	r2,SL_R2(r1)
+	stmw	r12,SL_R12(r1)
+
+	/* Save MSR & SDR1 */
+	mfmsr	r4
+	stw	r4,SL_MSR(r1)
+	mfsdr1	r4
+	stw	r4,SL_SDR1(r1)
+
+	/* Get a stable timebase and save it */
+1:	mftbu	r4
+	stw	r4,SL_TB(r1)
+	mftb	r5
+	stw	r5,SL_TB+4(r1)
+	mftbu	r3
+	cmpw	r3,r4
+	bne	1b
+	
+	/* Save SPRGs */
+	mfsprg	r4,0
+	stw	r4,SL_SPRG0(r1)
+	mfsprg	r4,1
+	stw	r4,SL_SPRG0+4(r1)
+	mfsprg	r4,2
+	stw	r4,SL_SPRG0+8(r1)
+	mfsprg	r4,3
+	stw	r4,SL_SPRG0+12(r1)
+
+	/* Save BATs */
+	mfdbatu	r4,0
+	stw	r4,SL_DBAT0(r1)
+	mfdbatl	r4,0
+	stw	r4,SL_DBAT0+4(r1)
+	mfdbatu	r4,1
+	stw	r4,SL_DBAT1(r1)
+	mfdbatl	r4,1
+	stw	r4,SL_DBAT1+4(r1)
+	mfdbatu	r4,2
+	stw	r4,SL_DBAT2(r1)
+	mfdbatl	r4,2
+	stw	r4,SL_DBAT2+4(r1)
+	mfdbatu	r4,3
+	stw	r4,SL_DBAT3(r1)
+	mfdbatl	r4,3
+	stw	r4,SL_DBAT3+4(r1)
+	mfibatu	r4,0
+	stw	r4,SL_IBAT0(r1)
+	mfibatl	r4,0
+	stw	r4,SL_IBAT0+4(r1)
+	mfibatu	r4,1
+	stw	r4,SL_IBAT1(r1)
+	mfibatl	r4,1
+	stw	r4,SL_IBAT1+4(r1)
+	mfibatu	r4,2
+	stw	r4,SL_IBAT2(r1)
+	mfibatl	r4,2
+	stw	r4,SL_IBAT2+4(r1)
+	mfibatu	r4,3
+	stw	r4,SL_IBAT3(r1)
+	mfibatl	r4,3
+	stw	r4,SL_IBAT3+4(r1)
+
+	/* Backup various CPU config stuffs */
+	bl	__save_cpu_setup
+
+	/* The ROM can wake us up via 2 different vectors:
+	 *  - On wallstreet & lombard, we must write a magic
+	 *    value 'Lars' at address 4 and a pointer to a
+	 *    memory location containing the PC to resume from
+	 *    at address 0.
+	 *  - On Core99, we must store the wakeup vector at
+	 *    address 0x80 and eventually it's parameters
+	 *    at address 0x84. I've have some trouble with those
+	 *    parameters however and I no longer use them.
+	 */
+	lis	r5,grackle_wake_up@ha
+	addi	r5,r5,grackle_wake_up@l
+	tophys(r5,r5)
+	stw	r5,SL_PC(r1)
+	lis	r4,KERNELBASE@h
+	tophys(r5,r1)
+	addi	r5,r5,SL_PC
+	lis	r6,MAGIC@ha
+	addi	r6,r6,MAGIC@l
+	stw	r5,0(r4)
+	stw	r6,4(r4)
+	/* Setup stuffs at 0x80-0x84 for Core99 */
+	lis	r3,core99_wake_up@ha
+	addi	r3,r3,core99_wake_up@l
+	tophys(r3,r3)
+	stw	r3,0x80(r4)
+	stw	r5,0x84(r4)
+	/* Store a pointer to our backup storage into
+	 * a kernel global
+	 */
+	lis r3,sleep_storage@ha
+	addi r3,r3,sleep_storage@l
+	stw r5,0(r3)
+
+BEGIN_FTR_SECTION
+	DSSALL
+	sync
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+
+/*
+ * Flush the L1 data cache by reading the first 128kB of RAM
+ * and then flushing the same area with the dcbf instruction.
+ * The L2 cache has already been disabled.
+ */
+	li	r4,0x1000	/* 128kB / 32B */
+	mtctr	r4
+	lis	r4,KERNELBASE@h
+1:
+	lwz	r0,0(r4)
+	addi	r4,r4,0x0020	/* Go to start of next cache line */
+	bdnz	1b
+	sync
+	
+	li	r4,0x1000	/* 128kB / 32B */
+	mtctr	r4
+	lis	r4,KERNELBASE@h
+1:
+	dcbf	r0,r4
+	addi	r4,r4,0x0020	/* Go to start of next cache line */
+	bdnz	1b
+	sync
+
+/*
+ * Set the HID0 and MSR for sleep.
+ */
+	mfspr	r2,HID0
+	rlwinm	r2,r2,0,10,7	/* clear doze, nap */
+	oris	r2,r2,HID0_SLEEP@h
+	sync
+	mtspr	HID0,r2
+	sync
+
+/* This loop puts us back to sleep in case we have a spurrious
+ * wakeup so that the host bridge properly stays asleep. The
+ * CPU will be turned off, either after a known time (about 1
+ * second) on wallstreet & lombard, or as soon as the CPU enters
+ * SLEEP mode on core99
+ */
+	mfmsr	r2
+	oris	r2,r2,MSR_POW@h
+1:	sync
+	mtmsr	r2
+	isync
+	b	1b
+
+/* 
+ * Here is the resume code.
+ */
+
+
+/*
+ * Core99 machines resume here
+ * r4 has the physical address of SL_PC(sp) (unused)
+ */
+_GLOBAL(core99_wake_up)
+	/* Make sure HID0 no longer contains any sleep bit */
+	mfspr	r3,HID0
+	rlwinm	r3,r3,0,11,7		/* clear SLEEP, NAP, DOZE bits */
+	mtspr	HID0,r3
+	sync
+	isync
+
+	/* Won't that cause problems on CPU that doesn't support it ? */
+	lis	r3, 0
+	mtspr	SPRN_MMCR0, r3
+	
+	/* sanitize MSR */
+	mfmsr	r3
+	ori	r3,r3,MSR_EE|MSR_IP
+	xori	r3,r3,MSR_EE|MSR_IP
+	sync
+	isync
+	mtmsr	r3
+	sync
+	isync
+
+	/* Recover sleep storage */
+	lis	r3,sleep_storage@ha
+	addi	r3,r3,sleep_storage@l
+	tophys(r3,r3)
+	lwz	r1,0(r3)
+
+	/* Pass thru to older resume code ... */
+/* 
+ * Here is the resume code for older machines.
+ * r1 has the physical address of SL_PC(sp).
+ */
+	
+grackle_wake_up:
+	/* Enable and then Flash inval the instruction & data cache */
+	mfspr	r3,HID0
+	ori	r3,r3, HID0_ICE|HID0_ICFI|HID0_DCE|HID0_DCI
+	sync
+	isync
+	mtspr	HID0,r3
+	xori	r3,r3, HID0_ICFI|HID0_DCI
+	mtspr	HID0,r3
+	sync
+	
+	/* Restore the kernel's segment registers before
+	 * we do any r1 memory access as we are not sure they
+	 * are in a sane state above the first 256Mb region
+	 */
+	li	r0,16		/* load up segment register values */
+	mtctr	r0		/* for context 0 */
+	lis	r3,0x2000	/* Ku = 1, VSID = 0 */
+	li	r4,0
+3:	mtsrin	r3,r4
+	addi	r3,r3,0x111	/* increment VSID */
+	addis	r4,r4,0x1000	/* address of next segment */
+	bdnz	3b
+	
+	subi	r1,r1,SL_PC
+
+	/* Restore various CPU config stuffs */
+	bl	__restore_cpu_setup
+
+	/* Restore the BATs, and SDR1.  Then we can turn on the MMU. */
+	lwz	r4,SL_SDR1(r1)
+	mtsdr1	r4
+	lwz	r4,SL_SPRG0(r1)
+	mtsprg	0,r4
+	lwz	r4,SL_SPRG0+4(r1)
+	mtsprg	1,r4
+	lwz	r4,SL_SPRG0+8(r1)
+	mtsprg	2,r4
+	lwz	r4,SL_SPRG0+12(r1)
+	mtsprg	3,r4
+
+	lwz	r4,SL_DBAT0(r1)
+	mtdbatu	0,r4
+	lwz	r4,SL_DBAT0+4(r1)
+	mtdbatl	0,r4
+	lwz	r4,SL_DBAT1(r1)
+	mtdbatu	1,r4
+	lwz	r4,SL_DBAT1+4(r1)
+	mtdbatl	1,r4
+	lwz	r4,SL_DBAT2(r1)
+	mtdbatu	2,r4
+	lwz	r4,SL_DBAT2+4(r1)
+	mtdbatl	2,r4
+	lwz	r4,SL_DBAT3(r1)
+	mtdbatu	3,r4
+	lwz	r4,SL_DBAT3+4(r1)
+	mtdbatl	3,r4
+	lwz	r4,SL_IBAT0(r1)
+	mtibatu	0,r4
+	lwz	r4,SL_IBAT0+4(r1)
+	mtibatl	0,r4
+	lwz	r4,SL_IBAT1(r1)
+	mtibatu	1,r4
+	lwz	r4,SL_IBAT1+4(r1)
+	mtibatl	1,r4
+	lwz	r4,SL_IBAT2(r1)
+	mtibatu	2,r4
+	lwz	r4,SL_IBAT2+4(r1)
+	mtibatl	2,r4
+	lwz	r4,SL_IBAT3(r1)
+	mtibatu	3,r4
+	lwz	r4,SL_IBAT3+4(r1)
+	mtibatl	3,r4
+
+	/* Flush all TLBs */
+	lis	r4,0x1000
+1:	addic.	r4,r4,-0x1000
+	tlbie	r4
+	blt	1b
+	sync
+
+	/* restore the MSR and turn on the MMU */
+	lwz	r3,SL_MSR(r1)
+	bl	turn_on_mmu	
+
+	/* get back the stack pointer */
+	tovirt(r1,r1)
+
+	/* Restore TB */
+	li	r3,0
+	mttbl	r3
+	lwz	r3,SL_TB(r1)
+	lwz	r4,SL_TB+4(r1)
+	mttbu	r3
+	mttbl	r4
+
+	/* Restore the callee-saved registers and return */
+	lwz	r0,SL_CR(r1)
+	mtcr	r0
+	lwz	r2,SL_R2(r1)
+	lmw	r12,SL_R12(r1)
+	addi	r1,r1,SL_SIZE
+	lwz	r0,4(r1)
+	mtlr	r0
+	blr
+
+turn_on_mmu:
+	mflr	r4
+	tovirt(r4,r4)
+	mtsrr0	r4
+	mtsrr1	r3
+	sync
+	isync
+	rfi
+
+#endif /* defined(CONFIG_PMAC_PBOOK) || defined(CONFIG_CPU_FREQ) */
+
+
+	.data
+	.globl sleep_storage
+sleep_storage:
+	.long 0
diff -Nru a/arch/ppc/platforms/pmac_smp.c b/arch/ppc/platforms/pmac_smp.c
--- a/arch/ppc/platforms/pmac_smp.c	Mon Mar 31 13:41:07 2003
+++ b/arch/ppc/platforms/pmac_smp.c	Mon Mar 31 13:41:07 2003
@@ -106,14 +106,16 @@
 volatile static long int core99_l3_cache;
 
 static void __init
-core99_init_caches(void)
+core99_init_caches(int cpu)
 {
-	int cpu = smp_processor_id();
-
+	/* Check cache presence on cpu 0, we assume all CPUs have
+	 * same features here. We also assume that if we don't have
+	 * L2CR, we don't have L3CR neither
+	 */
 	if (!(cur_cpu_spec[0]->cpu_features & CPU_FTR_L2CR))
 		return;
 
-	if (cpu == 0){
+	if (cpu == 0) {
 		core99_l2_cache = _get_L2CR();
 		printk("CPU0: L2CR is %lx\n", core99_l2_cache);
 	} else {
@@ -137,106 +139,6 @@
 	}
 }
 
-/* Some CPU registers have to be saved from the first CPU and
- * applied to others. Note that we override what is setup by
- * the cputable intentionally.
- */
-
-#define	reg_hid0	0
-#define	reg_hid1	1
-#define	reg_msscr0	2
-#define	reg_msssr0	3
-#define	reg_ictrl	4
-#define	reg_ldstcr	5
-#define	reg_ldstdb	6
-#define	reg_count	7
-
-static unsigned long cpu_regs[reg_count];
-
-static void __pmac
-cpu_setup_grab(void)
-{
-	unsigned int pvers = mfspr(SPRN_PVR)>>16;
-
-	/* Read cache setting of CPU 0 */
-	core99_init_caches();
-
-	/* 7400/7410/7450 */
-	if (pvers == 0x8000 || pvers == 0x000c || pvers == 0x800c) {
-		cpu_regs[reg_hid0] = mfspr(SPRN_HID0);
-		cpu_regs[reg_msscr0] = mfspr(SPRN_MSSCR0);
-		cpu_regs[reg_msssr0] = mfspr(SPRN_MSSSR0);
-	}
-	/* 7450 only */
-	if (pvers == 0x8000) {
-		cpu_regs[reg_hid1] = mfspr(SPRN_HID1);
-		cpu_regs[reg_ictrl] = mfspr(SPRN_ICTRL);
-		cpu_regs[reg_ldstcr] = mfspr(SPRN_LDSTCR);
-		cpu_regs[reg_ldstdb] = mfspr(SPRN_LDSTDB);
-	}
-	flush_dcache_range((unsigned long)cpu_regs, (unsigned long)&cpu_regs[reg_count]);
-}
-
-static void __pmac
-cpu_setup_apply(int cpu_nr)
-{
-	unsigned int pvers = mfspr(SPRN_PVR)>>16;
-
-	/* Apply cache setting from CPU 0 */
-	core99_init_caches();
-
-	/* 7400/7410/7450 */
-	if (pvers == 0x8000 || pvers == 0x000c || pvers == 0x800c) {
-		unsigned long tmp;
-		__asm__ __volatile__ (
-			"lwz	%0,4*"stringify(reg_hid0)"(%1)\n"
-			"sync\n"
-			"mtspr	"stringify(SPRN_HID0)", %0\n"
-			"isync;sync\n"
-			"lwz	%0, 4*"stringify(reg_msscr0)"(%1)\n"
-			"sync\n"
-			"mtspr	"stringify(SPRN_MSSCR0)", %0\n"
-			"isync;sync\n"
-//			"lwz	%0, "stringify(reg_msssr0)"(%1)\n"
-//			"sync\n"
-//			"mtspr	"stringify(SPRN_MSSSR0)", %0\n"
-//			"isync;sync\n"
-		: "=&r" (tmp) : "r" (cpu_regs));			
-	}
-	/* 7410 only */
-	if (pvers == 0x800c) {
-		unsigned long tmp;
-		__asm__ __volatile__ (
-			"li	%0, 0\n"
-			"sync\n"
-			"mtspr	"stringify(SPRN_L2CR2)", %0\n"
-			"isync;sync\n"
-		: "=&r" (tmp));		
-	}
-	/* 7450 only */
-	if (pvers == 0x8000) {
-		unsigned long tmp;
-		__asm__ __volatile__ (
-			"lwz	%0, 4*"stringify(reg_hid1)"(%1)\n"
-			"sync\n"
-			"mtspr	"stringify(SPRN_HID1)", %0\n"
-			"isync;sync\n"
-			"lwz	%0, 4*"stringify(reg_ictrl)"(%1)\n"
-			"sync\n"
-			"mtspr	"stringify(SPRN_ICTRL)", %0\n"
-			"isync;sync\n"
-			"lwz	%0, 4*"stringify(reg_ldstcr)"(%1)\n"
-			"sync\n"
-			"mtspr	"stringify(SPRN_LDSTCR)", %0\n"
-			"isync;sync\n"
-			"lwz	%0, 4*"stringify(reg_ldstdb)"(%1)\n"
-			"sync\n"
-			"mtspr	"stringify(SPRN_LDSTDB)", %0\n"
-			"isync;sync\n"
-		: "=&r" (tmp) : "r" (cpu_regs));		
-	}
-}
-
 /*
  * Set and clear IPIs for powersurge.
  */
@@ -501,7 +403,7 @@
 		/* reset the entry point so if we get another intr we won't
 		 * try to startup again */
 		out_be32(psurge_start, 0x100);
-		if (request_irq(30, psurge_primary_intr, 0, "primary IPI", 0))
+		if (request_irq(30, psurge_primary_intr, SA_INTERRUPT, "primary IPI", 0))
 			printk(KERN_ERR "Couldn't get primary IPI interrupt");
 	}
 
@@ -526,8 +428,10 @@
 		openpic_request_IPIs();
 		for (i = 1; i < ncpus; ++i)
 			smp_hw_index[i] = i;
+#ifdef CONFIG_6xx
 		powersave_nap = 0;
-		cpu_setup_grab();
+#endif
+		core99_init_caches(0);
 	}
 
 	return ncpus;
@@ -593,7 +497,7 @@
 {
 	/* Setup some registers */
 	if (cpu_nr != 0)
-		cpu_setup_apply(cpu_nr);
+		core99_init_caches(cpu_nr);
 	
 	/* Setup openpic */
 	do_openpic_setup_cpu();
@@ -605,20 +509,20 @@
 
 /* PowerSurge-style Macs */
 struct smp_ops_t psurge_smp_ops __pmacdata = {
-	smp_psurge_message_pass,
-	smp_psurge_probe,
-	smp_psurge_kick_cpu,
-	smp_psurge_setup_cpu,
-	.give_timebase = smp_generic_give_timebase,
-	.take_timebase = smp_generic_take_timebase,
+	.message_pass	= smp_psurge_message_pass,
+	.probe		= smp_psurge_probe,
+	.kick_cpu	= smp_psurge_kick_cpu,
+	.setup_cpu	= smp_psurge_setup_cpu,
+	.give_timebase	= smp_generic_give_timebase,
+	.take_timebase	= smp_generic_take_timebase,
 };
 
 /* Core99 Macs (dual G4s) */
 struct smp_ops_t core99_smp_ops __pmacdata = {
-	smp_openpic_message_pass,
-	smp_core99_probe,
-	smp_core99_kick_cpu,
-	smp_core99_setup_cpu,
-	.give_timebase = smp_generic_give_timebase,
-	.take_timebase = smp_generic_take_timebase,
+	.message_pass	= smp_openpic_message_pass,
+	.probe		= smp_core99_probe,
+	.kick_cpu	= smp_core99_kick_cpu,
+	.setup_cpu	= smp_core99_setup_cpu,
+	.give_timebase	= smp_generic_give_timebase,
+	.take_timebase	= smp_generic_take_timebase,
 };
diff -Nru a/arch/ppc/platforms/pmac_time.c b/arch/ppc/platforms/pmac_time.c
--- a/arch/ppc/platforms/pmac_time.c	Mon Mar 31 13:41:06 2003
+++ b/arch/ppc/platforms/pmac_time.c	Mon Mar 31 13:41:06 2003
@@ -202,6 +202,8 @@
 	printk(KERN_INFO "via_calibrate_decr: ticks per jiffy = %u (%u ticks)\n",
 	       tb_ticks_per_jiffy, dstart - dend);
 
+	iounmap((void*)via);
+	
 	return 1;
 }
 
diff -Nru a/arch/ppc/platforms/sleep.S b/arch/ppc/platforms/sleep.S
--- a/arch/ppc/platforms/sleep.S	Mon Mar 31 13:41:06 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,444 +0,0 @@
-/*
- * This file contains sleep low-level functions for PowerBook G3.
- *    Copyright (C) 1999 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- *    and Paul Mackerras (paulus@samba.org).
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/ppc_asm.h>
-#include <asm/cputable.h>
-
-#define MAGIC	0x4c617273	/* 'Lars' */
-
-/*
- * Structure for storing CPU registers on the stack.
- */
-#define SL_SP		0
-#define SL_PC		4
-#define SL_MSR		8
-#define SL_SDR1		0xc
-#define SL_SPRG0	0x10	/* 4 sprg's */
-#define SL_DBAT0	0x20
-#define SL_IBAT0	0x28
-#define SL_DBAT1	0x30
-#define SL_IBAT1	0x38
-#define SL_DBAT2	0x40
-#define SL_IBAT2	0x48
-#define SL_DBAT3	0x50
-#define SL_IBAT3	0x58
-#define SL_TB		0x60
-#define SL_HID0		0x68
-#define SL_HID1		0x6c
-#define SL_MSSCR0	0x70
-#define SL_MSSSR0	0x74
-#define SL_ICTRL	0x78
-#define SL_LDSTCR	0x7c
-#define SL_LDSTDB	0x80
-#define SL_R2		0x84
-#define SL_CR		0x88
-#define SL_R12		0x8c	/* r12 to r31 */
-#define SL_SIZE		(SL_R12 + 80)
-
-	.text
-	.align	5
-
-/* This gets called by via-pmu.c late during the sleep process.
- * The PMU was already send the sleep command and will shut us down
- * soon. We need to save all that is needed and setup the wakeup
- * vector that will be called by the ROM on wakeup
- */
-_GLOBAL(low_sleep_handler)
-	mflr	r0
-	stw	r0,4(r1)
-	stwu	r1,-SL_SIZE(r1)
-	mfcr	r0
-	stw	r0,SL_CR(r1)
-	stw	r2,SL_R2(r1)
-	stmw	r12,SL_R12(r1)
-
-	/* Save MSR & SDR1 */
-	mfmsr	r4
-	stw	r4,SL_MSR(r1)
-	mfsdr1	r4
-	stw	r4,SL_SDR1(r1)
-
-	/* Get a stable timebase and save it */
-1:	mftbu	r4
-	stw	r4,SL_TB(r1)
-	mftb	r5
-	stw	r5,SL_TB+4(r1)
-	mftbu	r3
-	cmpw	r3,r4
-	bne	1b
-	
-	/* Save SPRGs */
-	mfsprg	r4,0
-	stw	r4,SL_SPRG0(r1)
-	mfsprg	r4,1
-	stw	r4,SL_SPRG0+4(r1)
-	mfsprg	r4,2
-	stw	r4,SL_SPRG0+8(r1)
-	mfsprg	r4,3
-	stw	r4,SL_SPRG0+12(r1)
-
-	/* Save BATs */
-	mfdbatu	r4,0
-	stw	r4,SL_DBAT0(r1)
-	mfdbatl	r4,0
-	stw	r4,SL_DBAT0+4(r1)
-	mfdbatu	r4,1
-	stw	r4,SL_DBAT1(r1)
-	mfdbatl	r4,1
-	stw	r4,SL_DBAT1+4(r1)
-	mfdbatu	r4,2
-	stw	r4,SL_DBAT2(r1)
-	mfdbatl	r4,2
-	stw	r4,SL_DBAT2+4(r1)
-	mfdbatu	r4,3
-	stw	r4,SL_DBAT3(r1)
-	mfdbatl	r4,3
-	stw	r4,SL_DBAT3+4(r1)
-	mfibatu	r4,0
-	stw	r4,SL_IBAT0(r1)
-	mfibatl	r4,0
-	stw	r4,SL_IBAT0+4(r1)
-	mfibatu	r4,1
-	stw	r4,SL_IBAT1(r1)
-	mfibatl	r4,1
-	stw	r4,SL_IBAT1+4(r1)
-	mfibatu	r4,2
-	stw	r4,SL_IBAT2(r1)
-	mfibatl	r4,2
-	stw	r4,SL_IBAT2+4(r1)
-	mfibatu	r4,3
-	stw	r4,SL_IBAT3(r1)
-	mfibatl	r4,3
-	stw	r4,SL_IBAT3+4(r1)
-
-	/* Save HID0 */
-	mfspr	r4,HID0
-	stw	r4,SL_HID0(r1)
-
-	/* Save 7400/7410/7450 specific registers */
-	mfspr	r3,PVR
-	srwi	r3,r3,16
-	cmpli	cr0,r3,0x8000
-	cmpli	cr1,r3,0x000c
-	cmpli	cr2,r3,0x800c
-	cror	4*cr1+eq,4*cr1+eq,4*cr2+eq
-	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
-	bne	1f
-	mfspr	r4,SPRN_MSSCR0
-	stw	r4,SL_MSSCR0(r1)
-	mfspr	r4,SPRN_MSSSR0
-	stw	r4,SL_MSSSR0(r1)
-	/* Save 7450 specific registers */
-	beq	cr1,1f
-	mfspr	r4,HID1
-	stw	r4,SL_HID1(r1)
-	mfspr	r4,SPRN_ICTRL
-	stw	r4,SL_ICTRL(r1)
-	mfspr	r4,SPRN_LDSTCR
-	stw	r4,SL_LDSTCR(r1)
-	mfspr	r4,SPRN_LDSTDB
-	stw	r4,SL_LDSTDB(r1)
-1:
-	/* The ROM can wake us up via 2 different vectors:
-	 *  - On wallstreet & lombard, we must write a magic
-	 *    value 'Lars' at address 4 and a pointer to a
-	 *    memory location containing the PC to resume from
-	 *    at address 0.
-	 *  - On Core99, we must store the wakeup vector at
-	 *    address 0x80 and eventually it's parameters
-	 *    at address 0x84. I've have some trouble with those
-	 *    parameters however and I no longer use them.
-	 */
-	lis	r5,grackle_wake_up@ha
-	addi	r5,r5,grackle_wake_up@l
-	tophys(r5,r5)
-	stw	r5,SL_PC(r1)
-	lis	r4,KERNELBASE@h
-	tophys(r5,r1)
-	addi	r5,r5,SL_PC
-	lis	r6,MAGIC@ha
-	addi	r6,r6,MAGIC@l
-	stw	r5,0(r4)
-	stw	r6,4(r4)
-	/* Setup stuffs at 0x80-0x84 for Core99 */
-	lis	r3,core99_wake_up@ha
-	addi	r3,r3,core99_wake_up@l
-	tophys(r3,r3)
-	stw	r3,0x80(r4)
-	stw	r5,0x84(r4)
-	/* Store a pointer to our backup storage into
-	 * a kernel global
-	 */
-	lis r3,sleep_storage@ha
-	addi r3,r3,sleep_storage@l
-	stw r5,0(r3)
-
-
-/*
- * Flush the L1 data cache by reading the first 128kB of RAM
- * and then flushing the same area with the dcbf instruction.
- * The L2 cache has already been disabled.
- */
-	li	r4,0x1000	/* 128kB / 32B */
-	mtctr	r4
-	lis	r4,KERNELBASE@h
-1:
-	lwz	r0,0(r4)
-	addi	r4,r4,0x0020	/* Go to start of next cache line */
-	bdnz	1b
-	sync
-	
-	li	r4,0x1000	/* 128kB / 32B */
-	mtctr	r4
-	lis	r4,KERNELBASE@h
-1:
-	dcbf	r0,r4
-	addi	r4,r4,0x0020	/* Go to start of next cache line */
-	bdnz	1b
-	sync
-
-/*
- * Set the HID0 and MSR for sleep.
- */
-	mfspr	r2,HID0
-	rlwinm	r2,r2,0,10,7	/* clear doze, nap */
-	oris	r2,r2,HID0_SLEEP@h
-	sync
-	mtspr	HID0,r2
-	sync
-
-/* This loop puts us back to sleep in case we have a spurrious
- * wakeup so that the host bridge properly stays asleep. The
- * CPU will be turned off, either after a known time (about 1
- * second) on wallstreet & lombard, or as soon as the CPU enters
- * SLEEP mode on core99
- */
-	mfmsr	r2
-	oris	r2,r2,MSR_POW@h
-1:	sync
-	mtmsr	r2
-	isync
-	b	1b
-
-/* 
- * Here is the resume code.
- */
-
-
-/*
- * Core99 machines resume here
- * r4 has the physical address of SL_PC(sp) (unused)
- */
-_GLOBAL(core99_wake_up)
-	/* Make sure HID0 no longer contains any sleep bit */
-	mfspr	r3,HID0
-	rlwinm	r3,r3,0,11,7		/* clear SLEEP, NAP, DOZE bits */
-	mtspr	HID0,r3
-	sync
-	isync
-
-	/* Won't that cause problems on CPU that doesn't support it ? */
-	lis	r3, 0
-	mtspr	SPRN_MMCR0, r3
-	
-	/* sanitize MSR */
-	mfmsr	r3
-	ori	r3,r3,MSR_EE|MSR_IP
-	xori	r3,r3,MSR_EE|MSR_IP
-	sync
-	isync
-	mtmsr	r3
-	sync
-	isync
-
-	/* Recover sleep storage */
-	lis	r3,sleep_storage@ha
-	addi	r3,r3,sleep_storage@l
-	tophys(r3,r3)
-	lwz	r1,0(r3)
-
-	/* Pass thru to older resume code ... */
-/* 
- * Here is the resume code for older machines.
- * r1 has the physical address of SL_PC(sp).
- */
-	
-grackle_wake_up:
-	/* Enable and then Flash inval the instruction & data cache */
-	mfspr	r3,HID0
-	ori	r3,r3, HID0_ICE|HID0_ICFI|HID0_DCE|HID0_DCI
-	sync
-	isync
-	mtspr	HID0,r3
-	xori	r3,r3, HID0_ICFI|HID0_DCI
-	mtspr	HID0,r3
-	sync
-	
-	/* Restore the kernel's segment registers before
-	 * we do any r1 memory access as we are not sure they
-	 * are in a sane state above the first 256Mb region
-	 */
-	li	r0,16		/* load up segment register values */
-	mtctr	r0		/* for context 0 */
-	lis	r3,0x2000	/* Ku = 1, VSID = 0 */
-	li	r4,0
-3:	mtsrin	r3,r4
-	addi	r3,r3,0x111	/* increment VSID */
-	addis	r4,r4,0x1000	/* address of next segment */
-	bdnz	3b
-	
-	/* Restore the remaining bits of the HID0 register. */
-	subi	r1,r1,SL_PC
-	lwz	r3,SL_HID0(r1)
-	sync
-	isync
-	mtspr	HID0,r3
-	sync
-	isync
-
-	/* Restore 7400/7410/7450 specific registers */
-	mfspr	r3,PVR
-	srwi	r3,r3,16
-	cmpli	cr0,r3,0x8000
-	cmpli	cr1,r3,0x000c
-	cmpli	cr2,r3,0x800c
-	cror	4*cr1+eq,4*cr1+eq,4*cr2+eq
-	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
-	bne	1f
-	lwz	r4,SL_MSSCR0(r1)
-	sync
-	mtspr	SPRN_MSSCR0,r4
-	sync
-	isync
-	lwz	r4,SL_MSSSR0(r1)
-	sync
-	mtspr	SPRN_MSSSR0,r4
-	sync
-	isync
-	bne	cr2,1f
-	li	r4,0
-	mtspr	SPRN_L2CR2,r4
-	/* Restore 7450 specific registers */
-	beq	cr1,1f
-	lwz	r4,SL_HID1(r1)
-	sync
-	mtspr	HID1,r4
-	isync
-	sync
-	lwz	r4,SPRN_ICTRL(r1)
-	sync
-	mtspr	SPRN_ICTRL,r4
-	isync
-	sync
-	lwz	r4,SPRN_LDSTCR(r1)
-	sync
-	mtspr	SPRN_LDSTCR,r4
-	isync
-	sync
-	lwz	r4,SL_LDSTDB(r1)
-	sync
-	mtspr	SPRN_LDSTDB,r4
-	isync
-	sync
-1:
-	/* Restore the BATs, and SDR1.  Then we can turn on the MMU. */
-	lwz	r4,SL_SDR1(r1)
-	mtsdr1	r4
-	lwz	r4,SL_SPRG0(r1)
-	mtsprg	0,r4
-	lwz	r4,SL_SPRG0+4(r1)
-	mtsprg	1,r4
-	lwz	r4,SL_SPRG0+8(r1)
-	mtsprg	2,r4
-	lwz	r4,SL_SPRG0+12(r1)
-	mtsprg	3,r4
-
-	lwz	r4,SL_DBAT0(r1)
-	mtdbatu	0,r4
-	lwz	r4,SL_DBAT0+4(r1)
-	mtdbatl	0,r4
-	lwz	r4,SL_DBAT1(r1)
-	mtdbatu	1,r4
-	lwz	r4,SL_DBAT1+4(r1)
-	mtdbatl	1,r4
-	lwz	r4,SL_DBAT2(r1)
-	mtdbatu	2,r4
-	lwz	r4,SL_DBAT2+4(r1)
-	mtdbatl	2,r4
-	lwz	r4,SL_DBAT3(r1)
-	mtdbatu	3,r4
-	lwz	r4,SL_DBAT3+4(r1)
-	mtdbatl	3,r4
-	lwz	r4,SL_IBAT0(r1)
-	mtibatu	0,r4
-	lwz	r4,SL_IBAT0+4(r1)
-	mtibatl	0,r4
-	lwz	r4,SL_IBAT1(r1)
-	mtibatu	1,r4
-	lwz	r4,SL_IBAT1+4(r1)
-	mtibatl	1,r4
-	lwz	r4,SL_IBAT2(r1)
-	mtibatu	2,r4
-	lwz	r4,SL_IBAT2+4(r1)
-	mtibatl	2,r4
-	lwz	r4,SL_IBAT3(r1)
-	mtibatu	3,r4
-	lwz	r4,SL_IBAT3+4(r1)
-	mtibatl	3,r4
-
-	/* Flush all TLBs */
-	lis	r4,0x1000
-1:	addic.	r4,r4,-0x1000
-	tlbie	r4
-	blt	1b
-	sync
-
-	/* restore the MSR and turn on the MMU */
-	lwz	r3,SL_MSR(r1)
-	bl	turn_on_mmu	
-
-	/* get back the stack pointer */
-	tovirt(r1,r1)
-
-	/* Restore TB */
-	li	r3,0
-	mttbl	r3
-	lwz	r3,SL_TB(r1)
-	lwz	r4,SL_TB+4(r1)
-	mttbu	r3
-	mttbl	r4
-
-	/* Restore the callee-saved registers and return */
-	lwz	r0,SL_CR(r1)
-	mtcr	r0
-	lwz	r2,SL_R2(r1)
-	lmw	r12,SL_R12(r1)
-	addi	r1,r1,SL_SIZE
-	lwz	r0,4(r1)
-	mtlr	r0
-	blr
-
-turn_on_mmu:
-	mflr	r4
-	tovirt(r4,r4)
-	mtsrr0	r4
-	mtsrr1	r3
-	sync
-	isync
-	rfi
-
-	.data
-	.globl sleep_storage
-sleep_storage:
-	.long 0
diff -Nru a/arch/ppc/platforms/spruce_setup.c b/arch/ppc/platforms/spruce_setup.c
--- a/arch/ppc/platforms/spruce_setup.c	Mon Mar 31 13:41:06 2003
+++ b/arch/ppc/platforms/spruce_setup.c	Mon Mar 31 13:41:06 2003
@@ -133,8 +133,8 @@
 #endif
 
 	/* Identify the system */
-	printk("System Identification: IBM Spruce\n");
-	printk("IBM Spruce port (C) 2001 MontaVista Software, Inc. (source@mvista.com)\n");
+	printk(KERN_INFO "System Identification: IBM Spruce\n");
+	printk(KERN_INFO "Port by MontaVista Software, Inc. (source@mvista.com)\n");
 }
 
 static void
diff -Nru a/arch/ppc/syslib/prom_init.c b/arch/ppc/syslib/prom_init.c
--- a/arch/ppc/syslib/prom_init.c	Mon Mar 31 13:41:08 2003
+++ b/arch/ppc/syslib/prom_init.c	Mon Mar 31 13:41:08 2003
@@ -275,7 +275,7 @@
 {
 	phandle node;
 	ihandle ih;
-	int i;
+	int i, j;
 	char type[16], *path;
 	static unsigned char default_colors[] = {
 		0x00, 0x00, 0x00,
@@ -335,26 +335,23 @@
 			break;
 	}
 
-try_again:
-	/*
-	 * Open the first display and set its colormap.
-	 */
-	if (prom_num_displays > 0) {
-		path = prom_display_paths[0];
+	for (j=0; j<prom_num_displays; j++) {
+		path = prom_display_paths[j];
 		prom_print("opening display ");
 		prom_print(path);
 		ih = call_prom("open", 1, 1, path);
 		if (ih == 0 || ih == (ihandle) -1) {
 			prom_print("... failed\n");
-			for (i=1; i<prom_num_displays; i++) {
+			for (i=j+1; i<prom_num_displays; i++) {
 				prom_display_paths[i-1] = prom_display_paths[i];
 				prom_display_nodes[i-1] = prom_display_nodes[i];
 			}
-			if (--prom_num_displays > 0)
-				prom_disp_node = prom_display_nodes[0];
-			else
+			if (--prom_num_displays > 0) {
+				prom_disp_node = prom_display_nodes[j];
+				j--;
+			} else
 				prom_disp_node = NULL;
-			goto try_again;
+			continue;
 		} else {
 			prom_print("... ok\n");
 			/*
@@ -369,7 +366,7 @@
 					break;
 
 #ifdef CONFIG_LOGO_LINUX_CLUT224
-			clut = logo_linux_clut224.clut;
+			clut = PTRRELOC(logo_linux_clut224.clut);
 			for (i = 0; i < logo_linux_clut224.clutsize;
 			     i++, clut += 3)
 				if (prom_set_color(ih, i + 32, clut[0],
diff -Nru a/arch/ppc64/Makefile b/arch/ppc64/Makefile
--- a/arch/ppc64/Makefile	Mon Mar 31 13:41:06 2003
+++ b/arch/ppc64/Makefile	Mon Mar 31 13:41:06 2003
@@ -13,10 +13,10 @@
 # Adjusted for PPC64 by Tom Gall
 #
 
-KERNELLOAD	= 0xc000000000000000
+KERNELLOAD	:= 0xc000000000000000
 
 LDFLAGS		:= -m elf64ppc
-LDFLAGS_vmlinux	= -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD)
+LDFLAGS_vmlinux	:= -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD)
 LDFLAGS_BLOB	:= --format binary --oformat elf64-powerpc
 CFLAGS		+= -msoft-float -pipe -Wno-uninitialized -mminimal-toc \
 		-mtraceback=full -mcpu=power4
@@ -29,21 +29,19 @@
 core-$(CONFIG_XMON)		+= arch/ppc64/xmon/
 drivers-$(CONFIG_OPROFILE)	+= arch/ppc64/oprofile/
 
-makeboot =$(Q)$(MAKE) -f scripts/Makefile.build obj=arch/ppc64/boot $(1)
+boot := arch/ppc64/boot
 
 boottarget-$(CONFIG_PPC_PSERIES) := zImage zImage.initrd
 boottarget-$(CONFIG_PPC_ISERIES) := vmlinux.sminitrd vmlinux.initrd vmlinux.sm
 $(boottarget-y): vmlinux
-	$(call makeboot,arch/ppc64/boot/$@)
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
 %_config: arch/ppc64/configs/%_defconfig
 	rm -f .config arch/ppc64/defconfig
 	cp -f arch/ppc64/configs/$(@:config=defconfig) arch/ppc64/defconfig
 
 archclean:
-	$(Q)$(MAKE) -f scripts/Makefile.clean obj=arch/ppc64/boot
-
-archmrproper:
+	$(Q)$(MAKE) $(clean)=$(boot)
 
 prepare: include/asm-ppc64/offsets.h
 
diff -Nru a/arch/ppc64/boot/Makefile b/arch/ppc64/boot/Makefile
--- a/arch/ppc64/boot/Makefile	Mon Mar 31 13:41:08 2003
+++ b/arch/ppc64/boot/Makefile	Mon Mar 31 13:41:08 2003
@@ -79,7 +79,7 @@
 $(obj)/vmlinux.sminitrd: $(obj)/vmlinux.sm $(obj)/addRamDisk $(obj)/ramdisk.image.gz FORCE
 	$(call if_changed,ramdisk)
 
-$(obj)/sysmap.o: System.map $(obj)/piggyback
+$(obj)/sysmap.o: System.map $(obj)/piggyback FORCE
 	$(call if_changed,piggy)
 
 addsection = $(BOOTOBJCOPY) $(1) \
@@ -92,7 +92,7 @@
 quiet_cmd_piggy = PIGGY   $@
       cmd_piggy = $(obj)/piggyback $(@:.o=) < $< | $(BOOTAS) -o $@
 
-$(call gz-sec, $(required)): $(obj)/kernel-%.gz: %
+$(call gz-sec, $(required)): $(obj)/kernel-%.gz: % FORCE
 	$(call if_changed,gzip)
 
 $(obj)/kernel-initrd.gz: $(obj)/ramdisk.image.gz
@@ -101,7 +101,7 @@
 $(call src-sec, $(required) $(initrd)): $(obj)/kernel-%.c: $(obj)/kernel-%.gz
 	touch $@
 
-$(call obj-sec, $(required) $(initrd)): $(obj)/kernel-%.o: $(obj)/kernel-%.c
+$(call obj-sec, $(required) $(initrd)): $(obj)/kernel-%.o: $(obj)/kernel-%.c FORCE
 	$(call if_changed_dep,bootcc)
 	$(call addsection, $@)
 
diff -Nru a/arch/ppc64/kernel/chrp_setup.c b/arch/ppc64/kernel/chrp_setup.c
--- a/arch/ppc64/kernel/chrp_setup.c	Mon Mar 31 13:41:06 2003
+++ b/arch/ppc64/kernel/chrp_setup.c	Mon Mar 31 13:41:06 2003
@@ -71,9 +71,6 @@
 extern void init_ras_IRQ(void);
 
 extern void find_and_init_phbs(void);
-extern void pSeries_pcibios_fixup(void);
-extern void pSeries_pcibios_fixup_bus(struct pci_bus *bus);
-extern void iSeries_pcibios_fixup(void);
 
 extern void pSeries_get_rtc_time(struct rtc_time *rtc_time);
 extern int  pSeries_set_rtc_time(struct rtc_time *rtc_time);
@@ -201,7 +198,6 @@
 
 	hpte_init_pSeries();
 	tce_init_pSeries();
-	pSeries_pcibios_init_early();
 
 #ifdef CONFIG_SMP
 	smp_init_pSeries();
@@ -243,15 +239,6 @@
 		ppc_md.get_irq        = xics_get_irq;
 	}
 	ppc_md.init_ras_IRQ = init_ras_IRQ;
-
- 	#ifndef CONFIG_PPC_ISERIES
- 		ppc_md.pcibios_fixup = pSeries_pcibios_fixup;
- 		ppc_md.pcibios_fixup_bus = pSeries_pcibios_fixup_bus;
- 	#else 
- 		ppc_md.pcibios_fixup = NULL;
- 		// ppc_md.pcibios_fixup = iSeries_pcibios_fixup;
- 	#endif
-
 
 	ppc_md.init           = chrp_init2;
 
diff -Nru a/arch/ppc64/kernel/iSeries_pci.c b/arch/ppc64/kernel/iSeries_pci.c
--- a/arch/ppc64/kernel/iSeries_pci.c	Mon Mar 31 13:41:08 2003
+++ b/arch/ppc64/kernel/iSeries_pci.c	Mon Mar 31 13:41:08 2003
@@ -84,8 +84,6 @@
 struct iSeries_Device_Node* get_Device_Node(struct pci_dev* PciDev);
 
 unsigned long find_and_init_phbs(void);
-void          fixup_resources(struct pci_dev *dev);
-void          iSeries_pcibios_fixup(void);
 struct        pci_controller* alloc_phb(struct device_node *dev, char *model, unsigned int addr_size_words) ;
 
 void  iSeries_Scan_PHBs_Slots(struct pci_controller* Phb);
@@ -275,7 +273,7 @@
 	return 0;
 }
 /*********************************************************************** 
- * ppc64_pcibios_init
+ * iSeries_pcibios_init
  *  
  * Chance to initialize and structures or variable before PCI Bus walk.
  *  
@@ -302,9 +300,9 @@
 	PPCDBG(PPCDBG_BUSWALK,"iSeries_pcibios_init Exit.\n"); 
 }
 /***********************************************************************
- * iSeries_pcibios_fixup(void)  
+ * pcibios_final_fixup(void)  
  ***********************************************************************/
-void __init iSeries_pcibios_fixup(void)
+void __init pcibios_final_fixup(void)
 {
 	struct pci_dev* PciDev;
 	struct iSeries_Device_Node* DeviceNode;
@@ -328,8 +326,6 @@
 
 			iSeries_allocateDeviceBars(PciDev);
 
-			PPCDBGCALL(PPCDBG_BUSWALK,dumpPci_Dev(PciDev) );
-
 			iSeries_Device_Information(PciDev,Buffer, sizeof(Buffer) );
 			printk("%d. %s\n",DeviceCount,Buffer);
 
@@ -345,11 +341,7 @@
 	mf_displaySrc(0xC9000200);
 }
 
-/***********************************************************************
- * iSeries_pcibios_fixup_bus(int Bus)
- *
- ***********************************************************************/
-void iSeries_pcibios_fixup_bus(struct pci_bus* PciBus)
+void pcibios_fixup_bus(struct pci_bus* PciBus)
 {
 	PPCDBG(PPCDBG_BUSWALK,"iSeries_pcibios_fixup_bus(0x%04X) Entry.\n",PciBus->number); 
 
@@ -357,12 +349,12 @@
 
 
 /***********************************************************************
- * fixup_resources(struct pci_dev *dev) 
+ * pcibios_fixup_resources(struct pci_dev *dev) 
  *	
  ***********************************************************************/
-void fixup_resources(struct pci_dev *PciDev)
+void pcibios_fixup_resources(struct pci_dev *PciDev)
 {
-	PPCDBG(PPCDBG_BUSWALK,"fixup_resources PciDev %p\n",PciDev);
+	PPCDBG(PPCDBG_BUSWALK,"pcibios_fixup_resources PciDev %p\n",PciDev);
 }   
 
 
@@ -910,18 +902,3 @@
 	} while (CheckReturnCode("WWL",DevNode, Return.rc) != 0);
 	if(Pci_Trace_Flag == 1) PCIFR("WWL: IoAddress 0x%p = 0x%08X",IoAddress, Data);
 }
-/*
- * This is called very early before the page table is setup.
- * There are warnings here because of type mismatches.. Okay for now. AHT
- */
-void 
-iSeries_pcibios_init_early(void)
-{
-	//ppc_md.pcibios_read_config_byte   = iSeries_Node_read_config_byte;
-	//ppc_md.pcibios_read_config_word   = iSeries_Node_read_config_word;
-	//ppc_md.pcibios_read_config_dword  = iSeries_Node_read_config_dword;
-	//ppc_md.pcibios_write_config_byte  = iSeries_Node_write_config_byte;
-	//ppc_md.pcibios_write_config_word  = iSeries_Node_write_config_word;
-	//ppc_md.pcibios_write_config_dword = iSeries_Node_write_config_dword;
-}
-
diff -Nru a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c
--- a/arch/ppc64/kernel/iSeries_setup.c	Mon Mar 31 13:41:08 2003
+++ b/arch/ppc64/kernel/iSeries_setup.c	Mon Mar 31 13:41:08 2003
@@ -62,8 +62,6 @@
 		       pte_t * ptep, unsigned hpteflags, unsigned bolted );
 extern void ppcdbg_initialize(void);
 extern void iSeries_pcibios_init(void);
-extern void iSeries_pcibios_fixup(void);
-extern void iSeries_pcibios_fixup_bus(int);
 static void iSeries_setup_dprofile(void);
 
 /* Global Variables */
@@ -316,9 +314,6 @@
 	ppc_md.init_ras_IRQ		= NULL;
 	ppc_md.get_irq		 	= iSeries_get_irq;
 	ppc_md.init		 	= NULL;
-
- 	ppc_md.pcibios_fixup        = iSeries_pcibios_fixup;
-	ppc_md.pcibios_fixup_bus    = iSeries_pcibios_fixup_bus;
 
 	ppc_md.restart		 	= iSeries_restart;
 	ppc_md.power_off	 	= iSeries_power_off;
diff -Nru a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c
--- a/arch/ppc64/kernel/pSeries_lpar.c	Mon Mar 31 13:41:08 2003
+++ b/arch/ppc64/kernel/pSeries_lpar.c	Mon Mar 31 13:41:08 2003
@@ -320,7 +320,6 @@
 #ifdef CONFIG_SMP
 	smp_init_pSeries();
 #endif
-	pSeries_pcibios_init_early();
 
 	/* The keyboard is not useful in the LPAR environment.
 	 * Leave all the interfaces NULL.
diff -Nru a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c
--- a/arch/ppc64/kernel/pSeries_pci.c	Mon Mar 31 13:41:09 2003
+++ b/arch/ppc64/kernel/pSeries_pci.c	Mon Mar 31 13:41:09 2003
@@ -2,6 +2,7 @@
  * pSeries_pci.c
  *
  * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
  *
  * pSeries specific routines for PCI.
  * 
@@ -51,6 +52,8 @@
 
 static int s7a_workaround;
 
+extern unsigned long pci_probe_only;
+
 static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val)
 {
 	unsigned long returnval = ~0L;
@@ -371,9 +374,6 @@
 		phb->last_busno += (phb->global_number << 8);
 	}
 
-	/* Dump PHB information for Debug */
-	PPCDBGCALL(PPCDBG_PHBINIT, dumpPci_Controller(phb));
-
 	return phb;
 }
 
@@ -423,129 +423,96 @@
 	return 0;
 }
 
-void 
-fixup_resources(struct pci_dev *dev)
+void pcibios_name_device(struct pci_dev *dev)
 {
- 	int i;
- 	struct pci_controller *phb = PCI_GET_PHB_PTR(dev);
 	struct device_node *dn;
 
-	/* Add IBM loc code (slot) as a prefix to the device names for service */
+	/*
+	 * Add IBM loc code (slot) as a prefix to the device names for service
+	 */
 	dn = pci_device_to_OF_node(dev);
 	if (dn) {
 		char *loc_code = get_property(dn, "ibm,loc-code", 0);
 		if (loc_code) {
 			int loc_len = strlen(loc_code);
 			if (loc_len < sizeof(dev->dev.name)) {
-				memmove(dev->dev.name+loc_len+1, dev->dev.name, sizeof(dev->dev.name)-loc_len-1);
+				memmove(dev->dev.name+loc_len+1, dev->dev.name,
+					sizeof(dev->dev.name)-loc_len-1);
 				memcpy(dev->dev.name, loc_code, loc_len);
 				dev->dev.name[loc_len] = ' ';
 				dev->dev.name[sizeof(dev->dev.name)-1] = '\0';
 			}
 		}
 	}
+}   
 
-	PPCDBG(PPCDBG_PHBINIT, "fixup_resources:\n"); 
-	PPCDBG(PPCDBG_PHBINIT, "\tphb                 = 0x%016LX\n", phb); 
-	PPCDBG(PPCDBG_PHBINIT, "\tphb->pci_io_offset  = 0x%016LX\n", phb->pci_io_offset); 
-	PPCDBG(PPCDBG_PHBINIT, "\tphb->pci_mem_offset = 0x%016LX\n", phb->pci_mem_offset); 
-
-	PPCDBG(PPCDBG_PHBINIT, "\tdev->dev.name   = %s\n", dev->dev.name);
-	PPCDBG(PPCDBG_PHBINIT, "\tdev->vendor:device = 0x%04X : 0x%04X\n", dev->vendor, dev->device);
-
-	if (phb == NULL)
-		return;
-
- 	for (i = 0; i <  DEVICE_COUNT_RESOURCE; ++i) {
-		PPCDBG(PPCDBG_PHBINIT, "\tdevice %x.%x[%d] (flags %x) [%lx..%lx]\n",
-			    dev->bus->number, dev->devfn, i,
-			    dev->resource[i].flags,
-			    dev->resource[i].start,
-			    dev->resource[i].end);
-
-		if ((dev->resource[i].start == 0) && (dev->resource[i].end == 0)) {
-			continue;
-		}
-		if (dev->resource[i].start > dev->resource[i].end) {
-			/* Bogus resource.  Just clear it out. */
-			dev->resource[i].start = dev->resource[i].end = 0;
-			continue;
-		}
+void __init pcibios_fixup_device_resources(struct pci_dev *dev,
+					   struct pci_bus *bus)
+{
+	/* Update device resources.  */
+	struct pci_controller *hose = PCI_GET_PHB_PTR(bus);
+	int i;
 
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
 		if (dev->resource[i].flags & IORESOURCE_IO) {
-			unsigned long offset = (unsigned long)phb->io_base_virt - pci_io_base;
+			unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base;
 			dev->resource[i].start += offset;
 			dev->resource[i].end += offset;
-			PPCDBG(PPCDBG_PHBINIT, "\t\t-> now [%lx .. %lx]\n",
-			       dev->resource[i].start, dev->resource[i].end);
-		} else if (dev->resource[i].flags & IORESOURCE_MEM) {
-			if (dev->resource[i].start == 0) {
-				/* Bogus.  Probably an unused bridge. */
-				dev->resource[i].end = 0;
-			} else {
-				dev->resource[i].start += phb->pci_mem_offset;
-				dev->resource[i].end += phb->pci_mem_offset;
-			}
-			PPCDBG(PPCDBG_PHBINIT, "\t\t-> now [%lx..%lx]\n",
-			       dev->resource[i].start, dev->resource[i].end);
-
-		} else {
-			continue;
 		}
+                else if (dev->resource[i].flags & IORESOURCE_MEM) {
+			dev->resource[i].start += hose->pci_mem_offset;
+			dev->resource[i].end += hose->pci_mem_offset;
+		}
+        }
+}
 
- 		/* zap the 2nd function of the winbond chip */
- 		if (dev->resource[i].flags & IORESOURCE_IO
- 		    && dev->bus->number == 0 && dev->devfn == 0x81)
- 			dev->resource[i].flags &= ~IORESOURCE_IO;
- 	}
-}   
-
-void __init pSeries_pcibios_fixup_bus(struct pci_bus *bus)
+void __init pcibios_fixup_bus(struct pci_bus *bus)
 {
-	struct pci_controller *phb = PCI_GET_PHB_PTR(bus);
+	struct pci_controller *hose = PCI_GET_PHB_PTR(bus);
+	struct list_head *ln;
+
+	/* XXX or bus->parent? */
+	struct pci_dev *dev = bus->self;
 	struct resource *res;
 	int i;
 
-	if (bus->parent == NULL) {
-		/* This is a host bridge - fill in its resources */
-		phb->bus = bus;
-		bus->resource[0] = res = &phb->io_resource;
+	if (!dev) {
+		/* Root bus. */
+
+		hose->bus = bus;
+		bus->resource[0] = res = &hose->io_resource;
 		if (!res->flags)
 			BUG();	/* No I/O resource for this PHB? */
 
+		if (request_resource(&ioport_resource, res))
+			printk(KERN_ERR "Failed to request IO"
+					"on hose %d\n", 0 /* FIXME */);
+
 		for (i = 0; i < 3; ++i) {
-			res = &phb->mem_resources[i];
-			if (!res->flags) {
-				if (i == 0)
-					BUG();	/* No memory resource for this PHB? */
-			}
+			res = &hose->mem_resources[i];
+			if (!res->flags && i == 0)
+				BUG();	/* No memory resource for this PHB? */
 			bus->resource[i+1] = res;
+			if (res->flags && request_resource(&iomem_resource, res))
+				printk(KERN_ERR "Failed to request MEM"
+						"on hose %d\n", 0 /* FIXME */);
 		}
-	} else {
+	} else if (pci_probe_only &&
+		   (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
 		/* This is a subordinate bridge */
+
 		pci_read_bridge_bases(bus);
+		pcibios_fixup_device_resources(dev, bus);
+	}
 
-		for (i = 0; i < 4; ++i) {
-			if ((res = bus->resource[i]) == NULL)
-				continue;
-			if (!res->flags)
-				continue;
-			if (res == pci_find_parent_resource(bus->self, res)) {
-				/* Transparent resource -- don't try to "fix" it. */
-				continue;
-			}
-			if (res->flags & IORESOURCE_IO) {
-				unsigned long offset = (unsigned long)phb->io_base_virt - pci_io_base;
-				res->start += offset;
-				res->end += offset;
-			} else if (phb->pci_mem_offset
-				   && (res->flags & IORESOURCE_MEM)) {
-				if (res->start < phb->pci_mem_offset) {
-					res->start += phb->pci_mem_offset;
-					res->end += phb->pci_mem_offset;
-				}
-			}
-		}
+	/* XXX Need to check why Alpha doesnt do this - Anton */
+	if (!pci_probe_only)
+		return;
+
+	for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) {
+		struct pci_dev *dev = pci_dev_b(ln);
+		if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+			pcibios_fixup_device_resources(dev, bus);
 	}
 }
 
@@ -562,19 +529,20 @@
 	}
 }
 
-void __init
-pSeries_pcibios_fixup(void)
+extern void chrp_request_regions(void);
+
+void __init pcibios_final_fixup(void)
 {
 	struct pci_dev *dev;
 
-	PPCDBG(PPCDBG_PHBINIT, "pSeries_pcibios_fixup: start\n");
-
 	check_s7a();
-	
-	pci_for_each_dev(dev) {
+
+	pci_for_each_dev(dev)
 		pci_read_irq_line(dev);
-		PPCDBGCALL(PPCDBG_PHBINIT, dumpPci_Dev(dev) );
-	}
+
+	chrp_request_regions();
+	pci_fix_bus_sysdata();
+	create_tce_tables();
 }
 
 /*********************************************************************** 
@@ -595,14 +563,4 @@
 		node=node->parent;
 	}
 	return NULL;
-}
-
-/*
- * This is called very early before the page table is setup.
- */
-void 
-pSeries_pcibios_init_early(void)
-{
-	ppc_md.pcibios_read_config = rtas_read_config;
-	ppc_md.pcibios_write_config = rtas_write_config;
 }
diff -Nru a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c
--- a/arch/ppc64/kernel/pci.c	Mon Mar 31 13:41:08 2003
+++ b/arch/ppc64/kernel/pci.c	Mon Mar 31 13:41:08 2003
@@ -2,6 +2,9 @@
  * Port for PPC64 David Engebretsen, IBM Corp.
  * Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
  * 
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *   Rework, based on alpha PCI code.
+ *
  *      This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
@@ -30,31 +33,37 @@
 #include <asm/ppcdebug.h>
 #include <asm/naca.h>
 #include <asm/pci_dma.h>
-#include <asm/machdep.h>
 
 #include "pci.h"
 
+unsigned long pci_probe_only = 1;
+unsigned long pci_assign_all_buses = 0;
+
+unsigned int pcibios_assign_all_busses(void)
+{
+	return pci_assign_all_buses;
+}
+
 /* pci_io_base -- the base address from which io bars are offsets.
  * This is the lowest I/O base address (so bar values are always positive),
  * and it *must* be the start of ISA space if an ISA bus exists because
  * ISA drivers use hard coded offsets.  If no ISA bus exists a dummy
  * page is mapped and isa_io_limit prevents access to it.
  */
-unsigned long isa_io_base     = 0;	/* NULL if no ISA bus */
-unsigned long pci_io_base     = 0;
+unsigned long isa_io_base;	/* NULL if no ISA bus */
+unsigned long pci_io_base;
 
-static void pcibios_fixup_resources(struct pci_dev* dev);
+void pcibios_name_device(struct pci_dev* dev);
+void pcibios_final_fixup(void);
 static void fixup_broken_pcnet32(struct pci_dev* dev);
 static void fixup_windbond_82c105(struct pci_dev* dev);
-void fixup_resources(struct pci_dev* dev);
 
-void   iSeries_pcibios_init(void);
+void iSeries_pcibios_init(void);
 
-struct pci_controller* hose_head;
-struct pci_controller** hose_tail = &hose_head;
+struct pci_controller *hose_head;
+struct pci_controller **hose_tail = &hose_head;
 
-int  global_phb_number    = 0;           /* Global phb counter    */
-struct pci_controller *phbtab[PCI_MAX_PHB];
+int global_phb_number;		/* Global phb counter */
 
 /* Cached ISA bridge dev. */
 struct pci_dev *ppc64_isabridge_dev = NULL;
@@ -62,8 +71,8 @@
 struct pci_fixup pcibios_fixups[] = {
 	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_TRIDENT,	PCI_ANY_ID, fixup_broken_pcnet32 },
 	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_WINBOND,	PCI_DEVICE_ID_WINBOND_82C105, fixup_windbond_82c105 },
-	{ PCI_FIXUP_HEADER, PCI_ANY_ID,	PCI_ANY_ID, pcibios_fixup_resources },
- 	{ 0 }
+	{ PCI_FIXUP_HEADER, PCI_ANY_ID,	PCI_ANY_ID, pcibios_name_device },
+	{ 0 }
 };
 
 static void fixup_broken_pcnet32(struct pci_dev* dev)
@@ -81,14 +90,21 @@
 	 * p610.  We should probably be more careful in case
 	 * someone tries to plug in a similar adapter.
 	 */
+	int i;
 	unsigned int reg;
 
 	printk("Using INTC for W82c105 IDE controller.\n");
 	pci_read_config_dword(dev, 0x40, &reg);
 	/* Enable LEGIRQ to use INTC instead of ISA interrupts */
 	pci_write_config_dword(dev, 0x40, reg | (1<<11));
-}
 
+	for (i = 0; i < DEVICE_COUNT_RESOURCE; ++i) {
+		/* zap the 2nd function of the winbond chip */
+		if (dev->resource[i].flags & IORESOURCE_IO
+		    && dev->bus->number == 0 && dev->devfn == 0x81)
+			dev->resource[i].flags &= ~IORESOURCE_IO;
+	}
+}
 
 /* Given an mmio phys address, find a pci device that implements
  * this address.  This is of course expensive, but only used
@@ -127,12 +143,30 @@
 	return NULL;
 }
 
-static void
-pcibios_fixup_resources(struct pci_dev* dev)
+void __devinit
+pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+			struct resource *res)
 {
-	fixup_resources(dev);
+	unsigned long offset = 0;
+	struct pci_controller *hose = PCI_GET_PHB_PTR(dev);
+
+	if (!hose)
+		return;
+
+	if (res->flags & IORESOURCE_IO)
+	        offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+	if (res->flags & IORESOURCE_MEM)
+		offset = hose->pci_mem_offset;
+
+	region->start = res->start - offset;
+	region->end = res->end - offset;
 }
 
+#ifdef CONFIG_HOTPLUG
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+#endif
+
 /*
  * We need to avoid collisions with `mirrored' VGA ports
  * and other strange ISA hardware, so we always want the
@@ -146,180 +180,38 @@
  * but we want to try to avoid allocating at 0x2900-0x2bff
  * which might have be mirrored at 0x0100-0x03ff..
  */
-void
-pcibios_align_resource(void *data, struct resource *res,
-		       unsigned long size, unsigned long align)
+void pcibios_align_resource(void *data, struct resource *res,
+			    unsigned long size, unsigned long align)
 {
 	struct pci_dev *dev = data;
+	struct pci_controller *hose = PCI_GET_PHB_PTR(dev);
+	unsigned long start = res->start;
+	unsigned long alignto;
 
 	if (res->flags & IORESOURCE_IO) {
-		unsigned long start = res->start;
-
-		if (size > 0x100) {
-			printk(KERN_ERR "PCI: Can not align I/O Region %s %s because size %ld is too large.\n",
-                                        dev->slot_name, res->name, size);
-		}
-
-		if (start & 0x300) {
+	        unsigned long offset = (unsigned long)hose->io_base_virt -
+					pci_io_base;
+		/* Make sure we start at our min on all hoses */
+		if (start - offset < PCIBIOS_MIN_IO)
+			start = PCIBIOS_MIN_IO + offset;
+
+		/*
+		 * Put everything into 0x00-0xff region modulo 0x400
+		 */
+		if (start & 0x300)
 			start = (start + 0x3ff) & ~0x3ff;
-			res->start = start;
-		}
-	}
-}
 
-/*
- *  Handle resources of PCI devices.  If the world were perfect, we could
- *  just allocate all the resource regions and do nothing more.  It isn't.
- *  On the other hand, we cannot just re-allocate all devices, as it would
- *  require us to know lots of host bridge internals.  So we attempt to
- *  keep as much of the original configuration as possible, but tweak it
- *  when it's found to be wrong.
- *
- *  Known BIOS problems we have to work around:
- *	- I/O or memory regions not configured
- *	- regions configured, but not enabled in the command register
- *	- bogus I/O addresses above 64K used
- *	- expansion ROMs left enabled (this may sound harmless, but given
- *	  the fact the PCI specs explicitly allow address decoders to be
- *	  shared between expansion ROMs and other resource regions, it's
- *	  at least dangerous)
- *
- *  Our solution:
- *	(1) Allocate resources for all buses behind PCI-to-PCI bridges.
- *	    This gives us fixed barriers on where we can allocate.
- *	(2) Allocate resources for all enabled devices.  If there is
- *	    a collision, just mark the resource as unallocated. Also
- *	    disable expansion ROMs during this step.
- *	(3) Try to allocate resources for disabled devices.  If the
- *	    resources were assigned correctly, everything goes well,
- *	    if they weren't, they won't disturb allocation of other
- *	    resources.
- *	(4) Assign new addresses to resources which were either
- *	    not configured at all or misconfigured.  If explicitly
- *	    requested by the user, configure expansion ROM address
- *	    as well.
- */
+	} else if (res->flags & IORESOURCE_MEM) {
+		/* Make sure we start at our min on all hoses */
+		if (start - hose->pci_mem_offset < PCIBIOS_MIN_MEM)
+			start = PCIBIOS_MIN_MEM + hose->pci_mem_offset;
 
-static void __init
-pcibios_allocate_bus_resources(struct list_head *bus_list)
-{
-	struct list_head *ln;
-	struct pci_bus *bus;
-	int i;
-	struct resource *res, *pr;
-
-	/* Depth-First Search on bus tree */
-	for (ln=bus_list->next; ln != bus_list; ln=ln->next) {
-		bus = pci_bus_b(ln);
-		for (i = 0; i < 4; ++i) {
-			if ((res = bus->resource[i]) == NULL || !res->flags)
-				continue;
-			if (bus->parent == NULL)
-				pr = (res->flags & IORESOURCE_IO)?
-					&ioport_resource: &iomem_resource;
-			else
-				pr = pci_find_parent_resource(bus->self, res);
-
-			if (pr == res)
-				continue;	/* transparent bus or undefined */
-			if (pr && request_resource(pr, res) == 0)
-				continue;
-			printk(KERN_ERR "PCI: Cannot allocate resource region "
-			       "%d of PCI bridge %x\n", i, bus->number);
-			printk(KERN_ERR "PCI: resource is %lx..%lx (%lx), parent %p\n",
-			    res->start, res->end, res->flags, pr);
-		}
-		pcibios_allocate_bus_resources(&bus->children);
+		/* Align to multiple of size of minimum base.  */
+		alignto = max(0x1000UL, align);
+		start = ALIGN(start, alignto);
 	}
-}
-
-static void __init
-pcibios_allocate_resources(int pass)
-{
-	struct pci_dev *dev;
-	int idx, disabled;
-	u16 command;
-	struct resource *r, *pr;
 
-	pci_for_each_dev(dev) {
-		pci_read_config_word(dev, PCI_COMMAND, &command);
-		for(idx = 0; idx < 6; idx++) {
-			r = &dev->resource[idx];
-			if (r->parent)		/* Already allocated */
-				continue;
-			if (!r->start)		/* Address not assigned at all */
-				continue;
-
-			if (r->flags & IORESOURCE_IO)
-				disabled = !(command & PCI_COMMAND_IO);
-			else
-				disabled = !(command & PCI_COMMAND_MEMORY);
-			if (pass == disabled) {
-				PPCDBG(PPCDBG_PHBINIT,
-				       "PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n",
-				       r->start, r->end, r->flags, disabled, pass);
-				pr = pci_find_parent_resource(dev, r);
-				if (!pr || request_resource(pr, r) < 0) {
-					PPCDBG(PPCDBG_PHBINIT,
-					       "PCI: Cannot allocate resource region %d of device %s, pr = 0x%lx\n", idx, dev->slot_name, pr);
-					if(pr) {
-					PPCDBG(PPCDBG_PHBINIT,
-					       "PCI: Cannot allocate resource 0x%lx\n", request_resource(pr,r));
-					}
-					/* We'll assign a new address later */
-					r->end -= r->start;
-					r->start = 0;
-				}
-			}
-		}
-		if (!pass) {
-			r = &dev->resource[PCI_ROM_RESOURCE];
-			if (r->flags & PCI_ROM_ADDRESS_ENABLE) {
-				/* Turn the ROM off, leave the resource region, but keep it unregistered. */
-				u32 reg;
-				r->flags &= ~PCI_ROM_ADDRESS_ENABLE;
-				pci_read_config_dword(dev, dev->rom_base_reg, &reg);
-				pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE);
-			}
-		}
-	}
-}
-
-static void __init
-pcibios_assign_resources(void)
-{
-	struct pci_dev *dev;
-	int idx;
-	struct resource *r;
-
-	pci_for_each_dev(dev) {
-		int class = dev->class >> 8;
-
-		/* Don't touch classless devices and host bridges */
-		if (!class || class == PCI_CLASS_BRIDGE_HOST)
-			continue;
-
-		for (idx = 0; idx < 6; idx++) {
-			r = &dev->resource[idx];
-
-			/*
-			 * We shall assign a new address to this resource,
-			 * either because the BIOS (sic) forgot to do so
-			 * or because we have decided the old address was
-			 * unusable for some reason.
-			 */
-			if (!r->start && r->end)
-				pci_assign_resource(dev, idx);
-		}
-
-#if 0 /* don't assign ROMs */
-		r = &dev->resource[PCI_ROM_RESOURCE];
-		r->end -= r->start;
-		r->start = 0;
-		if (r->end)
-			pci_assign_resource(dev, PCI_ROM_RESOURCE);
-#endif
-	}
+	res->start = start;
 }
 
 /* 
@@ -358,20 +250,49 @@
         else
 		memcpy(hose->what,model,7);
         hose->type = controller_type;
-        hose->global_number = global_phb_number;
-	phbtab[global_phb_number++] = hose;
+        hose->global_number = global_phb_number++;
         
         *hose_tail = hose;
         hose_tail = &hose->next;
         return hose;
 }
 
-static int __init
-pcibios_init(void)
+static void __init pcibios_claim_one_bus(struct pci_bus *b)
+{
+	struct list_head *ld;
+	struct pci_bus *child_bus;
+
+	for (ld = b->devices.next; ld != &b->devices; ld = ld->next) {
+		struct pci_dev *dev = pci_dev_b(ld);
+		int i;
+
+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+			struct resource *r = &dev->resource[i];
+
+			if (r->parent || !r->start || !r->flags)
+				continue;
+			pci_claim_resource(dev, i);
+		}
+	}
+
+	list_for_each_entry(child_bus, &b->children, node)
+		pcibios_claim_one_bus(child_bus);
+}
+
+static void __init pcibios_claim_of_setup(void)
+{
+	struct list_head *lb;
+
+	for (lb = pci_root_buses.next; lb != &pci_root_buses; lb = lb->next) {
+		struct pci_bus *b = pci_bus_b(lb);
+		pcibios_claim_one_bus(b);
+	}
+}
+
+static int __init pcibios_init(void)
 {
 	struct pci_controller *hose;
 	struct pci_bus *bus;
-	int next_busno;
 
 #ifdef CONFIG_PPC_ISERIES
 	iSeries_pcibios_init(); 
@@ -379,38 +300,26 @@
 
 	//ppc64_boot_msg(0x40, "PCI Probe");
 	printk("PCI: Probing PCI hardware\n");
-	PPCDBG(PPCDBG_BUSWALK,"PCI: Probing PCI hardware\n");
 
 	/* Scan all of the recorded PCI controllers.  */
-	for (next_busno = 0, hose = hose_head; hose; hose = hose->next) {
+	for (hose = hose_head; hose; hose = hose->next) {
 		hose->last_busno = 0xff;
-		bus = pci_scan_bus(hose->first_busno, hose->ops, hose->arch_data);
+		bus = pci_scan_bus(hose->first_busno, hose->ops,
+				   hose->arch_data);
 		hose->bus = bus;
 		hose->last_busno = bus->subordinate;
-		if (next_busno <= hose->last_busno)
-			next_busno = hose->last_busno+1;
 	}
 
-	/* Call machine dependent fixup */
-	if (ppc_md.pcibios_fixup) {
-		ppc_md.pcibios_fixup();
-	}
-
-	/* Allocate and assign resources */
-	pcibios_allocate_bus_resources(&pci_root_buses);
-	pcibios_allocate_resources(0);
-	pcibios_allocate_resources(1);
-	pcibios_assign_resources();
-
-#ifndef CONFIG_PPC_ISERIES
-	void chrp_request_regions(void);
-	chrp_request_regions();
+	if (pci_probe_only)
+		pcibios_claim_of_setup();
+	else
+		/* FIXME: `else' will be removed when
+		   pci_assign_unassigned_resources() is able to work
+		   correctly with [partially] allocated PCI tree. */
+		pci_assign_unassigned_resources();
 
-	pci_fix_bus_sysdata();
-
-	create_tce_tables();
-	PPCDBG(PPCDBG_BUSWALK,"pSeries create_tce_tables()\n");
-#endif
+	/* Call machine dependent fixup */
+	pcibios_final_fixup();
 
 	/* Cache the location of the ISA bridge (if we have one) */
 	ppc64_isabridge_dev = pci_find_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
@@ -418,7 +327,6 @@
 		printk("ISA bridge at %s\n", ppc64_isabridge_dev->slot_name);
 
 	printk("PCI: Probing PCI hardware done\n");
-	PPCDBG(PPCDBG_BUSWALK,"PCI: Probing PCI hardware done.\n");
 	//ppc64_boot_msg(0x41, "PCI Done");
 
 	return 0;
@@ -426,12 +334,6 @@
 
 subsys_initcall(pcibios_init);
 
-void __init pcibios_fixup_bus(struct pci_bus *bus)
-{
-	if (ppc_md.pcibios_fixup_bus)
-		ppc_md.pcibios_fixup_bus(bus);
-}
-
 char __init *pcibios_setup(char *str)
 {
 	return str;
@@ -439,35 +341,29 @@
 
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
-	u16 cmd, old_cmd;
-	int idx;
-	struct resource *r;
-
-	PPCDBG(PPCDBG_BUSWALK,"PCI: %s for device %s \n", __FUNCTION__,
-	       dev->slot_name);
+	u16 cmd, oldcmd;
+	int i;
 
 	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	old_cmd = cmd;
-	for (idx = 0; idx < 6; idx++) {
+	oldcmd = cmd;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		struct resource *res = &dev->resource[i];
+
 		/* Only set up the requested stuff */
-		if (!(mask & (1<<idx)))
+		if (!(mask & (1<<i)))
 			continue;
 
-		r = &dev->resource[idx];
-		if (!r->start && r->end) {
-			printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name);
-			return -EINVAL;
-		}
-		if (r->flags & IORESOURCE_IO)
+		if (res->flags & IORESOURCE_IO)
 			cmd |= PCI_COMMAND_IO;
-		if (r->flags & IORESOURCE_MEM)
+		if (res->flags & IORESOURCE_MEM)
 			cmd |= PCI_COMMAND_MEMORY;
 	}
-	if (cmd != old_cmd) {
-		printk("PCI: Enabling device %s (%04x -> %04x)\n",
-		       dev->slot_name, old_cmd, cmd);
-		PPCDBG(PPCDBG_BUSWALK,"PCI: Enabling device %s \n",
-		       dev->slot_name);
+
+	if (cmd != oldcmd) {
+		printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n",
+		       dev->slot_name, cmd);
+                /* Enable the appropriate bits in the PCI command register.  */
 		pci_write_config_word(dev, PCI_COMMAND, cmd);
 	}
 	return 0;
@@ -608,113 +504,4 @@
 			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
 
 	return ret;
-}
-
-/*****************************************************
- * Dump Resource information
- *****************************************************/
-void dumpResources(struct resource* Resource)
-{
-	if(Resource != NULL) {
-		int Flags = 0x00000F00 & Resource->flags;
-		if(Resource->start == 0 && Resource->end == 0) return;
-		else if(Resource->start == Resource->end )     return;
-		else {
-			if     (Flags == IORESOURCE_IO)  udbg_printf("IO.:");
-			else if(Flags == IORESOURCE_MEM) udbg_printf("MEM:");
-			else if(Flags == IORESOURCE_IRQ) udbg_printf("IRQ:");
-			else                             udbg_printf("0x%02X:",Resource->flags);
-
-		}
-		udbg_printf("0x%016LX / 0x%016LX (0x%08X)\n",
-			    Resource->start, Resource->end, Resource->end - Resource->start);
-	}
-}
-
-int  resourceSize(struct resource* Resource)
-{
-	if(Resource->start == 0 && Resource->end == 0) return 0;
-	else if(Resource->start == Resource->end )     return 0;
-	else return (Resource->end-1)-Resource->start;
-}
-
-
-/*****************************************************
- * Dump PHB information for Debug
- *****************************************************/
-void dumpPci_Controller(struct pci_controller* phb)
-{
-	udbg_printf("\tpci_controller= 0x%016LX\n", phb);
-	if (phb != NULL) {
-		udbg_printf("\twhat & type   = %s 0x%02X\n ",phb->what,phb->type);
-		udbg_printf("\tbus           = ");
-		if (phb->bus != NULL) udbg_printf("0x%02X\n",   phb->bus->number);
-		else                  udbg_printf("<NULL>\n");
-		udbg_printf("\tarch_data     = 0x%016LX\n", phb->arch_data);
-		udbg_printf("\tfirst_busno   = 0x%02X\n",   phb->first_busno);
-		udbg_printf("\tlast_busno    = 0x%02X\n",   phb->last_busno);
-		udbg_printf("\tio_base_virt* = 0x%016LX\n", phb->io_base_virt);
-		udbg_printf("\tio_base_phys  = 0x%016LX\n", phb->io_base_phys);
-		udbg_printf("\tpci_mem_offset= 0x%016LX\n", phb->pci_mem_offset);
-		udbg_printf("\tpci_io_offset = 0x%016LX\n", phb->pci_io_offset);
-
-		udbg_printf("\tResources\n");
-		dumpResources(&phb->io_resource);
-		if (phb->mem_resource_count >  0) dumpResources(&phb->mem_resources[0]);
-		if (phb->mem_resource_count >  1) dumpResources(&phb->mem_resources[1]);
-		if (phb->mem_resource_count >  2) dumpResources(&phb->mem_resources[2]);
-
-		udbg_printf("\tglobal_num    = 0x%02X\n",   phb->global_number);
-		udbg_printf("\tlocal_num     = 0x%02X\n",   phb->local_number);
-	}
-}
-
-/*****************************************************
- * Dump PHB information for Debug
- *****************************************************/
-void dumpPci_Bus(struct pci_bus* Pci_Bus)
-{
-	int i;
-	udbg_printf("\tpci_bus         = 0x%016LX   \n",Pci_Bus);
-	if (Pci_Bus != NULL) {
-
-		udbg_printf("\tnumber          = 0x%02X     \n",Pci_Bus->number);
-		udbg_printf("\tprimary         = 0x%02X     \n",Pci_Bus->primary);
-		udbg_printf("\tsecondary       = 0x%02X     \n",Pci_Bus->secondary);
-		udbg_printf("\tsubordinate     = 0x%02X     \n",Pci_Bus->subordinate);
-
-		for (i=0;i<4;++i) {
-			if(Pci_Bus->resource[i] == NULL) continue;
-			if(Pci_Bus->resource[i]->start == 0 && Pci_Bus->resource[i]->end == 0) break;
-			udbg_printf("\tResources[%d]",i);
-			dumpResources(Pci_Bus->resource[i]);
-		}
-	}
-}
-
-/*****************************************************
- * Dump Device information for Debug
- *****************************************************/
-void dumpPci_Dev(struct pci_dev* Pci_Dev)
-{
-	int i;
-	udbg_printf("\tpci_dev*        = 0x%p\n",Pci_Dev);
-	if ( Pci_Dev == NULL )  return;
-	udbg_printf("\tname            = %s  \n",Pci_Dev->dev.name);
-	udbg_printf("\tbus*            = 0x%p\n",Pci_Dev->bus);
-	udbg_printf("\tsysdata*        = 0x%p\n",Pci_Dev->sysdata);
-	udbg_printf("\tDevice          = 0x%4X%02X:%02X.%02X 0x%04X:%04X\n",
-		    PCI_GET_PHB_NUMBER(Pci_Dev),
-		    PCI_GET_BUS_NUMBER(Pci_Dev),
-		    PCI_SLOT(Pci_Dev->devfn),
-		    PCI_FUNC(Pci_Dev->devfn),
-		    Pci_Dev->vendor,
-		    Pci_Dev->device);
-	udbg_printf("\tHdr/Irq         = 0x%02X/0x%02X \n",Pci_Dev->hdr_type,Pci_Dev->irq);
-	for (i=0;i<DEVICE_COUNT_RESOURCE;++i) {
-		if (Pci_Dev->resource[i].start == 0 && Pci_Dev->resource[i].end == 0) continue;
-		udbg_printf("\tResources[%d] ",i);
-		dumpResources(&Pci_Dev->resource[i]);
-	}
-	dumpResources(&Pci_Dev->resource[i]);
 }
diff -Nru a/arch/ppc64/kernel/pci.h b/arch/ppc64/kernel/pci.h
--- a/arch/ppc64/kernel/pci.h	Mon Mar 31 13:41:06 2003
+++ b/arch/ppc64/kernel/pci.h	Mon Mar 31 13:41:06 2003
@@ -19,18 +19,14 @@
 
 extern struct pci_controller* hose_head;
 extern struct pci_controller** hose_tail;
-/* PHB's are also in a table. */
-#define PCI_MAX_PHB 64
-extern int  global_phb_number;
-extern struct pci_controller *phbtab[];
+
+extern int global_phb_number;
 
 /*******************************************************************
  * Platform functions that are brand specific implementation. 
  *******************************************************************/
 extern unsigned long find_and_init_phbs(void);
 
-extern void   ppc64_pcibios_init(void);
-
 extern struct pci_dev *ppc64_isabridge_dev;	/* may be NULL if no ISA bus */
 
 /*******************************************************************
@@ -46,10 +42,6 @@
 void pci_fix_bus_sysdata(void);
 struct device_node *fetch_dev_dn(struct pci_dev *dev);
 
-void iSeries_pcibios_init_early(void);
-void pSeries_pcibios_init_early(void);
-void pSeries_pcibios_init(void);
-
 /*******************************************************************
  * Helper macros for extracting data from pci structures.  
  *   PCI_GET_PHB_PTR(struct pci_dev*)    returns the Phb pointer.
@@ -59,13 +51,5 @@
 #define PCI_GET_PHB_PTR(dev)    (((struct device_node *)(dev)->sysdata)->phb)
 #define PCI_GET_PHB_NUMBER(dev) (((dev)->bus->number&0x00FFFF00)>>8)
 #define PCI_GET_BUS_NUMBER(dev) ((dev)->bus->number&0x0000FF)
-
-/*******************************************************************
- * Debugging  Routines.
- *******************************************************************/
-extern void dumpResources(struct resource* Resource);
-extern void dumpPci_Controller(struct pci_controller* phb);
-extern void dumpPci_Bus(struct pci_bus* Pci_Bus);
-extern void dumpPci_Dev(struct pci_dev* Pci_Dev);
 
 #endif /* __PPC_KERNEL_PCI_H__ */
diff -Nru a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c
--- a/arch/ppc64/kernel/process.c	Mon Mar 31 13:41:07 2003
+++ b/arch/ppc64/kernel/process.c	Mon Mar 31 13:41:07 2003
@@ -208,6 +208,12 @@
 	} else {
 		childregs->gpr[1] = usp;
 		p->thread.regs = childregs;
+		if (clone_flags & CLONE_SETTLS) {
+			if (test_thread_flag(TIF_32BIT))
+				childregs->gpr[2] = childregs->gpr[6];
+			else
+				childregs->gpr[13] = childregs->gpr[6];
+		}
 	}
 	childregs->gpr[3] = 0;  /* Result from fork() */
 	sp -= STACK_FRAME_OVERHEAD;
@@ -304,7 +310,7 @@
 	if (clone_flags & (CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
 			   CLONE_CHILD_CLEARTID)) {
 		parent_tidptr = p3;
-		child_tidptr = p4;
+		child_tidptr = p5;
 		if (test_thread_flag(TIF_32BIT)) {
 			parent_tidptr &= 0xffffffff;
 			child_tidptr &= 0xffffffff;
diff -Nru a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c
--- a/arch/ppc64/kernel/prom.c	Mon Mar 31 13:41:08 2003
+++ b/arch/ppc64/kernel/prom.c	Mon Mar 31 13:41:08 2003
@@ -1270,8 +1270,8 @@
 				break;
 
 #ifdef CONFIG_LOGO_LINUX_CLUT224
-		clut = RELOC(RELOC(&logo_linux_clut224)->clut);
-		for (i = 0; i < logo_linux_clut224.clutsize; i++, clut += 3)
+		clut = PTRRELOC(RELOC(logo_linux_clut224.clut));
+		for (i = 0; i < RELOC(logo_linux_clut224.clutsize); i++, clut += 3)
 			if (prom_set_color(ih, i + 32, clut[0], clut[1],
 					   clut[2]) != 0)
 				break;
diff -Nru a/arch/ppc64/kernel/stab.c b/arch/ppc64/kernel/stab.c
--- a/arch/ppc64/kernel/stab.c	Mon Mar 31 13:41:08 2003
+++ b/arch/ppc64/kernel/stab.c	Mon Mar 31 13:41:08 2003
@@ -197,11 +197,23 @@
 
 	PMC_SW_PROCESSOR(stab_capacity_castouts); 
 
+	/* 
+	 * Never cast out the segment for our kernel stack. Since we
+	 * dont invalidate the ERAT we could have a valid translation
+	 * for the kernel stack during the first part of exception exit 
+	 * which gets invalidated due to a tlbie from another cpu at a
+	 * non recoverable point (after setting srr0/1) - Anton
+	 */
 	castout_entry = get_paca()->xStab_data.next_round_robin;
-	entry = castout_entry; 
-	castout_entry++; 
-	if (castout_entry >= naca->slb_size)
-		castout_entry = 1; 
+	do {
+		entry = castout_entry;
+		castout_entry++; 
+		if (castout_entry >= naca->slb_size)
+			castout_entry = 1; 
+		asm volatile("slbmfee  %0,%1" : "=r" (esid_data) : "r" (entry));
+	} while (esid_data.data.esid == GET_ESID((unsigned long)_get_SP()) &&
+			esid_data.data.v);
+
 	get_paca()->xStab_data.next_round_robin = castout_entry;
 
 	/* slbie not needed as the previous mapping is still valid. */
@@ -346,7 +358,12 @@
 void flush_stab(struct task_struct *tsk, struct mm_struct *mm)
 {
 	if (cpu_has_slb()) {
-		if (!STAB_PRESSURE && test_thread_flag(TIF_32BIT)) {
+		/*
+		 * XXX disable 32bit slb invalidate optimisation until we fix
+		 * the issue where a 32bit app execed out of a 64bit app can
+		 * cause segments above 4GB not to be flushed - Anton
+		 */
+		if (0 && !STAB_PRESSURE && test_thread_flag(TIF_32BIT)) {
 			union {
 				unsigned long word0;
 				slb_dword0 data;
diff -Nru a/arch/sparc/Makefile b/arch/sparc/Makefile
--- a/arch/sparc/Makefile	Mon Mar 31 13:41:06 2003
+++ b/arch/sparc/Makefile	Mon Mar 31 13:41:06 2003
@@ -54,13 +54,16 @@
 LIBS_Y		:= $(patsubst %/, %/lib.a, $(libs-y))
 export INIT_Y CORE_Y DRIVERS_Y NET_Y LIBS_Y HEAD_Y
 
-makeboot =$(Q)$(MAKE) -f scripts/Makefile.build obj=arch/$(ARCH)/boot $(1)
+# Default target
+all: image
+
+boot := arch/sparc/boot
 
 image tftpboot.img: vmlinux
-	$(call makeboot,arch/sparc/boot/$@)
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
 archclean:
-	$(Q)$(MAKE) -f scripts/Makefile.clean obj=arch/$(ARCH)/boot
+	$(Q)$(MAKE) $(clean)=$(boot)
 
 prepare: include/asm-$(ARCH)/asm_offsets.h
 
@@ -72,3 +75,9 @@
 
 CLEAN_FILES +=	include/asm-$(ARCH)/asm_offsets.h	\
 		arch/$(ARCH)/kernel/asm-offsets.s
+
+# Don't use tabs in echo arguments.
+define archhelp
+  echo  '* image        - kernel image ($(boot)/image)'
+  echo  '  tftpboot.img - image prepared for tftp'
+endef
diff -Nru a/arch/sparc/boot/Makefile b/arch/sparc/boot/Makefile
--- a/arch/sparc/boot/Makefile	Mon Mar 31 13:41:08 2003
+++ b/arch/sparc/boot/Makefile	Mon Mar 31 13:41:08 2003
@@ -32,7 +32,3 @@
 
 $(obj)/btfix.s: $(obj)/btfixupprep vmlinux FORCE
 	$(call if_changed,btfix)
-
-archhelp:
-	@echo '* image		- kernel image ($(obj)/image)'
-	@echo '  tftpboot.img	- image prepared for tftp'
diff -Nru a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
--- a/arch/sparc64/Kconfig	Mon Mar 31 13:41:08 2003
+++ b/arch/sparc64/Kconfig	Mon Mar 31 13:41:08 2003
@@ -139,8 +139,8 @@
 	bool "CPU Frequency scaling"
 	help
 	  Clock scaling allows you to change the clock speed of CPUs on the
-	  fly.  Currently there is only a sparc64 driver for UltraSPARC-III
-	  processors.
+	  fly.  Currently there are only sparc64 drivers for UltraSPARC-III
+	  and UltraSPARC-IIe processors.
 
 	  For details, take a look at linux/Documentation/cpufreq. 
 
@@ -161,6 +161,16 @@
 	depends on CPU_FREQ_TABLE
 	help
 	  This adds the CPUFreq driver for UltraSPARC-III processors.
+
+	  For details, take a look at linux/Documentation/cpufreq. 
+
+	  If in doubt, say N.
+
+config US2E_FREQ
+	tristate "UltraSPARC-IIe CPU Frequency driver"
+	depends on CPU_FREQ_TABLE
+	help
+	  This adds the CPUFreq driver for UltraSPARC-IIe processors.
 
 	  For details, take a look at linux/Documentation/cpufreq. 
 
diff -Nru a/arch/sparc64/Makefile b/arch/sparc64/Makefile
--- a/arch/sparc64/Makefile	Mon Mar 31 13:41:08 2003
+++ b/arch/sparc64/Makefile	Mon Mar 31 13:41:08 2003
@@ -71,10 +71,13 @@
 # FIXME: is drivers- right?
 drivers-$(CONFIG_OPROFILE)	+= arch/sparc64/oprofile/
 
-makeboot = $(Q)$(MAKE) -f scripts/Makefile.build obj=arch/$(ARCH)/boot $(1)
+boot := arch/sparc64/boot
 
 image tftpboot.img vmlinux.aout: vmlinux
-	$(call makeboot,arch/sparc64/boot/$@)
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+archclean:
+	$(Q)$(MAKE) $(clean)=$(boot)
 
 define archhelp
   echo  '* vmlinux       - Standard sparc64 kernel'
diff -Nru a/arch/sparc64/boot/Makefile b/arch/sparc64/boot/Makefile
--- a/arch/sparc64/boot/Makefile	Mon Mar 31 13:41:06 2003
+++ b/arch/sparc64/boot/Makefile	Mon Mar 31 13:41:06 2003
@@ -8,7 +8,7 @@
 ELFTOAOUT	:= elftoaout
 
 host-progs	:= piggyback
-targets		:= tftpboot.img vmlinux.aout
+targets		:= image tftpboot.img vmlinux.aout
 
 quiet_cmd_elftoaout = ELT2AOUT $@
       cmd_elftoaout = $(ELFTOAOUT) vmlinux -o $@
diff -Nru a/arch/sparc64/defconfig b/arch/sparc64/defconfig
--- a/arch/sparc64/defconfig	Mon Mar 31 13:41:06 2003
+++ b/arch/sparc64/defconfig	Mon Mar 31 13:41:06 2003
@@ -41,6 +41,7 @@
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_TABLE=y
 CONFIG_US3_FREQ=m
+CONFIG_US2E_FREQ=m
 CONFIG_CPU_FREQ_PROC_INTF=y
 CONFIG_CPU_FREQ_GOV_USERSPACE=m
 # CONFIG_CPU_FREQ_24_API is not set
@@ -361,11 +362,11 @@
 # CONFIG_SYN_COOKIES is not set
 CONFIG_INET_AH=y
 CONFIG_INET_ESP=y
-CONFIG_XFRM_USER=m
 CONFIG_IPV6=m
 CONFIG_IPV6_PRIVACY=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_XFRM_USER=m
 
 #
 # SCTP Configuration (EXPERIMENTAL)
diff -Nru a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
--- a/arch/sparc64/kernel/Makefile	Mon Mar 31 13:41:06 2003
+++ b/arch/sparc64/kernel/Makefile	Mon Mar 31 13:41:06 2003
@@ -21,6 +21,7 @@
 obj-$(CONFIG_BINFMT_AOUT32) += binfmt_aout32.o
 obj-$(CONFIG_MODULES) += module.o
 obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o
+obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o
 
 ifdef CONFIG_SUNOS_EMUL
   obj-y += sys_sunos32.o sunos_ioctl32.o
diff -Nru a/arch/sparc64/kernel/binfmt_elf32.c b/arch/sparc64/kernel/binfmt_elf32.c
--- a/arch/sparc64/kernel/binfmt_elf32.c	Mon Mar 31 13:41:08 2003
+++ b/arch/sparc64/kernel/binfmt_elf32.c	Mon Mar 31 13:41:08 2003
@@ -152,7 +152,6 @@
 #ifdef CONFIG_BINFMT_ELF32_MODULE
 #define CONFIG_BINFMT_ELF_MODULE CONFIG_BINFMT_ELF32_MODULE
 #endif
-#define ELF_FLAGS_INIT	set_thread_flag(TIF_32BIT)
 
 MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit SparcLinux binaries on the Ultra");
 MODULE_AUTHOR("Eric Youngdale, David S. Miller, Jakub Jelinek");
diff -Nru a/arch/sparc64/kernel/devices.c b/arch/sparc64/kernel/devices.c
--- a/arch/sparc64/kernel/devices.c	Mon Mar 31 13:41:06 2003
+++ b/arch/sparc64/kernel/devices.c	Mon Mar 31 13:41:06 2003
@@ -17,6 +17,7 @@
 #include <asm/system.h>
 #include <asm/smp.h>
 #include <asm/spitfire.h>
+#include <asm/timer.h>
 
 /* Used to synchronize acceses to NatSemi SUPER I/O chip configure
  * operations in asm/ns87303.h
@@ -88,7 +89,6 @@
 
 #ifndef CONFIG_SMP
 	{
-		extern unsigned long up_clock_tick;
 		up_clock_tick = prom_getintdefault(prom_node_cpu,
 						   "clock-frequency",
 						   0);
diff -Nru a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
--- a/arch/sparc64/kernel/irq.c	Mon Mar 31 13:41:08 2003
+++ b/arch/sparc64/kernel/irq.c	Mon Mar 31 13:41:08 2003
@@ -356,7 +356,7 @@
 	}	
 	if (action == NULL)
 	    action = (struct irqaction *)kmalloc(sizeof(struct irqaction),
-						 GFP_KERNEL);
+						 GFP_ATOMIC);
 	
 	if (!action) { 
 		spin_unlock_irqrestore(&irq_action_lock, flags);
@@ -376,7 +376,7 @@
 				goto free_and_ebusy;
 			}
 			if ((bucket->flags & IBF_MULTI) == 0) {
-				vector = kmalloc(sizeof(void *) * 4, GFP_KERNEL);
+				vector = kmalloc(sizeof(void *) * 4, GFP_ATOMIC);
 				if (vector == NULL)
 					goto free_and_enomem;
 
@@ -900,7 +900,7 @@
 	}
 	if (action == NULL)
 		action = (struct irqaction *)kmalloc(sizeof(struct irqaction),
-						     GFP_KERNEL);
+						     GFP_ATOMIC);
 	if (!action) {
 		spin_unlock_irqrestore(&irq_action_lock, flags);
 		return -ENOMEM;
diff -Nru a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c
--- a/arch/sparc64/kernel/pci.c	Mon Mar 31 13:41:06 2003
+++ b/arch/sparc64/kernel/pci.c	Mon Mar 31 13:41:06 2003
@@ -470,6 +470,54 @@
 	return err;
 }
 
+/* Sort resources by alignment */
+void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
+{
+	int i;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		struct resource *r;
+		struct resource_list *list, *tmp;
+		unsigned long r_align;
+
+		r = &dev->resource[i];
+		r_align = r->end - r->start;
+		
+		if (!(r->flags) || r->parent)
+			continue;
+		if (!r_align) {
+			printk(KERN_WARNING "PCI: Ignore bogus resource %d "
+					    "[%lx:%lx] of %s\n",
+					    i, r->start, r->end, dev->dev.name);
+			continue;
+		}
+		r_align = (i < PCI_BRIDGE_RESOURCES) ? r_align + 1 : r->start;
+		for (list = head; ; list = list->next) {
+			unsigned long align = 0;
+			struct resource_list *ln = list->next;
+			int idx;
+
+			if (ln) {
+				idx = ln->res - &ln->dev->resource[0];
+				align = (idx < PCI_BRIDGE_RESOURCES) ?
+					ln->res->end - ln->res->start + 1 :
+					ln->res->start;
+			}
+			if (r_align > align) {
+				tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+				if (!tmp)
+					panic("pdev_sort_resources(): "
+					      "kmalloc() failed!\n");
+				tmp->next = ln;
+				tmp->res = r;
+				tmp->dev = dev;
+				list->next = tmp;
+				break;
+			}
+		}
+	}
+}
+
 void pcibios_update_irq(struct pci_dev *pdev, int irq)
 {
 }
@@ -482,6 +530,44 @@
 int pcibios_enable_device(struct pci_dev *pdev, int mask)
 {
 	return 0;
+}
+
+void pcibios_resource_to_bus(struct pci_dev *pdev, struct pci_bus_region *region,
+			     struct resource *res)
+{
+	struct pci_pbm_info *pbm = pci_bus2pbm[pdev->bus->number];
+	struct resource zero_res, *root;
+
+	zero_res.start = 0;
+	zero_res.end = 0;
+	zero_res.flags = res->flags;
+
+	if (res->flags & IORESOURCE_IO)
+		root = &pbm->io_space;
+	else
+		root = &pbm->mem_space;
+
+	pbm->parent->resource_adjust(pdev, &zero_res, root);
+
+	region->start = res->start - zero_res.start;
+	region->end = res->end - zero_res.start;
+}
+
+void pcibios_bus_to_resource(struct pci_dev *pdev, struct resource *res,
+			     struct pci_bus_region *region)
+{
+	struct pci_pbm_info *pbm = pci_bus2pbm[pdev->bus->number];
+	struct resource *root;
+
+	res->start = region->start;
+	res->end = region->end;
+
+	if (res->flags & IORESOURCE_IO)
+		root = &pbm->io_space;
+	else
+		root = &pbm->mem_space;
+
+	pbm->parent->resource_adjust(pdev, res, root);
 }
 
 char * __init pcibios_setup(char *str)
diff -Nru a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c
--- a/arch/sparc64/kernel/pci_iommu.c	Mon Mar 31 13:41:06 2003
+++ b/arch/sparc64/kernel/pci_iommu.c	Mon Mar 31 13:41:06 2003
@@ -784,6 +784,26 @@
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
+static void ali_sound_dma_hack(struct pci_dev *pdev, int set_bit)
+{
+	struct pci_dev *ali_isa_bridge;
+	u8 val;
+
+	/* ALI sound chips generate 31-bits of DMA, a special register
+	 * determines what bit 31 is emitted as.
+	 */
+	ali_isa_bridge = pci_find_device(PCI_VENDOR_ID_AL,
+					 PCI_DEVICE_ID_AL_M1533,
+					 NULL);
+
+	pci_read_config_byte(ali_isa_bridge, 0x7e, &val);
+	if (set_bit)
+		val |= 0x01;
+	else
+		val &= ~0x01;
+	pci_write_config_byte(ali_isa_bridge, 0x7e, val);
+}
+
 int pci_dma_supported(struct pci_dev *pdev, u64 device_mask)
 {
 	struct pcidev_cookie *pcp = pdev->sysdata;
@@ -795,6 +815,14 @@
 		struct pci_iommu *iommu = pcp->pbm->iommu;
 
 		dma_addr_mask = iommu->dma_addr_mask;
+
+		if (pdev->vendor == PCI_VENDOR_ID_AL &&
+		    pdev->device == PCI_DEVICE_ID_AL_M5451 &&
+		    device_mask == 0x7fffffff) {
+			ali_sound_dma_hack(pdev,
+					   (dma_addr_mask & 0x80000000) != 0);
+			return 1;
+		}
 	}
 
 	return (device_mask & dma_addr_mask) == dma_addr_mask;
diff -Nru a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
--- a/arch/sparc64/kernel/rtrap.S	Mon Mar 31 13:41:07 2003
+++ b/arch/sparc64/kernel/rtrap.S	Mon Mar 31 13:41:07 2003
@@ -185,6 +185,7 @@
 		 * sched+signal checks with IRQs disabled.
 		 */
 to_user:	wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
+		wrpr			0, %pil
 __handle_preemption_continue:
 		ldx			[%g6 + TI_FLAGS], %l0
 		sethi			%hi(_TIF_USER_WORK_MASK), %o0
@@ -271,6 +272,7 @@
 		brnz			%l5, kern_fpucheck
 		 sethi			%hi(PREEMPT_ACTIVE), %l6
 		stw			%l6, [%g6 + TI_PRE_COUNT]
+		wrpr			0, %pil
 		call			schedule
 		 nop
 		ba,pt			%xcc, rtrap
diff -Nru a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
--- a/arch/sparc64/kernel/setup.c	Mon Mar 31 13:41:06 2003
+++ b/arch/sparc64/kernel/setup.c	Mon Mar 31 13:41:06 2003
@@ -44,6 +44,7 @@
 #include <asm/starfire.h>
 #include <asm/hardirq.h>
 #include <asm/mmu_context.h>
+#include <asm/timer.h>
 
 #ifdef CONFIG_IP_PNP
 #include <net/ipconfig.h>
diff -Nru a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
--- a/arch/sparc64/kernel/sparc64_ksyms.c	Mon Mar 31 13:41:08 2003
+++ b/arch/sparc64/kernel/sparc64_ksyms.c	Mon Mar 31 13:41:08 2003
@@ -55,6 +55,7 @@
 #endif
 #include <asm/a.out.h>
 #include <asm/ns87303.h>
+#include <asm/timer.h>
 
 struct poll {
 	int fd;
@@ -159,11 +160,7 @@
 EXPORT_SYMBOL(smp_call_function);
 #endif /* CONFIG_SMP */
 
-/* Uniprocessor clock frequency */
-#ifndef CONFIG_SMP
-extern unsigned long up_clock_tick;
-EXPORT_SYMBOL(up_clock_tick);
-#endif
+EXPORT_SYMBOL(sparc64_get_clock_tick);
 
 /* semaphores */
 EXPORT_SYMBOL(down);
diff -Nru a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
--- a/arch/sparc64/kernel/time.c	Mon Mar 31 13:41:07 2003
+++ b/arch/sparc64/kernel/time.c	Mon Mar 31 13:41:07 2003
@@ -26,6 +26,8 @@
 #include <linux/profile.h>
 #include <linux/bcd.h>
 #include <linux/jiffies.h>
+#include <linux/cpufreq.h>
+#include <linux/percpu.h>
 
 #include <asm/oplib.h>
 #include <asm/mostek.h>
@@ -988,6 +990,73 @@
 	return clock;
 }
 
+struct freq_table {
+	unsigned long udelay_val_ref;
+	unsigned long clock_tick_ref;
+	unsigned int ref_freq;
+};
+static DEFINE_PER_CPU(struct freq_table, sparc64_freq_table) = { 0, 0, 0 };
+
+unsigned long sparc64_get_clock_tick(unsigned int cpu)
+{
+	struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu);
+
+	if (ft->clock_tick_ref)
+		return ft->clock_tick_ref;
+#ifdef CONFIG_SMP
+	return cpu_data[cpu].clock_tick;
+#else
+	return up_clock_tick;
+#endif
+}
+
+#ifdef CONFIG_CPU_FREQ
+
+static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+				    void *data)
+{
+	struct cpufreq_freqs *freq = data;
+	unsigned int cpu = freq->cpu;
+	struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu);
+
+#ifdef CONFIG_SMP
+	if (!ft->ref_freq) {
+		ft->ref_freq = freq->old;
+		ft->udelay_val_ref = cpu_data[cpu].udelay_val;
+		ft->clock_tick_ref = cpu_data[cpu].clock_tick;
+	}
+	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
+	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
+		cpu_data[cpu].udelay_val =
+			cpufreq_scale(ft->udelay_val_ref,
+				      ft->ref_freq,
+				      freq->new);
+		cpu_data[cpu].clock_tick =
+			cpufreq_scale(ft->clock_tick_ref,
+				      ft->ref_freq,
+				      freq->new);
+	}
+#else
+	/* In the non-SMP case, kernel/cpufreq.c takes care of adjusting
+	 * loops_per_jiffy.
+	 */
+	if (!ft->ref_freq) {
+		ft->ref_freq = freq->old;
+		ft->clock_tick_ref = up_clock_tick;
+	}
+	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
+	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new))
+		up_clock_tick = cpufreq_scale(ft->clock_tick_ref, ft->ref_freq, freq->new);
+#endif
+
+	return 0;
+}
+
+static struct notifier_block sparc64_cpufreq_notifier_block = {
+	.notifier_call	= sparc64_cpufreq_notifier
+};
+#endif
+
 /* The quotient formula is taken from the IA64 port. */
 void __init time_init(void)
 {
@@ -996,6 +1065,11 @@
 	timer_ticks_per_usec_quotient =
 		(((1000000UL << 30) +
 		  (clock / 2)) / clock);
+
+#ifdef CONFIG_CPU_FREQ
+	cpufreq_register_notifier(&sparc64_cpufreq_notifier_block,
+				  CPUFREQ_TRANSITION_NOTIFIER);
+#endif
 }
 
 static __inline__ unsigned long do_gettimeoffset(void)
diff -Nru a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
--- a/arch/sparc64/kernel/traps.c	Mon Mar 31 13:41:08 2003
+++ b/arch/sparc64/kernel/traps.c	Mon Mar 31 13:41:08 2003
@@ -1575,6 +1575,9 @@
 	struct reg_window *rw;
 	int count = 0;
 
+	if (tp == current_thread_info())
+		flushw_all();
+
 	fp = ksp + STACK_BIAS;
 	thread_base = (unsigned long) tp;
 	do {
@@ -1595,6 +1598,15 @@
 	if (tsk)
 		show_trace_raw(tsk->thread_info,
 			       tsk->thread_info->ksp);
+}
+
+void dump_stack(void)
+{
+	unsigned long ksp;
+
+	__asm__ __volatile__("mov	%%fp, %0"
+			     : "=r" (ksp));
+	show_trace_raw(current_thread_info(), ksp);
 }
 
 void die_if_kernel(char *str, struct pt_regs *regs)
diff -Nru a/arch/sparc64/kernel/us2e_cpufreq.c b/arch/sparc64/kernel/us2e_cpufreq.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/arch/sparc64/kernel/us2e_cpufreq.c	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,398 @@
+/* us2e_cpufreq.c: UltraSPARC-IIe cpu frequency support
+ *
+ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ *
+ * Many thanks to Dominik Brodowski for fixing up the cpufreq
+ * infrastructure in order to make this driver easier to implement.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/cpufreq.h>
+#include <linux/threads.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+
+#include <asm/asi.h>
+#include <asm/timer.h>
+
+static struct cpufreq_driver *cpufreq_us2e_driver;
+
+struct us2e_freq_percpu_info {
+	struct cpufreq_frequency_table table[6];
+};
+
+/* Indexed by cpu number. */
+static struct us2e_freq_percpu_info *us2e_freq_table;
+
+#define HBIRD_MEM_CNTL0_ADDR	0x1fe0000f010UL
+#define HBIRD_ESTAR_MODE_ADDR	0x1fe0000f080UL
+
+/* UltraSPARC-IIe has five dividers: 1, 2, 4, 6, and 8.  These are controlled
+ * in the ESTAR mode control register.
+ */
+#define ESTAR_MODE_DIV_1	0x0000000000000000UL
+#define ESTAR_MODE_DIV_2	0x0000000000000001UL
+#define ESTAR_MODE_DIV_4	0x0000000000000003UL
+#define ESTAR_MODE_DIV_6	0x0000000000000002UL
+#define ESTAR_MODE_DIV_8	0x0000000000000004UL
+#define ESTAR_MODE_DIV_MASK	0x0000000000000007UL
+
+#define MCTRL0_SREFRESH_ENAB	0x0000000000010000UL
+#define MCTRL0_REFR_COUNT_MASK	0x0000000000007f00UL
+#define MCTRL0_REFR_COUNT_SHIFT	8
+#define MCTRL0_REFR_INTERVAL	7800
+#define MCTRL0_REFR_CLKS_P_CNT	64
+
+static unsigned long read_hbreg(unsigned long addr)
+{
+	unsigned long ret;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %0"
+			     : "=&r" (ret)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+	return ret;
+}
+
+static void write_hbreg(unsigned long addr, unsigned long val)
+{
+	__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
+			     "membar	#Sync"
+			     : /* no outputs */
+			     : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E)
+			     : "memory");
+	if (addr == HBIRD_ESTAR_MODE_ADDR) {
+		/* Need to wait 16 clock cycles for the PLL to lock.  */
+		udelay(1);
+	}
+}
+
+static void self_refresh_ctl(int enable)
+{
+	unsigned long mctrl = read_hbreg(HBIRD_MEM_CNTL0_ADDR);
+
+	if (enable)
+		mctrl |= MCTRL0_SREFRESH_ENAB;
+	else
+		mctrl &= ~MCTRL0_SREFRESH_ENAB;
+	write_hbreg(HBIRD_MEM_CNTL0_ADDR, mctrl);
+	(void) read_hbreg(HBIRD_MEM_CNTL0_ADDR);
+}
+
+static void frob_mem_refresh(int cpu_slowing_down,
+			     unsigned long clock_tick,
+			     unsigned long old_divisor, unsigned long divisor)
+{
+	unsigned long old_refr_count, refr_count, mctrl;
+
+
+	refr_count  = (clock_tick * MCTRL0_REFR_INTERVAL);
+	refr_count /= (MCTRL0_REFR_CLKS_P_CNT * divisor * 1000000000UL);
+
+	mctrl = read_hbreg(HBIRD_MEM_CNTL0_ADDR);
+	old_refr_count = (mctrl & MCTRL0_REFR_COUNT_MASK)
+		>> MCTRL0_REFR_COUNT_SHIFT;
+
+	mctrl &= ~MCTRL0_REFR_COUNT_MASK;
+	mctrl |= refr_count << MCTRL0_REFR_COUNT_SHIFT;
+	write_hbreg(HBIRD_MEM_CNTL0_ADDR, mctrl);
+	mctrl = read_hbreg(HBIRD_MEM_CNTL0_ADDR);
+
+	if (cpu_slowing_down && !(mctrl & MCTRL0_SREFRESH_ENAB)) {
+		unsigned long usecs;
+
+		/* We have to wait for both refresh counts (old
+		 * and new) to go to zero.
+		 */
+		usecs = (MCTRL0_REFR_CLKS_P_CNT *
+			 (refr_count + old_refr_count) *
+			 1000000UL *
+			 old_divisor) / clock_tick;
+		udelay(usecs + 1UL);
+	}
+}
+
+static void us2e_transition(unsigned long estar, unsigned long new_bits,
+			    unsigned long clock_tick,
+			    unsigned long old_divisor, unsigned long divisor)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	estar &= ~ESTAR_MODE_DIV_MASK;
+
+	/* This is based upon the state transition diagram in the IIe manual.  */
+	if (old_divisor == 2 && divisor == 1) {
+		self_refresh_ctl(0);
+		write_hbreg(HBIRD_ESTAR_MODE_ADDR, estar | new_bits);
+		frob_mem_refresh(0, clock_tick, old_divisor, divisor);
+	} else if (old_divisor == 1 && divisor == 2) {
+		frob_mem_refresh(1, clock_tick, old_divisor, divisor);
+		write_hbreg(HBIRD_ESTAR_MODE_ADDR, estar | new_bits);
+		self_refresh_ctl(1);
+	} else if (old_divisor == 1 && divisor > 2) {
+		us2e_transition(estar, ESTAR_MODE_DIV_2, clock_tick,
+				1, 2);
+		us2e_transition(estar, new_bits, clock_tick,
+				2, divisor);
+	} else if (old_divisor > 2 && divisor == 1) {
+		us2e_transition(estar, ESTAR_MODE_DIV_2, clock_tick,
+				old_divisor, 2);
+		us2e_transition(estar, new_bits, clock_tick,
+				2, divisor);
+	} else if (old_divisor < divisor) {
+		frob_mem_refresh(0, clock_tick, old_divisor, divisor);
+		write_hbreg(HBIRD_ESTAR_MODE_ADDR, estar | new_bits);
+	} else if (old_divisor > divisor) {
+		write_hbreg(HBIRD_ESTAR_MODE_ADDR, estar | new_bits);
+		frob_mem_refresh(1, clock_tick, old_divisor, divisor);
+	} else {
+		BUG();
+	}
+
+	local_irq_restore(flags);
+}
+
+static unsigned long index_to_estar_mode(unsigned int index)
+{
+	switch (index) {
+	case 0:
+		return ESTAR_MODE_DIV_1;
+
+	case 1:
+		return ESTAR_MODE_DIV_2;
+
+	case 2:
+		return ESTAR_MODE_DIV_4;
+
+	case 3:
+		return ESTAR_MODE_DIV_6;
+
+	case 4:
+		return ESTAR_MODE_DIV_8;
+
+	default:
+		BUG();
+	};
+}
+
+static unsigned long index_to_divisor(unsigned int index)
+{
+	switch (index) {
+	case 0:
+		return 1;
+
+	case 1:
+		return 2;
+
+	case 2:
+		return 4;
+
+	case 3:
+		return 6;
+
+	case 4:
+		return 8;
+
+	default:
+		BUG();
+	};
+}
+
+static unsigned long estar_to_divisor(unsigned long estar)
+{
+	unsigned long ret;
+
+	switch (estar & ESTAR_MODE_DIV_MASK) {
+	case ESTAR_MODE_DIV_1:
+		ret = 1;
+		break;
+	case ESTAR_MODE_DIV_2:
+		ret = 2;
+		break;
+	case ESTAR_MODE_DIV_4:
+		ret = 4;
+		break;
+	case ESTAR_MODE_DIV_6:
+		ret = 6;
+		break;
+	case ESTAR_MODE_DIV_8:
+		ret = 8;
+		break;
+	default:
+		BUG();
+	};
+
+	return ret;
+}
+
+static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index)
+{
+	unsigned long new_bits, new_freq, cpus_allowed;
+	unsigned long clock_tick, divisor, old_divisor, estar;
+	struct cpufreq_freqs freqs;
+
+	if (!cpu_online(cpu))
+		return;
+
+	cpus_allowed = current->cpus_allowed;
+	set_cpus_allowed(current, (1UL << cpu));
+
+	new_freq = clock_tick = sparc64_get_clock_tick(cpu);
+	new_bits = index_to_estar_mode(index);
+	divisor = index_to_divisor(index);
+	new_freq /= divisor;
+
+	estar = read_hbreg(HBIRD_ESTAR_MODE_ADDR);
+
+	old_divisor = estar_to_divisor(estar);
+
+	freqs.old = clock_tick / old_divisor;
+	freqs.new = new_freq;
+	freqs.cpu = cpu;
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	us2e_transition(estar, new_bits, clock_tick, old_divisor, divisor);
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	set_cpus_allowed(current, cpus_allowed);
+}
+
+static int us2e_freq_target(struct cpufreq_policy *policy,
+			  unsigned int target_freq,
+			  unsigned int relation)
+{
+	unsigned int new_index = 0;
+
+	if (cpufreq_frequency_table_target(policy,
+					      &us2e_freq_table[policy->cpu].table[0],
+					      target_freq,
+					      relation,
+					      &new_index))
+		return -EINVAL;
+
+	us2e_set_cpu_divider_index(policy->cpu, new_index);
+
+	return 0;
+}
+
+static int us2e_freq_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy,
+					      &us2e_freq_table[policy->cpu].table[0]);
+}
+
+static int __init us2e_freq_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+	unsigned long clock_tick = sparc64_get_clock_tick(cpu);
+	struct cpufreq_frequency_table *table =
+		&us2e_freq_table[cpu].table[0];
+
+	table[0].index = 0;
+	table[0].frequency = clock_tick / 1;
+	table[1].index = 1;
+	table[1].frequency = clock_tick / 2;
+	table[2].index = 2;
+	table[2].frequency = clock_tick / 4;
+	table[2].index = 3;
+	table[2].frequency = clock_tick / 6;
+	table[2].index = 4;
+	table[2].frequency = clock_tick / 8;
+	table[2].index = 5;
+	table[3].frequency = CPUFREQ_TABLE_END;
+
+	policy->policy = CPUFREQ_POLICY_PERFORMANCE;
+	policy->cpuinfo.transition_latency = 0;
+	policy->cur = clock_tick;
+
+	return cpufreq_frequency_table_cpuinfo(policy, table);
+}
+
+static int __exit us2e_freq_cpu_exit(struct cpufreq_policy *policy)
+{
+	if (cpufreq_us2e_driver)
+		us2e_set_cpu_divider_index(policy->cpu, 0);
+
+	return 0;
+}
+
+static int __init us2e_freq_init(void)
+{
+	unsigned long manuf, impl, ver;
+	int ret;
+
+	__asm__("rdpr %%ver, %0" : "=r" (ver));
+	manuf = ((ver >> 48) & 0xffff);
+	impl  = ((ver >> 32) & 0xffff);
+
+	if (manuf == 0x17 && impl == 0x13) {
+		struct cpufreq_driver *driver;
+
+		ret = -ENOMEM;
+		driver = kmalloc(sizeof(struct cpufreq_driver), GFP_KERNEL);
+		if (!driver)
+			goto err_out;
+		memset(driver, 0, sizeof(*driver));
+
+		us2e_freq_table = kmalloc(
+			(NR_CPUS * sizeof(struct us2e_freq_percpu_info)),
+			GFP_KERNEL);
+		if (!us2e_freq_table)
+			goto err_out;
+
+		memset(us2e_freq_table, 0,
+		       (NR_CPUS * sizeof(struct us2e_freq_percpu_info)));
+
+		driver->verify = us2e_freq_verify;
+		driver->target = us2e_freq_target;
+		driver->init = us2e_freq_cpu_init;
+		driver->exit = us2e_freq_cpu_exit;
+		driver->owner = THIS_MODULE,
+		strcpy(driver->name, "UltraSPARC-IIe");
+
+		cpufreq_us2e_driver = driver;
+		ret = cpufreq_register_driver(driver);
+		if (ret)
+			goto err_out;
+
+		return 0;
+
+err_out:
+		if (driver) {
+			kfree(driver);
+			cpufreq_us2e_driver = NULL;
+		}
+		if (us2e_freq_table) {
+			kfree(us2e_freq_table);
+			us2e_freq_table = NULL;
+		}
+		return ret;
+	}
+
+	return -ENODEV;
+}
+
+static void __exit us2e_freq_exit(void)
+{
+	if (cpufreq_us2e_driver) {
+		cpufreq_unregister_driver(cpufreq_us2e_driver);
+
+		kfree(cpufreq_us2e_driver);
+		cpufreq_us2e_driver = NULL;
+		kfree(us2e_freq_table);
+		us2e_freq_table = NULL;
+	}
+}
+
+MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
+MODULE_DESCRIPTION("cpufreq driver for UltraSPARC-IIe");
+MODULE_LICENSE("GPL");
+
+module_init(us2e_freq_init);
+module_exit(us2e_freq_exit);
diff -Nru a/arch/sparc64/kernel/us3_cpufreq.c b/arch/sparc64/kernel/us3_cpufreq.c
--- a/arch/sparc64/kernel/us3_cpufreq.c	Mon Mar 31 13:41:06 2003
+++ b/arch/sparc64/kernel/us3_cpufreq.c	Mon Mar 31 13:41:06 2003
@@ -16,14 +16,12 @@
 #include <linux/init.h>
 
 #include <asm/head.h>
+#include <asm/timer.h>
 
 static struct cpufreq_driver *cpufreq_us3_driver;
 
 struct us3_freq_percpu_info {
 	struct cpufreq_frequency_table table[4];
-	unsigned long udelay_val_ref;
-	unsigned long clock_tick_ref;
-	unsigned int ref_freq;
 };
 
 /* Indexed by cpu number. */
@@ -56,71 +54,9 @@
 			     : "memory");
 }
 
-#ifndef CONFIG_SMP
-extern unsigned long up_clock_tick;
-unsigned long clock_tick_ref;
-unsigned int ref_freq;
-#endif
-
-static __inline__ unsigned long get_clock_tick(unsigned int cpu)
-{
-#ifdef CONFIG_SMP
-	if (us3_freq_table[cpu].clock_tick_ref)
-		return us3_freq_table[cpu].clock_tick_ref;
-	return cpu_data[cpu].clock_tick;
-#else
-	if (clock_tick_ref)
-		return clock_tick_ref;
-	return up_clock_tick;
-#endif
-}
-
-static int us3_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
-				void *data)
-{
-	struct cpufreq_freqs *freq = data;
-#ifdef CONFIG_SMP
-	unsigned int cpu = freq->cpu;
-
-	if (!us3_freq_table[cpu].ref_freq) {
-		us3_freq_table[cpu].ref_freq = freq->old;
-		us3_freq_table[cpu].udelay_val_ref = cpu_data[cpu].udelay_val;
-		us3_freq_table[cpu].clock_tick_ref = cpu_data[cpu].clock_tick;
-	}
-	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
-	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
-		cpu_data[cpu].udelay_val =
-			cpufreq_scale(us3_freq_table[cpu].udelay_val_ref,
-				      us3_freq_table[cpu].ref_freq,
-				      freq->new);
-		cpu_data[cpu].clock_tick =
-			cpufreq_scale(us3_freq_table[cpu].clock_tick_ref,
-				      us3_freq_table[cpu].ref_freq,
-				      freq->new);
-	}
-#else
-	/* In the non-SMP case, kernel/cpufreq.c takes care of adjusting
-	 * loops_per_jiffy.
-	 */
-	if (!ref_freq) {
-		ref_freq = freq->old;
-		clock_tick_ref = up_clock_tick;
-	}
-	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
-	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new))
-		up_clock_tick = cpufreq_scale(clock_tick_ref, ref_freq, freq->new);
-#endif
-
-	return 0;
-}
-
-static struct notifier_block us3_cpufreq_notifier_block = {
-	.notifier_call	= us3_cpufreq_notifier
-};
-
 static unsigned long get_current_freq(unsigned int cpu, unsigned long safari_cfg)
 {
-	unsigned long clock_tick = get_clock_tick(cpu);
+	unsigned long clock_tick = sparc64_get_clock_tick(cpu);
 	unsigned long ret;
 
 	switch (safari_cfg & SAFARI_CFG_DIV_MASK) {
@@ -151,7 +87,7 @@
 	cpus_allowed = current->cpus_allowed;
 	set_cpus_allowed(current, (1UL << cpu));
 
-	new_freq = get_clock_tick(cpu);
+	new_freq = sparc64_get_clock_tick(cpu);
 	switch (index) {
 	case 0:
 		new_bits = SAFARI_CFG_DIV_1;
@@ -186,17 +122,17 @@
 	set_cpus_allowed(current, cpus_allowed);
 }
 
-static int us3freq_target(struct cpufreq_policy *policy,
+static int us3_freq_target(struct cpufreq_policy *policy,
 			  unsigned int target_freq,
 			  unsigned int relation)
 {
 	unsigned int new_index = 0;
 
 	if (cpufreq_frequency_table_target(policy,
-					      &us3_freq_table[policy->cpu].table[0],
-					      target_freq,
-					      relation,
-					      &new_index))
+					   &us3_freq_table[policy->cpu].table[0],
+					   target_freq,
+					   relation,
+					   &new_index))
 		return -EINVAL;
 
 	us3_set_cpu_divider_index(policy->cpu, new_index);
@@ -204,16 +140,16 @@
 	return 0;
 }
 
-static int us3freq_verify(struct cpufreq_policy *policy)
+static int us3_freq_verify(struct cpufreq_policy *policy)
 {
 	return cpufreq_frequency_table_verify(policy,
 					      &us3_freq_table[policy->cpu].table[0]);
 }
 
-static int __init us3freq_cpu_init(struct cpufreq_policy *policy)
+static int __init us3_freq_cpu_init(struct cpufreq_policy *policy)
 {
 	unsigned int cpu = policy->cpu;
-	unsigned long clock_tick = get_clock_tick(cpu);
+	unsigned long clock_tick = sparc64_get_clock_tick(cpu);
 	struct cpufreq_frequency_table *table =
 		&us3_freq_table[cpu].table[0];
 
@@ -233,7 +169,7 @@
 	return cpufreq_frequency_table_cpuinfo(policy, table);
 }
 
-static int __exit us3freq_cpu_exit(struct cpufreq_policy *policy)
+static int __exit us3_freq_cpu_exit(struct cpufreq_policy *policy)
 {
 	if (cpufreq_us3_driver)
 		us3_set_cpu_divider_index(policy->cpu, 0);
@@ -241,7 +177,7 @@
 	return 0;
 }
 
-static int __init us3freq_init(void)
+static int __init us3_freq_init(void)
 {
 	unsigned long manuf, impl, ver;
 	int ret;
@@ -254,9 +190,6 @@
 	    (impl == CHEETAH_IMPL || impl == CHEETAH_PLUS_IMPL)) {
 		struct cpufreq_driver *driver;
 
-		cpufreq_register_notifier(&us3_cpufreq_notifier_block,
-					  CPUFREQ_TRANSITION_NOTIFIER);
-
 		ret = -ENOMEM;
 		driver = kmalloc(sizeof(struct cpufreq_driver), GFP_KERNEL);
 		if (!driver)
@@ -272,10 +205,10 @@
 		memset(us3_freq_table, 0,
 		       (NR_CPUS * sizeof(struct us3_freq_percpu_info)));
 
-		driver->verify = us3freq_verify;
-		driver->target = us3freq_target;
-		driver->init = us3freq_cpu_init;
-		driver->exit = us3freq_cpu_exit;
+		driver->verify = us3_freq_verify;
+		driver->target = us3_freq_target;
+		driver->init = us3_freq_cpu_init;
+		driver->exit = us3_freq_cpu_exit;
 		driver->owner = THIS_MODULE,
 		strcpy(driver->name, "UltraSPARC-III");
 
@@ -295,20 +228,16 @@
 			kfree(us3_freq_table);
 			us3_freq_table = NULL;
 		}
-		cpufreq_unregister_notifier(&us3_cpufreq_notifier_block,
-					    CPUFREQ_TRANSITION_NOTIFIER);
 		return ret;
 	}
 
 	return -ENODEV;
 }
 
-static void __exit us3freq_exit(void)
+static void __exit us3_freq_exit(void)
 {
 	if (cpufreq_us3_driver) {
 		cpufreq_unregister_driver(cpufreq_us3_driver);
-		cpufreq_unregister_notifier(&us3_cpufreq_notifier_block,
-					    CPUFREQ_TRANSITION_NOTIFIER);
 
 		kfree(cpufreq_us3_driver);
 		cpufreq_us3_driver = NULL;
@@ -321,5 +250,5 @@
 MODULE_DESCRIPTION("cpufreq driver for UltraSPARC-III");
 MODULE_LICENSE("GPL");
 
-module_init(us3freq_init);
-module_exit(us3freq_exit);
+module_init(us3_freq_init);
+module_exit(us3_freq_exit);
diff -Nru a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile
--- a/arch/sparc64/lib/Makefile	Mon Mar 31 13:41:06 2003
+++ b/arch/sparc64/lib/Makefile	Mon Mar 31 13:41:06 2003
@@ -11,4 +11,4 @@
 	 VIScopy.o VISbzero.o VISmemset.o VIScsum.o VIScsumcopy.o \
 	 VIScsumcopyusr.o VISsave.o atomic.o rwlock.o bitops.o \
 	 dec_and_lock.o U3memcpy.o U3copy_from_user.o U3copy_to_user.o \
-	 U3copy_in_user.o mcount.o ipcsum.o
+	 U3copy_in_user.o mcount.o ipcsum.o rwsem.o
diff -Nru a/arch/sparc64/lib/rwsem.c b/arch/sparc64/lib/rwsem.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/arch/sparc64/lib/rwsem.c	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,238 @@
+/* rwsem.c: Don't inline expand these suckers all over the place.
+ *
+ * Written by David S. Miller (davem@redhat.com), 2001.
+ * Derived from asm-i386/rwsem.h
+ */
+
+#include <linux/kernel.h>
+#include <linux/rwsem.h>
+#include <linux/module.h>
+
+extern struct rw_semaphore *FASTCALL(rwsem_down_read_failed(struct rw_semaphore *sem));
+extern struct rw_semaphore *FASTCALL(rwsem_down_write_failed(struct rw_semaphore *sem));
+extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *));
+extern struct rw_semaphore *FASTCALL(rwsem_downgrade_wake(struct rw_semaphore *));
+
+void __down_read(struct rw_semaphore *sem)
+{
+	__asm__ __volatile__(
+		"! beginning __down_read\n"
+		"1:\tlduw	[%0], %%g5\n\t"
+		"add		%%g5, 1, %%g7\n\t"
+		"cas		[%0], %%g5, %%g7\n\t"
+		"cmp		%%g5, %%g7\n\t"
+		"bne,pn		%%icc, 1b\n\t"
+		" add		%%g7, 1, %%g7\n\t"
+		"cmp		%%g7, 0\n\t"
+		"bl,pn		%%icc, 3f\n\t"
+		" membar	#StoreLoad | #StoreStore\n"
+		"2:\n\t"
+		".subsection	2\n"
+		"3:\tmov	%0, %%g5\n\t"
+		"save		%%sp, -160, %%sp\n\t"
+		"mov		%%g1, %%l1\n\t"
+		"mov		%%g2, %%l2\n\t"
+		"mov		%%g3, %%l3\n\t"
+		"call		%1\n\t"
+		" mov		%%g5, %%o0\n\t"
+		"mov		%%l1, %%g1\n\t"
+		"mov		%%l2, %%g2\n\t"
+		"ba,pt		%%xcc, 2b\n\t"
+		" restore	%%l3, %%g0, %%g3\n\t"
+		".previous\n\t"
+		"! ending __down_read"
+		: : "r" (sem), "i" (rwsem_down_read_failed)
+		: "g5", "g7", "memory", "cc");
+}
+EXPORT_SYMBOL(__down_read);
+
+int __down_read_trylock(struct rw_semaphore *sem)
+{
+	int result;
+
+	__asm__ __volatile__(
+		"! beginning __down_read_trylock\n"
+		"1:\tlduw	[%1], %%g5\n\t"
+		"add		%%g5, 1, %%g7\n\t"
+		"cmp		%%g7, 0\n\t"
+		"bl,pn		%%icc, 2f\n\t"
+		" mov		0, %0\n\t"
+		"cas		[%1], %%g5, %%g7\n\t"
+		"cmp		%%g5, %%g7\n\t"
+		"bne,pn		%%icc, 1b\n\t"
+		" mov		1, %0\n\t"
+		"membar		#StoreLoad | #StoreStore\n"
+		"2:\n\t"
+		"! ending __down_read_trylock"
+		: "=&r" (result)
+                : "r" (sem)
+		: "g5", "g7", "memory", "cc");
+
+	return result;
+}
+EXPORT_SYMBOL(__down_read_trylock);
+
+void __down_write(struct rw_semaphore *sem)
+{
+	__asm__ __volatile__(
+		"! beginning __down_write\n\t"
+		"sethi		%%hi(%2), %%g1\n\t"
+		"or		%%g1, %%lo(%2), %%g1\n"
+		"1:\tlduw	[%0], %%g5\n\t"
+		"add		%%g5, %%g1, %%g7\n\t"
+		"cas		[%0], %%g5, %%g7\n\t"
+		"cmp		%%g5, %%g7\n\t"
+		"bne,pn		%%icc, 1b\n\t"
+		" cmp		%%g7, 0\n\t"
+		"bne,pn		%%icc, 3f\n\t"
+		" membar	#StoreLoad | #StoreStore\n"
+		"2:\n\t"
+		".subsection	2\n"
+		"3:\tmov	%0, %%g5\n\t"
+		"save		%%sp, -160, %%sp\n\t"
+		"mov		%%g2, %%l2\n\t"
+		"mov		%%g3, %%l3\n\t"
+		"call		%1\n\t"
+		" mov		%%g5, %%o0\n\t"
+		"mov		%%l2, %%g2\n\t"
+		"ba,pt		%%xcc, 2b\n\t"
+		" restore	%%l3, %%g0, %%g3\n\t"
+		".previous\n\t"
+		"! ending __down_write"
+		: : "r" (sem), "i" (rwsem_down_write_failed),
+		    "i" (RWSEM_ACTIVE_WRITE_BIAS)
+		: "g1", "g5", "g7", "memory", "cc");
+}
+EXPORT_SYMBOL(__down_write);
+
+int __down_write_trylock(struct rw_semaphore *sem)
+{
+	int result;
+
+	__asm__ __volatile__(
+		"! beginning __down_write_trylock\n\t"
+		"sethi		%%hi(%2), %%g1\n\t"
+		"or		%%g1, %%lo(%2), %%g1\n"
+		"1:\tlduw	[%1], %%g5\n\t"
+		"cmp		%%g5, 0\n\t"
+		"bne,pn		%%icc, 2f\n\t"
+		" mov		0, %0\n\t"
+		"add		%%g5, %%g1, %%g7\n\t"
+		"cas		[%1], %%g5, %%g7\n\t"
+		"cmp		%%g5, %%g7\n\t"
+		"bne,pn		%%icc, 1b\n\t"
+		" mov		1, %0\n\t"
+		"membar		#StoreLoad | #StoreStore\n"
+		"2:\n\t"
+		"! ending __down_write_trylock"
+		: "=&r" (result)
+		: "r" (sem), "i" (RWSEM_ACTIVE_WRITE_BIAS)
+		: "g1", "g5", "g7", "memory", "cc");
+
+	return result;
+}
+EXPORT_SYMBOL(__down_write_trylock);
+
+void __up_read(struct rw_semaphore *sem)
+{
+	__asm__ __volatile__(
+		"! beginning __up_read\n\t"
+		"1:\tlduw	[%0], %%g5\n\t"
+		"sub		%%g5, 1, %%g7\n\t"
+		"cas		[%0], %%g5, %%g7\n\t"
+		"cmp		%%g5, %%g7\n\t"
+		"bne,pn		%%icc, 1b\n\t"
+		" cmp		%%g7, 0\n\t"
+		"bl,pn		%%icc, 3f\n\t"
+		" membar	#StoreLoad | #StoreStore\n"
+		"2:\n\t"
+		".subsection	2\n"
+		"3:\tsethi	%%hi(%2), %%g1\n\t"
+		"sub		%%g7, 1, %%g7\n\t"
+		"or		%%g1, %%lo(%2), %%g1\n\t"
+		"andcc		%%g7, %%g1, %%g0\n\t"
+		"bne,pn		%%icc, 2b\n\t"
+		" mov		%0, %%g5\n\t"
+		"save		%%sp, -160, %%sp\n\t"
+		"mov		%%g2, %%l2\n\t"
+		"mov		%%g3, %%l3\n\t"
+		"call		%1\n\t"
+		" mov		%%g5, %%o0\n\t"
+		"mov		%%l2, %%g2\n\t"
+		"ba,pt		%%xcc, 2b\n\t"
+		" restore	%%l3, %%g0, %%g3\n\t"
+		".previous\n\t"
+		"! ending __up_read"
+		: : "r" (sem), "i" (rwsem_wake),
+		    "i" (RWSEM_ACTIVE_MASK)
+		: "g1", "g5", "g7", "memory", "cc");
+}
+EXPORT_SYMBOL(__up_read);
+
+void __up_write(struct rw_semaphore *sem)
+{
+	__asm__ __volatile__(
+		"! beginning __up_write\n\t"
+		"sethi		%%hi(%2), %%g1\n\t"
+		"or		%%g1, %%lo(%2), %%g1\n"
+		"1:\tlduw	[%0], %%g5\n\t"
+		"sub		%%g5, %%g1, %%g7\n\t"
+		"cas		[%0], %%g5, %%g7\n\t"
+		"cmp		%%g5, %%g7\n\t"
+		"bne,pn		%%icc, 1b\n\t"
+		" sub		%%g7, %%g1, %%g7\n\t"
+		"cmp		%%g7, 0\n\t"
+		"bl,pn		%%icc, 3f\n\t"
+		" membar	#StoreLoad | #StoreStore\n"
+		"2:\n\t"
+		".subsection 2\n"
+		"3:\tmov	%0, %%g5\n\t"
+		"save		%%sp, -160, %%sp\n\t"
+		"mov		%%g2, %%l2\n\t"
+		"mov		%%g3, %%l3\n\t"
+		"call		%1\n\t"
+		" mov		%%g5, %%o0\n\t"
+		"mov		%%l2, %%g2\n\t"
+		"ba,pt		%%xcc, 2b\n\t"
+		" restore	%%l3, %%g0, %%g3\n\t"
+		".previous\n\t"
+		"! ending __up_write"
+		: : "r" (sem), "i" (rwsem_wake),
+		    "i" (RWSEM_ACTIVE_WRITE_BIAS)
+		: "g1", "g5", "g7", "memory", "cc");
+}
+EXPORT_SYMBOL(__up_write);
+
+void __downgrade_write(struct rw_semaphore *sem)
+{
+	__asm__ __volatile__(
+		"! beginning __downgrade_write\n\t"
+		"sethi		%%hi(%2), %%g1\n\t"
+		"or		%%g1, %%lo(%2), %%g1\n"
+		"1:\tlduw	[%0], %%g5\n\t"
+		"sub		%%g5, %%g1, %%g7\n\t"
+		"cas		[%0], %%g5, %%g7\n\t"
+		"cmp		%%g5, %%g7\n\t"
+		"bne,pn		%%icc, 1b\n\t"
+		" sub		%%g7, %%g1, %%g7\n\t"
+		"cmp		%%g7, 0\n\t"
+		"bl,pn		%%icc, 3f\n\t"
+		" membar	#StoreLoad | #StoreStore\n"
+		"2:\n\t"
+		".subsection 2\n"
+		"3:\tmov	%0, %%g5\n\t"
+		"save		%%sp, -160, %%sp\n\t"
+		"mov		%%g2, %%l2\n\t"
+		"mov		%%g3, %%l3\n\t"
+		"call		%1\n\t"
+		" mov		%%g5, %%o0\n\t"
+		"mov		%%l2, %%g2\n\t"
+		"ba,pt		%%xcc, 2b\n\t"
+		" restore	%%l3, %%g0, %%g3\n\t"
+		".previous\n\t"
+		"! ending __up_write"
+		: : "r" (sem), "i" (rwsem_downgrade_wake),
+		    "i" (RWSEM_WAITING_BIAS)
+		: "g1", "g5", "g7", "memory", "cc");
+}
+EXPORT_SYMBOL(__downgrade_write);
diff -Nru a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
--- a/arch/x86_64/Kconfig	Mon Mar 31 13:41:08 2003
+++ b/arch/x86_64/Kconfig	Mon Mar 31 13:41:08 2003
@@ -179,9 +179,7 @@
 
 	  If you don't know what to do here, say N.
 
-# broken currently
 config PREEMPT
-	depends on NOT_WORKING
 	bool "Preemptible Kernel"
 	---help---
 	  This option reduces the latency of the kernel when reacting to
@@ -200,7 +198,7 @@
 # someone write a better help text please.
 config K8_NUMA
        bool "K8 NUMA support"
-       depends on SMP && NOT_WORKING
+       depends on SMP
        help
 	  Enable NUMA (Non Unified Memory Architecture) support for
 	  AMD Opteron Multiprocessor systems. The kernel will try to allocate
@@ -590,10 +588,8 @@
 	  allocation as well as poisoning memory on free to catch use of freed
 	  memory.
 
-#   bool '  Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT
 config MAGIC_SYSRQ
 	bool "Magic SysRq key"
-	depends on DEBUG_KERNEL
 	help
 	  If you say Y here, you will have some control over the system even
 	  if the system crashes for example during kernel debugging (e.g., you
@@ -639,13 +635,36 @@
 
 config FRAME_POINTER
        bool "Compile the kernel with frame pointers"
-       depends on DEBUG_KERNEL	
        help
 	 Compile the kernel with frame pointers. This may help for some 
 	 debugging with external debuggers. Note the standard oops backtracer 
-	 doesn't make use of it and the x86-64 kernel doesn't ensure an consistent
+	 doesn't make use of this  and the x86-64 kernel doesn't ensure an consistent
 	 frame pointer through inline assembly (semaphores etc.)
 	 Normally you should say N.
+
+config IOMMU_DEBUG
+       bool "Force IOMMU to on" 
+       help
+         Force the IOMMU to on even when you have less than 4GB of memory and add 
+	 debugging code. 
+	 Can be disabled at boot time with iommu=noforce.
+
+config IOMMU_LEAK
+       bool "IOMMU leak tracing"
+       depends on DEBUG_KERNEL
+       help
+         Add a simple leak tracer to the IOMMU code. This is useful when you
+	 are debugging a buggy device driver that leaks IOMMU mappings.
+       
+config MCE_DEBUG
+       bool "K8 Machine check debugging mode"
+       default y 
+       help
+         Turn on all Machine Check debugging for device driver problems.	  
+	 This can cause panics, but is useful to find device driver problems.
+
+#config X86_REMOTE_DEBUG
+#       bool "kgdb debugging stub"
 
 endmenu
 
diff -Nru a/arch/x86_64/Makefile b/arch/x86_64/Makefile
--- a/arch/x86_64/Makefile	Mon Mar 31 13:41:08 2003
+++ b/arch/x86_64/Makefile	Mon Mar 31 13:41:08 2003
@@ -47,6 +47,10 @@
 # should lower this a lot and see how much .text is saves	
 CFLAGS += -finline-limit=2000
 #CFLAGS += -g
+# don't enable this when you use kgdb:
+ifneq ($(CONFIG_X86_REMOTE_DEBUG),y)
+CFLAGS += -fno-asynchronous-unwind-tables
+endif
 
 head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
 
diff -Nru a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
--- a/arch/x86_64/boot/compressed/misc.c	Mon Mar 31 13:41:07 2003
+++ b/arch/x86_64/boot/compressed/misc.c	Mon Mar 31 13:41:07 2003
@@ -274,7 +274,7 @@
 	puts(x);
 	puts("\n\n -- System halted");
 
-	while(1);	/* Halt */
+	while(1);
 }
 
 void setup_normal_output_buffer(void)
@@ -429,8 +429,6 @@
 	else setup_output_buffer_if_we_run_high(mv);
 
 	makecrc();
-	puts("Checking CPU type...");
-	check_cpu();
 	puts(".\nDecompressing Linux...");
 	gunzip();
 	puts("done.\nBooting the kernel.\n");
diff -Nru a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S
--- a/arch/x86_64/boot/setup.S	Mon Mar 31 13:41:08 2003
+++ b/arch/x86_64/boot/setup.S	Mon Mar 31 13:41:08 2003
@@ -42,6 +42,7 @@
  * if CX/DX have been changed in the e801 call and if so use AX/BX .
  * Michael Miller, April 2001 <michaelm@mjmm.org>
  *
+ * Added long mode checking and SSE force. March 2003, Andi Kleen.		
  */
 
 #include <linux/config.h>
@@ -200,10 +201,10 @@
 prtsp2:	call	prtspc		# Print double space
 prtspc:	movb	$0x20, %al	# Print single space (note: fall-thru)
 
-# Part of above routine, this one just prints ascii al
-prtchr:	pushw	%ax
+prtchr:	
+	pushw	%ax
 	pushw	%cx
-	xorb	%bh, %bh
+	movw	$0007,%bx
 	movw	$0x01, %cx
 	movb	$0x0e, %ah
 	int	$0x10
@@ -280,6 +281,75 @@
 loader_panic_mess: .string "Wrong loader, giving up..."
 
 loader_ok:
+	/* check for long mode. */
+	/* we have to do this before the VESA setup, otherwise the user
+	   can't see the error message. */
+	
+	pushw	%ds
+	movw	%cs,%ax
+	movw	%ax,%ds
+	
+	/* minimum CPUID flags for x86-64 */
+	/* see http://www.x86-64.org/lists/discuss/msg02971.html */		
+#define SSE_MASK ((1<<25)|(1<<26))
+#define REQUIRED_MASK1 ((1<<0)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<8)|(1<<11)| \
+					   (1<<13)|(1<<15)|(1<<24)|(1<<29))
+
+	pushfl				/* standard way to check for cpuid */
+	popl	%eax
+	movl	%eax,%ebx
+	xorl	$0x200000,%eax
+	pushl	%eax
+	popfl
+	pushfl
+	popl	%eax
+	cmpl	%eax,%ebx
+	jz	no_longmode		/* cpu has no cpuid */
+	movl	$0x80000000,%eax
+	cpuid
+	cmpl	$0x80000001,%eax
+	jb	no_longmode		/* no extended cpuid */
+	xor	%di,%di
+	cmpl	$0x68747541,%ebx	/* AuthenticAMD */
+	jnz	noamd
+	cmpl	$0x69746e65,%edx
+	jnz	noamd
+	cmpl	$0x444d4163,%ecx
+	jnz	noamd
+	mov	$1,%di			/* cpu is from AMD */
+noamd:		
+	movl	$0x80000001,%eax		
+	cpuid
+	andl	$REQUIRED_MASK1,%edx
+	xorl	$REQUIRED_MASK1,%edx
+	jnz	no_longmode
+sse_test:		
+	movl	$1,%eax
+	cpuid
+	andl	$SSE_MASK,%edx
+	cmpl	$SSE_MASK,%edx
+	je	sse_ok
+	test	%di,%di
+	jz	no_longmode	/* only try to force SSE on AMD */ 
+	movl	$0xc0010015,%ecx	/* HWCR */
+	rdmsr
+	btr	$15,%eax	/* enable SSE */
+	wrmsr
+	xor	%di,%di		/* don't loop */
+	jmp	sse_test	/* try again */	
+no_longmode:
+	call	beep
+	lea	long_mode_panic,%si
+	call	prtstr
+no_longmode_loop:		
+	jmp	no_longmode_loop
+long_mode_panic:
+	.string "Your CPU does not support long mode. Use a 32bit distribution."
+	.byte 0
+	
+sse_ok:
+	popw	%ds
+	
 # Get memory size (extended mem, kB)
 
 	xorl	%eax, %eax
diff -Nru a/arch/x86_64/ia32/fpu32.c b/arch/x86_64/ia32/fpu32.c
--- a/arch/x86_64/ia32/fpu32.c	Mon Mar 31 13:41:06 2003
+++ b/arch/x86_64/ia32/fpu32.c	Mon Mar 31 13:41:06 2003
@@ -77,17 +77,20 @@
 	struct _fpxreg *to;
 	struct _fpreg *from;
 	int i;
-	int err; 
-	__u32 v;
+	u32 v;
+	int err = 0;
 
-	err = __get_user(fxsave->cwd, &buf->cw); 
-	err |= __get_user(fxsave->swd, &buf->sw);
-	err |= __get_user(fxsave->twd, &buf->tag);
+#define G(num,val) err |= __get_user(val, num + (u32 *)buf)
+	G(0, fxsave->cwd);
+	G(1, fxsave->swd);
+	G(2, fxsave->twd);
 	fxsave->twd = twd_i387_to_fxsr(fxsave->twd);
-	err |= __get_user(fxsave->rip, &buf->ipoff); 
-	err |= __get_user(fxsave->rdp, &buf->dataoff); 
-	err |= __get_user(v, &buf->cssel); 
-	fxsave->fop = v >> 16;
+	G(3, fxsave->rip);
+	G(4, v);
+	fxsave->fop = v>>16;	/* cs ignored */
+	G(5, fxsave->rdp);
+	/* 6: ds ignored */
+#undef G
 	if (err) 
 		return -1; 
 
@@ -109,21 +112,29 @@
 	struct _fpreg *to;
 	struct _fpxreg *from;
 	int i;
-	u32 ds; 
-	int err; 
+	u16 cs,ds; 
+	int err = 0; 
 
-	err = __put_user((unsigned long)fxsave->cwd | 0xffff0000, &buf->cw);
-	err |= __put_user((unsigned long)fxsave->swd | 0xffff0000, &buf->sw);
-	err |= __put_user((u32)fxsave->rip, &buf->ipoff); 
-	err |= __put_user((u32)(regs->cs | ((u32)fxsave->fop << 16)), 
-			  &buf->cssel); 
-	err |= __put_user((u32)twd_fxsr_to_i387(fxsave), &buf->tag); 
-	err |= __put_user((u32)fxsave->rdp, &buf->dataoff); 
-	if (tsk == current) 
-		asm("movl %%ds,%0 " : "=r" (ds)); 
-	else /* ptrace. task has stopped. */
+	if (tsk == current) {
+		/* should be actually ds/cs at fpu exception time,
+		   but that information is not available in 64bit mode. */
+		asm("movw %%ds,%0 " : "=r" (ds)); 
+		asm("movw %%cs,%0 " : "=r" (cs)); 		
+	} else { /* ptrace. task has stopped. */
 		ds = tsk->thread.ds;
-	err |= __put_user(ds, &buf->datasel); 
+		cs = regs->cs;
+	} 
+
+#define P(num,val) err |= __put_user(val, num + (u32 *)buf)
+	P(0, (u32)fxsave->cwd | 0xffff0000);
+	P(1, (u32)fxsave->swd | 0xffff0000);
+	P(2, twd_fxsr_to_i387(fxsave));
+	P(3, (u32)fxsave->rip);
+	P(4,  cs | ((u32)fxsave->fop) << 16); 
+	P(5, fxsave->rdp);
+	P(6, 0xffff0000 | ds);
+#undef P
+
 	if (err) 
 		return -1; 
 
@@ -144,9 +155,9 @@
 				     &buf->_fxsr_env[0],
 				     sizeof(struct i387_fxsave_struct)))
 			return -1;
-	} 
 	tsk->thread.i387.fxsave.mxcsr &= 0xffbf;
-	current->used_math = 1;
+		tsk->used_math = 1;
+	} 
 	return convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf);
 }  
 
@@ -157,12 +168,11 @@
 {
 	int err = 0;
 
-	if (!tsk->used_math) 
-		return 0;
-	tsk->used_math = 0; 
-	unlazy_fpu(tsk);
+	init_fpu(tsk);
 	if (convert_fxsr_to_user(buf, &tsk->thread.i387.fxsave, regs, tsk))
 		return -1;
+	if (fsave)
+		return 0;
 	err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
 	if (fsave) 
 		return err ? -1 : 1; 	
diff -Nru a/arch/x86_64/ia32/ia32_ioctl.c b/arch/x86_64/ia32/ia32_ioctl.c
--- a/arch/x86_64/ia32/ia32_ioctl.c	Mon Mar 31 13:41:07 2003
+++ b/arch/x86_64/ia32/ia32_ioctl.c	Mon Mar 31 13:41:07 2003
@@ -39,6 +39,7 @@
 #include <linux/cdrom.h>
 #include <linux/loop.h>
 #include <linux/auto_fs.h>
+#include <linux/auto_fs4.h>
 #include <linux/devfs_fs.h>
 #include <linux/tty.h>
 #include <linux/vt_kern.h>
@@ -60,6 +61,8 @@
 #include <linux/if_tun.h>
 #include <linux/dirent.h>
 #include <linux/ctype.h>
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/rfcomm.h>
 #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
 /* Ugh. This header really is not clean */
 #define min min
@@ -2906,35 +2909,28 @@
 {
 	typedef struct serial_struct SS;
 	struct serial_struct32 *ss32 = ptr; 
-	int err = 0;
+	int err;
 	struct serial_struct ss; 
 	mm_segment_t oldseg = get_fs(); 
-	set_fs(KERNEL_DS);
 	if (cmd == TIOCSSERIAL) { 
-		err = -EFAULT;
 		if (copy_from_user(&ss, ss32, sizeof(struct serial_struct32)))
-			goto out;
+			return -EFAULT;
 		memmove(&ss.iomem_reg_shift, ((char*)&ss.iomem_base)+4, 
 			sizeof(SS)-offsetof(SS,iomem_reg_shift)); 
 		ss.iomem_base = (void *)((unsigned long)ss.iomem_base & 0xffffffff);
 	}
-	if (!err)
+	set_fs(KERNEL_DS);
 		err = sys_ioctl(fd,cmd,(unsigned long)(&ss)); 
+	set_fs(oldseg);
 	if (cmd == TIOCGSERIAL && err >= 0) { 
-		__u32 base;
 		if (__copy_to_user(ss32,&ss,offsetof(SS,iomem_base)) ||
-		    __copy_to_user(&ss32->iomem_reg_shift,
-				   &ss.iomem_reg_shift,
-				   sizeof(SS) - offsetof(SS, iomem_reg_shift)))
-			err = -EFAULT;
-		if (ss.iomem_base > (unsigned char *)0xffffffff)
-			base = -1; 
-		else
-			base = (unsigned long)ss.iomem_base;
-		err |= __put_user(base, &ss32->iomem_base); 		
+		    __put_user((unsigned long)ss.iomem_base  >> 32 ? 
+			       0xffffffff : (unsigned)(unsigned long)ss.iomem_base,
+			       &ss32->iomem_base) ||
+		    __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
+		    __put_user(ss.port_high, &ss32->port_high))
+			return -EFAULT;
 	} 
- out:
-	set_fs(oldseg);
 	return err;	
 }
 
@@ -3045,7 +3041,14 @@
        return sys_ioctl(fd, BLKGETSIZE64, arg);
 }
 
+/* Bluetooth ioctls */
+#define HCIUARTSETPROTO        _IOW('U', 200, int)
+#define HCIUARTGETPROTO        _IOR('U', 201, int)
 
+#define BNEPCONNADD    _IOW('B', 200, int)
+#define BNEPCONNDEL    _IOW('B', 201, int)
+#define BNEPGETCONNLIST        _IOR('B', 210, int)
+#define BNEPGETCONNINFO        _IOR('B', 211, int)
 
 struct usbdevfs_ctrltransfer32 {
 	__u8 bRequestType;
@@ -4093,6 +4096,7 @@
 COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC)
 COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
 COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
+COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
 /* DEVFS */
 COMPATIBLE_IOCTL(DEVFSDIOC_GET_PROTO_REV)
 COMPATIBLE_IOCTL(DEVFSDIOC_SET_EVENT_MASK)
@@ -4200,6 +4204,17 @@
 COMPATIBLE_IOCTL(HCISETACLMTU)
 COMPATIBLE_IOCTL(HCISETSCOMTU)
 COMPATIBLE_IOCTL(HCIINQUIRY)
+COMPATIBLE_IOCTL(HCIUARTSETPROTO)
+COMPATIBLE_IOCTL(HCIUARTGETPROTO)
+COMPATIBLE_IOCTL(RFCOMMCREATEDEV)
+COMPATIBLE_IOCTL(RFCOMMRELEASEDEV)
+COMPATIBLE_IOCTL(RFCOMMGETDEVLIST)
+COMPATIBLE_IOCTL(RFCOMMGETDEVINFO)
+COMPATIBLE_IOCTL(RFCOMMSTEALDLC)
+COMPATIBLE_IOCTL(BNEPCONNADD)
+COMPATIBLE_IOCTL(BNEPCONNDEL)
+COMPATIBLE_IOCTL(BNEPGETCONNLIST)
+COMPATIBLE_IOCTL(BNEPGETCONNINFO)
 /* Misc. */
 COMPATIBLE_IOCTL(0x41545900)		/* ATYIO_CLKR */
 COMPATIBLE_IOCTL(0x41545901)		/* ATYIO_CLKW */
diff -Nru a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
--- a/arch/x86_64/ia32/ia32_signal.c	Mon Mar 31 13:41:08 2003
+++ b/arch/x86_64/ia32/ia32_signal.c	Mon Mar 31 13:41:08 2003
@@ -47,9 +47,16 @@
 {
 	if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
 		return -EFAULT;
-	if (from->si_code < 0)
-		return __copy_to_user(to, from, sizeof(siginfo_t));
-	else {
+	if (from->si_code < 0) { 
+		/* the only field that's different is the alignment
+		   of the pointer in sigval_t. Move that 4 bytes down including
+		   padding. */
+		memmove(&((siginfo_t32 *)&from)->si_int,
+			&from->si_int, 
+			sizeof(siginfo_t) - offsetof(siginfo_t, si_int));
+		/* last 4 bytes stay the same */
+		return __copy_to_user(to, from, sizeof(siginfo_t32));
+	} else {
 		int err;
 
 		/* If you change siginfo_t structure, please be sure
@@ -59,7 +66,7 @@
 		   3 ints plus the relevant union member.  */
 		err = __put_user(from->si_signo, &to->si_signo);
 		err |= __put_user(from->si_errno, &to->si_errno);
-		err |= __put_user((short)from->si_code, &to->si_code);
+		err |= __put_user(from->si_code, &to->si_code);
 		/* First 32bits of unions are always present.  */
 		err |= __put_user(from->si_pid, &to->si_pid);
 		switch (from->si_code >> 16) {
@@ -108,6 +115,7 @@
 	mm_segment_t seg; 
 	if (uss_ptr) { 
 		u32 ptr;
+		memset(&uss,0,sizeof(stack_t));
 	if (!access_ok(VERIFY_READ,uss_ptr,sizeof(stack_ia32_t)) ||
 		    __get_user(ptr, &uss_ptr->ss_sp) ||
 		    __get_user(uss.ss_flags, &uss_ptr->ss_flags) ||
@@ -340,8 +348,11 @@
 	tmp = save_i387_ia32(current, fpstate, regs, 0);
 	if (tmp < 0)
 	  err = -EFAULT;
-	else
+	else { 
+		current->used_math = 0;
+		stts();
 	  err |= __put_user((u32)(u64)(tmp ? fpstate : NULL), &sc->fpstate);
+	}
 
 	/* non-iBCS2 extensions.. */
 	err |= __put_user(mask, &sc->oldmask);
diff -Nru a/arch/x86_64/ia32/ipc32.c b/arch/x86_64/ia32/ipc32.c
--- a/arch/x86_64/ia32/ipc32.c	Mon Mar 31 13:41:07 2003
+++ b/arch/x86_64/ia32/ipc32.c	Mon Mar 31 13:41:07 2003
@@ -187,12 +187,58 @@
 	}
 }
 
+static int put_semid(void *user_semid, struct semid64_ds *s, int version)
+{
+	int err2;
+	switch (version) { 
+	case IPC_64: { 
+		struct semid64_ds32 *usp64 = (struct semid64_ds32 *) user_semid;
+		
+		if (!access_ok(VERIFY_WRITE, usp64, sizeof(*usp64))) {
+			err2 = -EFAULT;
+			break;
+		} 
+		err2 = __put_user(s->sem_perm.key, &usp64->sem_perm.key);
+		err2 |= __put_user(s->sem_perm.uid, &usp64->sem_perm.uid);
+		err2 |= __put_user(s->sem_perm.gid, &usp64->sem_perm.gid);
+		err2 |= __put_user(s->sem_perm.cuid, &usp64->sem_perm.cuid);
+		err2 |= __put_user(s->sem_perm.cgid, &usp64->sem_perm.cgid);
+		err2 |= __put_user(s->sem_perm.mode, &usp64->sem_perm.mode);
+		err2 |= __put_user(s->sem_perm.seq, &usp64->sem_perm.seq);
+		err2 |= __put_user(s->sem_otime, &usp64->sem_otime);
+		err2 |= __put_user(s->sem_ctime, &usp64->sem_ctime);
+		err2 |= __put_user(s->sem_nsems, &usp64->sem_nsems);
+		break;
+	}
+	default: {
+		struct semid_ds32 *usp32 = (struct semid_ds32 *) user_semid;
+		
+		if (!access_ok(VERIFY_WRITE, usp32, sizeof(*usp32))) {
+			err2 = -EFAULT;
+			break;
+		} 
+		err2 = __put_user(s->sem_perm.key, &usp32->sem_perm.key);
+		err2 |= __put_user(s->sem_perm.uid, &usp32->sem_perm.uid);
+		err2 |= __put_user(s->sem_perm.gid, &usp32->sem_perm.gid);
+		err2 |= __put_user(s->sem_perm.cuid, &usp32->sem_perm.cuid);
+		err2 |= __put_user(s->sem_perm.cgid, &usp32->sem_perm.cgid);
+		err2 |= __put_user(s->sem_perm.mode, &usp32->sem_perm.mode);
+		err2 |= __put_user(s->sem_perm.seq, &usp32->sem_perm.seq);
+		err2 |= __put_user(s->sem_otime, &usp32->sem_otime);
+		err2 |= __put_user(s->sem_ctime, &usp32->sem_ctime);
+		err2 |= __put_user(s->sem_nsems, &usp32->sem_nsems);
+		break;
+	}
+	}
+	return err2;
+}
+
 static int
 semctl32 (int first, int second, int third, void *uptr)
 {
 	union semun fourth;
 	u32 pad;
-	int err = 0, err2;
+	int err;
 	struct semid64_ds s;
 	mm_segment_t old_fs;
 	int version = ipc_parse_version32(&third);
@@ -225,46 +271,10 @@
 		fourth.__pad = &s;
 		old_fs = get_fs();
 		set_fs(KERNEL_DS);
-		err = sys_semctl(first, second|IPC_64, third, fourth);
+		err = sys_semctl(first, second, third|IPC_64, fourth);
 		set_fs(old_fs);
-
-		if (version == IPC_64) {
-			struct semid64_ds32 *usp64 = (struct semid64_ds32 *) A(pad);
-
-			if (!access_ok(VERIFY_WRITE, usp64, sizeof(*usp64))) {
-				err = -EFAULT;
-				break;
-			}
-			err2 = __put_user(s.sem_perm.key, &usp64->sem_perm.key);
-			err2 |= __put_user(s.sem_perm.uid, &usp64->sem_perm.uid);
-			err2 |= __put_user(s.sem_perm.gid, &usp64->sem_perm.gid);
-			err2 |= __put_user(s.sem_perm.cuid, &usp64->sem_perm.cuid);
-			err2 |= __put_user(s.sem_perm.cgid, &usp64->sem_perm.cgid);
-			err2 |= __put_user(s.sem_perm.mode, &usp64->sem_perm.mode);
-			err2 |= __put_user(s.sem_perm.seq, &usp64->sem_perm.seq);
-			err2 |= __put_user(s.sem_otime, &usp64->sem_otime);
-			err2 |= __put_user(s.sem_ctime, &usp64->sem_ctime);
-			err2 |= __put_user(s.sem_nsems, &usp64->sem_nsems);
-		} else {
-			struct semid_ds32 *usp32 = (struct semid_ds32 *) A(pad);
-
-			if (!access_ok(VERIFY_WRITE, usp32, sizeof(*usp32))) {
-				err = -EFAULT;
-				break;
-			}
-			err2 = __put_user(s.sem_perm.key, &usp32->sem_perm.key);
-			err2 |= __put_user(s.sem_perm.uid, &usp32->sem_perm.uid);
-			err2 |= __put_user(s.sem_perm.gid, &usp32->sem_perm.gid);
-			err2 |= __put_user(s.sem_perm.cuid, &usp32->sem_perm.cuid);
-			err2 |= __put_user(s.sem_perm.cgid, &usp32->sem_perm.cgid);
-			err2 |= __put_user(s.sem_perm.mode, &usp32->sem_perm.mode);
-			err2 |= __put_user(s.sem_perm.seq, &usp32->sem_perm.seq);
-			err2 |= __put_user(s.sem_otime, &usp32->sem_otime);
-			err2 |= __put_user(s.sem_ctime, &usp32->sem_ctime);
-			err2 |= __put_user(s.sem_nsems, &usp32->sem_nsems);
-		}
-		if (err2)
-		    err = -EFAULT;
+		if (!err)
+			err = put_semid((void *)A(pad), &s, version);
 		break;
 	default:
 		err = -EINVAL;
@@ -343,6 +353,7 @@
 	return err;
 }
 
+
 static int
 msgctl32 (int first, int second, void *uptr)
 {
@@ -387,7 +398,6 @@
 		set_fs(KERNEL_DS);
 		err = sys_msgctl(first, second|IPC_64, (void *) &m64);
 		set_fs(old_fs);
-
 		if (version == IPC_64) {
 			if (!access_ok(VERIFY_WRITE, up64, sizeof(*up64))) {
 				err = -EFAULT;
@@ -608,7 +618,9 @@
 		if (err2)
 			err = -EFAULT;
 		break;
-
+	default:
+		err = -EINVAL;
+		break;
 	}
 	return err;
 }
diff -Nru a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c
--- a/arch/x86_64/ia32/ptrace32.c	Mon Mar 31 13:41:06 2003
+++ b/arch/x86_64/ia32/ptrace32.c	Mon Mar 31 13:41:06 2003
@@ -8,7 +8,7 @@
  * This allows to access 64bit processes too; but there is no way to see the extended 
  * register contents.
  *
- * $Id: ptrace32.c,v 1.12 2002/03/24 13:02:02 ak Exp $
+ * $Id: ptrace32.c,v 1.16 2003/03/14 16:06:35 ak Exp $
  */ 
 
 #include <linux/kernel.h>
@@ -22,11 +22,9 @@
 #include <asm/errno.h>
 #include <asm/debugreg.h>
 #include <asm/i387.h>
-#include <asm/desc.h>
-#include <asm/ldt.h>
 #include <asm/fpu32.h>
-#include <linux/mm.h>
 #include <linux/ptrace.h>
+#include <linux/mm.h>
 
 #define R32(l,q) \
 	case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break
@@ -39,29 +37,26 @@
 	switch (regno) {
 	case offsetof(struct user32, regs.fs):
 		if (val && (val & 3) != 3) return -EIO; 
-	        child->thread.fs = val; 
+		child->thread.fs = val & 0xffff; 
 		break;
 	case offsetof(struct user32, regs.gs):
 		if (val && (val & 3) != 3) return -EIO; 
-		child->thread.gs = val;
+		child->thread.gs = val & 0xffff;
 		break;
 	case offsetof(struct user32, regs.ds):
 		if (val && (val & 3) != 3) return -EIO; 
-		child->thread.ds = val;
+		child->thread.ds = val & 0xffff;
 		break;
 	case offsetof(struct user32, regs.es):
-		if (val && (val & 3) != 3) return -EIO; 
-		child->thread.es = val;
+		child->thread.es = val & 0xffff;
 		break;
-
 	case offsetof(struct user32, regs.ss): 
 		if ((val & 3) != 3) return -EIO;
-		stack[offsetof(struct pt_regs, ss)/8] = val; 
+        	stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff;
 		break;
-
 	case offsetof(struct user32, regs.cs): 
 		if ((val & 3) != 3) return -EIO;
-		stack[offsetof(struct pt_regs, cs)/8] = val; 
+		stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff;
 		break;
 
 	R32(ebx, rbx); 
@@ -79,8 +74,16 @@
 		stack[offsetof(struct pt_regs, eflags)/8] = val & 0x44dd5; 
 		break;
 
-	case offsetof(struct user32, u_debugreg[0]) ... offsetof(struct user32, u_debugreg[6]):
-		child->thread.debugreg[(regno-offsetof(struct user32, u_debugreg[0]))/4] = val; 
+	case offsetof(struct user32, u_debugreg[4]): 
+	case offsetof(struct user32, u_debugreg[5]):
+		return -EIO;
+
+	case offsetof(struct user32, u_debugreg[0]) ...
+	     offsetof(struct user32, u_debugreg[3]):
+	case offsetof(struct user32, u_debugreg[6]):
+		child->thread.debugreg
+			[(regno-offsetof(struct user32, u_debugreg[0]))/4] 
+			= val; 
 		break; 
 
 	case offsetof(struct user32, u_debugreg[7]):
@@ -170,11 +173,19 @@
 	if (child)
 		get_task_struct(child);
 	read_unlock(&tasklist_lock);
-	*err = ptrace_check_attach(child,0);
-	if (*err == 0)
+	if (child) { 
+		*err = -EPERM;
+		if (child->pid == 1) 
+			goto out;
+		*err = ptrace_check_attach(child, request == PTRACE_KILL); 
+		if (*err < 0) 
+			goto out;
 		return child; 
+	} 
+ out:
 	put_task_struct(child);
 	return NULL; 
+	
 } 
 
 extern asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, unsigned long data);
@@ -187,6 +198,9 @@
 	__u32 val;
 
 	switch (request) { 
+	default:
+		return sys_ptrace(request, pid, addr, data); 
+
 	case PTRACE_PEEKTEXT:
 	case PTRACE_PEEKDATA:
 	case PTRACE_POKEDATA:
@@ -201,9 +215,6 @@
 	case PTRACE_GETFPXREGS:
 		break;
 		
-	default:
-		ret = sys_ptrace(request, pid, addr, data); 
-		return ret;
 	} 
 
 	child = find_target(request, pid, &ret);
@@ -261,7 +272,6 @@
 			ret = -EIO;
 			break;
 		}
-		empty_fpu(child); 
 		ret = 0; 
 		for ( i = 0; i <= 16*4; i += sizeof(u32) ) {
 			ret |= __get_user(tmp, (u32 *) (unsigned long) data);
@@ -271,33 +281,47 @@
 		break;
 	}
 
-	case PTRACE_SETFPREGS:
-		empty_fpu(child); 
+	case PTRACE_GETFPREGS:
+		ret = -EIO; 
+		if (!access_ok(VERIFY_READ, (void *)(u64)data, 
+			       sizeof(struct user_i387_struct)))
+			break;
 		save_i387_ia32(child, (void *)(u64)data, childregs, 1);
 		ret = 0; 
 			break;
 
-	case PTRACE_GETFPREGS:
-		empty_fpu(child); 
-		restore_i387_ia32(child, (void *)(u64)data, 1);
+	case PTRACE_SETFPREGS:
+		ret = -EIO;
+		if (!access_ok(VERIFY_WRITE, (void *)(u64)data, 
+			       sizeof(struct user_i387_struct)))
+			break;
 		ret = 0;
+		/* don't check EFAULT to be bug-to-bug compatible to i386 */
+		restore_i387_ia32(child, (void *)(u64)data, 1);
 		break;
 
 	case PTRACE_GETFPXREGS: { 
 		struct user32_fxsr_struct *u = (void *)(u64)data; 
-		empty_fpu(child); 
-		ret = copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u));
-		ret |= __put_user(childregs->cs, &u->fcs);
-		ret |= __put_user(child->thread.ds, &u->fos); 
-		if (ret) 
+		init_fpu(child); 
+		ret = -EIO;
+		if (!access_ok(VERIFY_WRITE, u, sizeof(*u)))
+			break;
 			ret = -EFAULT;
+		if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u)))
+			break;
+		ret = __put_user(childregs->cs, &u->fcs);
+		ret |= __put_user(child->thread.ds, &u->fos); 
 		break; 
 	} 
 	case PTRACE_SETFPXREGS: { 
 		struct user32_fxsr_struct *u = (void *)(u64)data; 
-		empty_fpu(child); 
-		/* no error checking to be bug to bug compatible with i386 */ 
-		copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u));
+		unlazy_fpu(child);
+		ret = -EIO;
+		if (!access_ok(VERIFY_READ, u, sizeof(*u)))
+			break;
+		/* no checking to be bug-to-bug compatible with i386 */
+		__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u));
+		child->used_math = 1;
 	        child->thread.i387.fxsave.mxcsr &= 0xffbf;
 		ret = 0; 
 			break;
diff -Nru a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
--- a/arch/x86_64/ia32/sys_ia32.c	Mon Mar 31 13:41:07 2003
+++ b/arch/x86_64/ia32/sys_ia32.c	Mon Mar 31 13:41:07 2003
@@ -57,6 +57,7 @@
 #include <linux/binfmts.h>
 #include <linux/init.h>
 #include <linux/aio_abi.h>
+#include <linux/aio.h>
 #include <linux/compat.h>
 #include <linux/vfs.h>
 #include <linux/ptrace.h>
@@ -74,6 +75,7 @@
 
 #define A(__x)		((unsigned long)(__x))
 #define AA(__x)		((unsigned long)(__x))
+#define u32_to_ptr(x)	((void *)(u64)(x))
 #define ROUND_UP(x,a)	((__typeof__(x))(((unsigned long)(x) + ((a) - 1)) & ~((a) - 1)))
 #define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
 
@@ -738,7 +740,7 @@
 asmlinkage ssize_t sys_writev(unsigned long,const struct iovec *,unsigned long);
 
 static struct iovec *
-get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 count, int type, int *errp)
+get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 *count, int type, int *errp)
 {
 	int i;
 	u32 buf, len;
@@ -747,15 +749,18 @@
 
 	/* Get the "struct iovec" from user memory */
 
-	if (!count)
+	*errp = 0;
+	if (!*count)
 		return 0;
-	if (count > UIO_MAXIOV)
+	*errp = -EINVAL;
+	if (*count > UIO_MAXIOV)
 		return(struct iovec *)0;
-	if(verify_area(VERIFY_READ, iov32, sizeof(struct compat_iovec)*count))
+	*errp = -EFAULT;
+	if(verify_area(VERIFY_READ, iov32, sizeof(struct compat_iovec)*(*count)))
 		return(struct iovec *)0;
-	if (count > UIO_FASTIOV) {
+	if (*count > UIO_FASTIOV) {
 		*errp = -ENOMEM; 
-		iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL);
+		iov = kmalloc(*count*sizeof(struct iovec), GFP_KERNEL);
 		if (!iov)
 			return((struct iovec *)0);
 	} else
@@ -763,14 +768,19 @@
 
 	ivp = iov;
 	totlen = 0;
-	for (i = 0; i < count; i++) {
+	for (i = 0; i < *count; i++) {
 		*errp = __get_user(len, &iov32->iov_len) |
 		  	__get_user(buf, &iov32->iov_base);	
 		if (*errp)
 			goto error;
 		*errp = verify_area(type, (void *)A(buf), len);
-		if (*errp) 
+		if (*errp) {
+			if (i > 0) { 
+				*count = i;
+				break;
+			} 
 			goto error;
+		}
 		/* SuS checks: */
 		*errp = -EINVAL; 
 		if ((int)len < 0)
@@ -799,7 +809,7 @@
 	int ret;
 	mm_segment_t old_fs = get_fs();
 
-	if ((iov = get_compat_iovec(vector, iovstack, count, VERIFY_WRITE, &ret)) == NULL)
+	if ((iov = get_compat_iovec(vector, iovstack, &count, VERIFY_WRITE, &ret)) == NULL)
 		return ret;
 	set_fs(KERNEL_DS);
 	ret = sys_readv(fd, iov, count);
@@ -817,7 +827,7 @@
 	int ret;
 	mm_segment_t old_fs = get_fs();
 
-	if ((iov = get_compat_iovec(vector, iovstack, count, VERIFY_READ, &ret)) == NULL)
+	if ((iov = get_compat_iovec(vector, iovstack, &count, VERIFY_READ, &ret)) == NULL)
 		return ret;
 	set_fs(KERNEL_DS);
 	ret = sys_writev(fd, iov, count);
@@ -1672,21 +1682,26 @@
 	return cnt; 
 } 
 
-long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs)
+asmlinkage long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs)
 { 
 	mm_segment_t oldseg; 
-	char **buf; 
-	int na,ne;
+	char **buf = NULL; 
+	int na = 0,ne = 0;
 	int ret;
-	unsigned sz; 
+	unsigned sz = 0; 
 
+	if (argv) {
 	na = nargs(argv, NULL); 
 	if (na < 0) 
 		return -EFAULT; 
+	} 	
+	if (envp) { 
 	ne = nargs(envp, NULL); 
 	if (ne < 0) 
 		return -EFAULT; 
+	}
 
+	if (argv || envp) { 
 	sz = (na+ne)*sizeof(void *); 
 	if (sz > PAGE_SIZE) 
 		buf = vmalloc(sz); 
@@ -1694,14 +1709,19 @@
 		buf = kmalloc(sz, GFP_KERNEL); 
 	if (!buf)
 		return -ENOMEM; 
+	} 
 	
+	if (argv) { 
 	ret = nargs(argv, buf);
 	if (ret < 0)
 		goto free;
+	}
 
+	if (envp) { 
 	ret = nargs(envp, buf + na); 
 	if (ret < 0)
 		goto free; 
+	}
 
 	name = getname(name); 
 	ret = PTR_ERR(name); 
@@ -1710,7 +1730,7 @@
 
 	oldseg = get_fs(); 
 	set_fs(KERNEL_DS);
-	ret = do_execve(name, buf, buf+na, &regs);  
+	ret = do_execve(name, argv ? buf : NULL, envp ? buf+na : NULL, &regs);  
 	set_fs(oldseg); 
 
 	if (ret == 0)
@@ -1719,10 +1739,12 @@
 	putname(name);
  
 free:
+	if (argv || envp) { 
 	if (sz > PAGE_SIZE)
 		vfree(buf); 
 	else
 	kfree(buf);
+	}
 	return ret; 
 } 
 
@@ -2012,12 +2034,8 @@
 
 long sys32_module_warning(void)
 { 
-	static long warn_time = -(60*HZ); 
-	if (time_before(warn_time + 60*HZ,jiffies) && strcmp(current->comm,"klogd")) { 
 		printk(KERN_INFO "%s: 32bit 2.4.x modutils not supported on 64bit kernel\n",
 		       current->comm);
-		warn_time = jiffies;
-	} 
 	return -ENOSYS ;
 } 
 
@@ -2055,6 +2073,7 @@
 	return err;
 }
 
+
 extern long sys_io_setup(unsigned nr_reqs, aio_context_t *ctx);
 
 long sys32_io_setup(unsigned nr_reqs, u32 *ctx32p)
@@ -2071,48 +2090,47 @@
 	return ret;
 } 
 
-extern asmlinkage long sys_io_submit(aio_context_t ctx_id, long nr,
-				     struct iocb **iocbpp);
-
-long sys32_io_submit(aio_context_t ctx_id, unsigned long nr,
+asmlinkage long sys32_io_submit(aio_context_t ctx_id, int nr,
 		   u32 *iocbpp)
 {
-	mm_segment_t oldfs = get_fs(); 
-	int k, err = 0;
-	struct iocb **iocb64; 
-	if (nr > 128) 
+	struct kioctx *ctx;
+	long ret = 0;
+	int i;
+	
+	if (unlikely(nr < 0))
+		return -EINVAL;
+
+	if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
+		return -EFAULT;
+
+	ctx = lookup_ioctx(ctx_id);
+	if (unlikely(!ctx)) {
+		pr_debug("EINVAL: io_submit: invalid context id\n");
 		return -EINVAL; 
-	iocb64 = kmalloc(sizeof(struct iocb *) * nr, GFP_KERNEL);
-	if (!iocb64)
-		return -ENOMEM;
-	for (k = 0; k < nr && !err; k++) { 
-		u64 val1, val2;
-		u32 iocb32;
-		struct iocb *iocb; 	
-		err = get_user(iocb32, (u32 *)(u64)iocbpp[k]); 
-		iocb64[k] = iocb = (void *)(u64)iocb32; 
-		
-		if (get_user(val1, &iocb->aio_buf) ||
-		    get_user(val2, &iocb->aio_nbytes)) 
-			err = -EFAULT; 
-		else if (!val1) /* should check cmd */ 
-			;
-		else if (verify_area(VERIFY_WRITE, (void*)val1, val2))
-			err = -EFAULT; 
-
-		/* paranoia check - remove it when you are sure they
-		   are not pointers */
-		if (get_user(val1, &iocb->aio_reserved2) || val1 ||
-		    get_user(val2, &iocb->aio_reserved2) || val2)
-			err = -EFAULT; 		    		   
 	} 
-	if (!err) {
-		set_fs(KERNEL_DS);
-		err = sys_io_submit(ctx_id, nr, iocb64);
-		set_fs(oldfs); 
+
+	for (i=0; i<nr; i++) {
+		u32 p32;
+		struct iocb *user_iocb, tmp;
+
+		if (unlikely(__get_user(p32, iocbpp + i))) {
+			ret = -EFAULT;
+			break;
 	} 
-	kfree(iocb64);
-	return err;		
+		user_iocb = u32_to_ptr(p32);
+
+		if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		ret = io_submit_one(ctx, user_iocb, &tmp);
+		if (ret)
+			break;
+	}
+
+	put_ioctx(ctx);
+	return i ? i : ret;
 }
 
 extern asmlinkage long sys_io_getevents(aio_context_t ctx_id,
@@ -2140,7 +2158,7 @@
 	set_fs(KERNEL_DS); 
 	ret = sys_io_getevents(ctx_id,min_nr,nr,events,timeout ? &t : NULL); 
 	set_fs(oldfs); 
-	if (timeout && put_compat_timespec(&t, timeout))
+	if (!ret && timeout && put_compat_timespec(&t, timeout))
 		return -EFAULT; 		
 	return ret;
 } 
@@ -2172,12 +2190,8 @@
 
 long sys32_vm86_warning(void)
 { 
-	static long warn_time = -(60*HZ); 
-	if (time_before(warn_time + 60*HZ,jiffies)) { 
 		printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
 		       current->comm);
-		warn_time = jiffies;
-	} 
 	return -ENOSYS ;
 } 
 
diff -Nru a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
--- a/arch/x86_64/kernel/Makefile	Mon Mar 31 13:41:08 2003
+++ b/arch/x86_64/kernel/Makefile	Mon Mar 31 13:41:08 2003
@@ -10,14 +10,13 @@
 		setup64.o bluesmoke.o bootflag.o e820.o reboot.o
 
 obj-$(CONFIG_MTRR)	+= mtrr/
+obj-$(CONFIG_ACPI)	+= acpi/
 obj-$(CONFIG_X86_MSR)	+= msr.o
 obj-$(CONFIG_X86_CPUID)	+= cpuid.o
 obj-$(CONFIG_SMP)	+= smp.o smpboot.o trampoline.o
 obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o  nmi.o
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o mpparse.o
 obj-$(CONFIG_SOFTWARE_SUSPEND)	+= suspend.o suspend_asm.o
-obj-$(CONFIG_ACPI)             += acpi.o
-obj-$(CONFIG_ACPI_SLEEP)       += wakeup.o
 obj-$(CONFIG_EARLY_PRINTK)    += early_printk.o
 obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
 obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o
diff -Nru a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
--- a/arch/x86_64/kernel/aperture.c	Mon Mar 31 13:41:08 2003
+++ b/arch/x86_64/kernel/aperture.c	Mon Mar 31 13:41:08 2003
@@ -105,7 +105,8 @@
 	if (!fix && !fallback_aper_force) 
 		return; 
 
-	printk("Your BIOS is broken and doesn't leave a aperture memory hole\n");
+	printk("Your BIOS doesn't leave a aperture memory hole\n");
+	printk("Please enable the IOMMU option in the BIOS setup\n"); 
 	aper_alloc = allocate_aperture(); 
 	if (!aper_alloc) 
 		return; 
diff -Nru a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
--- a/arch/x86_64/kernel/apic.c	Mon Mar 31 13:41:06 2003
+++ b/arch/x86_64/kernel/apic.c	Mon Mar 31 13:41:06 2003
@@ -408,7 +408,7 @@
 		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
 			apic_write(APIC_ESR, 0);
 		value = apic_read(APIC_ESR);
-		printk("ESR value before enabling vector: %08x\n", value);
+		Dprintk("ESR value before enabling vector: %08x\n", value);
 
 		value = ERROR_APIC_VECTOR;      // enables sending errors
 		apic_write_around(APIC_LVTERR, value);
@@ -418,7 +418,7 @@
 		if (maxlvt > 3)
 			apic_write(APIC_ESR, 0);
 		value = apic_read(APIC_ESR);
-		printk("ESR value after enabling vector: %08x\n", value);
+		Dprintk("ESR value after enabling vector: %08x\n", value);
 	} else {
 		if (esr_disable)	
 			/* 
@@ -1080,9 +1080,10 @@
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		check_nmi_watchdog();
 #ifdef CONFIG_X86_IO_APIC
-	if (smp_found_config)
-		if (!skip_ioapic_setup && nr_ioapics)
+	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
 			setup_IO_APIC();
+	else
+		nr_ioapics = 0;
 #endif
 	setup_boot_APIC_clock();
 
diff -Nru a/arch/x86_64/kernel/bluesmoke.c b/arch/x86_64/kernel/bluesmoke.c
--- a/arch/x86_64/kernel/bluesmoke.c	Mon Mar 31 13:41:08 2003
+++ b/arch/x86_64/kernel/bluesmoke.c	Mon Mar 31 13:41:08 2003
@@ -1,65 +1,87 @@
 /*
- * arch/x86_64/kernel/bluesmoke.c - x86-64 Machine Check Exception Reporting
- * 
-
-RED-PEN: need to add power management to restore after S3 wakeup. 
-
+ * Machine check handler.
+ * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
+ * Rest from unknown author(s). 
  */
-
+#include <linux/config.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/smp.h>
-#include <linux/config.h>
-#include <linux/irq.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
 #include <asm/processor.h> 
-#include <asm/system.h>
 #include <asm/msr.h>
-#include <asm/apic.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
+#include <asm/kdebug.h>
+#include <linux/pci.h>
+#include <linux/timer.h>
 
-#ifdef CONFIG_X86_MCE
+static int mce_disabled __initdata;
+static unsigned long mce_cpus; 
 
-static int mce_disabled __initdata = 0;
+/*
+ *	Machine Check Handler For PII/PIII/K7
+ */
 
 static int banks;
+static unsigned long ignored_banks, disabled_banks;
 
+/* Machine Check on everything dubious. This is a good setting
+   for device driver testing. */
+#define K8_DRIVER_DEBUG ((1<<13)-1)
+/* Report RAM errors and Hyper Transport Problems, but ignore Device
+   aborts and GART errors. */
+#define K8_NORMAL_OP    0xff
 
-/*
- *	Machine Check Handler For Hammer
- */
+#ifdef CONFIG_MCE_DEBUG
+static u32 k8_nb_flags __initdata = K8_DRIVER_DEBUG;
+#else
+static u32 k8_nb_flags __initdata = K8_NORMAL_OP;
+#endif
 
-static void hammer_machine_check(struct pt_regs * regs, long error_code)
+static void generic_machine_check(struct pt_regs * regs, long error_code)
 {
 	int recover=1;
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
 	int i;
 
+	preempt_disable();
+
 	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 	if(mcgstl&(1<<0))	/* Recoverable ? */
 		recover=0;
 
 	printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl);
-	preempt_disable();
-	for (i=0;i<banks;i++) {
+	
+	if (regs && (mcgstl & 2))
+		printk(KERN_EMERG "RIP <%02lx>:%016lx RSP %016lx\n", 
+		       regs->cs, regs->rip, regs->rsp); 
+
+	for(i=0;i<banks;i++)
+	{
+		if ((1UL<<i) & ignored_banks) 
+			continue; 
+
 		rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
-		if(high&(1<<31)) {
+		if(high&(1<<31))
+		{
 			if(high&(1<<29))
 				recover|=1;
 			if(high&(1<<25))
 				recover|=2;
 			printk(KERN_EMERG "Bank %d: %08x%08x", i, high, low);
 			high&=~(1<<31);
-			if(high&(1<<27)) {
+			if(high&(1<<27))
+			{
 				rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
-				printk("[%08x%08x]", ahigh, alow);
+				printk("[%08x%08x]", alow, ahigh);
 			}
-			if(high&(1<<26)) {
+			if(high&(1<<26))
+			{
 				rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
-				printk(" at %08x%08x", ahigh, alow);
+				printk(" at %08x%08x", 
+					ahigh, alow);
 			}
 			printk("\n");
 			/* Clear it */
@@ -68,7 +90,6 @@
 			wmb();
 		}
 	}
-	preempt_enable();
 
 	if(recover&2)
 		panic("CPU context corrupt");
@@ -77,16 +98,13 @@
 	printk(KERN_EMERG "Attempting to continue.\n");
 	mcgstl&=~(1<<2);
 	wrmsr(MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
-}
-
 
-/*
- *	Handle unconfigured int18 (should never happen)
- */
+	preempt_enable();
+}
 
-static void unexpected_machine_check(struct pt_regs * regs, long error_code)
+static void unexpected_machine_check(struct pt_regs *regs, long error_code)
 {	
-	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
+	printk("unexpected machine check %lx\n", error_code); 
 }
 
 /*
@@ -95,56 +113,194 @@
 
 static void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
 
-asmlinkage void do_machine_check(struct pt_regs * regs, long error_code)
+void do_machine_check(struct pt_regs * regs, long error_code)
 {
+	notify_die(DIE_NMI, "machine check", regs, error_code, 255, SIGKILL);
 	machine_check_vector(regs, error_code);
 }
 
+/* 
+ *	K8 machine check.
+ */
 
-#ifdef CONFIG_X86_MCE_NONFATAL
-static struct timer_list mce_timer;
-static int timerset = 0;
-
-#define MCE_RATE	15*HZ	/* timer rate is 15s */
+static struct pci_dev *find_k8_nb(void)
+{ 
+	struct pci_dev *dev;
+	int cpu = smp_processor_id(); 
+	pci_for_each_dev(dev) {
+		if (dev->bus->number==0 && PCI_FUNC(dev->devfn)==3 &&
+		    PCI_SLOT(dev->devfn) == (24+cpu))
+			return dev;
+	}
+	return NULL;
+}
 
-static void mce_checkregs (void *info)
+static void check_k8_nb(void)
 {
-	u32 low, high;
-	int i;
+	struct pci_dev *nb;
+	nb = find_k8_nb(); 
+	if (nb == NULL)
+		return;
 
-	for (i=0; i<banks; i++) {
-		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
+	u32 statuslow, statushigh;
+	pci_read_config_dword(nb, 0x48, &statuslow);
+	pci_read_config_dword(nb, 0x4c, &statushigh);
+	if (!(statushigh & (1<<31)))
+		return;
+	printk(KERN_ERR "Northbridge status %08x%08x\n",
+	       statushigh,statuslow); 
+	if (statuslow & 0x10) 
+		printk(KERN_ERR "GART error %d\n", statuslow & 0xf); 
+	if (statushigh & (1<<31))
+		printk(KERN_ERR "Lost an northbridge error\n"); 
+	if (statushigh & (1<<25))
+		printk(KERN_EMERG "NB status: unrecoverable\n"); 
+	if (statushigh & (1<<26)) { 
+		u32 addrhigh, addrlow; 
+		pci_read_config_dword(nb, 0x54, &addrhigh); 
+		pci_read_config_dword(nb, 0x50, &addrlow); 
+		printk(KERN_ERR "NB error address %08x%08x\n", addrhigh,addrlow); 
+	}
+	if (statushigh & (1<<29))
+		printk(KERN_EMERG "Error uncorrected\n"); 
+	statushigh &= ~(1<<31); 
+	pci_write_config_dword(nb, 0x4c, statushigh); 		
+}
 
-		if ((low | high) != 0) {
-			printk (KERN_EMERG "MCE: The hardware reports a non fatal, correctable incident occurred on CPU %d.\n", smp_processor_id());
-			printk (KERN_EMERG "Bank %d: %08x%08x\n", i, high, low);
+static void k8_machine_check(struct pt_regs * regs, long error_code)
+{ 
+	u64 status, nbstatus;
 
-			/* Scrub the error so we don't pick it up in MCE_RATE seconds time. */
-			wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+	preempt_disable();
 
-			/* Serialize */
-			wmb();
+	rdmsrl(MSR_IA32_MCG_STATUS, status); 
+	if ((status & (1<<2)) == 0) { 
+		if (!regs) 
+			check_k8_nb();
+		return; 
 		}
+	if (status & 1)
+		printk(KERN_EMERG "MCG_STATUS: unrecoverable\n"); 
+
+	rdmsrl(MSR_IA32_MC0_STATUS+4*4, nbstatus); 
+	if ((nbstatus & (1UL<<63)) == 0)
+		goto others; 
+	
+	printk(KERN_EMERG "Northbridge Machine Check %s %016lx %lx\n", 
+	       regs ? "exception" : "timer",
+	       (unsigned long)nbstatus, error_code); 
+	if (nbstatus & (1UL<<62))
+		printk(KERN_EMERG "Lost at least one NB error condition\n"); 	
+	if (nbstatus & (1UL<<61))
+		printk(KERN_EMERG "Uncorrectable condition\n"); 
+	if (nbstatus & (1UL<57))
+		printk(KERN_EMERG "Unrecoverable condition\n"); 
+		
+	check_k8_nb();
+
+	if (nbstatus & (1UL<<58)) { 
+		u64 adr;
+		rdmsrl(MSR_IA32_MC0_ADDR+4*4, adr);
+		printk(KERN_EMERG "Address: %016lx\n", (unsigned long)adr);
 	}
-}
+	
+	wrmsrl(MSR_IA32_MC0_STATUS+4*4, 0); 
+	wrmsrl(MSR_IA32_MCG_STATUS, 0);
+       
+	if (regs && (status & (1<<1)))
+		printk(KERN_EMERG "MCE at EIP %lx ESP %lx\n", regs->rip, regs->rsp); 
+
+ others:
+	generic_machine_check(regs, error_code); 
 
+	preempt_enable();
+} 
+
+static struct timer_list mcheck_timer;
+int mcheck_interval = 30*HZ; 
 
-static void mce_timerfunc (unsigned long data)
+#ifndef CONFIG_SMP 
+static void mcheck_timer_handler(unsigned long data)
 {
-	on_each_cpu (mce_checkregs, NULL, 1, 1);
+	k8_machine_check(NULL,0);
+	mcheck_timer.expires = jiffies + mcheck_interval;
+	add_timer(&mcheck_timer);
+}
+#else
+
+/* SMP needs a process context trampoline because smp_call_function cannot be 
+   called from interrupt context. */
 
-	/* Refresh the timer. */
-	mce_timer.expires = jiffies + MCE_RATE;
-	add_timer (&mce_timer);
+static void mcheck_timer_other(void *data)
+{ 
+	k8_machine_check(NULL, 0); 
+} 
+
+static void mcheck_timer_dist(void *data)
+{
+	smp_call_function(mcheck_timer_other,0,0,0);
+	k8_machine_check(NULL, 0); 
+	mcheck_timer.expires = jiffies + mcheck_interval;
+	add_timer(&mcheck_timer);
+} 
+
+static void mcheck_timer_handler(unsigned long data)
+{ 
+	static DECLARE_WORK(mcheck_work, mcheck_timer_dist, NULL);
+	schedule_work(&mcheck_work); 
 }
 #endif
 
+static int nok8 __initdata; 
+
+static void __init k8_mcheck_init(struct cpuinfo_x86 *c)
+{
+	u64 cap;
+	int i;
+	struct pci_dev *nb; 
+
+	if (!test_bit(X86_FEATURE_MCE, &c->x86_capability) || 
+	    !test_bit(X86_FEATURE_MCA, &c->x86_capability))
+		return; 
+
+	rdmsrl(MSR_IA32_MCG_CAP, cap); 
+	banks = cap&0xff; 
+	machine_check_vector = k8_machine_check; 
+	for (i = 0; i < banks; i++) { 
+		u64 val = ((1UL<<i) & disabled_banks) ? 0 : ~0UL; 
+		wrmsrl(MSR_IA32_MC0_CTL+4*i, val);
+		wrmsrl(MSR_IA32_MC0_STATUS+4*i,0); 
+	}
+
+	nb = find_k8_nb(); 
+	if (nb != NULL) {
+		u32 reg, reg2;
+		pci_read_config_dword(nb, 0x40, &reg); 
+		pci_write_config_dword(nb, 0x40, k8_nb_flags);
+		pci_read_config_dword(nb, 0x44, &reg2);
+		pci_write_config_dword(nb, 0x44, reg2); 
+		printk(KERN_INFO "Machine Check for K8 Northbridge %d enabled (%x,%x)\n",
+		       nb->devfn, reg, reg2);
+		ignored_banks |= (1UL<<4); 
+	} 
+
+	set_in_cr4(X86_CR4_MCE);	   	
+
+	if (mcheck_interval && (smp_processor_id() == 0)) { 
+		init_timer(&mcheck_timer); 
+		mcheck_timer.function = (void (*)(unsigned long))mcheck_timer_handler; 
+		mcheck_timer.expires = jiffies + mcheck_interval; 
+		add_timer(&mcheck_timer); 
+	} 
+	
+	printk(KERN_INFO "Machine Check Reporting enabled for CPU#%d\n", smp_processor_id()); 
+} 
 
 /*
- *	Set up machine check reporting for processors with Intel style MCE
+ *	Set up machine check reporting for Intel processors
  */
 
-static void __init hammer_mcheck_init(struct cpuinfo_x86 *c)
+static void __init generic_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
 	int i;
@@ -154,33 +310,36 @@
 	 *	Check for MCE support
 	 */
 
-	if( !test_bit(X86_FEATURE_MCE, c->x86_capability) )
+	if( !test_bit(X86_FEATURE_MCE, &c->x86_capability) )
 		return;	
 
-	/* Check for PPro style MCA */
-	if( !test_bit(X86_FEATURE_MCA, c->x86_capability) )
+	/*
+	 *	Check for PPro style MCA
+	 */
+	 		
+	if( !test_bit(X86_FEATURE_MCA, &c->x86_capability) )
 		return;
 
 	/* Ok machine check is available */
-	machine_check_vector = hammer_machine_check;
+	
+	machine_check_vector = generic_machine_check;
 	wmb();
 
 	if(done==0)
-		printk(KERN_INFO "Machine check architecture supported.\n");
+		printk(KERN_INFO "Intel machine check architecture supported.\n");
 	rdmsr(MSR_IA32_MCG_CAP, l, h);
-	if(l&(1<<8))	/* Control register present ? */
+	if(l&(1<<8))
 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 	banks = l&0xff;
 
-	for(i=0; i<banks; i++)
-		wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
-
-	for(i=0; i<banks; i++)
+	for(i=0;i<banks;i++)
+	{
+		u32 val = ((1UL<<i) & disabled_banks) ? 0 : ~0;
+		wrmsr(MSR_IA32_MC0_CTL+4*i, val, val);
 		wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
-
+	}
 	set_in_cr4(X86_CR4_MCE);
-	printk(KERN_INFO "Machine check reporting enabled on CPU#%d.\n", smp_processor_id());
-
+	printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", smp_processor_id());
 	done=1;
 }
 
@@ -190,30 +349,22 @@
 
 void __init mcheck_init(struct cpuinfo_x86 *c)
 {
+	if (test_and_set_bit(smp_processor_id(), &mce_cpus))
+		return; 
 
 	if(mce_disabled==1)
 		return;
 
-	switch(c->x86_vendor)
-	{
+	switch(c->x86_vendor) {
 		case X86_VENDOR_AMD:
-			hammer_mcheck_init(c);
-#ifdef CONFIG_X86_MCE_NONFATAL
-			if (timerset == 0) {
-				/* Set the timer to check for non-fatal
-				   errors every MCE_RATE seconds */
-				init_timer (&mce_timer);
-				mce_timer.expires = jiffies + MCE_RATE;
-				mce_timer.data = 0;
-				mce_timer.function = &mce_timerfunc;
-				add_timer (&mce_timer);
-				timerset = 1;
-				printk(KERN_INFO "Machine check exception polling timer started.\n");
-			}
-#endif
+		if (c->x86 == 15 && !nok8) {
+			k8_mcheck_init(c); 
 			break;
-
+		}
+		/* FALL THROUGH */
 		default:
+	case X86_VENDOR_INTEL:
+		generic_mcheck_init(c);
 			break;
 	}
 }
@@ -224,16 +375,33 @@
 	return 0;
 }
 
+
+/* mce=off disable machine check
+   mce=nok8 disable k8 specific features
+   mce=disable<NUMBER> disable bank NUMBER
+   mce=enable<NUMBER> enable bank number
+   mce=device	Enable device driver test reporting in NB
+   mce=NUMBER mcheck timer interval number seconds. 
+   Can be also comma separated in a single mce= */
 static int __init mcheck_enable(char *str)
 {
-	mce_disabled = -1;
+	char *p;
+	while ((p = strsep(&str,",")) != NULL) { 
+		if (isdigit(*p))
+			mcheck_interval = simple_strtol(p,NULL,0) * HZ; 
+		else if (!strcmp(p,"off"))
+			mce_disabled = 1; 
+		else if (!strncmp(p,"enable",6))
+			disabled_banks &= ~(1<<simple_strtol(p+6,NULL,0));
+		else if (!strncmp(p,"disable",7))
+			disabled_banks |= ~(1<<simple_strtol(p+7,NULL,0));
+		else if (!strcmp(p,"nok8"))
+			nok8 = 1;
+		else if (!strcmp(p,"device"))
+			k8_nb_flags = K8_DRIVER_DEBUG;
+	}
 	return 0;
 }
 
 __setup("nomce", mcheck_disable);
 __setup("mce", mcheck_enable);
-
-#else
-asmlinkage void do_machine_check(struct pt_regs * regs, long error_code) {}
-void __init mcheck_init(struct cpuinfo_x86 *c) {}
-#endif
diff -Nru a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
--- a/arch/x86_64/kernel/entry.S	Mon Mar 31 13:41:06 2003
+++ b/arch/x86_64/kernel/entry.S	Mon Mar 31 13:41:06 2003
@@ -359,8 +359,13 @@
 /* 0(%rsp): interrupt number */ 
 	.macro interrupt func
 	cld
+#ifdef CONFIG_X86_REMOTE_DEBUG
+	SAVE_ALL	
+	movq %rsp,%rdi
+#else		
 	SAVE_ARGS
 	leaq -ARGOFFSET(%rsp),%rdi	# arg1 for handler
+#endif	
 	testl $3,CS(%rdi)
 	je 1f
 	swapgs	
@@ -667,18 +672,6 @@
 	ret
 
 ENTRY(page_fault)
-#ifdef CONFIG_KDB
-        pushq %rcx
-        pushq %rdx
-        pushq %rax
-        movl  $473,%ecx
-        rdmsr
-        andl  $0xfffffffe,%eax          /* Disable last branch recording */
-        wrmsr
-        popq  %rax
-        popq  %rdx
-        popq  %rcx
-#endif 
 	errorentry do_page_fault
 
 ENTRY(coprocessor_error)
diff -Nru a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
--- a/arch/x86_64/kernel/head.S	Mon Mar 31 13:41:07 2003
+++ b/arch/x86_64/kernel/head.S	Mon Mar 31 13:41:07 2003
@@ -194,7 +194,7 @@
 	jmp	1b
 
 .org 0xf00
-pGDT32:
+ENTRY(pGDT32):
 	.word	gdt32_end-gdt_table32
 	.long	gdt_table32-__START_KERNEL_map
 
@@ -307,6 +307,15 @@
 	.quad	0x0000000000105007		/* -> level2_kernel_pgt (so that __va works even before pagetable_init) */
 
 .org 0xb000
+ENTRY(wakeup_level4_pgt)
+	.quad	0x0000000000102007		/* -> level3_ident_pgt */
+	.fill	255,8,0
+	.quad	0x000000000010a007
+	.fill	254,8,0
+	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+	.quad	0x0000000000103007		/* -> level3_kernel_pgt */
+
+.org 0xc000
 .data
 
 	.align 16
@@ -371,3 +380,4 @@
 	.quad   0
 	.quad 	0
 	.endr
+
diff -Nru a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
--- a/arch/x86_64/kernel/head64.c	Mon Mar 31 13:41:07 2003
+++ b/arch/x86_64/kernel/head64.c	Mon Mar 31 13:41:07 2003
@@ -15,6 +15,7 @@
 #include <asm/processor.h>
 #include <asm/proto.h>
 #include <asm/smp.h>
+#include <asm/bootsetup.h>
 
 /* Don't add a printk in there. printk relies on the PDA which is not initialized 
    yet. */
@@ -51,7 +52,7 @@
 		printk("old bootloader convention, maybe loadlin?\n");
 	}
 	command_line = (char *) ((u64)(new_data));
-	memcpy(saved_command_line, command_line, 2048);
+	memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
 	printk("Bootdata ok (command line is %s)\n", saved_command_line);	
 }
 
diff -Nru a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c
--- a/arch/x86_64/kernel/i387.c	Mon Mar 31 13:41:06 2003
+++ b/arch/x86_64/kernel/i387.c	Mon Mar 31 13:41:06 2003
@@ -42,7 +42,7 @@
 
 	/* clean state in init */
 	stts();
-	clear_thread_flag(TIF_USEDFPU);
+	current_thread_info()->status = 0;
 	current->used_math = 0;
 }
 
@@ -51,13 +51,12 @@
  * so initialize it and set the mxcsr to its default.
  * remeber the current task has used the FPU.
  */
-void init_fpu(void)
+void init_fpu(struct task_struct *child)
 {
-	struct task_struct *me = current;
-	memset(&me->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
-	me->thread.i387.fxsave.cwd = 0x37f;
-	me->thread.i387.fxsave.mxcsr = 0x1f80;
-	me->used_math = 1;
+	memset(&child->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
+	child->thread.i387.fxsave.cwd = 0x37f;
+	child->thread.i387.fxsave.mxcsr = 0x1f80;
+	child->used_math = 1;
 }
 
 /*
@@ -81,7 +80,7 @@
 	if (!tsk->used_math) 
 		return 0;
 	tsk->used_math = 0; /* trigger finit */ 
-	if (test_thread_flag(TIF_USEDFPU)) { 
+	if (tsk->thread_info->status & TS_USEDFPU) {
 		err = save_i387_checking((struct i387_fxsave_struct *)buf);
 		if (err) return err;
 		stts();
@@ -99,7 +98,7 @@
 
 int get_fpregs(struct user_i387_struct *buf, struct task_struct *tsk)
 {
-	empty_fpu(tsk);
+	init_fpu(tsk);
 	return __copy_to_user((void *)buf, &tsk->thread.i387.fxsave,
 			       sizeof(struct user_i387_struct)) ? -EFAULT : 0;
 }
diff -Nru a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
--- a/arch/x86_64/kernel/nmi.c	Mon Mar 31 13:41:07 2003
+++ b/arch/x86_64/kernel/nmi.c	Mon Mar 31 13:41:07 2003
@@ -25,13 +25,15 @@
 #include <asm/mpspec.h>
 #include <asm/nmi.h>
 #include <asm/msr.h>
+#include <asm/proto.h>
+#include <asm/kdebug.h>
 
 extern void default_do_nmi(struct pt_regs *);
 
 unsigned int nmi_watchdog = NMI_LOCAL_APIC;
 static unsigned int nmi_hz = HZ;
 unsigned int nmi_perfctr_msr;	/* the MSR to reset in NMI handler */
-extern void show_registers(struct pt_regs *regs);
+int nmi_watchdog_disabled;
 
 #define K7_EVNTSEL_ENABLE	(1 << 22)
 #define K7_EVNTSEL_INT		(1 << 20)
@@ -251,15 +253,13 @@
 		alert_counter[i] = 0;
 }
 
-void nmi_watchdog_tick (struct pt_regs * regs)
+void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
 {
+	if (nmi_watchdog_disabled)
+		return;
+
+	int sum, cpu = safe_smp_processor_id();
 
-	/*
-	 * Since current_thread_info()-> is always on the stack, and we
-	 * always switch the stack NMI-atomically, it's safe to use
-	 * smp_processor_id().
-	 */
-	int sum, cpu = smp_processor_id();
 	sum = read_pda(apic_timer_irqs);
 
 	if (last_irq_sums[cpu] == sum) {
@@ -269,6 +269,10 @@
 		 */
 		alert_counter[cpu]++;
 		if (alert_counter[cpu] == 5*nmi_hz) {
+			if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_BAD) { 
+				alert_counter[cpu] = 0; 
+				return;
+			} 
 			spin_lock(&nmi_print_lock);
 			/*
 			 * We are in trouble anyway, lets at least try
diff -Nru a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
--- a/arch/x86_64/kernel/pci-gart.c	Mon Mar 31 13:41:07 2003
+++ b/arch/x86_64/kernel/pci-gart.c	Mon Mar 31 13:41:07 2003
@@ -8,7 +8,7 @@
  * See Documentation/DMA-mapping.txt for the interface specification.
  * 
  * Copyright 2002 Andi Kleen, SuSE Labs.
- * $Id: pci-gart.c,v 1.12 2002/09/19 19:25:32 ak Exp $
+ * $Id: pci-gart.c,v 1.20 2003/03/12 08:23:29 ak Exp $
  */
 
 /* 
@@ -19,9 +19,12 @@
 
 possible future tuning: 
  fast path for sg streaming mappings 
- more intelligent flush strategy - flush only a single NB?
+ more intelligent flush strategy - flush only a single NB? flush only when
+ gart area fills up and alloc_iommu wraps. 
+ don't flush on allocation - need to unmap the gart area first to avoid prefetches
+ by the CPU
  move boundary between IOMMU and AGP in GART dynamically
- could use exact fit in the gart in alloc_consistent, not order of two.
+  
 */ 
 
 #include <linux/config.h>
@@ -49,7 +52,11 @@
 
 int no_iommu; 
 static int no_agp; 
+#ifdef CONFIG_IOMMU_DEBUG
 int force_mmu = 1;
+#else
+int force_mmu = 0;
+#endif
 
 extern int fallback_aper_order;
 extern int fallback_aper_force;
@@ -58,10 +65,9 @@
 static spinlock_t iommu_bitmap_lock = SPIN_LOCK_UNLOCKED;
 static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
 
-#define GPTE_MASK 0xfffffff000
 #define GPTE_VALID    1
 #define GPTE_COHERENT 2
-#define GPTE_ENCODE(x,flag) (((x) & 0xfffffff0) | ((x) >> 28) | GPTE_VALID | (flag))
+#define GPTE_ENCODE(x) (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
 #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
 
 #define for_all_nb(dev) \
@@ -72,7 +78,6 @@
 #define EMERGENCY_PAGES 32 /* = 128KB */ 
 
 #ifdef CONFIG_AGP
-extern int agp_amdk8_init(void);
 extern int agp_init(void);
 #define AGPEXTERN extern
 #else
@@ -130,7 +135,7 @@
 {
 	void *memory;
 	int gfp = GFP_ATOMIC;
-	int order, i;
+	int i;
 	unsigned long iommu_page;
 
 	if (hwdev == NULL || hwdev->dma_mask < 0xffffffff || no_iommu)
@@ -140,15 +145,15 @@
 	 * First try to allocate continuous and use directly if already 
 	 * in lowmem. 
 	 */ 
-	order = get_order(size);
-	memory = (void *)__get_free_pages(gfp, order);
+	size = round_up(size, PAGE_SIZE); 
+	memory = (void *)__get_free_pages(gfp, get_order(size));
 	if (memory == NULL) {
 		return NULL; 
 	} else {
 		int high = (unsigned long)virt_to_bus(memory) + size
 			>= 0xffffffff;
 		int mmu = high;
-		if (force_mmu) 
+		if (force_mmu && !(gfp & GFP_DMA)) 
 			mmu = 1;
 		if (no_iommu) { 
 			if (high) goto error;
@@ -161,19 +166,21 @@
 		}
 	} 
 
-	iommu_page = alloc_iommu(1<<order);
+	size >>= PAGE_SHIFT;
+
+	iommu_page = alloc_iommu(size);
 	if (iommu_page == -1)
 		goto error; 
 
    	/* Fill in the GATT, allocating pages as needed. */
-	for (i = 0; i < 1<<order; i++) { 
+	for (i = 0; i < size; i++) { 
 		unsigned long phys_mem; 
 		void *mem = memory + i*PAGE_SIZE;
 		if (i > 0) 
 			atomic_inc(&virt_to_page(mem)->count); 
 		phys_mem = virt_to_phys(mem); 
-		BUG_ON(phys_mem & ~PTE_MASK); 
-		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem,GPTE_COHERENT); 
+		BUG_ON(phys_mem & ~PHYSICAL_PAGE_MASK); 
+		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); 
 	} 
 
 	flush_gart();
@@ -181,7 +188,7 @@
 	return memory; 
 	
  error:
-	free_pages((unsigned long)memory, order); 
+	free_pages((unsigned long)memory, get_order(size)); 
 	return NULL; 
 }
 
@@ -193,30 +200,32 @@
 			 void *vaddr, dma_addr_t bus)
 {
 	u64 pte;
-	int order = get_order(size);
 	unsigned long iommu_page;
 	int i;
 
+	size = round_up(size, PAGE_SIZE); 
 	if (bus < iommu_bus_base || bus > iommu_bus_base + iommu_size) { 
-		free_pages((unsigned long)vaddr, order); 		
+		free_pages((unsigned long)vaddr, get_order(size)); 		
 		return;
 	} 
+	size >>= PAGE_SHIFT;
 	iommu_page = (bus - iommu_bus_base) / PAGE_SIZE;
-	for (i = 0; i < 1<<order; i++) {
+	for (i = 0; i < size; i++) {
 		pte = iommu_gatt_base[iommu_page + i];
 		BUG_ON((pte & GPTE_VALID) == 0); 
 		iommu_gatt_base[iommu_page + i] = 0; 		
 		free_page((unsigned long) __va(GPTE_DECODE(pte)));
 	} 
 	flush_gart(); 
-	free_iommu(iommu_page, 1<<order);
+	free_iommu(iommu_page, size);
 }
 
 #ifdef CONFIG_IOMMU_LEAK
 /* Debugging aid for drivers that don't free their IOMMU tables */
 static void **iommu_leak_tab; 
 static int leak_trace;
-int iommu_leak_dumppages = 20; 
+int iommu_leak_pages = 20; 
+extern unsigned long printk_address(unsigned long);
 void dump_leak(void)
 {
 	int i;
@@ -224,10 +233,13 @@
 	if (dump || !iommu_leak_tab) return;
 	dump = 1;
 	show_stack(NULL);
-	printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_dumppages); 
-	for (i = 0; i < iommu_leak_dumppages; i++) 
-		printk("[%lu: %lx] ",
-		       iommu_pages-i,(unsigned long) iommu_leak_tab[iommu_pages-i]); 
+	/* Very crude. dump some from the end of the table too */ 
+	printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages); 
+	for (i = 0; i < iommu_leak_pages; i+=2) {
+		printk("%lu: ", iommu_pages-i);
+		printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]);
+		printk("%c", (i+1)%2 == 0 ? '\n' : ' '); 
+	} 
 	printk("\n");
 }
 #endif
@@ -275,7 +287,8 @@
 	return mmu; 
 }
 
-dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size,int dir)
+dma_addr_t __pci_map_single(struct pci_dev *dev, void *addr, size_t size,
+			    int dir, int flush)
 { 
 	unsigned long iommu_page;
 	unsigned long phys_mem, bus;
@@ -297,13 +310,18 @@
 
 	phys_mem &= PAGE_MASK;
 	for (i = 0; i < npages; i++, phys_mem += PAGE_SIZE) {
-		BUG_ON(phys_mem & ~PTE_MASK); 
+		BUG_ON(phys_mem & ~PHYSICAL_PAGE_MASK); 
 		
 		/* 
 		 * Set coherent mapping here to avoid needing to flush
 		 * the caches on mapping.
 		 */
-		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem, GPTE_COHERENT);
+		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
+
+#ifdef CONFIG_IOMMU_DEBUG
+		/* paranoia check */
+		BUG_ON(GPTE_DECODE(iommu_gatt_base[iommu_page+i]) != phys_mem); 
+#endif
 
 #ifdef CONFIG_IOMMU_LEAK
 		/* XXX need eventually caller of pci_map_sg */
@@ -311,6 +329,7 @@
 			iommu_leak_tab[iommu_page + i] = __builtin_return_address(0); 
 #endif
 	}
+	if (flush)
 	flush_gart(); 
 
 	bus = iommu_bus_base + iommu_page*PAGE_SIZE; 
@@ -341,7 +360,7 @@
 	free_iommu(iommu_page, npages);
 }
 
-EXPORT_SYMBOL(pci_map_single);
+EXPORT_SYMBOL(__pci_map_single);
 EXPORT_SYMBOL(pci_unmap_single);
 
 static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -438,8 +457,6 @@
 	}
 	flush_gart(); 
 	
-	global_flush_tlb();
-		
 	printk("PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10); 
 	return 0;
 
@@ -530,8 +547,10 @@
    off   don't use the IOMMU
    leak  turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on)
    memaper[=order] allocate an own aperture over RAM with size 32MB^order.  
+   noforce don't force IOMMU usage. Should be fastest.
+   force  Force IOMMU and turn on unmap debugging.
 */
-__init int iommu_setup(char *opt, char **end) 
+__init int iommu_setup(char *opt) 
 { 
     int arg;
     char *p = opt;
@@ -552,17 +571,21 @@
 			    fallback_aper_order = arg;
 	    } 
 #ifdef CONFIG_IOMMU_LEAK
-	    if (!memcmp(p,"leak", 4))
+	    if (!memcmp(p,"leak", 4)) { 
 		    leak_trace = 1;
+		    p += 4; 
+		    if (*p == '=') ++p;
+		    if (isdigit(*p) && get_option(&p, &arg))
+			    iommu_leak_pages = arg;
+	    } else
 #endif
 	    if (isdigit(*p) && get_option(&p, &arg)) 
 		    iommu_size = arg;
 	    do {
-		    if (*p == ' ' || *p == 0) { 
-			    *end = p; 
+		    if (*p == ' ' || *p == 0) 
 			    return 0; 
-		    }
 	    } while (*p++ != ','); 
     }
+    return 1;
 } 
 
diff -Nru a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
--- a/arch/x86_64/kernel/process.c	Mon Mar 31 13:41:07 2003
+++ b/arch/x86_64/kernel/process.c	Mon Mar 31 13:41:07 2003
@@ -192,6 +192,8 @@
 	       fs,fsindex,gs,gsindex,shadowgs); 
 	printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 
 	printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
+
+	show_trace(&regs->rsp);
 }
 
 extern void load_gs_index(unsigned);
@@ -260,6 +262,14 @@
 		(((u32)desc->base2) << 24);
 }
 
+/*
+ * This gets called before we allocate a new thread and copy
+ * the current task into it.
+ */
+void prepare_to_copy(struct task_struct *tsk)
+{
+	unlazy_fpu(tsk);
+}
 
 int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, 
 		unsigned long unused,
@@ -294,9 +304,6 @@
 	asm("movl %%es,%0" : "=m" (p->thread.es));
 	asm("movl %%ds,%0" : "=m" (p->thread.ds));
 
-	unlazy_fpu(me);	
-	p->thread.i387 = me->thread.i387;
-
 	if (unlikely(me->thread.io_bitmap_ptr != NULL)) { 
 		p->thread.io_bitmap_ptr = kmalloc((IO_BITMAP_SIZE+1)*4, GFP_KERNEL);
 		if (!p->thread.io_bitmap_ptr) 
@@ -314,7 +321,7 @@
 			err = ia32_child_tls(p, childregs); 
 		else 			
 #endif	 
-			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r10); 
+			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
 		if (err) 
 			goto out;
 	}
diff -Nru a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
--- a/arch/x86_64/kernel/ptrace.c	Mon Mar 31 13:41:06 2003
+++ b/arch/x86_64/kernel/ptrace.c	Mon Mar 31 13:41:06 2003
@@ -240,8 +240,8 @@
 		unsigned long tmp;
 
 		ret = -EIO;
-		if ((addr & 3) || addr < 0 || 
-		    addr > sizeof(struct user) - 3)
+		if ((addr & 7) || addr < 0 || 
+		    addr > sizeof(struct user) - 7)
 			break;
 
 		tmp = 0;  /* Default return condition */
@@ -250,7 +250,7 @@
 		if(addr >= (long) &dummy->u_debugreg[0] &&
 		   addr <= (long) &dummy->u_debugreg[7]){
 			addr -= (long) &dummy->u_debugreg[0];
-			addr = addr >> 2;
+			addr = addr >> 3;
 			tmp = child->thread.debugreg[addr];
 		}
 		ret = put_user(tmp,(unsigned long *) data);
@@ -268,8 +268,8 @@
 
 	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
 		ret = -EIO;
-		if ((addr & 3) || addr < 0 || 
-		    addr > sizeof(struct user) - 3)
+		if ((addr & 7) || addr < 0 || 
+		    addr > sizeof(struct user) - 7)
 			break;
 
 		if (addr < sizeof(struct user_regs_struct)) {
@@ -290,6 +290,11 @@
 			  if(addr < (long) &dummy->u_debugreg[4] &&
 			     ((unsigned long) data) >= TASK_SIZE-3) break;
 			  
+			  if (addr == (long) &dummy->u_debugreg[6]) {
+				  if (data >> 32)
+					  goto out_tsk;
+			  }
+
 			  if(addr == (long) &dummy->u_debugreg[7]) {
 				  data &= ~DR_CONTROL_RESERVED;
 				  for(i=0; i<4; i++)
@@ -298,7 +303,7 @@
 			  }
 
 			  addr -= (long) &dummy->u_debugreg;
-			  addr = addr >> 2;
+			  addr = addr >> 3;
 			  child->thread.debugreg[addr] = data;
 			  ret = 0;
 		  }
diff -Nru a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c
--- a/arch/x86_64/kernel/reboot.c	Mon Mar 31 13:41:08 2003
+++ b/arch/x86_64/kernel/reboot.c	Mon Mar 31 13:41:08 2003
@@ -101,7 +101,6 @@
 	 * Stop all CPUs and turn off local APICs and the IO-APIC, so
 	 * other OSs see a clean IRQ state.
 	 */
-	if (notify_die(DIE_STOP,"cpustop",0,0) != NOTIFY_BAD)
 	smp_send_stop();
 	disable_IO_APIC();
 #endif
diff -Nru a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
--- a/arch/x86_64/kernel/setup.c	Mon Mar 31 13:41:06 2003
+++ b/arch/x86_64/kernel/setup.c	Mon Mar 31 13:41:06 2003
@@ -197,12 +197,15 @@
 		if (!memcmp(from, "acpi=off", 8))
 			acpi_disabled = 1;
 
+		if (!memcmp(from, "disableapic", 11))
+			disable_apic = 1;
+
 		if (!memcmp(from, "mem=", 4))
 			parse_memopt(from+4, &from); 
 
 #ifdef CONFIG_GART_IOMMU 
 		if (!memcmp(from,"iommu=",6)) { 
-			iommu_setup(from+6, &from); 
+			iommu_setup(from+6); 
 		}
 #endif
 
diff -Nru a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
--- a/arch/x86_64/kernel/setup64.c	Mon Mar 31 13:41:06 2003
+++ b/arch/x86_64/kernel/setup64.c	Mon Mar 31 13:41:06 2003
@@ -40,7 +40,7 @@
 char boot_cpu_stack[IRQSTACKSIZE] __cacheline_aligned;
 
 unsigned long __supported_pte_mask = ~0UL;
-static int do_not_nx = 0;
+static int do_not_nx = 1;
 
 static int __init nonx_setup(char *str)
 {
@@ -98,6 +98,8 @@
 	pda->cpudata_offset = 0;
 	pda->kernelstack = 
 		(unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; 
+	pda->active_mm = &init_mm;
+	pda->mmu_state = 0;
 
 	if (cpu == 0) {
 		/* others are initialized in smpboot.c */
@@ -121,8 +123,6 @@
 	asm volatile("movq %0,%%cr3" :: "r" (__pa(level4))); 
 
 	pda->irqstackptr += IRQSTACKSIZE-64;
-	pda->active_mm = &init_mm;
-	pda->mmu_state = 0;
 } 
 
 #define EXCEPTION_STK_ORDER 0 /* >= N_EXCEPTION_STACKS*EXCEPTION_STKSZ */
diff -Nru a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
--- a/arch/x86_64/kernel/signal.c	Mon Mar 31 13:41:07 2003
+++ b/arch/x86_64/kernel/signal.c	Mon Mar 31 13:41:07 2003
@@ -225,6 +225,7 @@
 	rsp = regs->rsp - 128;
 
 	/* This is the X/Open sanctioned signal stack switching.  */
+	/* RED-PEN: redzone on that stack? */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
 		if (sas_ss_flags(rsp) == 0)
 			rsp = current->sas_ss_sp + current->sas_ss_size;
@@ -433,7 +434,8 @@
 		 * have been cleared if the watchpoint triggered
 		 * inside the kernel.
 		 */
-		__asm__("movq %0,%%db7"	: : "r" (current->thread.debugreg[7]));
+		if (current->thread.debugreg[7])
+			asm volatile("movq %0,%%db7"	: : "r" (current->thread.debugreg[7]));
 
 		/* Whee!  Actually deliver the signal.  */
 		handle_signal(signr, &info, oldset, regs);
diff -Nru a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
--- a/arch/x86_64/kernel/smp.c	Mon Mar 31 13:41:06 2003
+++ b/arch/x86_64/kernel/smp.c	Mon Mar 31 13:41:06 2003
@@ -485,16 +485,16 @@
 	}
 }
 
-
 /* Slow. Should be only used for debugging. */
 int slow_smp_processor_id(void)
 { 
 	int stack_location;
 	unsigned long sp = (unsigned long)&stack_location; 
-	int cpu;
-	unsigned long mask;
+	int offset = 0, cpu;
+
+	for (offset = 0; (cpu_online_map >> offset); offset = cpu + 1) { 
+		cpu = ffz(~(cpu_online_map >> offset));
 
-	for_each_cpu(cpu, mask) { 
 		if (sp >= (u64)cpu_pda[cpu].irqstackptr - IRQSTACKSIZE && 
 		    sp <= (u64)cpu_pda[cpu].irqstackptr)
 			return cpu;
diff -Nru a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
--- a/arch/x86_64/kernel/smpboot.c	Mon Mar 31 13:41:08 2003
+++ b/arch/x86_64/kernel/smpboot.c	Mon Mar 31 13:41:08 2003
@@ -318,8 +318,6 @@
 	 */
  	smp_store_cpu_info(cpuid);
 
-	notify_die(DIE_CPUINIT, "cpuinit", NULL, 0);
-
 	local_irq_disable();
 
 	/*
@@ -898,6 +896,8 @@
 	 */
 	if (!skip_ioapic_setup && nr_ioapics)
 		setup_IO_APIC();
+	else
+		nr_ioapics = 0;
 
 	setup_boot_APIC_clock();
 
diff -Nru a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c
--- a/arch/x86_64/kernel/suspend.c	Mon Mar 31 13:41:07 2003
+++ b/arch/x86_64/kernel/suspend.c	Mon Mar 31 13:41:07 2003
@@ -28,7 +28,7 @@
 #include <asm/io.h>
 #include <asm/proto.h>
 
-static struct saved_context saved_context;
+struct saved_context saved_context;
 
 unsigned long saved_context_eax, saved_context_ebx, saved_context_ecx, saved_context_edx;
 unsigned long saved_context_esp, saved_context_ebp, saved_context_esi, saved_context_edi;
diff -Nru a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
--- a/arch/x86_64/kernel/traps.c	Mon Mar 31 13:41:06 2003
+++ b/arch/x86_64/kernel/traps.c	Mon Mar 31 13:41:06 2003
@@ -110,7 +110,7 @@
 } 
 #endif
 
-static inline unsigned long *in_exception_stack(int cpu, unsigned long stack) 
+unsigned long *in_exception_stack(int cpu, unsigned long stack) 
 { 
 	int k;
 	for (k = 0; k < N_EXCEPTION_STACKS; k++) {
@@ -249,7 +249,7 @@
 void dump_stack(void)
 {
 	unsigned long dummy;
-	show_stack(&dummy);
+	show_trace(&dummy);
 }
 
 void show_registers(struct pt_regs *regs)
@@ -344,7 +344,6 @@
 	show_registers(regs);
 	bust_spinlocks(0);
 	spin_unlock_irq(&die_lock);
-	notify_die(DIE_OOPS, (char *)str, regs, err);
 	do_exit(SIGSEGV);
 }
 
@@ -419,6 +418,8 @@
 #define DO_ERROR(trapnr, signr, str, name) \
 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
 { \
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) == NOTIFY_BAD) \
+		return; \
 	do_trap(trapnr, signr, str, regs, error_code, NULL); \
 }
 
@@ -430,10 +431,13 @@
 	info.si_errno = 0; \
 	info.si_code = sicode; \
 	info.si_addr = (void *)siaddr; \
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) == NOTIFY_BAD) \
+		return; \
 	do_trap(trapnr, signr, str, regs, error_code, &info); \
 }
 
 DO_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->rip)
+DO_ERROR( 3, SIGTRAP, "int3", int3);
 DO_ERROR( 4, SIGSEGV, "overflow", overflow)
 DO_ERROR( 5, SIGSEGV, "bounds", bounds)
 DO_ERROR_INFO( 6, SIGILL,  "invalid operand", invalid_op, ILL_ILLOPN, regs->rip)
@@ -446,13 +450,6 @@
 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2())
 DO_ERROR(18, SIGSEGV, "reserved", reserved)
 
-asmlinkage void do_int3(struct pt_regs * regs, long error_code)
-{
-	if (notify_die(DIE_INT3, "int3", regs, error_code) == NOTIFY_BAD)
-		return;
-	do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
-}
-
 extern void dump_pagetable(unsigned long);
 
 asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
@@ -493,6 +490,8 @@
 			regs->rip = fixup->fixup;
 			return;
 		}
+		notify_die(DIE_GPF, "general protection fault", regs, error_code,
+			   13, SIGSEGV); 
 		die("general protection fault", regs, error_code);
 	}
 }
@@ -537,14 +536,14 @@
 		 * so it must be the NMI watchdog.
 		 */
 		if (nmi_watchdog) {
-			nmi_watchdog_tick(regs);
+			nmi_watchdog_tick(regs,reason);
 			return;
 		}
 #endif
 		unknown_nmi_error(reason, regs);
 		return;
 	}
-	if (notify_die(DIE_NMI, "nmi", regs, reason) == NOTIFY_BAD)
+	if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_BAD)
 		return; 
 	if (reason & 0x80)
 		mem_parity_error(reason, regs);
@@ -569,6 +568,7 @@
 
 #ifdef CONFIG_CHECKING
        { 
+	       /* RED-PEN interaction with debugger - could destroy gs */
                unsigned long gs; 
                struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); 
                rdmsrl(MSR_GS_BASE, gs); 
@@ -583,9 +583,6 @@
 
 	conditional_sti(regs);
 
-	if (notify_die(DIE_DEBUG, "debug", regs, error_code) == NOTIFY_BAD)
-		return; 
-
 	/* Mask out spurious debug traps due to lazy DR7 setting */
 	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
 		if (!tsk->thread.debugreg[7]) { 
@@ -618,17 +615,22 @@
 	info.si_signo = SIGTRAP;
 	info.si_errno = 0;
 	info.si_code = TRAP_BRKPT;
-	info.si_addr = ((regs->cs & 3) == 0) ? (void *)tsk->thread.rip : 
-	                                        (void *)regs->rip;
+	if ((regs->cs & 3) == 0) 
+		goto clear_dr7; 
+
+	info.si_addr = (void *)regs->rip;
 	force_sig_info(SIGTRAP, &info, tsk);	
 clear_dr7:
-	asm("movq %0,%%db7"::"r"(0UL));
+	asm volatile("movq %0,%%db7"::"r"(0UL));
+	notify_die(DIE_DEBUG, "debug", regs, error_code, 1, SIGTRAP);
 	return;
 
 clear_TF_reenable:
 	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
 
 clear_TF:
+	/* RED-PEN could cause spurious errors */
+	if (notify_die(DIE_DEBUG, "debug2", regs, error_code, 1, SIGTRAP) != NOTIFY_BAD)
 	regs->eflags &= ~TF_MASK;
 	return;
 }
@@ -775,9 +777,9 @@
 	clts();			/* Allow maths ops (or we recurse) */
 
 	if (!me->used_math)
-		init_fpu();
+		init_fpu(me);
 	restore_fpu_checking(&me->thread.i387.fxsave);
-	set_thread_flag(TIF_USEDFPU); 
+	me->thread_info->status |= TS_USEDFPU;
 }
 
 asmlinkage void math_emulate(void)
@@ -787,7 +789,7 @@
 
 void do_call_debug(struct pt_regs *regs) 
 { 
-	notify_die(DIE_CALL, "debug call", regs, 0); 
+	notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT); 
 }
 
 void __init trap_init(void)
@@ -819,8 +821,6 @@
        
 	set_intr_gate(KDB_VECTOR, call_debug);
        
-	notify_die(DIE_TRAPINIT, "traps initialized", 0, 0); 
-	
 	/*
 	 * Should be a barrier for any external CPU state.
 	 */
diff -Nru a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
--- a/arch/x86_64/mm/fault.c	Mon Mar 31 13:41:06 2003
+++ b/arch/x86_64/mm/fault.c	Mon Mar 31 13:41:06 2003
@@ -57,29 +57,41 @@
 	}
 }
 
+static int bad_address(void *p) 
+{ 
+	unsigned long dummy;
+	return __get_user(dummy, (unsigned long *)p);
+} 
+
 void dump_pagetable(unsigned long address)
 {
-	static char *name[] = { "PML4", "PGD", "PDE", "PTE" }; 
-	int i, shift;
-	unsigned long page;
+	pml4_t *pml4;
+	asm("movq %%cr3,%0" : "=r" (pml4));
+
+	pml4 = __va((unsigned long)pml4 & PHYSICAL_PAGE_MASK); 
+	pml4 += pml4_index(address);
+	printk("PML4 %lx ", pml4_val(*pml4));
+	if (bad_address(pml4)) goto bad;
+	if (!pml4_present(*pml4)) goto ret; 
+
+	pgd_t *pgd = __pgd_offset_k((pgd_t *)pml4_page(*pml4), address); 
+	if (bad_address(pgd)) goto bad;
+	printk("PGD %lx ", pgd_val(*pgd)); 
+	if (!pgd_present(*pgd))	goto ret;
+
+	pmd_t *pmd = pmd_offset(pgd, address); 
+	if (bad_address(pmd)) goto bad;
+	printk("PMD %lx ", pmd_val(*pmd));
+	if (!pmd_present(*pmd))	goto ret;	 
 
-	shift = 9+9+9+12;
-	address &= ~0xFFFF000000000000UL;
-	asm("movq %%cr3,%0" : "=r" (page)); 
-	for (i = 0; i < 4; i++) { 
-		unsigned long *padr = (unsigned long *) __va(page); 
-		padr += (address >> shift) & 0x1FFU;
-		if (__get_user(page, padr)) { 
-			printk("%s: bad %p\n", name[i], padr); 
-			break;
-		}
-		printk("%s: %016lx ", name[i], page); 
-		if ((page & (1 | (1<<7))) != 1) /* Not present or 2MB page */
-			break;
-		page &= ~0xFFFUL;
-		shift -= (i == 0) ? 12 : 9;
-	} 
+	pte_t *pte = pte_offset_kernel(pmd, address);
+	if (bad_address(pte)) goto bad;
+	printk("PTE %lx", pte_val(*pte)); 
+ret:
 	printk("\n");
+	return;
+bad:
+	printk("BAD\n");
 }
 
 int page_fault_trace; 
diff -Nru a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
--- a/arch/x86_64/mm/ioremap.c	Mon Mar 31 13:41:08 2003
+++ b/arch/x86_64/mm/ioremap.c	Mon Mar 31 13:41:08 2003
@@ -150,7 +150,7 @@
 	 */
 	offset = phys_addr & ~PAGE_MASK;
 	phys_addr &= PAGE_MASK;
-	size = PAGE_ALIGN(last_addr) - phys_addr;
+	size = PAGE_ALIGN(last_addr+1) - phys_addr;
 
 	/*
 	 * Ok, go for it..
diff -Nru a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
--- a/arch/x86_64/mm/k8topology.c	Mon Mar 31 13:41:08 2003
+++ b/arch/x86_64/mm/k8topology.c	Mon Mar 31 13:41:08 2003
@@ -21,7 +21,7 @@
 #include <asm/pci-direct.h>
 #include <asm/numa.h>
 
-static int find_northbridge(void)
+static __init int find_northbridge(void)
 {
 	int num; 
 
@@ -45,7 +45,8 @@
 { 
 	unsigned long prevbase;
 	struct node nodes[MAXNODE];
-	int nodeid, numnodes, maxnode, i, nb; 
+	int nodeid, i, nb; 
+	int found = 0;
 
 	nb = find_northbridge(); 
 	if (nb < 0) 
@@ -53,12 +54,13 @@
 
 	printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb); 
 
-	numnodes = (read_pci_config(0, nb, 0, 0x60 ) >> 4) & 3; 
+	numnodes = (1 << ((read_pci_config(0, nb, 0, 0x60 ) >> 4) & 3)); 
+
+	printk(KERN_INFO "Assuming %d nodes\n", numnodes - 1); 
 
 	memset(&nodes,0,sizeof(nodes)); 
 	prevbase = 0;
-	maxnode = -1; 
-	for (i = 0; i < MAXNODE; i++) { 
+	for (i = 0; i < numnodes; i++) { 
 		unsigned long base,limit; 
 
 		base = read_pci_config(0, nb, 1, 0x40 + i*8);
@@ -66,18 +68,16 @@
 
 		nodeid = limit & 3; 
 		if (!limit) { 
-			printk(KERN_INFO "Skipping node entry %d (base %lx)\n", i,			       base);
-			continue;
+			printk(KERN_ERR "Skipping node entry %d (base %lx)\n", i,			       base);
+			return -1;
 		}
 		if ((base >> 8) & 3 || (limit >> 8) & 3) {
 			printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n", 
 			       nodeid, (base>>8)&3, (limit>>8) & 3); 
 			return -1; 
 		}	
-		if (nodeid > maxnode) 
-			maxnode = nodeid; 
 		if ((1UL << nodeid) & nodes_present) { 
-			printk("Node %d already present. Skipping\n", nodeid);
+			printk(KERN_INFO "Node %d already present. Skipping\n", nodeid);
 			continue;
 		}
 
@@ -98,17 +98,19 @@
 			base = start; 
 		if (limit > end) 
 			limit = end; 
-		if (limit == base) 
+		if (limit == base) { 
+			printk(KERN_ERR "Empty node %d\n", nodeid); 
 			continue; 
+		}
 		if (limit < base) { 
-			printk(KERN_INFO"Node %d bogus settings %lx-%lx. Ignored.\n",
+			printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n",
 			       nodeid, base, limit); 			       
-			continue; 
+			return -1;
 		} 
 		
 		/* Could sort here, but pun for now. Should not happen anyroads. */
 		if (prevbase > base) { 
-			printk(KERN_INFO "Node map not sorted %lx,%lx\n",
+			printk(KERN_ERR "Node map not sorted %lx,%lx\n",
 			       prevbase,base);
 			return -1;
 		}
@@ -116,23 +118,26 @@
 		printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n", 
 		       nodeid, base, limit); 
 		
+		found++;
+		
 		nodes[nodeid].start = base; 
 		nodes[nodeid].end = limit;
 
 		prevbase = base;
 	} 
 
-	if (maxnode <= 0)
+	if (!found)
 		return -1; 
 
-	memnode_shift = compute_hash_shift(nodes,maxnode,end);
+	memnode_shift = compute_hash_shift(nodes);
 	if (memnode_shift < 0) { 
 		printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); 
 		return -1; 
 	} 
 	printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); 
 
-	early_for_all_nodes(i) { 
+	for (i = 0; i < numnodes; i++) { 
+		if (nodes[i].start != nodes[i].end)
 		setup_node_bootmem(i, nodes[i].start, nodes[i].end); 
 	} 
 
diff -Nru a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
--- a/arch/x86_64/mm/numa.c	Mon Mar 31 13:41:08 2003
+++ b/arch/x86_64/mm/numa.c	Mon Mar 31 13:41:08 2003
@@ -26,11 +26,10 @@
 static int numa_off __initdata; 
 
 unsigned long nodes_present; 
-int maxnode;
 
 static int emunodes __initdata;
 
-int compute_hash_shift(struct node *nodes, int numnodes, u64 maxmem)
+int __init compute_hash_shift(struct node *nodes)
 {
 	int i; 
 	int shift = 24;
@@ -39,12 +38,16 @@
 	/* When in doubt use brute force. */
 	while (shift < 48) { 
 		memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE); 
-		early_for_all_nodes (i) { 
+		for (i = 0; i < numnodes; i++) { 
+			if (nodes[i].start == nodes[i].end) 
+				continue;
 			for (addr = nodes[i].start; 
 			     addr < nodes[i].end; 
 			     addr += (1UL << shift)) {
-				if (memnodemap[addr >> shift] != 0xff) { 
-					printk("node %d shift %d addr %Lx conflict %d\n", 
+				if (memnodemap[addr >> shift] != 0xff && 
+				    memnodemap[addr >> shift] != i) { 
+					printk(KERN_INFO 
+					    "node %d shift %d addr %Lx conflict %d\n", 
 					       i, shift, addr, memnodemap[addr>>shift]);
 					goto next; 
 				} 
@@ -101,9 +104,8 @@
 
 	reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); 
 	reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
-
-	if (nodeid > maxnode) 
-		maxnode = nodeid;
+	if (nodeid + 1 > numnodes) 
+		numnodes = nodeid + 1;
 	nodes_present |= (1UL << nodeid); 
 } 
 
@@ -151,6 +153,7 @@
 		int i;
 		if (emunodes > MAXNODE) 
 			emunodes = MAXNODE; 
+		memset(&nodes, 0, sizeof(nodes)); 
 		printk(KERN_INFO "Faking %d nodes of size %ld MB\n", emunodes, nodesize>>20); 
 		for (i = 0; i < emunodes; i++) { 
 			unsigned long end = (i+1)*nodesize; 
@@ -160,7 +163,7 @@
 			nodes[i].end = end; 
 			setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 		}
-		memnode_shift = compute_hash_shift(nodes, emunodes, nodes[i-1].end); 
+		memnode_shift = compute_hash_shift(nodes); 
 		return 0;
 	} 
 
diff -Nru a/arch/x86_64/pci/irq.c b/arch/x86_64/pci/irq.c
--- a/arch/x86_64/pci/irq.c	Mon Mar 31 13:41:07 2003
+++ b/arch/x86_64/pci/irq.c	Mon Mar 31 13:41:07 2003
@@ -618,11 +618,20 @@
 int pirq_enable_irq(struct pci_dev *dev)
 {
 	u8 pin;
+	extern int interrupt_line_quirk;
 	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
 	if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+		/* With IDE legacy devices the IRQ lookup failure is not a problem.. */
+		if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
+			return 0;
+		
 		printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.\n",
 		       'A' + pin - 1, dev->slot_name);
 	}
+	/* VIA bridges use interrupt line for apic/pci steering across
+	   the V-Link */
+	else if (interrupt_line_quirk)
+		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
 
 	return 0;
 }
diff -Nru a/crypto/Kconfig b/crypto/Kconfig
--- a/crypto/Kconfig	Mon Mar 31 13:41:08 2003
+++ b/crypto/Kconfig	Mon Mar 31 13:41:08 2003
@@ -131,6 +131,16 @@
 
 	  See http://csrc.nist.gov/encryption/aes/ for more information.
 
+config CRYPTO_DEFLATE
+	tristate "Deflate compression algorithm"
+	depends on CRYPTO
+	default y if INET_IPCOMP=y || INET_IPCOMP=m
+	help
+	  This is the Deflate algorithm (RFC1951), specified for use in
+	  IPSec with the IPCOMP protocol (RFC3173, RFC2394).
+	  
+	  You will most probably want this if using IPSec.
+
 config CRYPTO_TEST
 	tristate "Testing module"
 	depends on CRYPTO
diff -Nru a/crypto/Makefile b/crypto/Makefile
--- a/crypto/Makefile	Mon Mar 31 13:41:08 2003
+++ b/crypto/Makefile	Mon Mar 31 13:41:08 2003
@@ -20,5 +20,6 @@
 obj-$(CONFIG_CRYPTO_TWOFISH) += twofish.o
 obj-$(CONFIG_CRYPTO_SERPENT) += serpent.o
 obj-$(CONFIG_CRYPTO_AES) += aes.o
+obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
 
 obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
diff -Nru a/crypto/api.c b/crypto/api.c
--- a/crypto/api.c	Mon Mar 31 13:41:08 2003
+++ b/crypto/api.c	Mon Mar 31 13:41:08 2003
@@ -127,7 +127,7 @@
 	if (tfm == NULL)
 		goto out_put;
 
-	memset(tfm, 0, sizeof(*tfm));
+	memset(tfm, 0, sizeof(*tfm) + alg->cra_ctxsize);
 	
 	tfm->__crt_alg = alg;
 	
diff -Nru a/crypto/compress.c b/crypto/compress.c
--- a/crypto/compress.c	Mon Mar 31 13:41:07 2003
+++ b/crypto/compress.c	Mon Mar 31 13:41:07 2003
@@ -18,29 +18,46 @@
 #include <linux/string.h>
 #include "internal.h"
 
-static void crypto_compress(struct crypto_tfm *tfm)
+static int crypto_compress(struct crypto_tfm *tfm,
+                            const u8 *src, unsigned int slen,
+                            u8 *dst, unsigned int *dlen)
 {
-	tfm->__crt_alg->cra_compress.coa_compress();
+	return tfm->__crt_alg->cra_compress.coa_compress(crypto_tfm_ctx(tfm),
+	                                                 src, slen, dst,
+	                                                 dlen);
 }
 
-static void crypto_decompress(struct crypto_tfm *tfm)
+static int crypto_decompress(struct crypto_tfm *tfm,
+                             const u8 *src, unsigned int slen,
+                             u8 *dst, unsigned int *dlen)
 {
-	tfm->__crt_alg->cra_compress.coa_decompress();
+	return tfm->__crt_alg->cra_compress.coa_decompress(crypto_tfm_ctx(tfm),
+	                                                   src, slen, dst,
+	                                                   dlen);
 }
 
 int crypto_init_compress_flags(struct crypto_tfm *tfm, u32 flags)
 {
-	return crypto_cipher_flags(flags) ? -EINVAL : 0;
+	return flags ? -EINVAL : 0;
 }
 
 int crypto_init_compress_ops(struct crypto_tfm *tfm)
 {
+	int ret = 0;
 	struct compress_tfm *ops = &tfm->crt_compress;
 	
+	ret = tfm->__crt_alg->cra_compress.coa_init(crypto_tfm_ctx(tfm));
+	if (ret)
+		goto out;
+
 	ops->cot_compress = crypto_compress;
 	ops->cot_decompress = crypto_decompress;
-	return 0;
+	
+out:
+	return ret;
 }
 
 void crypto_exit_compress_ops(struct crypto_tfm *tfm)
-{ }
+{
+	tfm->__crt_alg->cra_compress.coa_exit(crypto_tfm_ctx(tfm));
+}
diff -Nru a/crypto/crypto_null.c b/crypto/crypto_null.c
--- a/crypto/crypto_null.c	Mon Mar 31 13:41:07 2003
+++ b/crypto/crypto_null.c	Mon Mar 31 13:41:07 2003
@@ -26,11 +26,13 @@
 #define NULL_BLOCK_SIZE		1
 #define NULL_DIGEST_SIZE	0
 
-static void null_compress(void)
-{ }
+static int null_compress(void *ctx, const u8 *src, unsigned int slen,
+                         u8 *dst, unsigned int *dlen)
+{ return 0; }
 
-static void null_decompress(void)
-{ }
+static int null_decompress(void *ctx, const u8 *src, unsigned int slen,
+                           u8 *dst, unsigned int *dlen)
+{ return 0; }
 
 static void null_init(void *ctx)
 { }
diff -Nru a/crypto/deflate.c b/crypto/deflate.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/crypto/deflate.c	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,224 @@
+/* 
+ * Cryptographic API.
+ *
+ * Deflate algorithm (RFC 1951), implemented here primarily for use
+ * by IPCOMP (RFC 3173 & RFC 2394).
+ *
+ * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ *
+ * FIXME: deflate transforms will require up to a total of about 436k of kernel
+ * memory on i386 (390k for compression, the rest for decompression), as the
+ * current zlib kernel code uses a worst case pre-allocation system by default.
+ * This needs to be fixed so that the amount of memory required is properly
+ * related to the  winbits and memlevel parameters.
+ *
+ * The default winbits of 11 should suit most packets, and it may be something
+ * to configure on a per-tfm basis in the future.
+ *
+ * Currently, compression history is not maintained between tfm calls, as
+ * it is not needed for IPCOMP and keeps the code simpler.  It can be
+ * implemented if someone wants it.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/zlib.h>
+#include <linux/vmalloc.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/net.h>
+
+#define DEFLATE_DEF_LEVEL		Z_DEFAULT_COMPRESSION
+#define DEFLATE_DEF_WINBITS		11
+#define DEFLATE_DEF_MEMLEVEL		MAX_MEM_LEVEL
+
+struct deflate_ctx {
+	int comp_initialized;
+	int decomp_initialized;
+	struct z_stream_s comp_stream;
+	struct z_stream_s decomp_stream;
+};
+
+static inline int deflate_gfp(void)
+{
+	return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
+}
+
+static int deflate_init(void *ctx)
+{
+	return 0;
+}
+
+static void deflate_exit(void *ctx)
+{
+	struct deflate_ctx *dctx = ctx;
+
+	if (dctx->comp_initialized)
+		vfree(dctx->comp_stream.workspace);
+	if (dctx->decomp_initialized)
+		kfree(dctx->decomp_stream.workspace);
+}
+
+/*
+ * Lazy initialization to make interface simple without allocating
+ * un-needed workspaces.  Thus can be called in softirq context.
+ */
+static int deflate_comp_init(struct deflate_ctx *ctx)
+{
+	int ret = 0;
+	struct z_stream_s *stream = &ctx->comp_stream;
+
+	stream->workspace = __vmalloc(zlib_deflate_workspacesize(),
+	                              deflate_gfp()|__GFP_HIGHMEM,
+	                              PAGE_KERNEL);
+	if (!stream->workspace ) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	memset(stream->workspace, 0, sizeof(stream->workspace));
+	ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED,
+	                        -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL,
+	                        Z_DEFAULT_STRATEGY);
+	if (ret != Z_OK) {
+		ret = -EINVAL;
+		goto out_free;
+	}
+	ctx->comp_initialized = 1;
+out:	
+	return ret;
+out_free:
+	vfree(stream->workspace);
+	goto out;
+}
+
+static int deflate_decomp_init(struct deflate_ctx *ctx)
+{
+	int ret = 0;
+	struct z_stream_s *stream = &ctx->decomp_stream;
+
+	stream->workspace = kmalloc(zlib_inflate_workspacesize(),
+	                            deflate_gfp());
+	if (!stream->workspace ) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	memset(stream->workspace, 0, sizeof(stream->workspace));
+	ret = zlib_inflateInit2(stream, -DEFLATE_DEF_WINBITS);
+	if (ret != Z_OK) {
+		ret = -EINVAL;
+		goto out_free;
+	}
+	ctx->decomp_initialized = 1;
+out:
+	return ret;
+out_free:
+	kfree(stream->workspace);
+	goto out;
+}
+
+static int deflate_compress(void *ctx, const u8 *src, unsigned int slen,
+	                    u8 *dst, unsigned int *dlen)
+{
+	int ret = 0;
+	struct deflate_ctx *dctx = ctx;
+	struct z_stream_s *stream = &dctx->comp_stream;
+
+	if (!dctx->comp_initialized) {
+		ret = deflate_comp_init(dctx);
+		if (ret)
+			goto out;
+	}
+
+	ret = zlib_deflateReset(stream);
+	if (ret != Z_OK) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	stream->next_in = (u8 *)src;
+	stream->avail_in = slen;
+	stream->next_out = (u8 *)dst;
+	stream->avail_out = *dlen;
+
+	ret = zlib_deflate(stream, Z_FINISH);
+	if (ret != Z_STREAM_END) {
+		ret = -EINVAL;
+		goto out;
+	}
+	ret = 0;
+	*dlen = stream->total_out;
+out:
+	return ret;
+}
+ 
+static int deflate_decompress(void *ctx, const u8 *src, unsigned int slen,
+                              u8 *dst, unsigned int *dlen)
+{
+	
+	int ret = 0;
+	struct deflate_ctx *dctx = ctx;
+	struct z_stream_s *stream = &dctx->decomp_stream;
+
+	if (!dctx->decomp_initialized) {
+		ret = deflate_decomp_init(dctx);
+		if (ret)
+			goto out;
+	}
+
+	ret = zlib_inflateReset(stream);
+	if (ret != Z_OK) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	stream->next_in = (u8 *)src;
+	stream->avail_in = slen;
+	stream->next_out = (u8 *)dst;
+	stream->avail_out = *dlen;
+
+	ret = zlib_inflate(stream, Z_FINISH);
+	if (ret != Z_STREAM_END) {
+		ret = -EINVAL;
+		goto out;
+	}
+	ret = 0;
+	*dlen = stream->total_out;
+out:
+	return ret;
+}
+
+static struct crypto_alg alg = {
+	.cra_name		= "deflate",
+	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
+	.cra_ctxsize		= sizeof(struct deflate_ctx),
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(alg.cra_list),
+	.cra_u			= { .compress = {
+	.coa_init		= deflate_init,
+	.coa_exit		= deflate_exit,
+	.coa_compress 		= deflate_compress,
+	.coa_decompress  	= deflate_decompress } }
+};
+
+static int __init init(void)
+{
+	return crypto_register_alg(&alg);
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Deflate Compression Algorithm for IPCOMP");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+
diff -Nru a/crypto/digest.c b/crypto/digest.c
--- a/crypto/digest.c	Mon Mar 31 13:41:06 2003
+++ b/crypto/digest.c	Mon Mar 31 13:41:06 2003
@@ -61,7 +61,7 @@
 
 int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags)
 {
-	return crypto_cipher_flags(flags) ? -EINVAL : 0;
+	return flags ? -EINVAL : 0;
 }
 
 int crypto_init_digest_ops(struct crypto_tfm *tfm)
diff -Nru a/crypto/internal.h b/crypto/internal.h
--- a/crypto/internal.h	Mon Mar 31 13:41:06 2003
+++ b/crypto/internal.h	Mon Mar 31 13:41:06 2003
@@ -41,11 +41,6 @@
 		cond_resched();
 }
 
-static inline u32 crypto_cipher_flags(u32 flags)
-{
-	return flags & (CRYPTO_TFM_MODE_MASK|CRYPTO_TFM_REQ_WEAK_KEY);
-}
-
 static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm)
 {
 	return (void *)&tfm[1];
diff -Nru a/crypto/proc.c b/crypto/proc.c
--- a/crypto/proc.c	Mon Mar 31 13:41:06 2003
+++ b/crypto/proc.c	Mon Mar 31 13:41:06 2003
@@ -54,10 +54,10 @@
 	
 	seq_printf(m, "name         : %s\n", alg->cra_name);
 	seq_printf(m, "module       : %s\n", module_name(alg->cra_module));
-	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
 	
 	switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_CIPHER:
+		seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
 		seq_printf(m, "min keysize  : %u\n",
 					alg->cra_cipher.cia_min_keysize);
 		seq_printf(m, "max keysize  : %u\n",
@@ -67,6 +67,7 @@
 		break;
 		
 	case CRYPTO_ALG_TYPE_DIGEST:
+		seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
 		seq_printf(m, "digestsize   : %u\n",
 		           alg->cra_digest.dia_digestsize);
 		break;
diff -Nru a/crypto/tcrypt.c b/crypto/tcrypt.c
--- a/crypto/tcrypt.c	Mon Mar 31 13:41:08 2003
+++ b/crypto/tcrypt.c	Mon Mar 31 13:41:08 2003
@@ -48,7 +48,7 @@
 
 static char *check[] = {
 	"des", "md5", "des3_ede", "rot13", "sha1", "sha256", "blowfish",
-	"twofish", "serpent", "sha384", "sha512", "md4", "aes",
+	"twofish", "serpent", "sha384", "sha512", "md4", "aes", "deflate",
 	 NULL
 };
 
@@ -2193,6 +2193,86 @@
 }
 
 static void
+test_deflate(void)
+{
+	unsigned int i;
+	char result[COMP_BUF_SIZE];
+	struct crypto_tfm *tfm;
+	struct comp_testvec *tv;
+	unsigned int tsize;
+
+	printk("\ntesting deflate compression\n");
+
+	tsize = sizeof (deflate_comp_tv_template);
+	if (tsize > TVMEMSIZE) {
+		printk("template (%u) too big for tvmem (%u)\n", tsize,
+		       TVMEMSIZE);
+		return;
+	}
+
+	memcpy(tvmem, deflate_comp_tv_template, tsize);
+	tv = (void *) tvmem;
+
+	tfm = crypto_alloc_tfm("deflate", 0);
+	if (tfm == NULL) {
+		printk("failed to load transform for deflate\n");
+		return;
+	}
+
+	for (i = 0; i < DEFLATE_COMP_TEST_VECTORS; i++) {
+		int ilen, ret, dlen = COMP_BUF_SIZE;
+		
+		printk("test %u:\n", i + 1);
+		memset(result, 0, sizeof (result));
+
+		ilen = tv[i].inlen;
+		ret = crypto_comp_compress(tfm, tv[i].input,
+		                           ilen, result, &dlen);
+		if (ret) {
+			printk("fail: ret=%d\n", ret);
+			continue;
+		}
+		hexdump(result, dlen);
+		printk("%s (ratio %d:%d)\n",
+		       memcmp(result, tv[i].output, dlen) ? "fail" : "pass",
+		       ilen, dlen);
+	}
+
+	printk("\ntesting deflate decompression\n");
+
+	tsize = sizeof (deflate_decomp_tv_template);
+	if (tsize > TVMEMSIZE) {
+		printk("template (%u) too big for tvmem (%u)\n", tsize,
+		       TVMEMSIZE);
+		goto out;
+	}
+
+	memcpy(tvmem, deflate_decomp_tv_template, tsize);
+	tv = (void *) tvmem;
+
+	for (i = 0; i < DEFLATE_DECOMP_TEST_VECTORS; i++) {
+		int ilen, ret, dlen = COMP_BUF_SIZE;
+		
+		printk("test %u:\n", i + 1);
+		memset(result, 0, sizeof (result));
+
+		ilen = tv[i].inlen;
+		ret = crypto_comp_decompress(tfm, tv[i].input,
+		                             ilen, result, &dlen);
+		if (ret) {
+			printk("fail: ret=%d\n", ret);
+			continue;
+		}
+		hexdump(result, dlen);
+		printk("%s (ratio %d:%d)\n",
+		       memcmp(result, tv[i].output, dlen) ? "fail" : "pass",
+		       ilen, dlen);
+	}
+out:
+	crypto_free_tfm(tfm);
+}
+
+static void
 test_available(void)
 {
 	char **name = check;
@@ -2223,6 +2303,7 @@
 		test_aes();
 		test_sha384();
 		test_sha512();
+		test_deflate();
 #ifdef CONFIG_CRYPTO_HMAC
 		test_hmac_md5();
 		test_hmac_sha1();
@@ -2276,6 +2357,10 @@
 		
 	case 12:
 		test_sha512();
+		break;
+
+	case 13:
+		test_deflate();
 		break;
 
 #ifdef CONFIG_CRYPTO_HMAC
diff -Nru a/crypto/tcrypt.h b/crypto/tcrypt.h
--- a/crypto/tcrypt.h	Mon Mar 31 13:41:08 2003
+++ b/crypto/tcrypt.h	Mon Mar 31 13:41:08 2003
@@ -1682,4 +1682,104 @@
 	},
 };
 
+/*
+ * Compression stuff.
+ */
+#define COMP_BUF_SIZE           512
+
+struct comp_testvec {
+	int inlen, outlen;
+	char input[COMP_BUF_SIZE];
+	char output[COMP_BUF_SIZE];
+};
+
+/*
+ * Deflate test vectors (null-terminated strings).
+ * Params: winbits=11, Z_DEFAULT_COMPRESSION, MAX_MEM_LEVEL.
+ */
+#define DEFLATE_COMP_TEST_VECTORS 2
+#define DEFLATE_DECOMP_TEST_VECTORS 2
+
+struct comp_testvec deflate_comp_tv_template[] = {
+	{
+	  70, 38,
+	  
+	  "Join us now and share the software "
+	  "Join us now and share the software ",
+
+          { 0xf3, 0xca, 0xcf, 0xcc, 0x53, 0x28, 0x2d, 0x56,
+            0xc8, 0xcb, 0x2f, 0x57, 0x48, 0xcc, 0x4b, 0x51,
+            0x28, 0xce, 0x48, 0x2c, 0x4a, 0x55, 0x28, 0xc9,
+            0x48, 0x55, 0x28, 0xce, 0x4f, 0x2b, 0x29, 0x07,
+            0x71, 0xbc, 0x08, 0x2b, 0x01, 0x00 
+          },
+	},
+	  
+	{
+	  191, 122,
+	  
+	  "This document describes a compression method based on the DEFLATE"
+	  "compression algorithm.  This document defines the application of "
+	  "the DEFLATE algorithm to the IP Payload Compression Protocol.",
+	  
+	  { 0x5d, 0x8d, 0x31, 0x0e, 0xc2, 0x30, 0x10, 0x04,
+	    0xbf, 0xb2, 0x2f, 0xc8, 0x1f, 0x10, 0x04, 0x09,
+	    0x89, 0xc2, 0x85, 0x3f, 0x70, 0xb1, 0x2f, 0xf8,
+	    0x24, 0xdb, 0x67, 0xd9, 0x47, 0xc1, 0xef, 0x49,
+	    0x68, 0x12, 0x51, 0xae, 0x76, 0x67, 0xd6, 0x27,
+	    0x19, 0x88, 0x1a, 0xde, 0x85, 0xab, 0x21, 0xf2,
+	    0x08, 0x5d, 0x16, 0x1e, 0x20, 0x04, 0x2d, 0xad,
+	    0xf3, 0x18, 0xa2, 0x15, 0x85, 0x2d, 0x69, 0xc4,
+	    0x42, 0x83, 0x23, 0xb6, 0x6c, 0x89, 0x71, 0x9b,
+	    0xef, 0xcf, 0x8b, 0x9f, 0xcf, 0x33, 0xca, 0x2f,
+	    0xed, 0x62, 0xa9, 0x4c, 0x80, 0xff, 0x13, 0xaf,
+	    0x52, 0x37, 0xed, 0x0e, 0x52, 0x6b, 0x59, 0x02,
+	    0xd9, 0x4e, 0xe8, 0x7a, 0x76, 0x1d, 0x02, 0x98,
+	    0xfe, 0x8a, 0x87, 0x83, 0xa3, 0x4f, 0x56, 0x8a,
+	    0xb8, 0x9e, 0x8e, 0x5c, 0x57, 0xd3, 0xa0, 0x79,
+	    0xfa, 0x02 },
+	},
+};
+
+struct comp_testvec deflate_decomp_tv_template[] = {
+	{
+	  122, 191,
+	  
+	  { 0x5d, 0x8d, 0x31, 0x0e, 0xc2, 0x30, 0x10, 0x04,
+	    0xbf, 0xb2, 0x2f, 0xc8, 0x1f, 0x10, 0x04, 0x09,
+	    0x89, 0xc2, 0x85, 0x3f, 0x70, 0xb1, 0x2f, 0xf8,
+	    0x24, 0xdb, 0x67, 0xd9, 0x47, 0xc1, 0xef, 0x49,
+	    0x68, 0x12, 0x51, 0xae, 0x76, 0x67, 0xd6, 0x27,
+	    0x19, 0x88, 0x1a, 0xde, 0x85, 0xab, 0x21, 0xf2,
+	    0x08, 0x5d, 0x16, 0x1e, 0x20, 0x04, 0x2d, 0xad,
+	    0xf3, 0x18, 0xa2, 0x15, 0x85, 0x2d, 0x69, 0xc4,
+	    0x42, 0x83, 0x23, 0xb6, 0x6c, 0x89, 0x71, 0x9b,
+	    0xef, 0xcf, 0x8b, 0x9f, 0xcf, 0x33, 0xca, 0x2f,
+	    0xed, 0x62, 0xa9, 0x4c, 0x80, 0xff, 0x13, 0xaf,
+	    0x52, 0x37, 0xed, 0x0e, 0x52, 0x6b, 0x59, 0x02,
+	    0xd9, 0x4e, 0xe8, 0x7a, 0x76, 0x1d, 0x02, 0x98,
+	    0xfe, 0x8a, 0x87, 0x83, 0xa3, 0x4f, 0x56, 0x8a,
+	    0xb8, 0x9e, 0x8e, 0x5c, 0x57, 0xd3, 0xa0, 0x79,
+	    0xfa, 0x02 },
+	    
+	    "This document describes a compression method based on the DEFLATE"
+	    "compression algorithm.  This document defines the application of "
+	    "the DEFLATE algorithm to the IP Payload Compression Protocol.",
+	},
+	
+	{
+	  38, 70,
+	  
+          { 0xf3, 0xca, 0xcf, 0xcc, 0x53, 0x28, 0x2d, 0x56,
+            0xc8, 0xcb, 0x2f, 0x57, 0x48, 0xcc, 0x4b, 0x51,
+            0x28, 0xce, 0x48, 0x2c, 0x4a, 0x55, 0x28, 0xc9,
+            0x48, 0x55, 0x28, 0xce, 0x4f, 0x2b, 0x29, 0x07,
+            0x71, 0xbc, 0x08, 0x2b, 0x01, 0x00
+          },
+          
+          "Join us now and share the software "
+	  "Join us now and share the software ",
+	},
+};
+
 #endif	/* _CRYPTO_TCRYPT_H */
diff -Nru a/drivers/acorn/char/i2c.c b/drivers/acorn/char/i2c.c
--- a/drivers/acorn/char/i2c.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/acorn/char/i2c.c	Mon Mar 31 13:41:08 2003
@@ -303,11 +303,13 @@
 }
 
 static struct i2c_adapter ioc_ops = {
-	.name			= "IOC/IOMD",
 	.id			= I2C_HW_B_IOC,
 	.algo_data		= &ioc_data,
 	.client_register	= ioc_client_reg,
 	.client_unregister	= ioc_client_unreg
+	.dev			= {
+		.name		= "IOC/IOMD",
+	},
 };
 
 static int __init i2c_ioc_init(void)
diff -Nru a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
--- a/drivers/acpi/Kconfig	Mon Mar 31 13:41:08 2003
+++ b/drivers/acpi/Kconfig	Mon Mar 31 13:41:08 2003
@@ -132,7 +132,7 @@
 	  may be damaged without it.
 
 config ACPI_NUMA
-	bool "NUMA support" if NUMA && (IA64 && !IA64_HP_SIM || X86 && ACPI && !ACPI_HT_ONLY)
+	bool "NUMA support" if NUMA && (IA64 && !IA64_HP_SIM || X86 && ACPI && !ACPI_HT_ONLY && !X86_64)
 	default y if IA64 && IA64_SGI_SN
 
 config ACPI_TOSHIBA
diff -Nru a/drivers/atm/iphase.c b/drivers/atm/iphase.c
--- a/drivers/atm/iphase.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/atm/iphase.c	Mon Mar 31 13:41:08 2003
@@ -2893,7 +2893,7 @@
         struct tx_buf_desc *buf_desc_ptr;
         int desc;
         int comp_code;
-        int total_len, pad, last;
+        int total_len;
         struct cpcs_trailer *trailer;
         struct ia_vcc *iavcc;
 
@@ -2975,9 +2975,7 @@
 	/* Figure out the exact length of the packet and padding required to 
            make it  aligned on a 48 byte boundary.  */
 	total_len = skb->len + sizeof(struct cpcs_trailer);  
-	last = total_len - (total_len/48)*48;  
-	pad = 48 - last;  
-	total_len = pad + total_len;  
+	total_len = ((total_len + 47) / 48) * 48;
 	IF_TX(printk("ia packet len:%d padding:%d\n", total_len, pad);)  
  
 	/* Put the packet in a tx buffer */   
diff -Nru a/drivers/base/base.h b/drivers/base/base.h
--- a/drivers/base/base.h	Mon Mar 31 13:41:07 2003
+++ b/drivers/base/base.h	Mon Mar 31 13:41:07 2003
@@ -1,5 +1,3 @@
-#undef DEBUG
-
 extern struct semaphore device_sem;
 extern struct semaphore devclass_sem;
 
diff -Nru a/drivers/base/cpu.c b/drivers/base/cpu.c
--- a/drivers/base/cpu.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/base/cpu.c	Mon Mar 31 13:41:06 2003
@@ -48,6 +48,9 @@
 
 int __init cpu_dev_init(void)
 {
-	devclass_register(&cpu_devclass);
-	return driver_register(&cpu_driver);
+	int error;
+	if (!(error = devclass_register(&cpu_devclass)))
+		if ((error = driver_register(&cpu_driver)))
+			devclass_unregister(&cpu_devclass);
+	return error;
 }
diff -Nru a/drivers/base/driver.c b/drivers/base/driver.c
--- a/drivers/base/driver.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/base/driver.c	Mon Mar 31 13:41:08 2003
@@ -82,6 +82,7 @@
 int driver_register(struct device_driver * drv)
 {
 	INIT_LIST_HEAD(&drv->devices);
+	INIT_LIST_HEAD(&drv->class_list);
 	init_MUTEX_LOCKED(&drv->unload_sem);
 	return bus_add_driver(drv);
 }
diff -Nru a/drivers/base/memblk.c b/drivers/base/memblk.c
--- a/drivers/base/memblk.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/base/memblk.c	Mon Mar 31 13:41:08 2003
@@ -47,9 +47,12 @@
 }
 
 
-static int __init register_memblk_type(void)
+int __init register_memblk_type(void)
 {
-	int error = devclass_register(&memblk_devclass);
-	return error ? error : driver_register(&memblk_driver);
+	int error;
+	if (!(error = devclass_register(&memblk_devclass)))
+		if (error = driver_register(&memblk_driver))
+			devclass_unregister(&memblk_devclass);
+	return error;
 }
 postcore_initcall(register_memblk_type);
diff -Nru a/drivers/base/node.c b/drivers/base/node.c
--- a/drivers/base/node.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/base/node.c	Mon Mar 31 13:41:06 2003
@@ -89,9 +89,12 @@
 }
 
 
-static int __init register_node_type(void)
+int __init register_node_type(void)
 {
-	int error = devclass_register(&node_devclass);
-	return error ? error : driver_register(&node_driver);
+	int error;
+	if (!(error = devclass_register(&node_devclass)))
+		if (error = driver_register(&node_driver))
+			devclass_unregister(&node_devclass);
+	return error;
 }
 postcore_initcall(register_node_type);
diff -Nru a/drivers/char/drm/Kconfig b/drivers/char/drm/Kconfig
--- a/drivers/char/drm/Kconfig	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/Kconfig	Mon Mar 31 13:41:08 2003
@@ -22,7 +22,13 @@
 	  Choose this option if you have a 3dfx Banshee or Voodoo3 (or later),
 	  graphics card.  If M is selected, the module will be called tdfx.
 
-#    tristate '  3dlabs GMX 2000' CONFIG_DRM_GAMMA
+config DRM_GAMMA
+	tristate "3dlabs GMX 2000"
+	depends on DRM && n
+	help
+	  This is the old gamma driver, disabled for now unless somebody
+	  tells me it actually might work.
+
 config DRM_R128
 	tristate "ATI Rage 128"
 	depends on DRM
@@ -60,4 +66,3 @@
 	  Choose this option if you have a Matrox G200, G400 or G450 graphics
 	  card.  If M is selected, the module will be called mga.  AGP
 	  support is required for this driver to work.
-
diff -Nru a/drivers/char/drm/Makefile b/drivers/char/drm/Makefile
--- a/drivers/char/drm/Makefile	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/Makefile	Mon Mar 31 13:41:08 2003
@@ -7,7 +7,7 @@
 r128-objs   := r128_drv.o r128_cce.o r128_state.o r128_irq.o
 mga-objs    := mga_drv.o mga_dma.o mga_state.o mga_warp.o mga_irq.o
 i810-objs   := i810_drv.o i810_dma.o
-i830-objs   := i830_drv.o i830_dma.o
+i830-objs   := i830_drv.o i830_dma.o i830_irq.o
 radeon-objs := radeon_drv.o radeon_cp.o radeon_state.o radeon_mem.o radeon_irq.o
 ffb-objs    := ffb_drv.o ffb_context.o
 
diff -Nru a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h
--- a/drivers/char/drm/drmP.h	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/drmP.h	Mon Mar 31 13:41:08 2003
@@ -254,6 +254,7 @@
  		}							\
 	}								\
 } while(0)
+#define DRM_DROP_MAP(_map)
 
 				/* Internal types and structures */
 #define DRM_ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
@@ -268,6 +269,17 @@
 	(_map) = (_dev)->context_sareas[_ctx];		\
 } while(0)
 
+#define LOCK_TEST_WITH_RETURN( dev, filp )				\
+do {									\
+	if ( !_DRM_LOCK_IS_HELD( dev->lock.hw_lock->lock ) ||		\
+	     dev->lock.filp != filp ) {				\
+		DRM_ERROR( "%s called without lock held\n",		\
+			   __FUNCTION__ );				\
+		return -EINVAL;						\
+	}								\
+} while (0)
+
+
 typedef int drm_ioctl_t( struct inode *inode, struct file *filp,
 			 unsigned int cmd, unsigned long arg );
 
@@ -316,7 +328,7 @@
 	__volatile__ int  waiting;     /* On kernel DMA queue		     */
 	__volatile__ int  pending;     /* On hardware DMA queue		     */
 	wait_queue_head_t dma_wait;    /* Processes waiting		     */
-	pid_t		  pid;	       /* PID of holding process	     */
+	struct file       *filp;       /* Pointer to holding file descr	     */
 	int		  context;     /* Kernel queue for this buffer	     */
 	int		  while_locked;/* Dispatch this buffer while locked  */
 	enum {
@@ -434,7 +446,7 @@
 
 typedef struct drm_lock_data {
 	drm_hw_lock_t	  *hw_lock;	/* Hardware lock		   */
-	pid_t		  pid;		/* PID of lock holder (0=kernel)   */
+	struct file       *filp;	/* File descr of lock holder (0=kernel)   */
 	wait_queue_head_t lock_queue;	/* Queue of blocked processes	   */
 	unsigned long	  lock_time;	/* Time of last lock in jiffies	   */
 } drm_lock_data_t;
@@ -516,6 +528,8 @@
 	drm_map_t		*map;
 } drm_map_list_t;
 
+typedef drm_map_t drm_local_map_t;
+
 #if __HAVE_VBL_IRQ
 
 typedef struct drm_vbl_sig {
@@ -591,6 +605,7 @@
    	atomic_t          vbl_received;
 	spinlock_t        vbl_lock;
 	drm_vbl_sig_t     vbl_sigs;
+	unsigned int      vbl_pending;
 #endif
 	cycles_t	  ctx_start;
 	cycles_t	  lck_start;
@@ -807,15 +822,15 @@
 extern int	     DRM(dma_setup)(drm_device_t *dev);
 extern void	     DRM(dma_takedown)(drm_device_t *dev);
 extern void	     DRM(free_buffer)(drm_device_t *dev, drm_buf_t *buf);
-extern void	     DRM(reclaim_buffers)(drm_device_t *dev, pid_t pid);
+extern void	     DRM(reclaim_buffers)( struct file *filp );
 #if __HAVE_OLD_DMA
 /* GH: This is a dirty hack for now...
  */
 extern void	     DRM(clear_next_buffer)(drm_device_t *dev);
 extern int	     DRM(select_queue)(drm_device_t *dev,
 				       void (*wrapper)(unsigned long));
-extern int	     DRM(dma_enqueue)(drm_device_t *dev, drm_dma_t *dma);
-extern int	     DRM(dma_get_buffers)(drm_device_t *dev, drm_dma_t *dma);
+extern int	     DRM(dma_enqueue)(struct file *filp, drm_dma_t *dma);
+extern int	     DRM(dma_get_buffers)(struct file *filp, drm_dma_t *dma);
 #endif
 #if __HAVE_DMA_IRQ
 extern int           DRM(control)( struct inode *inode, struct file *filp,
diff -Nru a/drivers/char/drm/drm_agpsupport.h b/drivers/char/drm/drm_agpsupport.h
--- a/drivers/char/drm/drm_agpsupport.h	Mon Mar 31 13:41:06 2003
+++ b/drivers/char/drm/drm_agpsupport.h	Mon Mar 31 13:41:06 2003
@@ -266,12 +266,12 @@
 		head->cant_use_aperture = head->agp_info.cant_use_aperture;
 		head->page_mask = head->agp_info.page_mask;
 #endif
-		
-		DRM_DEBUG("AGP %d.%d, aperture @ 0x%08lx %ZuMB\n",
-			  head->agp_info.version.major,
-			  head->agp_info.version.minor,
-			  head->agp_info.aper_base,
-			  head->agp_info.aper_size);
+
+		DRM_INFO("AGP %d.%d aperture @ 0x%08lx %ZuMB\n",
+			 head->agp_info.version.major,
+			 head->agp_info.version.minor,
+			 head->agp_info.aper_base,
+			 head->agp_info.aper_size);
 	}
 	return head;
 }
diff -Nru a/drivers/char/drm/drm_bufs.h b/drivers/char/drm/drm_bufs.h
--- a/drivers/char/drm/drm_bufs.h	Mon Mar 31 13:41:06 2003
+++ b/drivers/char/drm/drm_bufs.h	Mon Mar 31 13:41:06 2003
@@ -403,7 +403,7 @@
 		buf->waiting = 0;
 		buf->pending = 0;
 		init_waitqueue_head( &buf->dma_wait );
-		buf->pid     = 0;
+		buf->filp    = 0;
 
 		buf->dev_priv_size = sizeof(DRIVER_BUF_PRIV_T);
 		buf->dev_private = DRM(alloc)( sizeof(DRIVER_BUF_PRIV_T),
@@ -616,7 +616,7 @@
 			buf->waiting = 0;
 			buf->pending = 0;
 			init_waitqueue_head( &buf->dma_wait );
-			buf->pid     = 0;
+			buf->filp    = 0;
 #if __HAVE_DMA_HISTOGRAM
 			buf->time_queued     = 0;
 			buf->time_dispatched = 0;
@@ -773,7 +773,7 @@
 		buf->waiting = 0;
 		buf->pending = 0;
 		init_waitqueue_head( &buf->dma_wait );
-		buf->pid     = 0;
+		buf->filp    = 0;
 
 		buf->dev_priv_size = sizeof(DRIVER_BUF_PRIV_T);
 		buf->dev_private = DRM(alloc)( sizeof(DRIVER_BUF_PRIV_T),
@@ -1011,9 +1011,9 @@
 			return -EINVAL;
 		}
 		buf = dma->buflist[idx];
-		if ( buf->pid != current->pid ) {
-			DRM_ERROR( "Process %d freeing buffer owned by %d\n",
-				   current->pid, buf->pid );
+		if ( buf->filp != filp ) {
+			DRM_ERROR( "Process %d freeing buffer not owned\n",
+				   current->pid );
 			return -EINVAL;
 		}
 		DRM(free_buffer)( dev, buf );
diff -Nru a/drivers/char/drm/drm_dma.h b/drivers/char/drm/drm_dma.h
--- a/drivers/char/drm/drm_dma.h	Mon Mar 31 13:41:07 2003
+++ b/drivers/char/drm/drm_dma.h	Mon Mar 31 13:41:07 2003
@@ -188,7 +188,7 @@
 
 	buf->waiting  = 0;
 	buf->pending  = 0;
-	buf->pid      = 0;
+	buf->filp     = 0;
 	buf->used     = 0;
 #if __HAVE_DMA_HISTOGRAM
 	buf->time_completed = get_cycles();
@@ -210,14 +210,16 @@
 }
 
 #if !__HAVE_DMA_RECLAIM
-void DRM(reclaim_buffers)(drm_device_t *dev, pid_t pid)
+void DRM(reclaim_buffers)( struct file *filp )
 {
+	drm_file_t    *priv   = filp->private_data;
+	drm_device_t  *dev    = priv->dev;
 	drm_device_dma_t *dma = dev->dma;
 	int		 i;
 
 	if (!dma) return;
 	for (i = 0; i < dma->buf_count; i++) {
-		if (dma->buflist[i]->pid == pid) {
+		if (dma->buflist[i]->filp == filp) {
 			switch (dma->buflist[i]->list) {
 			case DRM_LIST_NONE:
 				DRM(free_buffer)(dev, dma->buflist[i]);
@@ -318,8 +320,10 @@
 }
 
 
-int DRM(dma_enqueue)(drm_device_t *dev, drm_dma_t *d)
+int DRM(dma_enqueue)(struct file *filp, drm_dma_t *d)
 {
+	drm_file_t    *priv   = filp->private_data;
+	drm_device_t  *dev    = priv->dev;
 	int		  i;
 	drm_queue_t	  *q;
 	drm_buf_t	  *buf;
@@ -381,10 +385,10 @@
 			return -EINVAL;
 		}
 		buf = dma->buflist[ idx ];
-		if (buf->pid != current->pid) {
+		if (buf->filp != filp) {
 			atomic_dec(&q->use_count);
-			DRM_ERROR("Process %d using buffer owned by %d\n",
-				  current->pid, buf->pid);
+			DRM_ERROR("Process %d using buffer not owned\n",
+				  current->pid);
 			return -EINVAL;
 		}
 		if (buf->list != DRM_LIST_NONE) {
@@ -426,9 +430,11 @@
 	return 0;
 }
 
-static int DRM(dma_get_buffers_of_order)(drm_device_t *dev, drm_dma_t *d,
+static int DRM(dma_get_buffers_of_order)(struct file *filp, drm_dma_t *d,
 					 int order)
 {
+	drm_file_t    *priv   = filp->private_data;
+	drm_device_t  *dev    = priv->dev;
 	int		  i;
 	drm_buf_t	  *buf;
 	drm_device_dma_t  *dma = dev->dma;
@@ -438,13 +444,13 @@
 					d->flags & _DRM_DMA_WAIT);
 		if (!buf) break;
 		if (buf->pending || buf->waiting) {
-			DRM_ERROR("Free buffer %d in use by %d (w%d, p%d)\n",
+			DRM_ERROR("Free buffer %d in use by %x (w%d, p%d)\n",
 				  buf->idx,
-				  buf->pid,
+				  buf->filp,
 				  buf->waiting,
 				  buf->pending);
 		}
-		buf->pid     = current->pid;
+		buf->filp     = filp;
 		if (copy_to_user(&d->request_indices[i],
 				 &buf->idx,
 				 sizeof(buf->idx)))
@@ -461,7 +467,7 @@
 }
 
 
-int DRM(dma_get_buffers)(drm_device_t *dev, drm_dma_t *dma)
+int DRM(dma_get_buffers)(struct file *filp, drm_dma_t *dma)
 {
 	int		  order;
 	int		  retcode = 0;
@@ -470,7 +476,7 @@
 	order = DRM(order)(dma->request_size);
 
 	dma->granted_count = 0;
-	retcode		   = DRM(dma_get_buffers_of_order)(dev, dma, order);
+	retcode		   = DRM(dma_get_buffers_of_order)(filp, dma, order);
 
 	if (dma->granted_count < dma->request_count
 	    && (dma->flags & _DRM_DMA_SMALLER_OK)) {
@@ -480,7 +486,7 @@
 			     && tmp_order >= DRM_MIN_ORDER;
 		     --tmp_order) {
 
-			retcode = DRM(dma_get_buffers_of_order)(dev, dma,
+			retcode = DRM(dma_get_buffers_of_order)(filp, dma,
 								tmp_order);
 		}
 	}
@@ -493,7 +499,7 @@
 			     && tmp_order <= DRM_MAX_ORDER;
 		     ++tmp_order) {
 
-			retcode = DRM(dma_get_buffers_of_order)(dev, dma,
+			retcode = DRM(dma_get_buffers_of_order)(filp, dma,
 								tmp_order);
 		}
 	}
@@ -540,6 +546,8 @@
 	spin_lock_init( &dev->vbl_lock );
 
 	INIT_LIST_HEAD( &dev->vbl_sigs.head );
+
+	dev->vbl_pending = 0;
 #endif
 
 				/* Before installing handler */
@@ -622,6 +630,7 @@
 	switch ( vblwait.request.type & ~_DRM_VBLANK_FLAGS_MASK ) {
 	case _DRM_VBLANK_RELATIVE:
 		vblwait.request.sequence += atomic_read( &dev->vbl_received );
+		vblwait.request.type &= ~_DRM_VBLANK_RELATIVE;
 	case _DRM_VBLANK_ABSOLUTE:
 		break;
 	default:
@@ -632,10 +641,38 @@
 	
 	if ( flags & _DRM_VBLANK_SIGNAL ) {
 		unsigned long irqflags;
-		drm_vbl_sig_t *vbl_sig = DRM_MALLOC( sizeof( drm_vbl_sig_t ) );
+		drm_vbl_sig_t *vbl_sig;
+		
+		vblwait.reply.sequence = atomic_read( &dev->vbl_received );
+
+		spin_lock_irqsave( &dev->vbl_lock, irqflags );
+
+		/* Check if this task has already scheduled the same signal
+		 * for the same vblank sequence number; nothing to be done in
+		 * that case
+		 */
+		list_for_each( ( (struct list_head *) vbl_sig ), &dev->vbl_sigs.head ) {
+			if (vbl_sig->sequence == vblwait.request.sequence
+			    && vbl_sig->info.si_signo == vblwait.request.signal
+			    && vbl_sig->task == current)
+			{
+				spin_unlock_irqrestore( &dev->vbl_lock, irqflags );
+				goto done;
+			}
+		}
+
+		if ( dev->vbl_pending >= 100 ) {
+			spin_unlock_irqrestore( &dev->vbl_lock, irqflags );
+			return -EBUSY;
+		}
+
+		dev->vbl_pending++;
 
-		if ( !vbl_sig )
+		spin_unlock_irqrestore( &dev->vbl_lock, irqflags );
+
+		if ( !( vbl_sig = DRM_MALLOC( sizeof( drm_vbl_sig_t ) ) ) ) {
 			return -ENOMEM;
+		}
 
 		memset( (void *)vbl_sig, 0, sizeof(*vbl_sig) );
 
@@ -643,9 +680,6 @@
 		vbl_sig->info.si_signo = vblwait.request.signal;
 		vbl_sig->task = current;
 
-		vblwait.reply.sequence = atomic_read( &dev->vbl_received );
-
-		/* Hook signal entry into list */
 		spin_lock_irqsave( &dev->vbl_lock, irqflags );
 
 		list_add_tail( (struct list_head *) vbl_sig, &dev->vbl_sigs.head );
@@ -659,6 +693,7 @@
 		vblwait.reply.tval_usec = now.tv_usec;
 	}
 
+done:
 	DRM_COPY_TO_USER_IOCTL( (drm_wait_vblank_t *)data, vblwait,
 				sizeof(vblwait) );
 
@@ -667,25 +702,23 @@
 
 void DRM(vbl_send_signals)( drm_device_t *dev )
 {
-	struct list_head *entry, *tmp;
+	struct list_head *tmp;
 	drm_vbl_sig_t *vbl_sig;
 	unsigned int vbl_seq = atomic_read( &dev->vbl_received );
 	unsigned long flags;
 
 	spin_lock_irqsave( &dev->vbl_lock, flags );
 
-	list_for_each_safe( entry, tmp, &dev->vbl_sigs.head ) {
-
-		vbl_sig = (drm_vbl_sig_t *) entry;
-
+	list_for_each_safe( ( (struct list_head *) vbl_sig ), tmp, &dev->vbl_sigs.head ) {
 		if ( ( vbl_seq - vbl_sig->sequence ) <= (1<<23) ) {
-
-			vbl_sig->info.si_code = atomic_read( &dev->vbl_received );
+			vbl_sig->info.si_code = vbl_seq;
 			send_sig_info( vbl_sig->info.si_signo, &vbl_sig->info, vbl_sig->task );
 
-			list_del( entry );
+			list_del( (struct list_head *) vbl_sig );
+
+			DRM_FREE( vbl_sig, sizeof(*vbl_sig) );
 
-			DRM_FREE( entry );
+			dev->vbl_pending--;
 		}
 	}
 
diff -Nru a/drivers/char/drm/drm_drv.h b/drivers/char/drm/drm_drv.h
--- a/drivers/char/drm/drm_drv.h	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/drm_drv.h	Mon Mar 31 13:41:08 2003
@@ -323,6 +323,8 @@
 	dev->last_context = 0;
 	dev->last_switch = 0;
 	dev->last_checked = 0;
+	init_timer( &dev->timer );
+	init_waitqueue_head( &dev->context_wait );
 
 	dev->ctx_start = 0;
 	dev->lck_start = 0;
@@ -494,7 +496,7 @@
 #endif
 	if ( dev->lock.hw_lock ) {
 		dev->sigdata.lock = dev->lock.hw_lock = NULL; /* SHM removed */
-		dev->lock.pid = 0;
+		dev->lock.filp = 0;
 		wake_up_interruptible( &dev->lock.lock_queue );
 	}
 	up( &dev->struct_sem );
@@ -545,9 +547,7 @@
 
 	drm_device_t *dev;
 	int i;
-#if __HAVE_CTX_BITMAP
 	int retcode;
-#endif
 	DRM_DEBUG( "\n" );
 
 #ifdef MODULE
@@ -578,13 +578,9 @@
 		memset( (void *)dev, 0, sizeof(*dev) );
 		dev->count_lock = SPIN_LOCK_UNLOCKED;
 		sema_init( &dev->struct_sem, 1 );
-		init_timer( &dev->timer );
-		init_waitqueue_head( &dev->context_wait );
 
-		if ((DRM(minor)[i] = DRM(stub_register)(DRIVER_NAME, &DRM(fops),dev)) < 0) {
-			retcode = -EPERM;
-			goto fail_reg;
-		}
+		if ((DRM(minor)[i] = DRM(stub_register)(DRIVER_NAME, &DRM(fops),dev)) < 0)
+			return -EPERM;
 		dev->device = MKDEV(DRM_MAJOR, DRM(minor)[i] );
 		dev->name   = DRIVER_NAME;
 
@@ -593,8 +589,9 @@
 #if __MUST_HAVE_AGP
 		if ( dev->agp == NULL ) {
 			DRM_ERROR( "Cannot initialize the agpgart module.\n" );
-			retcode = -ENOMEM;
-			goto fail;
+			DRM(stub_unregister)(DRM(minor)[i]);
+			DRM(takedown)( dev );
+			return -ENOMEM;
 		}
 #endif
 #if __REALLY_HAVE_MTRR
@@ -610,7 +607,9 @@
 		retcode = DRM(ctxbitmap_init)( dev );
 		if( retcode ) {
 			DRM_ERROR( "Cannot allocate memory for context bitmap.\n" );
-			goto fail;
+			DRM(stub_unregister)(DRM(minor)[i]);
+			DRM(takedown)( dev );
+			return retcode;
 		}
 #endif
 		DRM_INFO( "Initialized %s %d.%d.%d %s on minor %d\n",
@@ -625,15 +624,6 @@
 	DRIVER_POSTINIT();
 
 	return 0;
-
-fail:
-	DRM(stub_unregister)(DRM(minor)[i]);
-	DRM(takedown)( dev );
-
-fail_reg:
-	kfree (DRM(device));
-	kfree (DRM(minor));
-	return retcode;
 }
 
 /* drm_cleanup is called via cleanup_module at module unload time.
@@ -740,8 +730,6 @@
 		return -ENODEV;
 	}
 
-	DRM_DEBUG( "open_count = %d\n", dev->open_count );
-
 	retcode = DRM(open_helper)( inode, filp, dev );
 	if ( !retcode ) {
 		atomic_inc( &dev->counts[_DRM_STAT_OPENS] );
@@ -773,15 +761,15 @@
 	 * Begin inline drm_release
 	 */
 
-	DRM_DEBUG( "pid = %d, device = 0x%x, open_count = %d\n",
-		   current->pid, dev->device, dev->open_count );
+	DRM_DEBUG( "pid = %d, device = 0x%lx, open_count = %d\n",
+		   current->pid, (long)dev->device, dev->open_count );
 
 	if ( dev->lock.hw_lock &&
 	     _DRM_LOCK_IS_HELD(dev->lock.hw_lock->lock) &&
-	     dev->lock.pid == current->pid ) {
-		DRM_DEBUG( "Process %d dead, freeing lock for context %d\n",
-			   current->pid,
-			   _DRM_LOCKING_CONTEXT(dev->lock.hw_lock->lock) );
+	     dev->lock.filp == filp ) {
+		DRM_DEBUG( "File %p released, freeing lock for context %d\n",
+			filp,
+			_DRM_LOCKING_CONTEXT(dev->lock.hw_lock->lock) );
 #if __HAVE_RELEASE
 		DRIVER_RELEASE();
 #endif
@@ -797,6 +785,7 @@
 	else if ( dev->lock.hw_lock ) {
 		/* The lock is required to reclaim buffers */
 		DECLARE_WAITQUEUE( entry, current );
+
 		add_wait_queue( &dev->lock.lock_queue, &entry );
 		for (;;) {
 			current->state = TASK_INTERRUPTIBLE;
@@ -807,7 +796,7 @@
 			}
 			if ( DRM(lock_take)( &dev->lock.hw_lock->lock,
 					     DRM_KERNEL_CONTEXT ) ) {
-				dev->lock.pid	    = priv->pid;
+				dev->lock.filp	    = filp;
 				dev->lock.lock_time = jiffies;
                                 atomic_inc( &dev->counts[_DRM_STAT_LOCKS] );
 				break;	/* Got lock */
@@ -831,7 +820,7 @@
 		}
 	}
 #elif __HAVE_DMA
-	DRM(reclaim_buffers)( dev, priv->pid );
+	DRM(reclaim_buffers)( filp );
 #endif
 
 	DRM(fasync)( -1, filp, 0 );
@@ -855,7 +844,7 @@
 		dev->file_last	 = priv->prev;
 	}
 	up( &dev->struct_sem );
-
+	
 	DRM(free)( priv, sizeof(*priv), DRM_MEM_FILES );
 
 	/* ========================================================
@@ -880,6 +869,7 @@
 	spin_unlock( &dev->count_lock );
 
 	unlock_kernel();
+
 	return retcode;
 }
 
@@ -899,8 +889,9 @@
 	atomic_inc( &dev->counts[_DRM_STAT_IOCTLS] );
 	++priv->ioctl_count;
 
-	DRM_DEBUG( "pid=%d, cmd=0x%02x, nr=0x%02x, dev 0x%x, auth=%d\n",
-		   current->pid, cmd, nr, dev->device, priv->authenticated );
+	DRM_DEBUG( "pid=%d, cmd=0x%02x, nr=0x%02x, dev 0x%lx, auth=%d\n",
+		   current->pid, cmd, nr, (long)dev->device, 
+		   priv->authenticated );
 
 	if ( nr >= DRIVER_IOCTL_COUNT ) {
 		retcode = -EINVAL;
@@ -976,7 +967,7 @@
                         }
                         if ( DRM(lock_take)( &dev->lock.hw_lock->lock,
 					     lock.context ) ) {
-                                dev->lock.pid       = current->pid;
+                                dev->lock.filp      = filp;
                                 dev->lock.lock_time = jiffies;
                                 atomic_inc( &dev->counts[_DRM_STAT_LOCKS] );
                                 break;  /* Got lock */
@@ -1058,7 +1049,7 @@
 	 * agent to request it then we should just be able to
 	 * take it immediately and not eat the ioctl.
 	 */
-	dev->lock.pid = 0;
+	dev->lock.filp = 0;
 	{
 		__volatile__ unsigned int *plock = &dev->lock.hw_lock->lock;
 		unsigned int old, new, prev, ctx;
diff -Nru a/drivers/char/drm/drm_fops.h b/drivers/char/drm/drm_fops.h
--- a/drivers/char/drm/drm_fops.h	Mon Mar 31 13:41:07 2003
+++ b/drivers/char/drm/drm_fops.h	Mon Mar 31 13:41:07 2003
@@ -94,8 +94,8 @@
 	drm_file_t    *priv   = filp->private_data;
 	drm_device_t  *dev    = priv->dev;
 
-	DRM_DEBUG("pid = %d, device = 0x%x, open_count = %d\n",
-		  current->pid, dev->device, dev->open_count);
+	DRM_DEBUG("pid = %d, device = 0x%lx, open_count = %d\n",
+		  current->pid, (long)dev->device, dev->open_count);
 	return 0;
 }
 
@@ -105,7 +105,7 @@
 	drm_device_t  *dev    = priv->dev;
 	int	      retcode;
 
-	DRM_DEBUG("fd = %d, device = 0x%x\n", fd, dev->device);
+	DRM_DEBUG("fd = %d, device = 0x%lx\n", fd, (long)dev->device);
 	retcode = fasync_helper(fd, filp, on, &dev->buf_async);
 	if (retcode < 0) return retcode;
 	return 0;
diff -Nru a/drivers/char/drm/drm_ioctl.h b/drivers/char/drm/drm_ioctl.h
--- a/drivers/char/drm/drm_ioctl.h	Mon Mar 31 13:41:06 2003
+++ b/drivers/char/drm/drm_ioctl.h	Mon Mar 31 13:41:06 2003
@@ -40,6 +40,28 @@
 
 	if (copy_from_user(&p, (drm_irq_busid_t *)arg, sizeof(p)))
 		return -EFAULT;
+#ifdef __alpha__
+	{
+		int domain = p.busnum >> 8;
+		p.busnum &= 0xff;
+
+		/*
+		 * Find the hose the device is on (the domain number is the
+		 * hose index) and offset the bus by the root bus of that
+		 * hose.
+		 */
+                for(dev = pci_find_device(PCI_ANY_ID,PCI_ANY_ID,NULL);
+                    dev;
+                    dev = pci_find_device(PCI_ANY_ID,PCI_ANY_ID,dev)) {
+			struct pci_controller *hose = dev->sysdata;
+			
+			if (hose->index == domain) {
+				p.busnum += hose->bus->number;
+				break;
+			}
+		}
+	}
+#endif
 	dev = pci_find_slot(p.busnum, PCI_DEVFN(p.devnum, p.funcnum));
 	if (!dev) {
 		DRM_ERROR("pci_find_slot failed for %d:%d:%d\n",
@@ -112,7 +134,7 @@
 
 	do {
 		struct pci_dev *pci_dev;
-                int b, d, f;
+                int domain, b, d, f;
                 char *p;
  
                 for(p = dev->unique; p && *p && *p != ':'; p++);
@@ -124,6 +146,27 @@
                 f = (int)simple_strtoul(p+1, &p, 10);
                 if (*p) break;
  
+		domain = b >> 8;
+		b &= 0xff;
+
+#ifdef __alpha__
+		/*
+		 * Find the hose the device is on (the domain number is the
+		 * hose index) and offset the bus by the root bus of that
+		 * hose.
+		 */
+                for(pci_dev = pci_find_device(PCI_ANY_ID,PCI_ANY_ID,NULL);
+                    pci_dev;
+                    pci_dev = pci_find_device(PCI_ANY_ID,PCI_ANY_ID,pci_dev)) {
+			struct pci_controller *hose = pci_dev->sysdata;
+			
+			if (hose->index == domain) {
+				b += hose->bus->number;
+				break;
+			}
+		}
+#endif
+
                 pci_dev = pci_find_slot(b, PCI_DEVFN(d,f));
                 if (pci_dev) {
 			dev->pdev = pci_dev;
diff -Nru a/drivers/char/drm/drm_lists.h b/drivers/char/drm/drm_lists.h
--- a/drivers/char/drm/drm_lists.h	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/drm_lists.h	Mon Mar 31 13:41:08 2003
@@ -72,8 +72,8 @@
 
 	left = DRM_LEFTCOUNT(bl);
 	if (!left) {
-		DRM_ERROR("Overflow while adding buffer %d from pid %d\n",
-			  buf->idx, buf->pid);
+		DRM_ERROR("Overflow while adding buffer %d from filp %p\n",
+			  buf->idx, buf->filp);
 		return -EINVAL;
 	}
 #if __HAVE_DMA_HISTOGRAM
diff -Nru a/drivers/char/drm/drm_lock.h b/drivers/char/drm/drm_lock.h
--- a/drivers/char/drm/drm_lock.h	Mon Mar 31 13:41:07 2003
+++ b/drivers/char/drm/drm_lock.h	Mon Mar 31 13:41:07 2003
@@ -78,7 +78,7 @@
 {
 	unsigned int old, new, prev;
 
-	dev->lock.pid = 0;
+	dev->lock.filp = 0;
 	do {
 		old  = *lock;
 		new  = context | _DRM_LOCK_HELD;
@@ -91,19 +91,17 @@
 		   __volatile__ unsigned int *lock, unsigned int context)
 {
 	unsigned int old, new, prev;
-	pid_t        pid = dev->lock.pid;
 
-	dev->lock.pid = 0;
+	dev->lock.filp = 0;
 	do {
 		old  = *lock;
 		new  = 0;
 		prev = cmpxchg(lock, old, new);
 	} while (prev != old);
 	if (_DRM_LOCK_IS_HELD(old) && _DRM_LOCKING_CONTEXT(old) != context) {
-		DRM_ERROR("%d freed heavyweight lock held by %d (pid %d)\n",
+		DRM_ERROR("%d freed heavyweight lock held by %d\n",
 			  context,
-			  _DRM_LOCKING_CONTEXT(old),
-			  pid);
+			  _DRM_LOCKING_CONTEXT(old));
 		return 1;
 	}
 	wake_up_interruptible(&dev->lock.lock_queue);
diff -Nru a/drivers/char/drm/drm_os_linux.h b/drivers/char/drm/drm_os_linux.h
--- a/drivers/char/drm/drm_os_linux.h	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/drm_os_linux.h	Mon Mar 31 13:41:08 2003
@@ -2,16 +2,17 @@
 #include <linux/interrupt.h>	/* For task queue support */
 #include <linux/delay.h>
 
+#define DRMFILE                         struct file *
 #define DRM_IOCTL_ARGS			struct inode *inode, struct file *filp, unsigned int cmd, unsigned long data
 #define DRM_ERR(d)			-(d)
 #define DRM_CURRENTPID			current->pid
 #define DRM_UDELAY(d)			udelay(d)
-#define DRM_READ8(addr)			readb(addr)
-#define DRM_READ32(addr)		readl(addr)
-#define DRM_WRITE8(addr, val)		writeb(val, addr)
-#define DRM_WRITE32(addr, val)		writel(val, addr)
-#define DRM_READMEMORYBARRIER()		mb()
-#define DRM_WRITEMEMORYBARRIER()	wmb()
+#define DRM_READ8(map, offset)		readb(((unsigned long)(map)->handle) + (offset))
+#define DRM_READ32(map, offset)		readl(((unsigned long)(map)->handle) + (offset))
+#define DRM_WRITE8(map, offset, val)	writeb(val, ((unsigned long)(map)->handle) + (offset))
+#define DRM_WRITE32(map, offset, val)	writel(val, ((unsigned long)(map)->handle) + (offset))
+#define DRM_READMEMORYBARRIER(map)	mb()
+#define DRM_WRITEMEMORYBARRIER(map)	wmb()
 #define DRM_DEVICE	drm_file_t	*priv	= filp->private_data; \
 			drm_device_t	*dev	= priv->dev
 
@@ -41,7 +42,7 @@
 
 /* malloc/free without the overhead of DRM(alloc) */
 #define DRM_MALLOC(x) kmalloc(x, GFP_KERNEL)
-#define DRM_FREE(x) kfree(x)
+#define DRM_FREE(x,size) kfree(x)
 
 #define DRM_GETSAREA()							 \
 do { 									 \
diff -Nru a/drivers/char/drm/drm_proc.h b/drivers/char/drm/drm_proc.h
--- a/drivers/char/drm/drm_proc.h	Mon Mar 31 13:41:06 2003
+++ b/drivers/char/drm/drm_proc.h	Mon Mar 31 13:41:06 2003
@@ -147,10 +147,10 @@
 	*eof   = 0;
 
 	if (dev->unique) {
-		DRM_PROC_PRINT("%s 0x%x %s\n",
-			       dev->name, dev->device, dev->unique);
+		DRM_PROC_PRINT("%s 0x%lx %s\n",
+			       dev->name, (long)dev->device, dev->unique);
 	} else {
-		DRM_PROC_PRINT("%s 0x%x\n", dev->name, dev->device);
+		DRM_PROC_PRINT("%s 0x%lx\n", dev->name, (long)dev->device);
 	}
 
 	if (len > request + offset) return request;
diff -Nru a/drivers/char/drm/gamma_dma.c b/drivers/char/drm/gamma_dma.c
--- a/drivers/char/drm/gamma_dma.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/gamma_dma.c	Mon Mar 31 13:41:08 2003
@@ -188,7 +188,7 @@
 		if (!_DRM_LOCK_IS_HELD(dev->lock.hw_lock->lock)) {
 			DRM_ERROR("Dispatching buffer %d from pid %d"
 				  " \"while locked\", but no lock held\n",
-				  buf->idx, buf->pid);
+				  buf->idx, current->pid);
 		}
 	} else {
 		if (!locked && !gamma_lock_take(&dev->lock.hw_lock->lock,
@@ -340,7 +340,8 @@
 	return retcode;
 }
 
-static int gamma_dma_priority(drm_device_t *dev, drm_dma_t *d)
+static int gamma_dma_priority(struct file *filp, 
+			      drm_device_t *dev, drm_dma_t *d)
 {
 	unsigned long	  address;
 	unsigned long	  length;
@@ -378,15 +379,15 @@
 			continue;
 		}
 		buf = dma->buflist[ idx ];
-		if (buf->pid != current->pid) {
-			DRM_ERROR("Process %d using buffer owned by %d\n",
-				  current->pid, buf->pid);
+		if (buf->filp != filp) {
+			DRM_ERROR("Process %d using buffer not owned\n",
+				  current->pid);
 			retcode = -EINVAL;
 			goto cleanup;
 		}
 		if (buf->list != DRM_LIST_NONE) {
-			DRM_ERROR("Process %d using %d's buffer on list %d\n",
-				  current->pid, buf->pid, buf->list);
+			DRM_ERROR("Process %d using buffer on list %d\n",
+				  current->pid, buf->list);
 			retcode = -EINVAL;
 			goto cleanup;
 		}
@@ -478,7 +479,8 @@
 	return retcode;
 }
 
-static int gamma_dma_send_buffers(drm_device_t *dev, drm_dma_t *d)
+static int gamma_dma_send_buffers(struct file *filp,
+				  drm_device_t *dev, drm_dma_t *d)
 {
 	DECLARE_WAITQUEUE(entry, current);
 	drm_buf_t	  *last_buf = NULL;
@@ -490,7 +492,7 @@
 		add_wait_queue(&last_buf->dma_wait, &entry);
 	}
 
-	if ((retcode = gamma_dma_enqueue(dev, d))) {
+	if ((retcode = gamma_dma_enqueue(filp, d))) {
 		if (d->flags & _DRM_DMA_BLOCK)
 			remove_wait_queue(&last_buf->dma_wait, &entry);
 		return retcode;
@@ -520,14 +522,13 @@
 			}
 		}
 		if (retcode) {
-			DRM_ERROR("ctx%d w%d p%d c%d i%d l%d %d/%d\n",
+			DRM_ERROR("ctx%d w%d p%d c%ld i%d l%d pid:%d\n",
 				  d->context,
 				  last_buf->waiting,
 				  last_buf->pending,
-				  DRM_WAITCOUNT(dev, d->context),
+				  (long)DRM_WAITCOUNT(dev, d->context),
 				  last_buf->idx,
 				  last_buf->list,
-				  last_buf->pid,
 				  current->pid);
 		}
 	}
@@ -560,15 +561,15 @@
 
 	if (d.send_count) {
 		if (d.flags & _DRM_DMA_PRIORITY)
-			retcode = gamma_dma_priority(dev, &d);
+			retcode = gamma_dma_priority(filp, dev, &d);
 		else
-			retcode = gamma_dma_send_buffers(dev, &d);
+			retcode = gamma_dma_send_buffers(filp, dev, &d);
 	}
 
 	d.granted_count = 0;
 
 	if (!retcode && d.request_count) {
-		retcode = gamma_dma_get_buffers(dev, &d);
+		retcode = gamma_dma_get_buffers(filp, &d);
 	}
 
 	DRM_DEBUG("%d returning, granted = %d\n",
@@ -590,7 +591,7 @@
 	drm_buf_t	    *buf;
 	int i;
 	struct list_head    *list;
-	unsigned int	    *pgt;
+	unsigned long	    *pgt;
 
 	DRM_DEBUG( "%s\n", __FUNCTION__ );
 
@@ -643,7 +644,7 @@
 
  		for (i = 0; i < GLINT_DRI_BUF_COUNT; i++) {
 			buf = dma->buflist[i];
-			*pgt = (unsigned int)buf->address + 0x07;
+			*pgt = (unsigned long)buf->address + 0x07;
 			pgt++;
 		}
 
diff -Nru a/drivers/char/drm/gamma_drv.h b/drivers/char/drm/gamma_drv.h
--- a/drivers/char/drm/gamma_drv.h	Mon Mar 31 13:41:07 2003
+++ b/drivers/char/drm/gamma_drv.h	Mon Mar 31 13:41:07 2003
@@ -42,16 +42,6 @@
 	drm_map_t *mmio3;
 } drm_gamma_private_t;
 
-#define LOCK_TEST_WITH_RETURN( dev )					\
-do {									\
-	if ( !_DRM_LOCK_IS_HELD( dev->lock.hw_lock->lock ) ||		\
-	     dev->lock.pid != current->pid ) {				\
-		DRM_ERROR( "%s called without lock held\n",		\
-			   __FUNCTION__ );				\
-		return -EINVAL;						\
-	}								\
-} while (0)
-
 				/* gamma_dma.c */
 extern int gamma_dma_init( struct inode *inode, struct file *filp,
 			 unsigned int cmd, unsigned long arg );
diff -Nru a/drivers/char/drm/i810.h b/drivers/char/drm/i810.h
--- a/drivers/char/drm/i810.h	Mon Mar 31 13:41:06 2003
+++ b/drivers/char/drm/i810.h	Mon Mar 31 13:41:06 2003
@@ -86,7 +86,7 @@
  */
 #define __HAVE_RELEASE		1
 #define DRIVER_RELEASE() do {						\
-	i810_reclaim_buffers( dev, priv->pid );				\
+	i810_reclaim_buffers( filp );				\
 } while (0)
 
 /* DMA customization:
diff -Nru a/drivers/char/drm/i810_dma.c b/drivers/char/drm/i810_dma.c
--- a/drivers/char/drm/i810_dma.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/char/drm/i810_dma.c	Mon Mar 31 13:41:06 2003
@@ -46,30 +46,10 @@
 #define I810_BUF_UNMAPPED 0
 #define I810_BUF_MAPPED   1
 
-#define RING_LOCALS	unsigned int outring, ringmask; volatile char *virt;
-
-#define BEGIN_LP_RING(n) do {						\
-	if (0) DRM_DEBUG("BEGIN_LP_RING(%d) in %s\n", n, __FUNCTION__);	\
-	if (dev_priv->ring.space < n*4)					\
-		i810_wait_ring(dev, n*4);				\
-	dev_priv->ring.space -= n*4;					\
-	outring = dev_priv->ring.tail;					\
-	ringmask = dev_priv->ring.tail_mask;				\
-	virt = dev_priv->ring.virtual_start;				\
-} while (0)
-
-#define ADVANCE_LP_RING() do {				\
-	if (0) DRM_DEBUG("ADVANCE_LP_RING\n");			\
-	dev_priv->ring.tail = outring;			\
-	I810_WRITE(LP_RING + RING_TAIL, outring);	\
-} while(0)
-
-#define OUT_RING(n) do {				\
-	if (0) DRM_DEBUG("   OUT_RING %x\n", (int)(n));	\
-	*(volatile unsigned int *)(virt + outring) = n;	\
-	outring += 4;					\
-	outring &= ringmask;				\
-} while (0)
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,2)
+#define down_write down
+#define up_write up
+#endif
 
 static inline void i810_print_status_page(drm_device_t *dev)
 {
@@ -178,11 +158,7 @@
 
 	if(buf_priv->currently_mapped == I810_BUF_MAPPED) return -EINVAL;
 
-#if LINUX_VERSION_CODE <= 0x020402
-	down( &current->mm->mmap_sem );
-#else
 	down_write( &current->mm->mmap_sem );
-#endif
 	old_fops = filp->f_op;
 	filp->f_op = &i810_buffer_fops;
 	dev_priv->mmap_buffer = buf;
@@ -194,15 +170,12 @@
 	filp->f_op = old_fops;
 	if ((unsigned long)buf_priv->virtual > -1024UL) {
 		/* Real error */
-		DRM_DEBUG("mmap error\n");
+		DRM_ERROR("mmap error\n");
 		retcode = (signed int)buf_priv->virtual;
 		buf_priv->virtual = 0;
 	}
-#if LINUX_VERSION_CODE <= 0x020402
-	up( &current->mm->mmap_sem );
-#else
 	up_write( &current->mm->mmap_sem );
-#endif
+
 	return retcode;
 }
 
@@ -213,19 +186,13 @@
 
 	if(buf_priv->currently_mapped != I810_BUF_MAPPED)
 		return -EINVAL;
-#if LINUX_VERSION_CODE <= 0x020402
-	down( &current->mm->mmap_sem );
-#else
-	down_write( &current->mm->mmap_sem );
-#endif
+
+	down_write(&current->mm->mmap_sem);
 	retcode = do_munmap(current->mm,
 			    (unsigned long)buf_priv->virtual,
 			    (size_t) buf->total);
-#if LINUX_VERSION_CODE <= 0x020402
-	up( &current->mm->mmap_sem );
-#else
-	up_write( &current->mm->mmap_sem );
-#endif
+	up_write(&current->mm->mmap_sem);
+
    	buf_priv->currently_mapped = I810_BUF_UNMAPPED;
    	buf_priv->virtual = 0;
 
@@ -235,7 +202,6 @@
 static int i810_dma_get_buffer(drm_device_t *dev, drm_i810_dma_t *d,
 			       struct file *filp)
 {
-	drm_file_t	  *priv	  = filp->private_data;
 	drm_buf_t	  *buf;
 	drm_i810_buf_priv_t *buf_priv;
 	int retcode = 0;
@@ -250,10 +216,10 @@
 	retcode = i810_map_buffer(buf, filp);
 	if(retcode) {
 		i810_freelist_put(dev, buf);
-	   	DRM_DEBUG("mapbuf failed, retcode %d\n", retcode);
+	   	DRM_ERROR("mapbuf failed, retcode %d\n", retcode);
 		return retcode;
 	}
-	buf->pid     = priv->pid;
+	buf->filp = filp;
 	buf_priv = buf->dev_private;
 	d->granted = 1;
    	d->request_idx = buf->idx;
@@ -314,7 +280,7 @@
 		   end = jiffies + (HZ*3);
 
 	   	iters++;
-		if((signed)(end - jiffies) <= 0) {
+		if(time_before(end, jiffies)) {
 		   	DRM_ERROR("space: %d wanted %d\n", ring->space, n);
 		   	DRM_ERROR("lockup\n");
 		   	goto out_wait_ring;
@@ -882,8 +848,10 @@
 }
 
 /* Must be called with the lock held */
-void i810_reclaim_buffers(drm_device_t *dev, pid_t pid)
+void i810_reclaim_buffers(struct file *filp)
 {
+	drm_file_t    *priv   = filp->private_data;
+	drm_device_t  *dev    = priv->dev;
 	drm_device_dma_t *dma = dev->dma;
 	int		 i;
 
@@ -897,7 +865,7 @@
 	   	drm_buf_t *buf = dma->buflist[ i ];
 	   	drm_i810_buf_priv_t *buf_priv = buf->dev_private;
 
-		if (buf->pid == pid && buf_priv) {
+		if (buf->filp == filp && buf_priv) {
 			int used = cmpxchg(buf_priv->in_use, I810_BUF_CLIENT,
 					   I810_BUF_FREE);
 
diff -Nru a/drivers/char/drm/i810_drv.h b/drivers/char/drm/i810_drv.h
--- a/drivers/char/drm/i810_drv.h	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/i810_drv.h	Mon Mar 31 13:41:08 2003
@@ -88,7 +88,7 @@
 			  unsigned int cmd, unsigned long arg);
 extern int  i810_flush_ioctl(struct inode *inode, struct file *filp,
 			     unsigned int cmd, unsigned long arg);
-extern void i810_reclaim_buffers(drm_device_t *dev, pid_t pid);
+extern void i810_reclaim_buffers(struct file *filp);
 extern int  i810_getage(struct inode *inode, struct file *filp,
 			unsigned int cmd, unsigned long arg);
 extern int i810_mmap_buffers(struct file *filp, struct vm_area_struct *vma);
@@ -136,6 +136,33 @@
 #define I810_READ16(reg)	I810_DEREF16(reg)
 #define I810_WRITE16(reg,val)	do { I810_DEREF16(reg) = val; } while (0)
 
+#define I810_VERBOSE 0
+#define RING_LOCALS	unsigned int outring, ringmask; \
+                        volatile char *virt;
+
+#define BEGIN_LP_RING(n) do {						\
+	if (I810_VERBOSE)                                               \
+           DRM_DEBUG("BEGIN_LP_RING(%d) in %s\n", n, __FUNCTION__);	\
+	if (dev_priv->ring.space < n*4)					\
+		i810_wait_ring(dev, n*4);				\
+	dev_priv->ring.space -= n*4;					\
+	outring = dev_priv->ring.tail;					\
+	ringmask = dev_priv->ring.tail_mask;				\
+	virt = dev_priv->ring.virtual_start;				\
+} while (0)
+
+#define ADVANCE_LP_RING() do {				        \
+	if (I810_VERBOSE) DRM_DEBUG("ADVANCE_LP_RING\n");    	\
+	dev_priv->ring.tail = outring;		        	\
+	I810_WRITE(LP_RING + RING_TAIL, outring);	        \
+} while(0)
+
+#define OUT_RING(n) do {  				                \
+	if (I810_VERBOSE) DRM_DEBUG("   OUT_RING %x\n", (int)(n));	\
+	*(volatile unsigned int *)(virt + outring) = n;	                \
+	outring += 4;					                \
+	outring &= ringmask;			                        \
+} while (0)
 
 #define GFX_OP_USER_INTERRUPT 		((0<<29)|(2<<23))
 #define GFX_OP_BREAKPOINT_INTERRUPT	((0<<29)|(1<<23))
@@ -198,6 +225,7 @@
 
 #define CMD_OP_Z_BUFFER_INFO     ((0x0<<29)|(0x16<<23))
 #define CMD_OP_DESTBUFFER_INFO   ((0x0<<29)|(0x15<<23))
+#define CMD_OP_FRONTBUFFER_INFO  ((0x0<<29)|(0x14<<23))
 
 #define BR00_BITBLT_CLIENT   0x40000000
 #define BR00_OP_COLOR_BLT    0x10000000
diff -Nru a/drivers/char/drm/i830.h b/drivers/char/drm/i830.h
--- a/drivers/char/drm/i830.h	Mon Mar 31 13:41:06 2003
+++ b/drivers/char/drm/i830.h	Mon Mar 31 13:41:06 2003
@@ -45,22 +45,37 @@
 
 #define DRIVER_NAME		"i830"
 #define DRIVER_DESC		"Intel 830M"
-#define DRIVER_DATE		"20020828"
+#define DRIVER_DATE		"20021108"
 
+/* Interface history:
+ *
+ * 1.1: Original.
+ * 1.2: ?
+ * 1.3: New irq emit/wait ioctls.
+ *      New pageflip ioctl.
+ *      New getparam ioctl.
+ *      State for texunits 3&4 in sarea.
+ *      New (alternative) layout for texture state.
+ */
 #define DRIVER_MAJOR		1
-#define DRIVER_MINOR		2
-#define DRIVER_PATCHLEVEL	1
+#define DRIVER_MINOR		3
+#define DRIVER_PATCHLEVEL	2
 
 #define DRIVER_IOCTLS							    \
 	[DRM_IOCTL_NR(DRM_IOCTL_I830_INIT)]   = { i830_dma_init,    1, 1 }, \
-   	[DRM_IOCTL_NR(DRM_IOCTL_I830_VERTEX)] = { i830_dma_vertex,  1, 0 }, \
-   	[DRM_IOCTL_NR(DRM_IOCTL_I830_CLEAR)]  = { i830_clear_bufs,  1, 0 }, \
-      	[DRM_IOCTL_NR(DRM_IOCTL_I830_FLUSH)]  = { i830_flush_ioctl, 1, 0 }, \
-   	[DRM_IOCTL_NR(DRM_IOCTL_I830_GETAGE)] = { i830_getage,      1, 0 }, \
+	[DRM_IOCTL_NR(DRM_IOCTL_I830_VERTEX)] = { i830_dma_vertex,  1, 0 }, \
+	[DRM_IOCTL_NR(DRM_IOCTL_I830_CLEAR)]  = { i830_clear_bufs,  1, 0 }, \
+	[DRM_IOCTL_NR(DRM_IOCTL_I830_FLUSH)]  = { i830_flush_ioctl, 1, 0 }, \
+	[DRM_IOCTL_NR(DRM_IOCTL_I830_GETAGE)] = { i830_getage,      1, 0 }, \
 	[DRM_IOCTL_NR(DRM_IOCTL_I830_GETBUF)] = { i830_getbuf,      1, 0 }, \
-   	[DRM_IOCTL_NR(DRM_IOCTL_I830_SWAP)]   = { i830_swap_bufs,   1, 0 }, \
-   	[DRM_IOCTL_NR(DRM_IOCTL_I830_COPY)]   = { i830_copybuf,     1, 0 }, \
-   	[DRM_IOCTL_NR(DRM_IOCTL_I830_DOCOPY)] = { i830_docopy,      1, 0 },
+	[DRM_IOCTL_NR(DRM_IOCTL_I830_SWAP)]   = { i830_swap_bufs,   1, 0 }, \
+	[DRM_IOCTL_NR(DRM_IOCTL_I830_COPY)]   = { i830_copybuf,     1, 0 }, \
+	[DRM_IOCTL_NR(DRM_IOCTL_I830_DOCOPY)] = { i830_docopy,      1, 0 }, \
+	[DRM_IOCTL_NR(DRM_IOCTL_I830_FLIP)]   = { i830_flip_bufs,   1, 0 }, \
+	[DRM_IOCTL_NR(DRM_IOCTL_I830_IRQ_EMIT)] = { i830_irq_emit,  1, 0 }, \
+	[DRM_IOCTL_NR(DRM_IOCTL_I830_IRQ_WAIT)] = { i830_irq_wait,  1, 0 }, \
+	[DRM_IOCTL_NR(DRM_IOCTL_I830_GETPARAM)] = { i830_getparam,  1, 0 }, \
+	[DRM_IOCTL_NR(DRM_IOCTL_I830_SETPARAM)] = { i830_setparam,  1, 0 } 
 
 #define __HAVE_COUNTERS         4
 #define __HAVE_COUNTER6         _DRM_STAT_IRQ
@@ -72,7 +87,7 @@
  */
 #define __HAVE_RELEASE		1
 #define DRIVER_RELEASE() do {						\
-	i830_reclaim_buffers( dev, priv->pid );				\
+	i830_reclaim_buffers( filp );				\
 } while (0)
 
 /* DMA customization:
@@ -87,10 +102,49 @@
 	i830_dma_quiescent( dev );					\
 } while (0)
 
-/* Don't need an irq any more.  The template code will make sure that
- * a noop stub is generated for compatibility.
+
+/* Driver will work either way: IRQ's save cpu time when waiting for
+ * the card, but are subject to subtle interactions between bios,
+ * hardware and the driver.
  */
-#define __HAVE_DMA_IRQ		0
+#define USE_IRQS 0
+
+
+#if USE_IRQS
+#define __HAVE_DMA_IRQ		1
+#define __HAVE_SHARED_IRQ	1
+
+#define DRIVER_PREINSTALL() do {			\
+	drm_i830_private_t *dev_priv =			\
+		(drm_i830_private_t *)dev->dev_private;	\
+							\
+   	I830_WRITE16( I830REG_HWSTAM, 0xffff );	\
+        I830_WRITE16( I830REG_INT_MASK_R, 0x0 );	\
+      	I830_WRITE16( I830REG_INT_ENABLE_R, 0x0 );	\
+} while (0)
+
+
+#define DRIVER_POSTINSTALL() do {				\
+	drm_i830_private_t *dev_priv =				\
+		(drm_i830_private_t *)dev->dev_private;		\
+   	I830_WRITE16( I830REG_INT_ENABLE_R, 0x2 );		\
+   	atomic_set(&dev_priv->irq_received, 0);			\
+   	atomic_set(&dev_priv->irq_emitted, 0);			\
+	init_waitqueue_head(&dev_priv->irq_queue);		\
+} while (0)
+
+
+/* This gets called too late to be useful: dev_priv has already been
+ * freed.
+ */
+#define DRIVER_UNINSTALL() do {					\
+} while (0)
+
+#else
+#define __HAVE_DMA_IRQ          0
+#endif
+
+
 
 /* Buffer customization:
  */
diff -Nru a/drivers/char/drm/i830_dma.c b/drivers/char/drm/i830_dma.c
--- a/drivers/char/drm/i830_dma.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/char/drm/i830_dma.c	Mon Mar 31 13:41:06 2003
@@ -37,6 +37,7 @@
 #include "i830_drm.h"
 #include "i830_drv.h"
 #include <linux/interrupt.h>	/* For task queue support */
+#include <linux/pagemap.h>	/* For FASTCALL on unlock_page() */
 #include <linux/delay.h>
 
 #define I830_BUF_FREE		2
@@ -46,8 +47,6 @@
 #define I830_BUF_UNMAPPED 0
 #define I830_BUF_MAPPED   1
 
-#define RING_LOCALS	unsigned int outring, ringmask; volatile char *virt;
-
 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,2)
 #define down_write down
 #define up_write up
@@ -60,32 +59,6 @@
 #define UnlockPage(page)	unlock_page(page)
 #endif
 
-#define I830_VERBOSE 0
-
-#define BEGIN_LP_RING(n) do {				\
-	if (I830_VERBOSE)				\
-		printk("BEGIN_LP_RING(%d) in %s\n",	\
-			  n, __FUNCTION__);		\
-	if (dev_priv->ring.space < n*4) 		\
-		i830_wait_ring(dev, n*4);		\
-	dev_priv->ring.space -= n*4;			\
-	outring = dev_priv->ring.tail;			\
-	ringmask = dev_priv->ring.tail_mask;		\
-	virt = dev_priv->ring.virtual_start;		\
-} while (0)
-
-#define ADVANCE_LP_RING() do {					\
-	if (I830_VERBOSE) printk("ADVANCE_LP_RING %x\n", outring);	\
-	dev_priv->ring.tail = outring;				\
-	I830_WRITE(LP_RING + RING_TAIL, outring);		\
-} while(0)
-
-#define OUT_RING(n) do {						\
-	if (I830_VERBOSE) printk("   OUT_RING %x\n", (int)(n));	\
-	*(volatile unsigned int *)(virt + outring) = n;			\
-	outring += 4;							\
-	outring &= ringmask;						\
-} while (0)
 
 static inline void i830_print_status_page(drm_device_t *dev)
 {
@@ -237,7 +210,6 @@
 static int i830_dma_get_buffer(drm_device_t *dev, drm_i830_dma_t *d, 
 			       struct file *filp)
 {
-	drm_file_t	  *priv	  = filp->private_data;
 	drm_buf_t	  *buf;
 	drm_i830_buf_priv_t *buf_priv;
 	int retcode = 0;
@@ -245,7 +217,7 @@
 	buf = i830_freelist_get(dev);
 	if (!buf) {
 		retcode = -ENOMEM;
-	   	DRM_ERROR("retcode=%d\n", retcode);
+	   	DRM_DEBUG("retcode=%d\n", retcode);
 		return retcode;
 	}
    
@@ -255,7 +227,7 @@
 	   	DRM_ERROR("mapbuf failed, retcode %d\n", retcode);
 		return retcode;
 	}
-	buf->pid     = priv->pid;
+	buf->filp = filp;
 	buf_priv = buf->dev_private;	
 	d->granted = 1;
    	d->request_idx = buf->idx;
@@ -279,12 +251,21 @@
 					 dev_priv->ring.Size);
 		}
 	   	if(dev_priv->hw_status_page != 0UL) {
-		   	pci_free_consistent(dev->pdev, PAGE_SIZE,
+			pci_free_consistent(dev->pdev, PAGE_SIZE,
 					    (void *)dev_priv->hw_status_page,
 					    dev_priv->dma_status_page);
 		   	/* Need to rewrite hardware status page */
 		   	I830_WRITE(0x02080, 0x1ffff000);
 		}
+
+		/* Disable interrupts here because after dev_private
+		 * is freed, it's too late.
+		 */
+		if (dev->irq) {
+			I830_WRITE16( I830REG_INT_MASK_R, 0xffff );
+			I830_WRITE16( I830REG_INT_ENABLE_R, 0x0 );
+		}
+
 	   	DRM(free)(dev->dev_private, sizeof(drm_i830_private_t), 
 			 DRM_MEM_DRIVER);
 	   	dev->dev_private = NULL;
@@ -298,7 +279,7 @@
    	return 0;
 }
 
-static int i830_wait_ring(drm_device_t *dev, int n)
+int i830_wait_ring(drm_device_t *dev, int n, const char *caller)
 {
    	drm_i830_private_t *dev_priv = dev->dev_private;
    	drm_i830_ring_buffer_t *ring = &(dev_priv->ring);
@@ -324,6 +305,7 @@
 		   	goto out_wait_ring;
 		}
 		udelay(1);
+		dev_priv->sarea_priv->perf_boxes |= I830_BOX_WAIT;
 	}
 
 out_wait_ring:   
@@ -339,6 +321,9 @@
      	ring->tail = I830_READ(LP_RING + RING_TAIL) & TAIL_ADDR;
      	ring->space = ring->head - (ring->tail+8);
      	if (ring->space < 0) ring->space += ring->Size;
+
+	if (ring->head == ring->tail)
+		dev_priv->sarea_priv->perf_boxes |= I830_BOX_RING_EMPTY;
 }
 
 static int i830_freelist_init(drm_device_t *dev, drm_i830_private_t *dev_priv)
@@ -453,6 +438,8 @@
 
 	dev_priv->back_pitch = init->back_pitch;
 	dev_priv->depth_pitch = init->depth_pitch;
+	dev_priv->do_boxes = 0;
+	dev_priv->use_mi_batchbuffer_start = 0;
 
    	/* Program Hardware Status Page */
    	dev_priv->hw_status_page =
@@ -467,7 +454,7 @@
    	memset((void *) dev_priv->hw_status_page, 0, PAGE_SIZE);
 	DRM_DEBUG("hw status page @ %lx\n", dev_priv->hw_status_page);
    
-	I830_WRITE(0x02080, dev_priv->dma_status_page);
+   	I830_WRITE(0x02080, virt_to_bus((void *)dev_priv->hw_status_page));
 	DRM_DEBUG("Enabled hardware status page\n");
    
    	/* Now we need to init our freelist */
@@ -528,11 +515,7 @@
 	unsigned int tmp;
 	RING_LOCALS;
 
-	BEGIN_LP_RING( I830_CTX_SETUP_SIZE + 2 );
-
-	OUT_RING( GFX_OP_STIPPLE );
-	OUT_RING( 0 );
-
+	BEGIN_LP_RING( I830_CTX_SETUP_SIZE + 4 );
 
 	for ( i = 0 ; i < I830_CTXREG_BLENDCOLR0 ; i++ ) {
 		tmp = code[i];
@@ -570,38 +553,44 @@
 	ADVANCE_LP_RING();
 }
 
-static void i830EmitTexVerified( drm_device_t *dev, 
-				 volatile unsigned int *code ) 
+static void i830EmitTexVerified( drm_device_t *dev, unsigned int *code ) 
 {
    	drm_i830_private_t *dev_priv = dev->dev_private;
 	int i, j = 0;
 	unsigned int tmp;
 	RING_LOCALS;
 
-	BEGIN_LP_RING( I830_TEX_SETUP_SIZE );
-
-	OUT_RING( GFX_OP_MAP_INFO );
-	OUT_RING( code[I830_TEXREG_MI1] );
-	OUT_RING( code[I830_TEXREG_MI2] );
-	OUT_RING( code[I830_TEXREG_MI3] );
-	OUT_RING( code[I830_TEXREG_MI4] );
-	OUT_RING( code[I830_TEXREG_MI5] );
-
-	for ( i = 6 ; i < I830_TEX_SETUP_SIZE ; i++ ) {
-		tmp = code[i];
-		OUT_RING( tmp ); 
-		j++;
-	} 
+	if (code[I830_TEXREG_MI0] == GFX_OP_MAP_INFO ||
+	    (code[I830_TEXREG_MI0] & ~(0xf*LOAD_TEXTURE_MAP0)) == 
+	    (STATE3D_LOAD_STATE_IMMEDIATE_2|4)) {
+
+		BEGIN_LP_RING( I830_TEX_SETUP_SIZE );
+
+		OUT_RING( code[I830_TEXREG_MI0] ); /* TM0LI */
+		OUT_RING( code[I830_TEXREG_MI1] ); /* TM0S0 */
+		OUT_RING( code[I830_TEXREG_MI2] ); /* TM0S1 */
+		OUT_RING( code[I830_TEXREG_MI3] ); /* TM0S2 */
+		OUT_RING( code[I830_TEXREG_MI4] ); /* TM0S3 */
+		OUT_RING( code[I830_TEXREG_MI5] ); /* TM0S4 */
+		
+		for ( i = 6 ; i < I830_TEX_SETUP_SIZE ; i++ ) {
+			tmp = code[i];
+			OUT_RING( tmp ); 
+			j++;
+		} 
 
-	if (j & 1) 
-		OUT_RING( 0 ); 
+		if (j & 1) 
+			OUT_RING( 0 ); 
 
-	ADVANCE_LP_RING();
+		ADVANCE_LP_RING();
+	}
+	else
+		printk("rejected packet %x\n", code[0]);
 }
 
 static void i830EmitTexBlendVerified( drm_device_t *dev, 
-				     volatile unsigned int *code,
-				     volatile unsigned int num)
+				      unsigned int *code,
+				      unsigned int num)
 {
    	drm_i830_private_t *dev_priv = dev->dev_private;
 	int i, j = 0;
@@ -611,7 +600,7 @@
 	if (!num)
 		return;
 
-	BEGIN_LP_RING( num );
+	BEGIN_LP_RING( num + 1 );
 
 	for ( i = 0 ; i < num ; i++ ) {
 		tmp = code[i];
@@ -634,6 +623,8 @@
 	int i;
 	RING_LOCALS;
 
+	return;
+
 	BEGIN_LP_RING( 258 );
 
 	if(is_shared == 1) {
@@ -647,42 +638,41 @@
 		OUT_RING(palette[i]);
 	}
 	OUT_RING(0);
+	/* KW:  WHERE IS THE ADVANCE_LP_RING?  This is effectively a noop! 
+	 */
 }
 
 /* Need to do some additional checking when setting the dest buffer.
  */
 static void i830EmitDestVerified( drm_device_t *dev, 
-				  volatile unsigned int *code ) 
+				  unsigned int *code ) 
 {	
    	drm_i830_private_t *dev_priv = dev->dev_private;
 	unsigned int tmp;
 	RING_LOCALS;
 
-	BEGIN_LP_RING( I830_DEST_SETUP_SIZE + 6 );
+	BEGIN_LP_RING( I830_DEST_SETUP_SIZE + 10 );
+
 
 	tmp = code[I830_DESTREG_CBUFADDR];
-	if (tmp == dev_priv->front_di1) {
-		/* Don't use fence when front buffer rendering */
-		OUT_RING( CMD_OP_DESTBUFFER_INFO );
-		OUT_RING( BUF_3D_ID_COLOR_BACK | 
-			  BUF_3D_PITCH(dev_priv->back_pitch * dev_priv->cpp) );
-		OUT_RING( tmp );
+	if (tmp == dev_priv->front_di1 || tmp == dev_priv->back_di1) {
+		if (((int)outring) & 8) {
+			OUT_RING(0);
+			OUT_RING(0);
+		}
 
 		OUT_RING( CMD_OP_DESTBUFFER_INFO );
-		OUT_RING( BUF_3D_ID_DEPTH |
-			  BUF_3D_PITCH(dev_priv->depth_pitch * dev_priv->cpp));
-		OUT_RING( dev_priv->zi1 );
-	} else if(tmp == dev_priv->back_di1) {
-		OUT_RING( CMD_OP_DESTBUFFER_INFO );
 		OUT_RING( BUF_3D_ID_COLOR_BACK | 
 			  BUF_3D_PITCH(dev_priv->back_pitch * dev_priv->cpp) |
 			  BUF_3D_USE_FENCE);
 		OUT_RING( tmp );
+		OUT_RING( 0 );
 
 		OUT_RING( CMD_OP_DESTBUFFER_INFO );
 		OUT_RING( BUF_3D_ID_DEPTH | BUF_3D_USE_FENCE | 
 			  BUF_3D_PITCH(dev_priv->depth_pitch * dev_priv->cpp));
 		OUT_RING( dev_priv->zi1 );
+		OUT_RING( 0 );
 	} else {
 		DRM_ERROR("bad di1 %x (allow %x or %x)\n",
 			  tmp, dev_priv->front_di1, dev_priv->back_di1);
@@ -710,21 +700,35 @@
 		OUT_RING( 0 );
 	}
 
-	OUT_RING( code[I830_DESTREG_SENABLE] );
-
 	OUT_RING( GFX_OP_SCISSOR_RECT );
 	OUT_RING( code[I830_DESTREG_SR1] );
 	OUT_RING( code[I830_DESTREG_SR2] );
+	OUT_RING( 0 );
 
 	ADVANCE_LP_RING();
 }
 
+static void i830EmitStippleVerified( drm_device_t *dev, 
+				     unsigned int *code ) 
+{
+   	drm_i830_private_t *dev_priv = dev->dev_private;
+	RING_LOCALS;
+
+	BEGIN_LP_RING( 2 );
+	OUT_RING( GFX_OP_STIPPLE );
+	OUT_RING( code[1] );
+	ADVANCE_LP_RING();	
+}
+
+
 static void i830EmitState( drm_device_t *dev )
 {
    	drm_i830_private_t *dev_priv = dev->dev_private;
       	drm_i830_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	unsigned int dirty = sarea_priv->dirty;
 
+	DRM_DEBUG("%s %x\n", __FUNCTION__, dirty);
+
 	if (dirty & I830_UPLOAD_BUFFERS) {
 		i830EmitDestVerified( dev, sarea_priv->BufferState );
 		sarea_priv->dirty &= ~I830_UPLOAD_BUFFERS;
@@ -758,17 +762,154 @@
 	}
 
 	if (dirty & I830_UPLOAD_TEX_PALETTE_SHARED) {
-	   i830EmitTexPalette(dev, sarea_priv->Palette[0], 0, 1);
+		i830EmitTexPalette(dev, sarea_priv->Palette[0], 0, 1);
+	} else {
+		if (dirty & I830_UPLOAD_TEX_PALETTE_N(0)) {
+			i830EmitTexPalette(dev, sarea_priv->Palette[0], 0, 0);
+			sarea_priv->dirty &= ~I830_UPLOAD_TEX_PALETTE_N(0);
+		}
+		if (dirty & I830_UPLOAD_TEX_PALETTE_N(1)) {
+			i830EmitTexPalette(dev, sarea_priv->Palette[1], 1, 0);
+			sarea_priv->dirty &= ~I830_UPLOAD_TEX_PALETTE_N(1);
+		}
+
+		/* 1.3:
+		 */
+#if 0
+		if (dirty & I830_UPLOAD_TEX_PALETTE_N(2)) {
+			i830EmitTexPalette(dev, sarea_priv->Palette2[0], 0, 0);
+			sarea_priv->dirty &= ~I830_UPLOAD_TEX_PALETTE_N(2);
+		}
+		if (dirty & I830_UPLOAD_TEX_PALETTE_N(3)) {
+			i830EmitTexPalette(dev, sarea_priv->Palette2[1], 1, 0);
+			sarea_priv->dirty &= ~I830_UPLOAD_TEX_PALETTE_N(2);
+		}
+#endif
+	}
+
+	/* 1.3:
+	 */
+	if (dirty & I830_UPLOAD_STIPPLE) {
+		i830EmitStippleVerified( dev, 
+					 sarea_priv->StippleState);
+		sarea_priv->dirty &= ~I830_UPLOAD_STIPPLE;
+	}
+
+	if (dirty & I830_UPLOAD_TEX2) {
+		i830EmitTexVerified( dev, sarea_priv->TexState2 );
+		sarea_priv->dirty &= ~I830_UPLOAD_TEX2;
+	}
+
+	if (dirty & I830_UPLOAD_TEX3) {
+		i830EmitTexVerified( dev, sarea_priv->TexState3 );
+		sarea_priv->dirty &= ~I830_UPLOAD_TEX3;
+	}
+
+
+	if (dirty & I830_UPLOAD_TEXBLEND2) {
+		i830EmitTexBlendVerified( 
+			dev, 
+			sarea_priv->TexBlendState2,
+			sarea_priv->TexBlendStateWordsUsed2);
+
+		sarea_priv->dirty &= ~I830_UPLOAD_TEXBLEND2;
+	}
+
+	if (dirty & I830_UPLOAD_TEXBLEND3) {
+		i830EmitTexBlendVerified( 
+			dev, 
+			sarea_priv->TexBlendState3,
+			sarea_priv->TexBlendStateWordsUsed3);
+		sarea_priv->dirty &= ~I830_UPLOAD_TEXBLEND3;
+	}
+}
+
+/* ================================================================
+ * Performance monitoring functions
+ */
+
+static void i830_fill_box( drm_device_t *dev,
+			   int x, int y, int w, int h,
+			   int r, int g, int b )
+{
+   	drm_i830_private_t *dev_priv = dev->dev_private;
+	u32 color;
+	unsigned int BR13, CMD;
+	RING_LOCALS;
+
+	BR13 = (0xF0 << 16) | (dev_priv->pitch * dev_priv->cpp) | (1<<24);
+	CMD = XY_COLOR_BLT_CMD;
+	x += dev_priv->sarea_priv->boxes[0].x1;
+	y += dev_priv->sarea_priv->boxes[0].y1;
+
+	if (dev_priv->cpp == 4) {
+		BR13 |= (1<<25);
+		CMD |= (XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB);
+		color = (((0xff) << 24) | (r << 16) | (g <<  8) | b);	
 	} else {
-	   if (dirty & I830_UPLOAD_TEX_PALETTE_N(0)) {
-	      i830EmitTexPalette(dev, sarea_priv->Palette[0], 0, 0);
-	      sarea_priv->dirty &= ~I830_UPLOAD_TEX_PALETTE_N(0);
-	   }
-	   if (dirty & I830_UPLOAD_TEX_PALETTE_N(1)) {
-	      i830EmitTexPalette(dev, sarea_priv->Palette[1], 1, 0);
-	      sarea_priv->dirty &= ~I830_UPLOAD_TEX_PALETTE_N(1);
-	   }
+		color = (((r & 0xf8) << 8) |
+			 ((g & 0xfc) << 3) |
+			 ((b & 0xf8) >> 3));
 	}
+
+	BEGIN_LP_RING( 6 );	    
+	OUT_RING( CMD );
+	OUT_RING( BR13 );
+	OUT_RING( (y << 16) | x );
+	OUT_RING( ((y+h) << 16) | (x+w) );
+
+ 	if ( dev_priv->current_page == 1 ) { 
+		OUT_RING( dev_priv->front_offset );
+ 	} else {	 
+		OUT_RING( dev_priv->back_offset );
+ 	} 
+
+	OUT_RING( color );
+	ADVANCE_LP_RING();
+}
+
+static void i830_cp_performance_boxes( drm_device_t *dev )
+{
+   	drm_i830_private_t *dev_priv = dev->dev_private;
+
+	/* Purple box for page flipping
+	 */
+	if ( dev_priv->sarea_priv->perf_boxes & I830_BOX_FLIP ) 
+		i830_fill_box( dev, 4, 4, 8, 8, 255, 0, 255 );
+
+	/* Red box if we have to wait for idle at any point
+	 */
+	if ( dev_priv->sarea_priv->perf_boxes & I830_BOX_WAIT ) 
+		i830_fill_box( dev, 16, 4, 8, 8, 255, 0, 0 );
+
+	/* Blue box: lost context?
+	 */
+	if ( dev_priv->sarea_priv->perf_boxes & I830_BOX_LOST_CONTEXT ) 
+		i830_fill_box( dev, 28, 4, 8, 8, 0, 0, 255 );
+
+	/* Yellow box for texture swaps
+	 */
+	if ( dev_priv->sarea_priv->perf_boxes & I830_BOX_TEXTURE_LOAD ) 
+		i830_fill_box( dev, 40, 4, 8, 8, 255, 255, 0 );
+
+	/* Green box if hardware never idles (as far as we can tell)
+	 */
+	if ( !(dev_priv->sarea_priv->perf_boxes & I830_BOX_RING_EMPTY) ) 
+		i830_fill_box( dev, 64, 4, 8, 8, 0, 255, 0 );
+
+
+	/* Draw bars indicating number of buffers allocated 
+	 * (not a great measure, easily confused)
+	 */
+	if (dev_priv->dma_used) {
+		int bar = dev_priv->dma_used / 10240;
+		if (bar > 100) bar = 100;
+		if (bar < 1) bar = 1;
+		i830_fill_box( dev, 4, 16, bar, 4, 196, 128, 128 );
+		dev_priv->dma_used = 0;
+	}
+
+	dev_priv->sarea_priv->perf_boxes = 0;
 }
 
 static void i830_dma_dispatch_clear( drm_device_t *dev, int flags, 
@@ -786,6 +927,15 @@
 	unsigned int BR13, CMD, D_CMD;
 	RING_LOCALS;
 
+
+	if ( dev_priv->current_page == 1 ) {
+		unsigned int tmp = flags;
+
+		flags &= ~(I830_FRONT | I830_BACK);
+		if ( tmp & I830_FRONT ) flags |= I830_BACK;
+		if ( tmp & I830_BACK )  flags |= I830_FRONT;
+	}
+
   	i830_kernel_lost_context(dev);
 
 	switch(cpp) {
@@ -865,13 +1015,17 @@
 	drm_clip_rect_t *pbox = sarea_priv->boxes;
 	int pitch = dev_priv->pitch;
 	int cpp = dev_priv->cpp;
-	int ofs = dev_priv->back_offset;
 	int i;
 	unsigned int CMD, BR13;
 	RING_LOCALS;
 
 	DRM_DEBUG("swapbuffers\n");
 
+  	i830_kernel_lost_context(dev);
+
+	if (dev_priv->do_boxes)
+		i830_cp_performance_boxes( dev );
+
 	switch(cpp) {
 	case 2: 
 		BR13 = (pitch * cpp) | (0xCC << 16) | (1<<24);
@@ -888,7 +1042,6 @@
 		break;
 	}
 
-  	i830_kernel_lost_context(dev);
 
       	if (nbox > I830_NR_SAREA_CLIPRECTS)
      		nbox = I830_NR_SAREA_CLIPRECTS;
@@ -908,23 +1061,72 @@
 		BEGIN_LP_RING( 8 );
 		OUT_RING( CMD );
 		OUT_RING( BR13 );
+		OUT_RING( (pbox->y1 << 16) | pbox->x1 );
+		OUT_RING( (pbox->y2 << 16) | pbox->x2 );
 
-		OUT_RING( (pbox->y1 << 16) |
-			  pbox->x1 );
-		OUT_RING( (pbox->y2 << 16) |
-			  pbox->x2 );
-
-		OUT_RING( dev_priv->front_offset );
-		OUT_RING( (pbox->y1 << 16) |
-			  pbox->x1 );
+		if (dev_priv->current_page == 0) 
+			OUT_RING( dev_priv->front_offset );
+		else
+			OUT_RING( dev_priv->back_offset );			
 
+		OUT_RING( (pbox->y1 << 16) | pbox->x1 );
 		OUT_RING( BR13 & 0xffff );
-		OUT_RING( ofs );
+
+		if (dev_priv->current_page == 0) 
+			OUT_RING( dev_priv->back_offset );			
+		else
+			OUT_RING( dev_priv->front_offset );
 
 		ADVANCE_LP_RING();
 	}
 }
 
+static void i830_dma_dispatch_flip( drm_device_t *dev )
+{
+   	drm_i830_private_t *dev_priv = dev->dev_private;
+	RING_LOCALS;
+
+	DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n", 
+		   __FUNCTION__, 
+		   dev_priv->current_page,
+		   dev_priv->sarea_priv->pf_current_page);
+
+  	i830_kernel_lost_context(dev);
+
+	if (dev_priv->do_boxes) {
+		dev_priv->sarea_priv->perf_boxes |= I830_BOX_FLIP;
+		i830_cp_performance_boxes( dev );
+	}
+
+
+	BEGIN_LP_RING( 2 );
+    	OUT_RING( INST_PARSER_CLIENT | INST_OP_FLUSH | INST_FLUSH_MAP_CACHE ); 
+	OUT_RING( 0 );
+	ADVANCE_LP_RING();
+
+	BEGIN_LP_RING( 6 );
+	OUT_RING( CMD_OP_DISPLAYBUFFER_INFO | ASYNC_FLIP );	
+	OUT_RING( 0 );
+	if ( dev_priv->current_page == 0 ) {
+		OUT_RING( dev_priv->back_offset );
+		dev_priv->current_page = 1;
+	} else {
+		OUT_RING( dev_priv->front_offset );
+		dev_priv->current_page = 0;
+	}
+	OUT_RING(0);
+	ADVANCE_LP_RING();
+
+
+	BEGIN_LP_RING( 2 );
+	OUT_RING( MI_WAIT_FOR_EVENT |
+		  MI_WAIT_FOR_PLANE_A_FLIP );
+	OUT_RING( 0 );
+	ADVANCE_LP_RING();
+	
+
+	dev_priv->sarea_priv->pf_current_page = dev_priv->current_page;
+}
 
 static void i830_dma_dispatch_vertex(drm_device_t *dev, 
 				     drm_buf_t *buf,
@@ -977,8 +1179,10 @@
 			 sarea_priv->vertex_prim |
 			 ((used/4)-2));
 
-		vp[used/4] = MI_BATCH_BUFFER_END;
-		used += 4;
+		if (dev_priv->use_mi_batchbuffer_start) {
+			vp[used/4] = MI_BATCH_BUFFER_END; 
+			used += 4; 
+		}
 		
 		if (used & 4) {
 			vp[used/4] = 0;
@@ -1001,11 +1205,21 @@
 				ADVANCE_LP_RING();
 			}
 
-			BEGIN_LP_RING(2);
-			OUT_RING( MI_BATCH_BUFFER_START | (2<<6) );
-			OUT_RING( start | MI_BATCH_NON_SECURE );
-			ADVANCE_LP_RING();
-			
+			if (dev_priv->use_mi_batchbuffer_start) {
+				BEGIN_LP_RING(2);
+				OUT_RING( MI_BATCH_BUFFER_START | (2<<6) );
+				OUT_RING( start | MI_BATCH_NON_SECURE );
+				ADVANCE_LP_RING();
+			} 
+			else {
+				BEGIN_LP_RING(4);
+				OUT_RING( MI_BATCH_BUFFER );
+				OUT_RING( start | MI_BATCH_NON_SECURE );
+				OUT_RING( start + used - 4 );
+				OUT_RING( 0 );
+				ADVANCE_LP_RING();
+			}
+
 		} while (++i < nbox);
 	}
 
@@ -1043,7 +1257,7 @@
       	OUT_RING( 0 );
    	ADVANCE_LP_RING();
 
-	i830_wait_ring( dev, dev_priv->ring.Size - 8 );
+	i830_wait_ring( dev, dev_priv->ring.Size - 8, __FUNCTION__ );
 }
 
 static int i830_flush_queue(drm_device_t *dev)
@@ -1060,7 +1274,7 @@
       	OUT_RING( 0 );
       	ADVANCE_LP_RING();
 
-	i830_wait_ring( dev, dev_priv->ring.Size - 8 );
+	i830_wait_ring( dev, dev_priv->ring.Size - 8, __FUNCTION__ );
 
    	for (i = 0; i < dma->buf_count; i++) {
 	   	drm_buf_t *buf = dma->buflist[ i ];
@@ -1079,8 +1293,10 @@
 }
 
 /* Must be called with the lock held */
-void i830_reclaim_buffers(drm_device_t *dev, pid_t pid)
+void i830_reclaim_buffers( struct file *filp )
 {
+	drm_file_t    *priv   = filp->private_data;
+	drm_device_t  *dev    = priv->dev;
 	drm_device_dma_t *dma = dev->dma;
 	int		 i;
 
@@ -1094,7 +1310,7 @@
 	   	drm_buf_t *buf = dma->buflist[ i ];
 	   	drm_i830_buf_priv_t *buf_priv = buf->dev_private;
 	   
-		if (buf->pid == pid && buf_priv) {
+		if (buf->filp == filp && buf_priv) {
 			int used = cmpxchg(buf_priv->in_use, I830_BUF_CLIENT, 
 					   I830_BUF_FREE);
 
@@ -1200,6 +1416,53 @@
    	return 0;
 }
 
+
+
+/* Not sure why this isn't set all the time:
+ */ 
+static void i830_do_init_pageflip( drm_device_t *dev )
+{
+	drm_i830_private_t *dev_priv = dev->dev_private;
+
+	DRM_DEBUG("%s\n", __FUNCTION__);
+	dev_priv->page_flipping = 1;
+	dev_priv->current_page = 0;
+	dev_priv->sarea_priv->pf_current_page = dev_priv->current_page;
+}
+
+int i830_do_cleanup_pageflip( drm_device_t *dev )
+{
+	drm_i830_private_t *dev_priv = dev->dev_private;
+
+	DRM_DEBUG("%s\n", __FUNCTION__);
+	if (dev_priv->current_page != 0)
+		i830_dma_dispatch_flip( dev );
+
+	dev_priv->page_flipping = 0;
+	return 0;
+}
+
+int i830_flip_bufs(struct inode *inode, struct file *filp,
+		   unsigned int cmd, unsigned long arg)
+{
+	drm_file_t *priv = filp->private_data;
+	drm_device_t *dev = priv->dev;
+	drm_i830_private_t *dev_priv = dev->dev_private;
+
+	DRM_DEBUG("%s\n", __FUNCTION__);
+
+   	if(!_DRM_LOCK_IS_HELD(dev->lock.hw_lock->lock)) {
+		DRM_ERROR("i830_flip_buf called without lock held\n");
+		return -EINVAL;
+	}
+
+	if (!dev_priv->page_flipping) 
+		i830_do_init_pageflip( dev );
+
+	i830_dma_dispatch_flip( dev );
+   	return 0;
+}
+
 int i830_getage(struct inode *inode, struct file *filp, unsigned int cmd,
 		unsigned long arg)
 {
@@ -1261,5 +1524,68 @@
 int i830_docopy(struct inode *inode, struct file *filp, unsigned int cmd,
 		unsigned long arg)
 {
+	return 0;
+}
+
+
+
+int i830_getparam( struct inode *inode, struct file *filp, unsigned int cmd,
+		      unsigned long arg )
+{
+	drm_file_t	  *priv	    = filp->private_data;
+	drm_device_t	  *dev	    = priv->dev;
+	drm_i830_private_t *dev_priv = dev->dev_private;
+	drm_i830_getparam_t param;
+	int value;
+
+	if ( !dev_priv ) {
+		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&param, (drm_i830_getparam_t *)arg, sizeof(param) ))
+		return -EFAULT;
+
+	switch( param.param ) {
+	case I830_PARAM_IRQ_ACTIVE:
+		value = dev->irq ? 1 : 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if ( copy_to_user( param.value, &value, sizeof(int) ) ) {
+		DRM_ERROR( "copy_to_user\n" );
+		return -EFAULT;
+	}
+	
+	return 0;
+}
+
+
+int i830_setparam( struct inode *inode, struct file *filp, unsigned int cmd,
+		   unsigned long arg )
+{
+	drm_file_t	  *priv	    = filp->private_data;
+	drm_device_t	  *dev	    = priv->dev;
+	drm_i830_private_t *dev_priv = dev->dev_private;
+	drm_i830_setparam_t param;
+
+	if ( !dev_priv ) {
+		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&param, (drm_i830_setparam_t *)arg, sizeof(param) ))
+		return -EFAULT;
+
+	switch( param.param ) {
+	case I830_SETPARAM_USE_MI_BATCHBUFFER_START:
+		dev_priv->use_mi_batchbuffer_start = param.value;
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	return 0;
 }
diff -Nru a/drivers/char/drm/i830_drm.h b/drivers/char/drm/i830_drm.h
--- a/drivers/char/drm/i830_drm.h	Mon Mar 31 13:41:07 2003
+++ b/drivers/char/drm/i830_drm.h	Mon Mar 31 13:41:07 2003
@@ -3,6 +3,9 @@
 
 /* WARNING: These defines must be the same as what the Xserver uses.
  * if you change them, you must change the defines in the Xserver.
+ *
+ * KW: Actually, you can't ever change them because doing so would
+ * break backwards compatibility.
  */
 
 #ifndef _I830_DEFINES_
@@ -18,14 +21,12 @@
 #define I830_NR_TEX_REGIONS 64
 #define I830_LOG_MIN_TEX_REGION_SIZE 16
 
-/* if defining I830_ENABLE_4_TEXTURES, do it in i830_3d_reg.h, too */
-#if !defined(I830_ENABLE_4_TEXTURES)
+/* KW: These aren't correct but someone set them to two and then
+ * released the module.  Now we can't change them as doing so would
+ * break backwards compatibility.
+ */
 #define I830_TEXTURE_COUNT	2
-#define I830_TEXBLEND_COUNT	2	/* always same as TEXTURE_COUNT? */
-#else /* defined(I830_ENABLE_4_TEXTURES) */
-#define I830_TEXTURE_COUNT	4
-#define I830_TEXBLEND_COUNT	4	/* always same as TEXTURE_COUNT? */
-#endif /* I830_ENABLE_4_TEXTURES */
+#define I830_TEXBLEND_COUNT	I830_TEXTURE_COUNT
 
 #define I830_TEXBLEND_SIZE	12	/* (4 args + op) * 2 + COLOR_FACTOR */
 
@@ -57,6 +58,7 @@
 #define I830_UPLOAD_TEXBLEND_MASK	0xf00000
 #define I830_UPLOAD_TEX_PALETTE_N(n)    (0x1000000 << (n))
 #define I830_UPLOAD_TEX_PALETTE_SHARED	0x4000000
+#define I830_UPLOAD_STIPPLE         	0x8000000
 
 /* Indices into buf.Setup where various bits of state are mirrored per
  * context and per buffer.  These can be fired at the card as a unit,
@@ -73,7 +75,6 @@
  */
 
 #define I830_DESTREG_CBUFADDR 0
-/* Invarient */
 #define I830_DESTREG_DBUFADDR 1
 #define I830_DESTREG_DV0 2
 #define I830_DESTREG_DV1 3
@@ -109,6 +110,13 @@
 #define I830_CTXREG_MCSB1		16
 #define I830_CTX_SETUP_SIZE		17
 
+/* 1.3: Stipple state
+ */ 
+#define I830_STPREG_ST0 0
+#define I830_STPREG_ST1 1
+#define I830_STP_SETUP_SIZE 2
+
+
 /* Texture state (per tex unit)
  */
 
@@ -124,6 +132,18 @@
 #define I830_TEXREG_MCS	9	/* GFX_OP_MAP_COORD_SETS */
 #define I830_TEX_SETUP_SIZE 10
 
+#define I830_TEXREG_TM0LI      0 /* load immediate 2 texture map n */
+#define I830_TEXREG_TM0S0      1
+#define I830_TEXREG_TM0S1      2
+#define I830_TEXREG_TM0S2      3
+#define I830_TEXREG_TM0S3      4
+#define I830_TEXREG_TM0S4      5
+#define I830_TEXREG_NOP0       6       /* noop */
+#define I830_TEXREG_NOP1       7       /* noop */
+#define I830_TEXREG_NOP2       8       /* noop */
+#define __I830_TEXREG_MCS      9       /* GFX_OP_MAP_COORD_SETS -- shared */
+#define __I830_TEX_SETUP_SIZE   10
+
 #define I830_FRONT   0x1
 #define I830_BACK    0x2
 #define I830_DEPTH   0x4
@@ -199,8 +219,35 @@
 	int ctxOwner;		/* last context to upload state */
 
 	int vertex_prim;
+
+        int pf_enabled;               /* is pageflipping allowed? */
+        int pf_active;               
+        int pf_current_page;	    /* which buffer is being displayed? */
+
+        int perf_boxes;             /* performance boxes to be displayed */
+   
+        /* Here's the state for texunits 2,3:
+	 */
+	unsigned int TexState2[I830_TEX_SETUP_SIZE];
+	unsigned int TexBlendState2[I830_TEXBLEND_SIZE];
+	unsigned int TexBlendStateWordsUsed2;
+
+	unsigned int TexState3[I830_TEX_SETUP_SIZE];
+	unsigned int TexBlendState3[I830_TEXBLEND_SIZE];
+	unsigned int TexBlendStateWordsUsed3;
+
+	unsigned int StippleState[I830_STP_SETUP_SIZE];
 } drm_i830_sarea_t;
 
+/* Flags for perf_boxes
+ */
+#define I830_BOX_RING_EMPTY    0x1 /* populated by kernel */
+#define I830_BOX_FLIP          0x2 /* populated by kernel */
+#define I830_BOX_WAIT          0x4 /* populated by kernel & client */
+#define I830_BOX_TEXTURE_LOAD  0x8 /* populated by kernel */
+#define I830_BOX_LOST_CONTEXT  0x10 /* populated by client */
+
+
 /* I830 specific ioctls
  * The device specific ioctl range is 0x40 to 0x79.
  */
@@ -213,6 +260,11 @@
 #define DRM_IOCTL_I830_SWAP		DRM_IO ( 0x46)
 #define DRM_IOCTL_I830_COPY		DRM_IOW( 0x47, drm_i830_copy_t)
 #define DRM_IOCTL_I830_DOCOPY		DRM_IO ( 0x48)
+#define DRM_IOCTL_I830_FLIP		DRM_IO ( 0x49)
+#define DRM_IOCTL_I830_IRQ_EMIT         DRM_IOWR(0x4a, drm_i830_irq_emit_t)
+#define DRM_IOCTL_I830_IRQ_WAIT         DRM_IOW( 0x4b, drm_i830_irq_wait_t)
+#define DRM_IOCTL_I830_GETPARAM         DRM_IOWR(0x4c, drm_i830_getparam_t)
+#define DRM_IOCTL_I830_SETPARAM         DRM_IOWR(0x4d, drm_i830_setparam_t)
 
 typedef struct _drm_i830_clear {
 	int clear_color;
@@ -247,5 +299,37 @@
 	int request_size;
 	int granted;
 } drm_i830_dma_t;
+
+
+/* 1.3: Userspace can request & wait on irq's:
+ */
+typedef struct drm_i830_irq_emit {
+	int *irq_seq;
+} drm_i830_irq_emit_t;
+
+typedef struct drm_i830_irq_wait {
+	int irq_seq;
+} drm_i830_irq_wait_t;
+
+
+/* 1.3: New ioctl to query kernel params:
+ */
+#define I830_PARAM_IRQ_ACTIVE            1
+
+typedef struct drm_i830_getparam {
+	int param;
+	int *value;
+} drm_i830_getparam_t;
+
+
+/* 1.3: New ioctl to set kernel params:
+ */
+#define I830_SETPARAM_USE_MI_BATCHBUFFER_START            1
+
+typedef struct drm_i830_setparam {
+	int param;
+	int value;
+} drm_i830_setparam_t;
+
 
 #endif /* _I830_DRM_H_ */
diff -Nru a/drivers/char/drm/i830_drv.h b/drivers/char/drm/i830_drv.h
--- a/drivers/char/drm/i830_drv.h	Mon Mar 31 13:41:07 2003
+++ b/drivers/char/drm/i830_drv.h	Mon Mar 31 13:41:07 2003
@@ -78,6 +78,19 @@
 	int back_pitch;
 	int depth_pitch;
 	unsigned int cpp;
+
+	int do_boxes;
+	int dma_used;
+
+	int current_page;
+	int page_flipping;
+
+	wait_queue_head_t irq_queue;
+   	atomic_t irq_received;
+   	atomic_t irq_emitted;
+
+	int use_mi_batchbuffer_start;
+
 } drm_i830_private_t;
 
 				/* i830_dma.c */
@@ -88,7 +101,7 @@
 			  unsigned int cmd, unsigned long arg);
 extern int  i830_flush_ioctl(struct inode *inode, struct file *filp,
 			     unsigned int cmd, unsigned long arg);
-extern void i830_reclaim_buffers(drm_device_t *dev, pid_t pid);
+extern void i830_reclaim_buffers(struct file *filp);
 extern int  i830_getage(struct inode *inode, struct file *filp, unsigned int cmd,
 			unsigned long arg);
 extern int i830_mmap_buffers(struct file *filp, struct vm_area_struct *vma);
@@ -108,6 +121,23 @@
 extern int i830_clear_bufs(struct inode *inode, struct file *filp,
 			  unsigned int cmd, unsigned long arg);
 
+extern int i830_flip_bufs(struct inode *inode, struct file *filp,
+			 unsigned int cmd, unsigned long arg);
+
+extern int i830_getparam( struct inode *inode, struct file *filp,
+			  unsigned int cmd, unsigned long arg );
+
+extern int i830_setparam( struct inode *inode, struct file *filp,
+			  unsigned int cmd, unsigned long arg );
+
+/* i830_irq.c */
+extern int i830_irq_emit( struct inode *inode, struct file *filp, 
+			  unsigned int cmd, unsigned long arg );
+extern int i830_irq_wait( struct inode *inode, struct file *filp,
+			  unsigned int cmd, unsigned long arg );
+extern int i830_wait_irq(drm_device_t *dev, int irq_nr);
+extern int i830_emit_irq(drm_device_t *dev);
+
 
 #define I830_BASE(reg)		((unsigned long) \
 				dev_priv->mmio_map->handle)
@@ -119,12 +149,53 @@
 #define I830_READ16(reg) 	I830_DEREF16(reg)
 #define I830_WRITE16(reg,val)	do { I830_DEREF16(reg) = val; } while (0)
 
+
+
+#define I830_VERBOSE 0
+
+#define RING_LOCALS	unsigned int outring, ringmask, outcount; \
+                        volatile char *virt;
+
+#define BEGIN_LP_RING(n) do {				\
+	if (I830_VERBOSE)				\
+		printk("BEGIN_LP_RING(%d) in %s\n",	\
+			  n, __FUNCTION__);		\
+	if (dev_priv->ring.space < n*4)			\
+		i830_wait_ring(dev, n*4, __FUNCTION__);		\
+	outcount = 0;					\
+	outring = dev_priv->ring.tail;			\
+	ringmask = dev_priv->ring.tail_mask;		\
+	virt = dev_priv->ring.virtual_start;		\
+} while (0)
+
+
+#define OUT_RING(n) do {					\
+	if (I830_VERBOSE) printk("   OUT_RING %x\n", (int)(n));	\
+	*(volatile unsigned int *)(virt + outring) = n;		\
+        outcount++;						\
+	outring += 4;						\
+	outring &= ringmask;					\
+} while (0)
+
+#define ADVANCE_LP_RING() do {						\
+	if (I830_VERBOSE) printk("ADVANCE_LP_RING %x\n", outring);	\
+	dev_priv->ring.tail = outring;					\
+	dev_priv->ring.space -= outcount * 4;				\
+	I830_WRITE(LP_RING + RING_TAIL, outring);			\
+} while(0)
+
+extern int i830_wait_ring(drm_device_t *dev, int n, const char *caller);
+
+
 #define GFX_OP_USER_INTERRUPT 		((0<<29)|(2<<23))
 #define GFX_OP_BREAKPOINT_INTERRUPT	((0<<29)|(1<<23))
 #define CMD_REPORT_HEAD			(7<<23)
 #define CMD_STORE_DWORD_IDX		((0x21<<23) | 0x1)
 #define CMD_OP_BATCH_BUFFER  ((0x0<<29)|(0x30<<23)|0x1)
 
+#define STATE3D_LOAD_STATE_IMMEDIATE_2      ((0x3<<29)|(0x1d<<24)|(0x03<<16))
+#define LOAD_TEXTURE_MAP0                   (1<<11)
+
 #define INST_PARSER_CLIENT   0x00000000
 #define INST_OP_FLUSH        0x02000000
 #define INST_FLUSH_MAP_CACHE 0x00000001
@@ -140,6 +211,9 @@
 #define I830REG_INT_MASK_R 	0x020a8
 #define I830REG_INT_ENABLE_R	0x020a0
 
+#define I830_IRQ_RESERVED ((1<<13)|(3<<2))
+
+
 #define LP_RING     		0x2030
 #define HP_RING     		0x2040
 #define RING_TAIL      		0x00
@@ -182,6 +256,9 @@
 
 #define CMD_OP_DESTBUFFER_INFO	 ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1)
 
+#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2)
+#define ASYNC_FLIP                (1<<22)
+
 #define CMD_3D                          (0x3<<29)
 #define STATE3D_CONST_BLEND_COLOR_CMD   (CMD_3D|(0x1d<<24)|(0x88<<16))
 #define STATE3D_MAP_COORD_SETBIND_CMD   (CMD_3D|(0x1d<<24)|(0x02<<16))
@@ -213,6 +290,11 @@
 #define MI_BATCH_BUFFER_END 	(0xA<<23)
 #define MI_BATCH_NON_SECURE	(1)
 
+#define MI_WAIT_FOR_EVENT       ((0x3<<23))
+#define MI_WAIT_FOR_PLANE_A_FLIP      (1<<2) 
+#define MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1) 
+
+#define MI_LOAD_SCAN_LINES_INCL  ((0x12<<23))
 
 #endif
 
diff -Nru a/drivers/char/drm/i830_irq.c b/drivers/char/drm/i830_irq.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/drivers/char/drm/i830_irq.c	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,177 @@
+/* i830_dma.c -- DMA support for the I830 -*- linux-c -*-
+ *
+ * Copyright 2002 Tungsten Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "i830.h"
+#include "drmP.h"
+#include "drm.h"
+#include "i830_drm.h"
+#include "i830_drv.h"
+#include <linux/interrupt.h>	/* For task queue support */
+#include <linux/delay.h>
+
+
+void DRM(dma_service)(int irq, void *device, struct pt_regs *regs)
+{
+	drm_device_t	 *dev = (drm_device_t *)device;
+      	drm_i830_private_t *dev_priv = (drm_i830_private_t *)dev->dev_private;
+   	u16 temp;
+   
+      	temp = I830_READ16(I830REG_INT_IDENTITY_R);
+	printk("%s: %x\n", __FUNCTION__, temp);
+	
+   	if(temp == 0) 
+	   return;
+
+	I830_WRITE16(I830REG_INT_IDENTITY_R, temp); 
+
+	if (temp & 2) {
+		atomic_inc(&dev_priv->irq_received);
+		wake_up_interruptible(&dev_priv->irq_queue); 
+	}
+}
+
+
+int i830_emit_irq(drm_device_t *dev)
+{
+	drm_i830_private_t *dev_priv = dev->dev_private;
+	RING_LOCALS;
+
+	DRM_DEBUG("%s\n", __FUNCTION__);
+
+	atomic_inc(&dev_priv->irq_emitted);
+
+   	BEGIN_LP_RING(2);
+      	OUT_RING( 0 );
+      	OUT_RING( GFX_OP_USER_INTERRUPT );
+      	ADVANCE_LP_RING();
+
+	return atomic_read(&dev_priv->irq_emitted);
+}
+
+
+int i830_wait_irq(drm_device_t *dev, int irq_nr)
+{
+  	drm_i830_private_t *dev_priv = 
+	   (drm_i830_private_t *)dev->dev_private;
+	DECLARE_WAITQUEUE(entry, current);
+	unsigned long end = jiffies + HZ*3;
+	int ret = 0;
+
+	DRM_DEBUG("%s\n", __FUNCTION__);
+
+ 	if (atomic_read(&dev_priv->irq_received) >= irq_nr)  
+ 		return 0; 
+
+	dev_priv->sarea_priv->perf_boxes |= I830_BOX_WAIT;
+
+	add_wait_queue(&dev_priv->irq_queue, &entry);
+
+	for (;;) {
+		current->state = TASK_INTERRUPTIBLE;
+	   	if (atomic_read(&dev_priv->irq_received) >= irq_nr) 
+		   break;
+		if((signed)(end - jiffies) <= 0) {
+			DRM_ERROR("timeout iir %x imr %x ier %x hwstam %x\n",
+				  I830_READ16( I830REG_INT_IDENTITY_R ),
+				  I830_READ16( I830REG_INT_MASK_R ),
+				  I830_READ16( I830REG_INT_ENABLE_R ),
+				  I830_READ16( I830REG_HWSTAM ));
+
+		   	ret = -EBUSY;	/* Lockup?  Missed irq? */
+			break;
+		}
+	      	schedule_timeout(HZ*3);
+	      	if (signal_pending(current)) {
+		   	ret = -EINTR;
+			break;
+		}
+	}
+
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&dev_priv->irq_queue, &entry);
+	return ret;
+}
+
+
+/* Needs the lock as it touches the ring.
+ */
+int i830_irq_emit( struct inode *inode, struct file *filp, unsigned int cmd,
+		   unsigned long arg )
+{
+	drm_file_t	  *priv	    = filp->private_data;
+	drm_device_t	  *dev	    = priv->dev;
+	drm_i830_private_t *dev_priv = dev->dev_private;
+	drm_i830_irq_emit_t emit;
+	int result;
+
+   	if(!_DRM_LOCK_IS_HELD(dev->lock.hw_lock->lock)) {
+		DRM_ERROR("i830_irq_emit called without lock held\n");
+		return -EINVAL;
+	}
+
+	if ( !dev_priv ) {
+		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
+		return -EINVAL;
+	}
+
+	if (copy_from_user( &emit, (drm_i830_irq_emit_t *)arg, sizeof(emit) ))
+		return -EFAULT;
+
+	result = i830_emit_irq( dev );
+
+	if ( copy_to_user( emit.irq_seq, &result, sizeof(int) ) ) {
+		DRM_ERROR( "copy_to_user\n" );
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+
+/* Doesn't need the hardware lock.
+ */
+int i830_irq_wait( struct inode *inode, struct file *filp, unsigned int cmd,
+		   unsigned long arg )
+{
+	drm_file_t	  *priv	    = filp->private_data;
+	drm_device_t	  *dev	    = priv->dev;
+	drm_i830_private_t *dev_priv = dev->dev_private;
+	drm_i830_irq_wait_t irqwait;
+
+	if ( !dev_priv ) {
+		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
+		return -EINVAL;
+	}
+
+	if (copy_from_user( &irqwait, (drm_i830_irq_wait_t *)arg, 
+			    sizeof(irqwait) ))
+		return -EFAULT;
+
+	return i830_wait_irq( dev, irqwait.irq_seq );
+}
+
diff -Nru a/drivers/char/drm/mga_dma.c b/drivers/char/drm/mga_dma.c
--- a/drivers/char/drm/mga_dma.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/mga_dma.c	Mon Mar 31 13:41:08 2003
@@ -686,7 +686,7 @@
 	drm_mga_private_t *dev_priv = (drm_mga_private_t *)dev->dev_private;
 	drm_lock_t lock;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( lock, (drm_lock_t *)data, sizeof(lock) );
 
@@ -720,7 +720,7 @@
 	DRM_DEVICE;
 	drm_mga_private_t *dev_priv = (drm_mga_private_t *)dev->dev_private;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	return mga_do_dma_reset( dev_priv );
 }
@@ -730,7 +730,8 @@
  * DMA buffer management
  */
 
-static int mga_dma_get_buffers( drm_device_t *dev, drm_dma_t *d )
+static int mga_dma_get_buffers( DRMFILE filp,
+				drm_device_t *dev, drm_dma_t *d )
 {
 	drm_buf_t *buf;
 	int i;
@@ -739,7 +740,7 @@
 		buf = mga_freelist_get( dev );
 		if ( !buf ) return DRM_ERR(EAGAIN);
 
-		buf->pid = DRM_CURRENTPID;
+		buf->filp = filp;
 
 		if ( DRM_COPY_TO_USER( &d->request_indices[i],
 				   &buf->idx, sizeof(buf->idx) ) )
@@ -761,7 +762,7 @@
 	drm_dma_t d;
 	int ret = 0;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( d, (drm_dma_t *)data, sizeof(d) );
 
@@ -786,7 +787,7 @@
 	d.granted_count = 0;
 
 	if ( d.request_count ) {
-		ret = mga_dma_get_buffers( dev, &d );
+		ret = mga_dma_get_buffers( filp, dev, &d );
 	}
 
 	DRM_COPY_TO_USER_IOCTL( (drm_dma_t *)data, d, sizeof(d) );
diff -Nru a/drivers/char/drm/mga_drv.h b/drivers/char/drm/mga_drv.h
--- a/drivers/char/drm/mga_drv.h	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/mga_drv.h	Mon Mar 31 13:41:08 2003
@@ -90,14 +90,14 @@
 	unsigned int texture_offset;
 	unsigned int texture_size;
 
-	drm_map_t *sarea;
-	drm_map_t *fb;
-	drm_map_t *mmio;
-	drm_map_t *status;
-	drm_map_t *warp;
-	drm_map_t *primary;
-	drm_map_t *buffers;
-	drm_map_t *agp_textures;
+	drm_local_map_t *sarea;
+	drm_local_map_t *fb;
+	drm_local_map_t *mmio;
+	drm_local_map_t *status;
+	drm_local_map_t *warp;
+	drm_local_map_t *primary;
+	drm_local_map_t *buffers;
+	drm_local_map_t *agp_textures;
 } drm_mga_private_t;
 
 				/* mga_dma.c */
@@ -131,32 +131,30 @@
 extern int mga_warp_install_microcode( drm_mga_private_t *dev_priv );
 extern int mga_warp_init( drm_mga_private_t *dev_priv );
 
-#define mga_flush_write_combine()	DRM_WRITEMEMORYBARRIER()
-
+#define mga_flush_write_combine()	DRM_WRITEMEMORYBARRIER(dev_priv->primary)
 
+#if defined(__linux__) && defined(__alpha__)
 #define MGA_BASE( reg )		((unsigned long)(dev_priv->mmio->handle))
 #define MGA_ADDR( reg )		(MGA_BASE(reg) + reg)
 
 #define MGA_DEREF( reg )	*(volatile u32 *)MGA_ADDR( reg )
 #define MGA_DEREF8( reg )	*(volatile u8 *)MGA_ADDR( reg )
 
-#ifdef __alpha__
 #define MGA_READ( reg )		(_MGA_READ((u32 *)MGA_ADDR(reg)))
 #define MGA_READ8( reg )	(_MGA_READ((u8 *)MGA_ADDR(reg)))
-#define MGA_WRITE( reg, val )	do { DRM_WRITEMEMORYBARRIER(); MGA_DEREF( reg ) = val; } while (0)
-#define MGA_WRITE8( reg, val )  do { DRM_WRITEMEMORYBARRIER(); MGA_DEREF8( reg ) = val; } while (0)
+#define MGA_WRITE( reg, val )	do { DRM_WRITEMEMORYBARRIER(dev_priv->mmio); MGA_DEREF( reg ) = val; } while (0)
+#define MGA_WRITE8( reg, val )  do { DRM_WRITEMEMORYBARRIER(dev_priv->mmio); MGA_DEREF8( reg ) = val; } while (0)
 
 static inline u32 _MGA_READ(u32 *addr)
 {
-	DRM_READMEMORYBARRIER();
+	DRM_READMEMORYBARRIER(dev_priv->mmio);
 	return *(volatile u32 *)addr;
 }
-
 #else
-#define MGA_READ( reg )		MGA_DEREF( reg )
-#define MGA_READ8( reg )	MGA_DEREF8( reg )
-#define MGA_WRITE( reg, val )	do { MGA_DEREF( reg ) = val; } while (0)
-#define MGA_WRITE8( reg, val )  do { MGA_DEREF8( reg ) = val; } while (0)
+#define MGA_READ8( reg )	DRM_READ8(dev_priv->mmio, (reg))
+#define MGA_READ( reg )		DRM_READ32(dev_priv->mmio, (reg))
+#define MGA_WRITE8( reg, val )  DRM_WRITE8(dev_priv->mmio, (reg), (val))
+#define MGA_WRITE( reg, val )	DRM_WRITE32(dev_priv->mmio, (reg), (val))
 #endif
 
 #define DWGREG0 	0x1c00
@@ -183,16 +181,6 @@
 		} else {						\
 			mga_g200_emit_state( dev_priv );		\
 		}							\
-	}								\
-} while (0)
-
-#define LOCK_TEST_WITH_RETURN( dev )					\
-do {									\
-	if ( !_DRM_LOCK_IS_HELD( dev->lock.hw_lock->lock ) ||		\
-	     dev->lock.pid != DRM_CURRENTPID ) {				\
-		DRM_ERROR( "%s called without lock held\n",		\
-			   __FUNCTION__ );					\
-		return DRM_ERR(EINVAL);				\
 	}								\
 } while (0)
 
diff -Nru a/drivers/char/drm/mga_state.c b/drivers/char/drm/mga_state.c
--- a/drivers/char/drm/mga_state.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/char/drm/mga_state.c	Mon Mar 31 13:41:07 2003
@@ -887,7 +887,7 @@
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_mga_clear_t clear;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( clear, (drm_mga_clear_t *)data, sizeof(clear) );
 
@@ -911,7 +911,7 @@
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( sarea_priv->nbox > MGA_NR_SAREA_CLIPRECTS )
 		sarea_priv->nbox = MGA_NR_SAREA_CLIPRECTS;
@@ -936,7 +936,7 @@
 	drm_mga_buf_priv_t *buf_priv;
 	drm_mga_vertex_t vertex;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( vertex,
 			     (drm_mga_vertex_t *)data,
@@ -975,7 +975,7 @@
 	drm_mga_buf_priv_t *buf_priv;
 	drm_mga_indices_t indices;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( indices,
 			     (drm_mga_indices_t *)data,
@@ -1015,7 +1015,7 @@
 	drm_mga_iload_t iload;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( iload, (drm_mga_iload_t *)data, sizeof(iload) );
 
@@ -1055,7 +1055,7 @@
 	drm_mga_blit_t blit;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( blit, (drm_mga_blit_t *)data, sizeof(blit) );
 
diff -Nru a/drivers/char/drm/r128_cce.c b/drivers/char/drm/r128_cce.c
--- a/drivers/char/drm/r128_cce.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/char/drm/r128_cce.c	Mon Mar 31 13:41:06 2003
@@ -579,6 +579,7 @@
 		(dev_priv->ring.size / sizeof(u32)) - 1;
 
 	dev_priv->ring.high_mark = 128;
+	dev_priv->ring.ring_rptr = dev_priv->ring_rptr;
 
 	dev_priv->sarea_priv->last_frame = 0;
 	R128_WRITE( R128_LAST_FRAME_REG, dev_priv->sarea_priv->last_frame );
@@ -663,7 +664,7 @@
 	drm_r128_private_t *dev_priv = dev->dev_private;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( dev_priv->cce_running || dev_priv->cce_mode == R128_PM4_NONPM4 ) {
 		DRM_DEBUG( "%s while CCE running\n", __FUNCTION__ );
@@ -686,7 +687,7 @@
 	int ret;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL(stop, (drm_r128_cce_stop_t *)data, sizeof(stop) );
 
@@ -725,7 +726,7 @@
 	drm_r128_private_t *dev_priv = dev->dev_private;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( !dev_priv ) {
 		DRM_DEBUG( "%s called before init done\n", __FUNCTION__ );
@@ -746,7 +747,7 @@
 	drm_r128_private_t *dev_priv = dev->dev_private;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( dev_priv->cce_running ) {
 		r128_do_cce_flush( dev_priv );
@@ -760,7 +761,7 @@
 	DRM_DEVICE;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	return r128_do_engine_reset( dev );
 }
@@ -807,7 +808,7 @@
 	DRM_DEVICE;
 	drm_r128_fullscreen_t fs;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( fs, (drm_r128_fullscreen_t *)data, sizeof(fs) );
 
@@ -889,7 +890,7 @@
 	for ( i = 0 ; i < dma->buf_count ; i++ ) {
 		buf = dma->buflist[i];
 		buf_priv = buf->dev_private;
-		if ( buf->pid == 0 )
+		if ( buf->filp == 0 )
 			return buf;
 	}
 
@@ -948,7 +949,7 @@
 	return DRM_ERR(EBUSY);
 }
 
-static int r128_cce_get_buffers( drm_device_t *dev, drm_dma_t *d )
+static int r128_cce_get_buffers( DRMFILE filp, drm_device_t *dev, drm_dma_t *d )
 {
 	int i;
 	drm_buf_t *buf;
@@ -957,7 +958,7 @@
 		buf = r128_freelist_get( dev );
 		if ( !buf ) return DRM_ERR(EAGAIN);
 
-		buf->pid = DRM_CURRENTPID;
+		buf->filp = filp;
 
 		if ( DRM_COPY_TO_USER( &d->request_indices[i], &buf->idx,
 				   sizeof(buf->idx) ) )
@@ -978,7 +979,7 @@
 	int ret = 0;
 	drm_dma_t d;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( d, (drm_dma_t *) data, sizeof(d) );
 
@@ -1001,7 +1002,7 @@
 	d.granted_count = 0;
 
 	if ( d.request_count ) {
-		ret = r128_cce_get_buffers( dev, &d );
+		ret = r128_cce_get_buffers( filp, dev, &d );
 	}
 
 	DRM_COPY_TO_USER_IOCTL((drm_dma_t *) data, d, sizeof(d) );
diff -Nru a/drivers/char/drm/r128_drv.h b/drivers/char/drm/r128_drv.h
--- a/drivers/char/drm/r128_drv.h	Mon Mar 31 13:41:09 2003
+++ b/drivers/char/drm/r128_drv.h	Mon Mar 31 13:41:09 2003
@@ -34,8 +34,8 @@
 #ifndef __R128_DRV_H__
 #define __R128_DRV_H__
 
-#define GET_RING_HEAD(ring)		DRM_READ32(  (volatile u32 *) (ring)->head )
-#define SET_RING_HEAD(ring,val)		DRM_WRITE32( (volatile u32 *) (ring)->head, (val) )
+#define GET_RING_HEAD(ring)		DRM_READ32(  (ring)->ring_rptr, 0 ) /* (ring)->head */
+#define SET_RING_HEAD(ring,val)		DRM_WRITE32( (ring)->ring_rptr, 0, (val) ) /* (ring)->head */
 
 typedef struct drm_r128_freelist {
    	unsigned int age;
@@ -56,6 +56,7 @@
 	int space;
 
 	int high_mark;
+	drm_local_map_t *ring_rptr;
 } drm_r128_ring_buffer_t;
 
 typedef struct drm_r128_private {
@@ -98,13 +99,13 @@
 	u32 depth_pitch_offset_c;
 	u32 span_pitch_offset_c;
 
-	drm_map_t *sarea;
-	drm_map_t *fb;
-	drm_map_t *mmio;
-	drm_map_t *cce_ring;
-	drm_map_t *ring_rptr;
-	drm_map_t *buffers;
-	drm_map_t *agp_textures;
+	drm_local_map_t *sarea;
+	drm_local_map_t *fb;
+	drm_local_map_t *mmio;
+	drm_local_map_t *cce_ring;
+	drm_local_map_t *ring_rptr;
+	drm_local_map_t *buffers;
+	drm_local_map_t *agp_textures;
 } drm_r128_private_t;
 
 typedef struct drm_r128_buf_priv {
@@ -370,15 +371,10 @@
 
 #define R128_PERFORMANCE_BOXES		0
 
-
-#define R128_BASE(reg)		((unsigned long)(dev_priv->mmio->handle))
-#define R128_ADDR(reg)		(R128_BASE( reg ) + reg)
-
-#define R128_READ(reg)		DRM_READ32(  (volatile u32 *) R128_ADDR(reg) )
-#define R128_WRITE(reg,val)	DRM_WRITE32( (volatile u32 *) R128_ADDR(reg), (val) )
-
-#define R128_READ8(reg)		DRM_READ8(  (volatile u8 *) R128_ADDR(reg) )
-#define R128_WRITE8(reg,val)	DRM_WRITE8( (volatile u8 *) R128_ADDR(reg), (val) )
+#define R128_READ(reg)		DRM_READ32(  dev_priv->mmio, (reg) )
+#define R128_WRITE(reg,val)	DRM_WRITE32( dev_priv->mmio, (reg), (val) )
+#define R128_READ8(reg)		DRM_READ8(   dev_priv->mmio, (reg) )
+#define R128_WRITE8(reg,val)	DRM_WRITE8(  dev_priv->mmio, (reg), (val) )
 
 #define R128_WRITE_PLL(addr,val)					\
 do {									\
@@ -403,15 +399,6 @@
  * Misc helper macros
  */
 
-#define LOCK_TEST_WITH_RETURN( dev )					\
-do {									\
-	if ( !_DRM_LOCK_IS_HELD( dev->lock.hw_lock->lock ) ||		\
-	     dev->lock.pid != DRM_CURRENTPID ) {			\
-		DRM_ERROR( "%s called without lock held\n", __FUNCTION__ );	\
-		return DRM_ERR(EINVAL);				\
-	}								\
-} while (0)
-
 #define RING_SPACE_TEST_WITH_RETURN( dev_priv )				\
 do {									\
 	drm_r128_ring_buffer_t *ring = &dev_priv->ring; int i;		\
@@ -453,7 +440,7 @@
 #if defined(__powerpc__)
 #define r128_flush_write_combine()	(void) GET_RING_HEAD( &dev_priv->ring )
 #else
-#define r128_flush_write_combine()	DRM_WRITEMEMORYBARRIER()
+#define r128_flush_write_combine()	DRM_WRITEMEMORYBARRIER(dev_priv->ring_rptr)
 #endif
 
 
diff -Nru a/drivers/char/drm/r128_state.c b/drivers/char/drm/r128_state.c
--- a/drivers/char/drm/r128_state.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/char/drm/r128_state.c	Mon Mar 31 13:41:06 2003
@@ -778,7 +778,8 @@
 	sarea_priv->nbox = 0;
 }
 
-static int r128_cce_dispatch_blit( drm_device_t *dev,
+static int r128_cce_dispatch_blit( DRMFILE filp,
+				   drm_device_t *dev,
 				   drm_r128_blit_t *blit )
 {
 	drm_r128_private_t *dev_priv = dev->dev_private;
@@ -829,9 +830,9 @@
 	buf = dma->buflist[blit->idx];
 	buf_priv = buf->dev_private;
 
-	if ( buf->pid != DRM_CURRENTPID ) {
-		DRM_ERROR( "process %d using buffer owned by %d\n",
-			   DRM_CURRENTPID, buf->pid );
+	if ( buf->filp != filp ) {
+		DRM_ERROR( "process %d using buffer owned by %p\n",
+			   DRM_CURRENTPID, buf->filp );
 		return DRM_ERR(EINVAL);
 	}
 	if ( buf->pending ) {
@@ -896,7 +897,7 @@
 	int count, x, y;
 	u32 *buffer;
 	u8 *mask;
-	int i;
+	int i, buffer_size, mask_size;
 	RING_LOCALS;
 	DRM_DEBUG( "\n" );
 
@@ -908,25 +909,25 @@
 		return DRM_ERR(EFAULT);
 	}
 
-	buffer = DRM_MALLOC( depth->n * sizeof(u32) );
+	buffer_size = depth->n * sizeof(u32);
+	buffer = DRM_MALLOC( buffer_size );
 	if ( buffer == NULL )
 		return DRM_ERR(ENOMEM);
-	if ( DRM_COPY_FROM_USER( buffer, depth->buffer,
-			     depth->n * sizeof(u32) ) ) {
-		DRM_FREE( buffer );
+	if ( DRM_COPY_FROM_USER( buffer, depth->buffer, buffer_size ) ) {
+		DRM_FREE( buffer, buffer_size);
 		return DRM_ERR(EFAULT);
 	}
 
+	mask_size = depth->n * sizeof(u8);
 	if ( depth->mask ) {
-		mask = DRM_MALLOC( depth->n * sizeof(u8) );
+		mask = DRM_MALLOC( mask_size );
 		if ( mask == NULL ) {
-			DRM_FREE( buffer );
+			DRM_FREE( buffer, buffer_size );
 			return DRM_ERR(ENOMEM);
 		}
-		if ( DRM_COPY_FROM_USER( mask, depth->mask,
-				     depth->n * sizeof(u8) ) ) {
-			DRM_FREE( buffer );
-			DRM_FREE( mask );
+		if ( DRM_COPY_FROM_USER( mask, depth->mask, mask_size ) ) {
+			DRM_FREE( buffer, buffer_size );
+			DRM_FREE( mask, mask_size );
 			return DRM_ERR(EFAULT);
 		}
 
@@ -953,7 +954,7 @@
 			}
 		}
 
-		DRM_FREE( mask );
+		DRM_FREE( mask, mask_size );
 	} else {
 		for ( i = 0 ; i < count ; i++, x++ ) {
 			BEGIN_RING( 6 );
@@ -977,7 +978,7 @@
 		}
 	}
 
-	DRM_FREE( buffer );
+	DRM_FREE( buffer, buffer_size );
 
 	return 0;
 }
@@ -989,60 +990,62 @@
 	int count, *x, *y;
 	u32 *buffer;
 	u8 *mask;
-	int i;
+	int i, xbuf_size, ybuf_size, buffer_size, mask_size;
 	RING_LOCALS;
 	DRM_DEBUG( "\n" );
 
 	count = depth->n;
 
-	x = DRM_MALLOC( count * sizeof(*x) );
+	xbuf_size = count * sizeof(*x);
+	ybuf_size = count * sizeof(*y);
+	x = DRM_MALLOC( xbuf_size );
 	if ( x == NULL ) {
 		return DRM_ERR(ENOMEM);
 	}
-	y = DRM_MALLOC( count * sizeof(*y) );
+	y = DRM_MALLOC( ybuf_size );
 	if ( y == NULL ) {
-		DRM_FREE( x );
+		DRM_FREE( x, xbuf_size );
 		return DRM_ERR(ENOMEM);
 	}
-	if ( DRM_COPY_FROM_USER( x, depth->x, count * sizeof(int) ) ) {
-		DRM_FREE( x );
-		DRM_FREE( y );
+	if ( DRM_COPY_FROM_USER( x, depth->x, xbuf_size ) ) {
+		DRM_FREE( x, xbuf_size );
+		DRM_FREE( y, ybuf_size );
 		return DRM_ERR(EFAULT);
 	}
-	if ( DRM_COPY_FROM_USER( y, depth->y, count * sizeof(int) ) ) {
-		DRM_FREE( x );
-		DRM_FREE( y );
+	if ( DRM_COPY_FROM_USER( y, depth->y, xbuf_size ) ) {
+		DRM_FREE( x, xbuf_size );
+		DRM_FREE( y, ybuf_size );
 		return DRM_ERR(EFAULT);
 	}
 
-	buffer = DRM_MALLOC( depth->n * sizeof(u32) );
+	buffer_size = depth->n * sizeof(u32);
+	buffer = DRM_MALLOC( buffer_size );
 	if ( buffer == NULL ) {
-		DRM_FREE( x );
-		DRM_FREE( y );
+		DRM_FREE( x, xbuf_size );
+		DRM_FREE( y, ybuf_size );
 		return DRM_ERR(ENOMEM);
 	}
-	if ( DRM_COPY_FROM_USER( buffer, depth->buffer,
-			     depth->n * sizeof(u32) ) ) {
-		DRM_FREE( x );
-		DRM_FREE( y );
-		DRM_FREE( buffer );
+	if ( DRM_COPY_FROM_USER( buffer, depth->buffer, buffer_size ) ) {
+		DRM_FREE( x, xbuf_size );
+		DRM_FREE( y, ybuf_size );
+		DRM_FREE( buffer, buffer_size );
 		return DRM_ERR(EFAULT);
 	}
 
 	if ( depth->mask ) {
-		mask = DRM_MALLOC( depth->n * sizeof(u8) );
+		mask_size = depth->n * sizeof(u8);
+		mask = DRM_MALLOC( mask_size );
 		if ( mask == NULL ) {
-			DRM_FREE( x );
-			DRM_FREE( y );
-			DRM_FREE( buffer );
+			DRM_FREE( x, xbuf_size );
+			DRM_FREE( y, ybuf_size );
+			DRM_FREE( buffer, buffer_size );
 			return DRM_ERR(ENOMEM);
 		}
-		if ( DRM_COPY_FROM_USER( mask, depth->mask,
-				     depth->n * sizeof(u8) ) ) {
-			DRM_FREE( x );
-			DRM_FREE( y );
-			DRM_FREE( buffer );
-			DRM_FREE( mask );
+		if ( DRM_COPY_FROM_USER( mask, depth->mask, mask_size ) ) {
+			DRM_FREE( x, xbuf_size );
+			DRM_FREE( y, ybuf_size );
+			DRM_FREE( buffer, buffer_size );
+			DRM_FREE( mask, mask_size );
 			return DRM_ERR(EFAULT);
 		}
 
@@ -1069,7 +1072,7 @@
 			}
 		}
 
-		DRM_FREE( mask );
+		DRM_FREE( mask, mask_size );
 	} else {
 		for ( i = 0 ; i < count ; i++ ) {
 			BEGIN_RING( 6 );
@@ -1093,9 +1096,9 @@
 		}
 	}
 
-	DRM_FREE( x );
-	DRM_FREE( y );
-	DRM_FREE( buffer );
+	DRM_FREE( x, xbuf_size );
+	DRM_FREE( y, ybuf_size );
+	DRM_FREE( buffer, buffer_size );
 
 	return 0;
 }
@@ -1146,7 +1149,7 @@
 {
 	drm_r128_private_t *dev_priv = dev->dev_private;
 	int count, *x, *y;
-	int i;
+	int i, xbuf_size, ybuf_size;
 	RING_LOCALS;
 	DRM_DEBUG( "%s\n", __FUNCTION__ );
 
@@ -1155,23 +1158,25 @@
 		count = dev_priv->depth_pitch;
 	}
 
-	x = DRM_MALLOC( count * sizeof(*x) );
+	xbuf_size = count * sizeof(*x);
+	ybuf_size = count * sizeof(*y);
+	x = DRM_MALLOC( xbuf_size );
 	if ( x == NULL ) {
 		return DRM_ERR(ENOMEM);
 	}
-	y = DRM_MALLOC( count * sizeof(*y) );
+	y = DRM_MALLOC( ybuf_size );
 	if ( y == NULL ) {
-		DRM_FREE( x );
+		DRM_FREE( x, xbuf_size );
 		return DRM_ERR(ENOMEM);
 	}
-	if ( DRM_COPY_FROM_USER( x, depth->x, count * sizeof(int) ) ) {
-		DRM_FREE( x );
-		DRM_FREE( y );
+	if ( DRM_COPY_FROM_USER( x, depth->x, xbuf_size ) ) {
+		DRM_FREE( x, xbuf_size );
+		DRM_FREE( y, ybuf_size );
 		return DRM_ERR(EFAULT);
 	}
-	if ( DRM_COPY_FROM_USER( y, depth->y, count * sizeof(int) ) ) {
-		DRM_FREE( x );
-		DRM_FREE( y );
+	if ( DRM_COPY_FROM_USER( y, depth->y, ybuf_size ) ) {
+		DRM_FREE( x, xbuf_size );
+		DRM_FREE( y, ybuf_size );
 		return DRM_ERR(EFAULT);
 	}
 
@@ -1199,8 +1204,8 @@
 		ADVANCE_RING();
 	}
 
-	DRM_FREE( x );
-	DRM_FREE( y );
+	DRM_FREE( x, xbuf_size );
+	DRM_FREE( y, ybuf_size );
 
 	return 0;
 }
@@ -1240,7 +1245,7 @@
 	drm_r128_clear_t clear;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( clear, (drm_r128_clear_t *) data,
 			     sizeof(clear) );
@@ -1266,7 +1271,7 @@
 	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	DRM_DEBUG( "%s\n", __FUNCTION__ );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	RING_SPACE_TEST_WITH_RETURN( dev_priv );
 
@@ -1293,7 +1298,7 @@
 	drm_r128_buf_priv_t *buf_priv;
 	drm_r128_vertex_t vertex;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( !dev_priv ) {
 		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
@@ -1324,9 +1329,9 @@
 	buf = dma->buflist[vertex.idx];
 	buf_priv = buf->dev_private;
 
-	if ( buf->pid != DRM_CURRENTPID ) {
-		DRM_ERROR( "process %d using buffer owned by %d\n",
-			   DRM_CURRENTPID, buf->pid );
+	if ( buf->filp != filp ) {
+		DRM_ERROR( "process %d using buffer owned by %p\n",
+			   DRM_CURRENTPID, buf->filp );
 		return DRM_ERR(EINVAL);
 	}
 	if ( buf->pending ) {
@@ -1353,7 +1358,7 @@
 	drm_r128_indices_t elts;
 	int count;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( !dev_priv ) {
 		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
@@ -1383,9 +1388,9 @@
 	buf = dma->buflist[elts.idx];
 	buf_priv = buf->dev_private;
 
-	if ( buf->pid != DRM_CURRENTPID ) {
-		DRM_ERROR( "process %d using buffer owned by %d\n",
-			   DRM_CURRENTPID, buf->pid );
+	if ( buf->filp != filp ) {
+		DRM_ERROR( "process %d using buffer owned by %p\n",
+			   DRM_CURRENTPID, buf->filp );
 		return DRM_ERR(EINVAL);
 	}
 	if ( buf->pending ) {
@@ -1421,7 +1426,7 @@
 	drm_r128_private_t *dev_priv = dev->dev_private;
 	drm_r128_blit_t blit;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( blit, (drm_r128_blit_t *) data,
 			     sizeof(blit) );
@@ -1437,7 +1442,7 @@
 	RING_SPACE_TEST_WITH_RETURN( dev_priv );
 	VB_AGE_TEST_WITH_RETURN( dev_priv );
 
-	return r128_cce_dispatch_blit( dev, &blit );
+	return r128_cce_dispatch_blit( filp, dev, &blit );
 }
 
 int r128_cce_depth( DRM_IOCTL_ARGS )
@@ -1446,7 +1451,7 @@
 	drm_r128_private_t *dev_priv = dev->dev_private;
 	drm_r128_depth_t depth;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( depth, (drm_r128_depth_t *) data,
 			     sizeof(depth) );
@@ -1474,7 +1479,7 @@
 	drm_r128_stipple_t stipple;
 	u32 mask[32];
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( stipple, (drm_r128_stipple_t *) data,
 			     sizeof(stipple) );
@@ -1502,7 +1507,7 @@
 	RING_LOCALS;
 #endif
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( !dev_priv ) {
 		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
@@ -1525,9 +1530,9 @@
 	buf = dma->buflist[indirect.idx];
 	buf_priv = buf->dev_private;
 
-	if ( buf->pid != DRM_CURRENTPID ) {
-		DRM_ERROR( "process %d using buffer owned by %d\n",
-			   DRM_CURRENTPID, buf->pid );
+	if ( buf->filp != filp ) {
+		DRM_ERROR( "process %d using buffer owned by %p\n",
+			   DRM_CURRENTPID, buf->filp );
 		return DRM_ERR(EINVAL);
 	}
 	if ( buf->pending ) {
diff -Nru a/drivers/char/drm/radeon.h b/drivers/char/drm/radeon.h
--- a/drivers/char/drm/radeon.h	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/radeon.h	Mon Mar 31 13:41:08 2003
@@ -51,7 +51,7 @@
 #define DRIVER_DATE		"20020828"
 
 #define DRIVER_MAJOR		1
-#define DRIVER_MINOR		7
+#define DRIVER_MINOR		8
 #define DRIVER_PATCHLEVEL	0
 
 /* Interface history:
@@ -77,6 +77,7 @@
  *       and R200_PP_CUBIC_OFFSET_F1_[0..5].
  *       Added packets R200_EMIT_PP_CUBIC_FACES_[0..5] and
  *       R200_EMIT_PP_CUBIC_OFFSETS_[0..5].  (brian)
+ * 1.8 - Remove need to call cleanup ioctls on last client exit (keith)
  */
 #define DRIVER_IOCTLS							     \
  [DRM_IOCTL_NR(DRM_IOCTL_DMA)]               = { radeon_cp_buffers,  1, 0 }, \
@@ -105,11 +106,6 @@
  [DRM_IOCTL_NR(DRM_IOCTL_RADEON_IRQ_WAIT)]   = { radeon_irq_wait, 1, 0 },
 
 
-#define USE_IRQS 1
-#if USE_IRQS
-#define __HAVE_DMA_IRQ		1
-#define __HAVE_VBL_IRQ		1
-#define __HAVE_SHARED_IRQ       1
 
 /* When a client dies:
  *    - Check for and clean up flipped page state
@@ -117,35 +113,34 @@
  *
  * DRM infrastructure takes care of reclaiming dma buffers.
  */
-#define DRIVER_PRERELEASE() do {					\
+#define DRIVER_PRERELEASE() 						\
+do {									\
 	if ( dev->dev_private ) {					\
 		drm_radeon_private_t *dev_priv = dev->dev_private;	\
 		if ( dev_priv->page_flipping ) {			\
 			radeon_do_cleanup_pageflip( dev );		\
 		}							\
-		radeon_mem_release( dev_priv->agp_heap );		\
+                radeon_mem_release( filp, dev_priv->agp_heap );		\
+                radeon_mem_release( filp, dev_priv->fb_heap );		\
 	}								\
 } while (0)
 
-/* On unloading the module:
- *    - Free memory heap structure
- *    - Remove mappings made at startup and free dev_private.
+/* When the last client dies, shut down the CP and free dev->dev_priv.
  */
-#define DRIVER_PRETAKEDOWN() do {					\
-	if ( dev->dev_private ) {					\
-		drm_radeon_private_t *dev_priv = dev->dev_private;	\
-		radeon_mem_takedown( &(dev_priv->agp_heap) );		\
-		radeon_do_cleanup_cp( dev );				\
-	}								\
+/* #define __HAVE_RELEASE 1 */
+#define DRIVER_PRETAKEDOWN()			\
+do {						\
+    radeon_do_release( dev );			\
 } while (0)
 
-#else
-#define __HAVE_DMA_IRQ 0
-#endif
+
 
 /* DMA customization:
  */
 #define __HAVE_DMA		1
+#define __HAVE_DMA_IRQ		1
+#define __HAVE_VBL_IRQ		1
+#define __HAVE_SHARED_IRQ       1
 
 
 /* Buffer customization:
diff -Nru a/drivers/char/drm/radeon_cp.c b/drivers/char/drm/radeon_cp.c
--- a/drivers/char/drm/radeon_cp.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/char/drm/radeon_cp.c	Mon Mar 31 13:41:06 2003
@@ -926,11 +926,11 @@
 	RADEON_WRITE( RADEON_SCRATCH_UMSK, 0x7 );
 
 	/* Writeback doesn't seem to work everywhere, test it first */
-	DRM_WRITE32( &dev_priv->scratch[1], 0 );
+	DRM_WRITE32( dev_priv->ring_rptr, RADEON_SCRATCHOFF(1), 0 );
 	RADEON_WRITE( RADEON_SCRATCH_REG1, 0xdeadbeef );
 
 	for ( tmp = 0 ; tmp < dev_priv->usec_timeout ; tmp++ ) {
-		if ( DRM_READ32( &dev_priv->scratch[1] ) == 0xdeadbeef )
+		if ( DRM_READ32( dev_priv->ring_rptr, RADEON_SCRATCHOFF(1) ) == 0xdeadbeef )
 			break;
 		DRM_UDELAY( 1 );
 	}
@@ -1217,6 +1217,7 @@
 		(dev_priv->ring.size / sizeof(u32)) - 1;
 
 	dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK;
+	dev_priv->ring.ring_rptr = dev_priv->ring_rptr;
 
 #if __REALLY_HAVE_SG
 	if ( dev_priv->is_pci ) {
@@ -1322,7 +1323,7 @@
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( dev_priv->cp_running ) {
 		DRM_DEBUG( "%s while CP running\n", __FUNCTION__ );
@@ -1350,10 +1351,13 @@
 	int ret;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( stop, (drm_radeon_cp_stop_t *)data, sizeof(stop) );
 
+	if (!dev_priv->cp_running)
+		return 0;
+
 	/* Flush any pending CP commands.  This ensures any outstanding
 	 * commands are exectuted by the engine before we turn it off.
 	 */
@@ -1381,6 +1385,39 @@
 	return 0;
 }
 
+
+void radeon_do_release( drm_device_t *dev )
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	int ret;
+
+	if (dev_priv) {
+		if (dev_priv->cp_running) {
+			/* Stop the cp */
+			while ((ret = radeon_do_cp_idle( dev_priv )) != 0) {
+				DRM_DEBUG("radeon_do_cp_idle %d\n", ret);
+#ifdef __linux__
+				schedule();
+#else
+				tsleep(&ret, PZERO, "rdnrel", 1);
+#endif
+			}
+			radeon_do_cp_stop( dev_priv );
+			radeon_do_engine_reset( dev );
+		}
+
+		/* Disable *all* interrupts */
+		RADEON_WRITE( RADEON_GEN_INT_CNTL, 0 );
+
+		/* Free memory heap structures */
+		radeon_mem_takedown( &(dev_priv->agp_heap) );
+		radeon_mem_takedown( &(dev_priv->fb_heap) );
+
+		/* deallocate kernel resources */
+		radeon_do_cleanup_cp( dev );
+	}
+}
+
 /* Just reset the CP ring.  Called as part of an X Server engine reset.
  */
 int radeon_cp_reset( DRM_IOCTL_ARGS )
@@ -1389,7 +1426,7 @@
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( !dev_priv ) {
 		DRM_DEBUG( "%s called before init done\n", __FUNCTION__ );
@@ -1410,10 +1447,7 @@
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
-
-/* 	if (dev->irq)  */
-/* 		radeon_emit_and_wait_irq( dev ); */
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	return radeon_do_cp_idle( dev_priv );
 }
@@ -1423,7 +1457,7 @@
 	DRM_DEVICE;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	return radeon_do_engine_reset( dev );
 }
@@ -1482,7 +1516,7 @@
 		for ( i = start ; i < dma->buf_count ; i++ ) {
 			buf = dma->buflist[i];
 			buf_priv = buf->dev_private;
-			if ( buf->pid == 0 || (buf->pending && 
+			if ( buf->filp == 0 || (buf->pending && 
 					       buf_priv->age <= done_age) ) {
 				dev_priv->stats.requested_bufs++;
 				buf->pending = 0;
@@ -1509,7 +1543,7 @@
 	drm_buf_t *buf;
 	int i, t;
 	int start;
-	u32 done_age = DRM_READ32(&dev_priv->scratch[1]);
+	u32 done_age = DRM_READ32(dev_priv->ring_rptr, RADEON_SCRATCHOFF(1));
 
 	if ( ++dev_priv->last_buf >= dma->buf_count )
 		dev_priv->last_buf = 0;
@@ -1521,7 +1555,7 @@
 		for ( i = start ; i < dma->buf_count ; i++ ) {
 			buf = dma->buflist[i];
 			buf_priv = buf->dev_private;
-			if ( buf->pid == 0 || (buf->pending && 
+			if ( buf->filp == 0 || (buf->pending && 
 					       buf_priv->age <= done_age) ) {
 				dev_priv->stats.requested_bufs++;
 				buf->pending = 0;
@@ -1586,7 +1620,7 @@
 	return DRM_ERR(EBUSY);
 }
 
-static int radeon_cp_get_buffers( drm_device_t *dev, drm_dma_t *d )
+static int radeon_cp_get_buffers( DRMFILE filp, drm_device_t *dev, drm_dma_t *d )
 {
 	int i;
 	drm_buf_t *buf;
@@ -1595,7 +1629,7 @@
 		buf = radeon_freelist_get( dev );
 		if ( !buf ) return DRM_ERR(EBUSY); /* NOTE: broken client */
 
-		buf->pid = DRM_CURRENTPID;
+		buf->filp = filp;
 
 		if ( DRM_COPY_TO_USER( &d->request_indices[i], &buf->idx,
 				   sizeof(buf->idx) ) )
@@ -1616,7 +1650,7 @@
 	int ret = 0;
 	drm_dma_t d;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( d, (drm_dma_t *)data, sizeof(d) );
 
@@ -1639,7 +1673,7 @@
 	d.granted_count = 0;
 
 	if ( d.request_count ) {
-		ret = radeon_cp_get_buffers( dev, &d );
+		ret = radeon_cp_get_buffers( filp, dev, &d );
 	}
 
 	DRM_COPY_TO_USER_IOCTL( (drm_dma_t *)data, d, sizeof(d) );
diff -Nru a/drivers/char/drm/radeon_drm.h b/drivers/char/drm/radeon_drm.h
--- a/drivers/char/drm/radeon_drm.h	Mon Mar 31 13:41:07 2003
+++ b/drivers/char/drm/radeon_drm.h	Mon Mar 31 13:41:07 2003
@@ -382,7 +382,7 @@
 #define DRM_IOCTL_RADEON_STIPPLE    DRM_IOW( 0x4c, drm_radeon_stipple_t)
 #define DRM_IOCTL_RADEON_INDIRECT   DRM_IOWR(0x4d, drm_radeon_indirect_t)
 #define DRM_IOCTL_RADEON_TEXTURE    DRM_IOWR(0x4e, drm_radeon_texture_t)
-#define DRM_IOCTL_RADEON_VERTEX2    DRM_IOW( 0x4f, drm_radeon_vertex_t)
+#define DRM_IOCTL_RADEON_VERTEX2    DRM_IOW( 0x4f, drm_radeon_vertex2_t)
 #define DRM_IOCTL_RADEON_CMDBUF     DRM_IOW( 0x50, drm_radeon_cmd_buffer_t)
 #define DRM_IOCTL_RADEON_GETPARAM   DRM_IOWR(0x51, drm_radeon_getparam_t)
 #define DRM_IOCTL_RADEON_FLIP	    DRM_IO(  0x52)
@@ -396,7 +396,7 @@
 	enum {
 		RADEON_INIT_CP    = 0x01,
 		RADEON_CLEANUP_CP = 0x02,
-		RADEON_INIT_R200_CP = 0x03,	
+		RADEON_INIT_R200_CP = 0x03
 	} func;
 	unsigned long sarea_priv_offset;
 	int is_pci;
diff -Nru a/drivers/char/drm/radeon_drv.h b/drivers/char/drm/radeon_drv.h
--- a/drivers/char/drm/radeon_drv.h	Mon Mar 31 13:41:07 2003
+++ b/drivers/char/drm/radeon_drv.h	Mon Mar 31 13:41:07 2003
@@ -31,8 +31,8 @@
 #ifndef __RADEON_DRV_H__
 #define __RADEON_DRV_H__
 
-#define GET_RING_HEAD(ring)		DRM_READ32(  (volatile u32 *) (ring)->head )
-#define SET_RING_HEAD(ring,val)		DRM_WRITE32( (volatile u32 *) (ring)->head , (val))
+#define GET_RING_HEAD(ring)		DRM_READ32(  (ring)->ring_rptr, 0 ) /* (ring)->head */
+#define SET_RING_HEAD(ring,val)		DRM_WRITE32( (ring)->ring_rptr, 0, (val) ) /* (ring)->head */
 
 typedef struct drm_radeon_freelist {
    	unsigned int age;
@@ -53,6 +53,7 @@
 	int space;
 
 	int high_mark;
+	drm_local_map_t *ring_rptr;
 } drm_radeon_ring_buffer_t;
 
 typedef struct drm_radeon_depth_clear_t {
@@ -67,7 +68,7 @@
 	struct mem_block *prev;
 	int start;
 	int size;
-	int pid;		/* 0: free, -1: heap, other: real pids */
+	DRMFILE filp;		/* 0: free, -1: heap, other: real files */
 };
 
 typedef struct drm_radeon_private {
@@ -126,13 +127,13 @@
 
 	drm_radeon_depth_clear_t depth_clear;
 
-	drm_map_t *sarea;
-	drm_map_t *fb;
-	drm_map_t *mmio;
-	drm_map_t *cp_ring;
-	drm_map_t *ring_rptr;
-	drm_map_t *buffers;
-	drm_map_t *agp_textures;
+	drm_local_map_t *sarea;
+	drm_local_map_t *fb;
+	drm_local_map_t *mmio;
+	drm_local_map_t *cp_ring;
+	drm_local_map_t *ring_rptr;
+	drm_local_map_t *buffers;
+	drm_local_map_t *agp_textures;
 
 	struct mem_block *agp_heap;
 	struct mem_block *fb_heap;
@@ -183,7 +184,7 @@
 extern int radeon_mem_free( DRM_IOCTL_ARGS );
 extern int radeon_mem_init_heap( DRM_IOCTL_ARGS );
 extern void radeon_mem_takedown( struct mem_block **heap );
-extern void radeon_mem_release( struct mem_block *heap );
+extern void radeon_mem_release( DRMFILE filp, struct mem_block *heap );
 
 				/* radeon_irq.c */
 extern int radeon_irq_emit( DRM_IOCTL_ARGS );
@@ -193,6 +194,7 @@
 extern int radeon_wait_irq(drm_device_t *dev, int swi_nr);
 extern int radeon_emit_irq(drm_device_t *dev);
 
+extern void radeon_do_release(drm_device_t *dev);
 
 /* Flags for stats.boxes
  */
@@ -266,8 +268,10 @@
 #define RADEON_SCRATCH_UMSK		0x0770
 #define RADEON_SCRATCH_ADDR		0x0774
 
+#define RADEON_SCRATCHOFF( x )		(RADEON_SCRATCH_REG_OFFSET + 4*(x))
+
 #define GET_SCRATCH( x )	(dev_priv->writeback_works			\
-				? DRM_READ32( &dev_priv->scratch[(x)] )		\
+				? DRM_READ32( dev_priv->ring_rptr, RADEON_SCRATCHOFF(x) ) \
 				: RADEON_READ( RADEON_SCRATCH_REG0 + 4*(x) ) )
 
 
@@ -686,15 +690,10 @@
 
 #define RADEON_RING_HIGH_MARK		128
 
-
-#define RADEON_BASE(reg)	((unsigned long)(dev_priv->mmio->handle))
-#define RADEON_ADDR(reg)	(RADEON_BASE( reg ) + reg)
-
-#define RADEON_READ(reg)	DRM_READ32(  (volatile u32 *) RADEON_ADDR(reg) )
-#define RADEON_WRITE(reg,val)	DRM_WRITE32( (volatile u32 *) RADEON_ADDR(reg), (val) )
-
-#define RADEON_READ8(reg)	DRM_READ8(  (volatile u8 *) RADEON_ADDR(reg) )
-#define RADEON_WRITE8(reg,val)	DRM_WRITE8( (volatile u8 *) RADEON_ADDR(reg), (val) )
+#define RADEON_READ(reg)	DRM_READ32(  dev_priv->mmio, (reg) )
+#define RADEON_WRITE(reg,val)	DRM_WRITE32( dev_priv->mmio, (reg), (val) )
+#define RADEON_READ8(reg)	DRM_READ8(  dev_priv->mmio, (reg) )
+#define RADEON_WRITE8(reg,val)	DRM_WRITE8( dev_priv->mmio, (reg), (val) )
 
 #define RADEON_WRITE_PLL( addr, val )					\
 do {									\
@@ -771,16 +770,6 @@
  * Misc helper macros
  */
 
-#define LOCK_TEST_WITH_RETURN( dev )					\
-do {									\
-	if ( !_DRM_LOCK_IS_HELD( dev->lock.hw_lock->lock ) ||		\
-	     dev->lock.pid != DRM_CURRENTPID ) {			\
-		DRM_ERROR( "%s called without lock held\n", __FUNCTION__ );	\
-		return DRM_ERR(EINVAL);				\
-	}								\
-} while (0)
-
-
 /* Perfbox functionality only.  
  */
 #define RING_SPACE_TEST_WITH_RETURN( dev_priv )				\
@@ -823,13 +812,6 @@
  * Ring control
  */
 
-#if defined(__powerpc__)
-#define radeon_flush_write_combine()	(void) GET_RING_HEAD( &dev_priv->ring )
-#else
-#define radeon_flush_write_combine()	DRM_WRITEMEMORYBARRIER()
-#endif
-
-
 #define RADEON_VERBOSE	0
 
 #define RING_LOCALS	int write, _nr; unsigned int mask; u32 *ring;
@@ -863,8 +845,13 @@
 		dev_priv->ring.tail = write;				\
 } while (0)
 
-#define COMMIT_RING() do {					    \
-	RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail );		    \
+#define COMMIT_RING() do {						\
+	/* Flush writes to ring */					\
+	DRM_READMEMORYBARRIER(dev_priv->mmio);					\
+	GET_RING_HEAD( &dev_priv->ring );				\
+	RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail );		\
+	/* read from PCI bus to ensure correct posting */		\
+	RADEON_READ( RADEON_CP_RB_RPTR );				\
 } while (0)
 
 #define OUT_RING( x ) do {						\
diff -Nru a/drivers/char/drm/radeon_irq.c b/drivers/char/drm/radeon_irq.c
--- a/drivers/char/drm/radeon_irq.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/radeon_irq.c	Mon Mar 31 13:41:08 2003
@@ -61,7 +61,11 @@
 	   (drm_radeon_private_t *)dev->dev_private;
    	u32 stat;
 
-	stat = RADEON_READ(RADEON_GEN_INT_STATUS);
+	/* Only consider the bits we're interested in - others could be used
+	 * outside the DRM
+	 */
+	stat = RADEON_READ(RADEON_GEN_INT_STATUS)
+	     & (RADEON_SW_INT_TEST | RADEON_CRTC_VBLANK_STAT);
 	if (!stat)
 		return;
 
@@ -77,15 +81,14 @@
 		DRM(vbl_send_signals)( dev );
 	}
 
-	/* Acknowledge all the bits in GEN_INT_STATUS -- seem to get
-	 * more than we asked for...
-	 */
+	/* Acknowledge interrupts we handle */
 	RADEON_WRITE(RADEON_GEN_INT_STATUS, stat);
 }
 
 static __inline__ void radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv)
 {
-	u32 tmp = RADEON_READ( RADEON_GEN_INT_STATUS );
+	u32 tmp = RADEON_READ( RADEON_GEN_INT_STATUS )
+		& (RADEON_SW_INT_TEST_ACK | RADEON_CRTC_VBLANK_STAT);
 	if (tmp)
 		RADEON_WRITE( RADEON_GEN_INT_STATUS, tmp );
 }
@@ -176,7 +179,7 @@
 	drm_radeon_irq_emit_t emit;
 	int result;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( !dev_priv ) {
 		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
diff -Nru a/drivers/char/drm/radeon_mem.c b/drivers/char/drm/radeon_mem.c
--- a/drivers/char/drm/radeon_mem.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/radeon_mem.c	Mon Mar 31 13:41:08 2003
@@ -40,7 +40,7 @@
  */
 
 static struct mem_block *split_block(struct mem_block *p, int start, int size,
-				     int pid )
+				     DRMFILE filp )
 {
 	/* Maybe cut off the start of an existing block */
 	if (start > p->start) {
@@ -49,7 +49,7 @@
 			goto out;
 		newblock->start = start;
 		newblock->size = p->size - (start - p->start);
-		newblock->pid = 0;
+		newblock->filp = 0;
 		newblock->next = p->next;
 		newblock->prev = p;
 		p->next->prev = newblock;
@@ -65,7 +65,7 @@
 			goto out;
 		newblock->start = start + size;
 		newblock->size = p->size - size;
-		newblock->pid = 0;
+		newblock->filp = 0;
 		newblock->next = p->next;
 		newblock->prev = p;
 		p->next->prev = newblock;
@@ -75,20 +75,20 @@
 
  out:
 	/* Our block is in the middle */
-	p->pid = pid;
+	p->filp = filp;
 	return p;
 }
 
 static struct mem_block *alloc_block( struct mem_block *heap, int size, 
-				      int align2, int pid )
+				      int align2, DRMFILE filp )
 {
 	struct mem_block *p;
 	int mask = (1 << align2)-1;
 
 	for (p = heap->next ; p != heap ; p = p->next) {
 		int start = (p->start + mask) & ~mask;
-		if (p->pid == 0 && start + size <= p->start + p->size)
-			return split_block( p, start, size, pid );
+		if (p->filp == 0 && start + size <= p->start + p->size)
+			return split_block( p, start, size, filp );
 	}
 
 	return NULL;
@@ -108,25 +108,25 @@
 
 static void free_block( struct mem_block *p )
 {
-	p->pid = 0;
+	p->filp = 0;
 
-	/* Assumes a single contiguous range.  Needs a special pid in
+	/* Assumes a single contiguous range.  Needs a special filp in
 	 * 'heap' to stop it being subsumed.
 	 */
-	if (p->next->pid == 0) {
+	if (p->next->filp == 0) {
 		struct mem_block *q = p->next;
 		p->size += q->size;
 		p->next = q->next;
 		p->next->prev = p;
-		DRM_FREE(q);
+		DRM_FREE(q, sizeof(*q));
 	}
 
-	if (p->prev->pid == 0) {
+	if (p->prev->filp == 0) {
 		struct mem_block *q = p->prev;
 		q->size += p->size;
 		q->next = p->next;
 		q->next->prev = q;
-		DRM_FREE(p);
+		DRM_FREE(p, sizeof(*q));
 	}
 }
 
@@ -141,47 +141,46 @@
 	
 	*heap = DRM_MALLOC(sizeof(**heap));
 	if (!*heap) {
-		DRM_FREE( blocks );
+		DRM_FREE( blocks, sizeof(*blocks) );
 		return -ENOMEM;
 	}
 
 	blocks->start = start;
 	blocks->size = size;
-	blocks->pid = 0;
+	blocks->filp = 0;
 	blocks->next = blocks->prev = *heap;
 
 	memset( *heap, 0, sizeof(**heap) );
-	(*heap)->pid = -1;
+	(*heap)->filp = (DRMFILE) -1;
 	(*heap)->next = (*heap)->prev = blocks;
 	return 0;
 }
 
 
-/* Free all blocks associated with the releasing pid.
+/* Free all blocks associated with the releasing file.
  */
-void radeon_mem_release( struct mem_block *heap )
+void radeon_mem_release( DRMFILE filp, struct mem_block *heap )
 {
-	int pid = DRM_CURRENTPID;
 	struct mem_block *p;
 
 	if (!heap || !heap->next)
 		return;
 
 	for (p = heap->next ; p != heap ; p = p->next) {
-		if (p->pid == pid) 
-			p->pid = 0;
+		if (p->filp == filp) 
+			p->filp = 0;
 	}
 
-	/* Assumes a single contiguous range.  Needs a special pid in
+	/* Assumes a single contiguous range.  Needs a special filp in
 	 * 'heap' to stop it being subsumed.
 	 */
 	for (p = heap->next ; p != heap ; p = p->next) {
-		while (p->pid == 0 && p->next->pid == 0) {
+		while (p->filp == 0 && p->next->filp == 0) {
 			struct mem_block *q = p->next;
 			p->size += q->size;
 			p->next = q->next;
 			p->next->prev = p;
-			DRM_FREE(q);
+			DRM_FREE(q, sizeof(*q));
 		}
 	}
 }
@@ -198,10 +197,10 @@
 	for (p = (*heap)->next ; p != *heap ; ) {
 		struct mem_block *q = p;
 		p = p->next;
-		DRM_FREE(q);
+		DRM_FREE(q, sizeof(*q));
 	}
 
-	DRM_FREE( *heap );
+	DRM_FREE( *heap, sizeof(**heap) );
 	*heap = 0;
 }
 
@@ -248,7 +247,7 @@
 		alloc.alignment = 12;
 
 	block = alloc_block( *heap, alloc.size, alloc.alignment,
-			     DRM_CURRENTPID );
+			     filp );
 
 	if (!block) 
 		return DRM_ERR(ENOMEM);
@@ -287,7 +286,7 @@
 	if (!block)
 		return DRM_ERR(EFAULT);
 
-	if (block->pid != DRM_CURRENTPID)
+	if (block->filp != filp)
 		return DRM_ERR(EPERM);
 
 	free_block( block );	
diff -Nru a/drivers/char/drm/radeon_state.c b/drivers/char/drm/radeon_state.c
--- a/drivers/char/drm/radeon_state.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/radeon_state.c	Mon Mar 31 13:41:08 2003
@@ -1063,7 +1063,8 @@
 
 #define RADEON_MAX_TEXTURE_SIZE (RADEON_BUFFER_SIZE - 8 * sizeof(u32))
 
-static int radeon_cp_dispatch_texture( drm_device_t *dev,
+static int radeon_cp_dispatch_texture( DRMFILE filp,
+				       drm_device_t *dev,
 				       drm_radeon_texture_t *tex,
 				       drm_radeon_tex_image_t *image )
 {
@@ -1073,7 +1074,7 @@
 	u32 *buffer;
 	const u8 *data;
 	int size, dwords, tex_width, blit_width;
-	u32 y, height;
+	u32 height;
 	int i;
 	RING_LOCALS;
 
@@ -1138,10 +1139,9 @@
 			   tex->offset >> 10, tex->pitch, tex->format,
 			   image->x, image->y, image->width, image->height );
 
-		/* Make a copy of the parameters in case we have to
+		/* Make a copy of some parameters in case we have to
 		 * update them for a multi-pass texture blit.
 		 */
-		y = image->y;
 		height = image->height;
 		data = (const u8 *)image->data;
 		
@@ -1156,11 +1156,6 @@
 			return 0;
 		}
 
-		/* Update the input parameters for next time */
-		image->y += height;
-		image->height -= height;
-		image->data += size;
-
 		buf = radeon_freelist_get( dev );
 		if ( 0 && !buf ) {
 			radeon_do_cp_idle( dev_priv );
@@ -1190,7 +1185,7 @@
 		buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
 		buffer[3] = 0xffffffff;
 		buffer[4] = 0xffffffff;
-		buffer[5] = (y << 16) | image->x;
+		buffer[5] = (image->y << 16) | image->x;
 		buffer[6] = (height << 16) | image->width;
 		buffer[7] = dwords;
 		buffer += 8;
@@ -1222,11 +1217,15 @@
 			}
 		}
 
-		buf->pid = DRM_CURRENTPID;
+		buf->filp = filp;
 		buf->used = (dwords + 8) * sizeof(u32);
 		radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
 		radeon_cp_discard_buffer( dev, buf );
 
+		/* Update the input parameters for next time */
+		image->y += height;
+		image->height -= height;
+		(const u8 *)image->data += size;
 	} while (image->height > 0);
 
 	/* Flush the pixel cache after the blit completes.  This ensures
@@ -1275,7 +1274,7 @@
 	drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( clear, (drm_radeon_clear_t *)data,
 			     sizeof(clear) );
@@ -1344,7 +1343,7 @@
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	RING_SPACE_TEST_WITH_RETURN( dev_priv );
 
@@ -1364,7 +1363,7 @@
 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	DRM_DEBUG( "\n" );
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	RING_SPACE_TEST_WITH_RETURN( dev_priv );
 
@@ -1388,7 +1387,7 @@
 	drm_radeon_vertex_t vertex;
 	drm_radeon_tcl_prim_t prim;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( !dev_priv ) {
 		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
@@ -1418,9 +1417,9 @@
 
 	buf = dma->buflist[vertex.idx];
 
-	if ( buf->pid != DRM_CURRENTPID ) {
-		DRM_ERROR( "process %d using buffer owned by %d\n",
-			   DRM_CURRENTPID, buf->pid );
+	if ( buf->filp != filp ) {
+		DRM_ERROR( "process %d using buffer owned by %p\n",
+			   DRM_CURRENTPID, buf->filp );
 		return DRM_ERR(EINVAL);
 	}
 	if ( buf->pending ) {
@@ -1475,7 +1474,7 @@
 	drm_radeon_tcl_prim_t prim;
 	int count;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( !dev_priv ) {
 		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
@@ -1505,9 +1504,9 @@
 
 	buf = dma->buflist[elts.idx];
 
-	if ( buf->pid != DRM_CURRENTPID ) {
-		DRM_ERROR( "process %d using buffer owned by %d\n",
-			   DRM_CURRENTPID, buf->pid );
+	if ( buf->filp != filp ) {
+		DRM_ERROR( "process %d using buffer owned by %p\n",
+			   DRM_CURRENTPID, buf->filp );
 		return DRM_ERR(EINVAL);
 	}
 	if ( buf->pending ) {
@@ -1570,7 +1569,7 @@
 	drm_radeon_tex_image_t image;
 	int ret;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( tex, (drm_radeon_texture_t *)data, sizeof(tex) );
 
@@ -1587,7 +1586,7 @@
 	RING_SPACE_TEST_WITH_RETURN( dev_priv );
 	VB_AGE_TEST_WITH_RETURN( dev_priv );
 
-	ret = radeon_cp_dispatch_texture( dev, &tex, &image );
+	ret = radeon_cp_dispatch_texture( filp, dev, &tex, &image );
 
 	COMMIT_RING();
 	return ret;
@@ -1600,7 +1599,7 @@
 	drm_radeon_stipple_t stipple;
 	u32 mask[32];
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	DRM_COPY_FROM_USER_IOCTL( stipple, (drm_radeon_stipple_t *)data,
 			     sizeof(stipple) );
@@ -1625,7 +1624,7 @@
 	drm_radeon_indirect_t indirect;
 	RING_LOCALS;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( !dev_priv ) {
 		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
@@ -1647,9 +1646,9 @@
 
 	buf = dma->buflist[indirect.idx];
 
-	if ( buf->pid != DRM_CURRENTPID ) {
-		DRM_ERROR( "process %d using buffer owned by %d\n",
-			   DRM_CURRENTPID, buf->pid );
+	if ( buf->filp != filp ) {
+		DRM_ERROR( "process %d using buffer owned by %p\n",
+			   DRM_CURRENTPID, buf->filp );
 		return DRM_ERR(EINVAL);
 	}
 	if ( buf->pending ) {
@@ -1702,7 +1701,7 @@
 	int i;
 	unsigned char laststate;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( !dev_priv ) {
 		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
@@ -1727,9 +1726,9 @@
 
 	buf = dma->buflist[vertex.idx];
 
-	if ( buf->pid != DRM_CURRENTPID ) {
-		DRM_ERROR( "process %d using buffer owned by %d\n",
-			   DRM_CURRENTPID, buf->pid );
+	if ( buf->filp != filp ) {
+		DRM_ERROR( "process %d using buffer owned by %p\n",
+			   DRM_CURRENTPID, buf->filp );
 		return DRM_ERR(EINVAL);
 	}
 
@@ -2029,7 +2028,7 @@
 	drm_radeon_cmd_header_t header;
 	int orig_nbox;
 
-	LOCK_TEST_WITH_RETURN( dev );
+	LOCK_TEST_WITH_RETURN( dev, filp );
 
 	if ( !dev_priv ) {
 		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
@@ -2098,8 +2097,9 @@
 			}
 
 			buf = dma->buflist[idx];
-			if ( buf->pid != DRM_CURRENTPID || buf->pending ) {
-				DRM_ERROR( "bad buffer\n" );
+			if ( buf->filp != filp || buf->pending ) {
+				DRM_ERROR( "bad buffer %p %p %d\n",
+					   buf->filp, filp, buf->pending);
 				return DRM_ERR(EINVAL);
 			}
 
diff -Nru a/drivers/char/drm/sis.h b/drivers/char/drm/sis.h
--- a/drivers/char/drm/sis.h	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/drm/sis.h	Mon Mar 31 13:41:08 2003
@@ -24,7 +24,7 @@
  * DEALINGS IN THE SOFTWARE.
  * 
  */
-/* $XFree86: xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/sis.h,v 1.2 2001/12/19 21:25:59 dawes Exp $ */
+/* $XFree86: xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/sis.h,v 1.3 2002/10/30 12:52:38 alanh Exp $ */
 
 #ifndef __SIS_H__
 #define __SIS_H__
diff -Nru a/drivers/char/drm/sis_mm.c b/drivers/char/drm/sis_mm.c
--- a/drivers/char/drm/sis_mm.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/char/drm/sis_mm.c	Mon Mar 31 13:41:07 2003
@@ -182,10 +182,10 @@
   if(block){
     /* TODO */
     agp.offset = block->ofs;
-    agp.free = (unsigned int)block;
+    agp.free = (unsigned long)block;
     if(!add_alloc_set(agp.context, AGP_TYPE, agp.free)){
       DRM_DEBUG("adding to allocation set fails\n");
-      mmFreeMem((PMemBlock)agp.free);
+      mmFreeMem((PMemBlock)(unsigned long)agp.free);
       retval = -1;
     }
   }
@@ -218,7 +218,7 @@
     return -1;
   }
 
-  mmFreeMem((PMemBlock)agp.free);
+  mmFreeMem((PMemBlock)(unsigned long)agp.free);
   if(!del_alloc_set(agp.context, AGP_TYPE, agp.free))
     retval = -1;
 
@@ -288,7 +288,7 @@
 	  retval = setFirst(set, &item);
 	  while(retval){
    	    DRM_DEBUG("free agp memory 0x%x\n", item);
-	    mmFreeMem((PMemBlock)item);
+	    mmFreeMem((PMemBlock)(unsigned long)item);
 	    retval = setNext(set, &item);
 	  }
 	  setDestroy(set);
diff -Nru a/drivers/char/hw_random.c b/drivers/char/hw_random.c
--- a/drivers/char/hw_random.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/char/hw_random.c	Mon Mar 31 13:41:07 2003
@@ -151,6 +151,7 @@
  */
 static struct pci_device_id rng_pci_tbl[] __initdata = {
 	{ 0x1022, 0x7443, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_amd },
+	{ 0x1022, 0x746b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_amd },
 
 	{ 0x8086, 0x2418, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
 	{ 0x8086, 0x2428, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
diff -Nru a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
--- a/drivers/char/ipmi/Kconfig	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/ipmi/Kconfig	Mon Mar 31 13:41:08 2003
@@ -7,8 +7,14 @@
        tristate 'IPMI top-level message handler'
        help
          This enables the central IPMI message handler, required for IPMI
-	 to work.  Note that you must have this enabled to do any other IPMI
-	 things.  See IPMI.txt for more details.
+	 to work.
+
+         IPMI is a standard for managing sensors (temperature,
+         voltage, etc.) in a system.
+
+         See Documentation/IPMI.txt for more details on the driver.
+
+	 If unsure, say N.
 
 config IPMI_PANIC_EVENT
        bool 'Generate a panic event to all BMCs on a panic'
diff -Nru a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c
--- a/drivers/char/ipmi/ipmi_devintf.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/char/ipmi/ipmi_devintf.c	Mon Mar 31 13:41:08 2003
@@ -449,7 +449,7 @@
 	if (if_num > MAX_DEVICES)
 		return;
 
-	snprinf(name, sizeof(name), "ipmidev/%d", if_num);
+	snprintf(name, sizeof(name), "ipmidev/%d", if_num);
 
 	handles[if_num] = devfs_register(NULL, name, DEVFS_FL_NONE,
 					 ipmi_major, if_num,
diff -Nru a/drivers/char/ipmi/ipmi_kcs_intf.c b/drivers/char/ipmi/ipmi_kcs_intf.c
--- a/drivers/char/ipmi/ipmi_kcs_intf.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/char/ipmi/ipmi_kcs_intf.c	Mon Mar 31 13:41:06 2003
@@ -826,7 +826,7 @@
 	if (kcs_port && kcs_physaddr)
 		return -EINVAL;
 
-	new_kcs = kmalloc(kcs_size(), GFP_KERNEL);
+	new_kcs = kmalloc(sizeof(*new_kcs), GFP_KERNEL);
 	if (!new_kcs) {
 		printk(KERN_ERR "ipmi_kcs: out of memory\n");
 		return -ENOMEM;
diff -Nru a/drivers/char/ipmi/ipmi_kcs_sm.c b/drivers/char/ipmi/ipmi_kcs_sm.c
--- a/drivers/char/ipmi/ipmi_kcs_sm.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/char/ipmi/ipmi_kcs_sm.c	Mon Mar 31 13:41:06 2003
@@ -468,7 +468,7 @@
 		break;
 			
 	case KCS_HOSED:
-		return KCS_SM_HOSED;
+		break;
 	}
 
 	if (kcs->state == KCS_HOSED) {
diff -Nru a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
--- a/drivers/char/pcmcia/synclink_cs.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/char/pcmcia/synclink_cs.c	Mon Mar 31 13:41:06 2003
@@ -76,7 +76,6 @@
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
-#include <pcmcia/bus_ops.h>
 
 #ifdef CONFIG_SYNCLINK_SYNCPPP_MODULE
 #define CONFIG_SYNCLINK_SYNCPPP 1
@@ -241,7 +240,6 @@
 	dev_link_t	      link;
 	dev_node_t	      node;
 	int		      stop;
-	struct bus_operations *bus;
 
 	/* SPPP/Cisco HDLC device parts */
 	int netcount;
@@ -826,7 +824,6 @@
 	    break;
     case CS_EVENT_CARD_INSERTION:
 	    link->state |= DEV_PRESENT | DEV_CONFIG_PENDING;
-	    info->bus = args->bus;
 	    mgslpc_config(link);
 	    break;
     case CS_EVENT_PM_SUSPEND:
diff -Nru a/drivers/char/tty_io.c b/drivers/char/tty_io.c
--- a/drivers/char/tty_io.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/char/tty_io.c	Mon Mar 31 13:41:07 2003
@@ -2143,31 +2143,16 @@
  */
 int tty_unregister_driver(struct tty_driver *driver)
 {
-	int	retval;
-	struct tty_driver *p;
-	int	i, found = 0;
+	int retval, i;
 	struct termios *tp;
-	const char *othername = NULL;
-	
+
 	if (*driver->refcount)
 		return -EBUSY;
 
-	list_for_each_entry(p, &tty_drivers, tty_drivers) {
-		if (p == driver)
-			found++;
-		else if (p->major == driver->major)
-			othername = p->name;
-	}
-	
-	if (!found)
-		return -ENOENT;
-
-	if (othername == NULL) {
-		retval = unregister_chrdev(driver->major, driver->name);
-		if (retval)
-			return retval;
-	} else
-		register_chrdev(driver->major, othername, &tty_fops);
+	retval = unregister_chrdev_region(driver->major, driver->minor_start,
+					  driver->num, driver->name);
+	if (retval)
+		return retval;
 
 	list_del(&driver->tty_drivers);
 
diff -Nru a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c
--- a/drivers/i2c/busses/i2c-ali15x3.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/i2c/busses/i2c-ali15x3.c	Mon Mar 31 13:41:07 2003
@@ -474,9 +474,11 @@
 
 static struct i2c_adapter ali15x3_adapter = {
 	.owner		= THIS_MODULE,
-	.name		= "unset",
 	.id		= I2C_ALGO_SMBUS | I2C_HW_SMBUS_ALI15X3,
 	.algo		= &smbus_algorithm,
+	.dev		= {
+		.name	= "unset",
+	},
 };
 
 static struct pci_device_id ali15x3_ids[] __devinitdata = {
@@ -500,8 +502,8 @@
 	/* set up the driverfs linkage to our parent device */
 	ali15x3_adapter.dev.parent = &dev->dev;
 
-	sprintf(ali15x3_adapter.name, "SMBus ALI15X3 adapter at %04x",
-		ali15x3_smba);
+	snprintf(ali15x3_adapter.dev.name, DEVICE_NAME_SIZE,
+		"SMBus ALI15X3 adapter at %04x", ali15x3_smba);
 	return i2c_add_adapter(&ali15x3_adapter);
 }
 
diff -Nru a/drivers/i2c/busses/i2c-amd756.c b/drivers/i2c/busses/i2c-amd756.c
--- a/drivers/i2c/busses/i2c-amd756.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/i2c/busses/i2c-amd756.c	Mon Mar 31 13:41:08 2003
@@ -312,9 +312,11 @@
 
 static struct i2c_adapter amd756_adapter = {
 	.owner		= THIS_MODULE,
-	.name		= "unset",
 	.id		= I2C_ALGO_SMBUS | I2C_HW_SMBUS_AMD756,
 	.algo		= &smbus_algorithm,
+	.dev		= {
+		.name	= "unset",
+	},
 };
 
 enum chiptype { AMD756, AMD766, AMD768, NFORCE };
@@ -376,7 +378,7 @@
 	/* set up the driverfs linkage to our parent device */
 	amd756_adapter.dev.parent = &pdev->dev;
 
-	sprintf(amd756_adapter.name,
+	snprintf(amd756_adapter.dev.name, DEVICE_NAME_SIZE,
 		"SMBus AMD75x adapter at %04x", amd756_ioport);
 
 	error = i2c_add_adapter(&amd756_adapter);
diff -Nru a/drivers/i2c/busses/i2c-amd8111.c b/drivers/i2c/busses/i2c-amd8111.c
--- a/drivers/i2c/busses/i2c-amd8111.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/i2c/busses/i2c-amd8111.c	Mon Mar 31 13:41:08 2003
@@ -357,8 +357,8 @@
 		goto out_kfree;
 
 	smbus->adapter.owner = THIS_MODULE;
-	sprintf(smbus->adapter.name,
-			"SMBus2 AMD8111 adapter at %04x", smbus->base);
+	snprintf(smbus->adapter.dev.name, DEVICE_NAME_SIZE,
+		"SMBus2 AMD8111 adapter at %04x", smbus->base);
 	smbus->adapter.id = I2C_ALGO_SMBUS | I2C_HW_SMBUS_AMD8111;
 	smbus->adapter.algo = &smbus_algorithm;
 	smbus->adapter.algo_data = smbus;
diff -Nru a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
--- a/drivers/i2c/busses/i2c-i801.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/i2c/busses/i2c-i801.c	Mon Mar 31 13:41:07 2003
@@ -546,9 +546,11 @@
 
 static struct i2c_adapter i801_adapter = {
 	.owner		= THIS_MODULE,
-	.name		= "unset",
 	.id		= I2C_ALGO_SMBUS | I2C_HW_SMBUS_I801,
 	.algo		= &smbus_algorithm,
+	.dev		= {
+		.name	= "unset",
+	},
 };
 
 static struct pci_device_id i801_ids[] __devinitdata = {
@@ -597,8 +599,8 @@
 	/* set up the driverfs linkage to our parent device */
 	i801_adapter.dev.parent = &dev->dev;
 
-	sprintf(i801_adapter.name, "SMBus I801 adapter at %04x",
-		i801_smba);
+	snprintf(i801_adapter.dev.name, DEVICE_NAME_SIZE,
+		"SMBus I801 adapter at %04x", i801_smba);
 	return i2c_add_adapter(&i801_adapter);
 }
 
diff -Nru a/drivers/i2c/busses/i2c-isa.c b/drivers/i2c/busses/i2c-isa.c
--- a/drivers/i2c/busses/i2c-isa.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/i2c/busses/i2c-isa.c	Mon Mar 31 13:41:07 2003
@@ -39,9 +39,11 @@
 /* There can only be one... */
 static struct i2c_adapter isa_adapter = {
 	.owner		= THIS_MODULE,
-	.name		= "ISA main adapter",
 	.id		= I2C_ALGO_ISA | I2C_HW_ISA,
 	.algo		= &isa_algorithm,
+	.dev		= {
+		.name	= "ISA main adapter",
+	},
 };
 
 static int __init i2c_isa_init(void)
diff -Nru a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
--- a/drivers/i2c/busses/i2c-piix4.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/i2c/busses/i2c-piix4.c	Mon Mar 31 13:41:06 2003
@@ -394,9 +394,11 @@
 
 static struct i2c_adapter piix4_adapter = {
 	.owner		= THIS_MODULE,
-	.name		= "unset",
 	.id		= I2C_ALGO_SMBUS | I2C_HW_SMBUS_PIIX4,
 	.algo		= &smbus_algorithm,
+	.dev		= {
+		.name	= "unset",
+	},
 };
 
 static struct pci_device_id piix4_ids[] __devinitdata = {
@@ -449,8 +451,8 @@
 	/* set up the driverfs linkage to our parent device */
 	piix4_adapter.dev.parent = &dev->dev;
 
-	sprintf(piix4_adapter.name, "SMBus PIIX4 adapter at %04x",
-		piix4_smba);
+	snprintf(piix4_adapter.dev.name, DEVICE_NAME_SIZE,
+		"SMBus PIIX4 adapter at %04x", piix4_smba);
 
 	retval = i2c_add_adapter(&piix4_adapter);
 
diff -Nru a/drivers/i2c/chips/adm1021.c b/drivers/i2c/chips/adm1021.c
--- a/drivers/i2c/chips/adm1021.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/i2c/chips/adm1021.c	Mon Mar 31 13:41:06 2003
@@ -144,7 +144,7 @@
 /* This is the driver that will be inserted */
 static struct i2c_driver adm1021_driver = {
 	.owner		= THIS_MODULE,
-	.name		= "ADM1021, MAX1617 sensor driver",
+	.name		= "ADM1021-MAX1617",
 	.id		= I2C_DRIVERID_ADM1021,
 	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= adm1021_attach_adapter,
@@ -221,10 +221,12 @@
 		err = -ENOMEM;
 		goto error0;
 	}
+	memset(new_client, 0x00, sizeof(struct i2c_client) +
+				 sizeof(struct adm1021_data));
 
 	data = (struct adm1021_data *) (new_client + 1);
+	i2c_set_clientdata(new_client, data);
 	new_client->addr = address;
-	new_client->data = data;
 	new_client->adapter = adapter;
 	new_client->driver = &adm1021_driver;
 	new_client->flags = 0;
@@ -299,7 +301,7 @@
 	}
 
 	/* Fill in the remaining client fields and put it into the global list */
-	strcpy(new_client->name, client_name);
+	strncpy(new_client->dev.name, client_name, DEVICE_NAME_SIZE);
 	data->type = kind;
 
 	new_client->id = adm1021_id++;
@@ -354,8 +356,7 @@
 
 	int err;
 
-	i2c_deregister_entry(((struct adm1021_data *) (client->data))->
-				 sysctl_id);
+	i2c_deregister_entry(((struct adm1021_data *) (i2c_get_clientdata(client)))->sysctl_id);
 
 	if ((err = i2c_detach_client(client))) {
 		printk
@@ -384,7 +385,7 @@
 
 static void adm1021_update_client(struct i2c_client *client)
 {
-	struct adm1021_data *data = client->data;
+	struct adm1021_data *data = i2c_get_clientdata(client);
 
 	down(&data->update_lock);
 
@@ -435,7 +436,7 @@
 static void adm1021_temp(struct i2c_client *client, int operation,
 			 int ctl_name, int *nrels_mag, long *results)
 {
-	struct adm1021_data *data = client->data;
+	struct adm1021_data *data = i2c_get_clientdata(client);
 
 	if (operation == SENSORS_PROC_REAL_INFO)
 		*nrels_mag = 0;
@@ -462,7 +463,7 @@
 static void adm1021_remote_temp(struct i2c_client *client, int operation,
 				int ctl_name, int *nrels_mag, long *results)
 {
-	struct adm1021_data *data = client->data;
+	struct adm1021_data *data = i2c_get_clientdata(client);
 	int prec = 0;
 
 	if (operation == SENSORS_PROC_REAL_INFO)
@@ -535,7 +536,7 @@
 static void adm1021_die_code(struct i2c_client *client, int operation,
 			     int ctl_name, int *nrels_mag, long *results)
 {
-	struct adm1021_data *data = client->data;
+	struct adm1021_data *data = i2c_get_clientdata(client);
 
 	if (operation == SENSORS_PROC_REAL_INFO)
 		*nrels_mag = 0;
@@ -551,7 +552,7 @@
 static void adm1021_alarms(struct i2c_client *client, int operation,
 			   int ctl_name, int *nrels_mag, long *results)
 {
-	struct adm1021_data *data = client->data;
+	struct adm1021_data *data = i2c_get_clientdata(client);
 	if (operation == SENSORS_PROC_REAL_INFO)
 		*nrels_mag = 0;
 	else if (operation == SENSORS_PROC_REAL_READ) {
diff -Nru a/drivers/i2c/chips/lm75.c b/drivers/i2c/chips/lm75.c
--- a/drivers/i2c/chips/lm75.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/i2c/chips/lm75.c	Mon Mar 31 13:41:06 2003
@@ -82,7 +82,7 @@
 /* This is the driver that will be inserted */
 static struct i2c_driver lm75_driver = {
 	.owner		= THIS_MODULE,
-	.name		= "LM75 sensor chip driver",
+	.name		= "LM75 sensor",
 	.id		= I2C_DRIVERID_LM75,
 	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= lm75_attach_adapter,
@@ -140,10 +140,12 @@
 		err = -ENOMEM;
 		goto error0;
 	}
+	memset(new_client, 0x00, sizeof(struct i2c_client) +
+				 sizeof(struct lm75_data));
 
 	data = (struct lm75_data *) (new_client + 1);
+	i2c_set_clientdata(new_client, data);
 	new_client->addr = address;
-	new_client->data = data;
 	new_client->adapter = adapter;
 	new_client->driver = &lm75_driver;
 	new_client->flags = 0;
@@ -180,7 +182,7 @@
 	}
 
 	/* Fill in the remaining client fields and put it into the global list */
-	strcpy(new_client->name, client_name);
+	strncpy(new_client->dev.name, client_name, DEVICE_NAME_SIZE);
 
 	new_client->id = lm75_id++;
 	data->valid = 0;
@@ -215,7 +217,7 @@
 
 static int lm75_detach_client(struct i2c_client *client)
 {
-	struct lm75_data *data = client->data;
+	struct lm75_data *data = i2c_get_clientdata(client);
 
 	i2c_deregister_entry(data->sysctl_id);
 	i2c_detach_client(client);
@@ -263,7 +265,7 @@
 
 static void lm75_update_client(struct i2c_client *client)
 {
-	struct lm75_data *data = client->data;
+	struct lm75_data *data = i2c_get_clientdata(client);
 
 	down(&data->update_lock);
 
@@ -286,7 +288,7 @@
 static void lm75_temp(struct i2c_client *client, int operation, int ctl_name,
 		      int *nrels_mag, long *results)
 {
-	struct lm75_data *data = client->data;
+	struct lm75_data *data = i2c_get_clientdata(client);
 	if (operation == SENSORS_PROC_REAL_INFO)
 		*nrels_mag = 1;
 	else if (operation == SENSORS_PROC_REAL_READ) {
diff -Nru a/drivers/i2c/i2c-algo-bit.c b/drivers/i2c/i2c-algo-bit.c
--- a/drivers/i2c/i2c-algo-bit.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/i2c/i2c-algo-bit.c	Mon Mar 31 13:41:07 2003
@@ -23,6 +23,8 @@
 
 /* $Id: i2c-algo-bit.c,v 1.44 2003/01/21 08:08:16 kmalkki Exp $ */
 
+/* #define DEBUG 1 */
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/delay.h>
@@ -338,16 +340,14 @@
 
 	while (count > 0) {
 		c = *temp;
-		DEB2(printk(KERN_DEBUG "i2c-algo-bit.o: %s sendbytes: writing %2.2X\n",
-			    i2c_adap->name, c&0xff));
+		DEB2(dev_dbg(&i2c_adap->dev, "sendbytes: writing %2.2X\n", c&0xff));
 		retval = i2c_outb(i2c_adap,c);
 		if ((retval>0) || (nak_ok && (retval==0)))  { /* ok or ignored NAK */
 			count--; 
 			temp++;
 			wrcount++;
 		} else { /* arbitration or no acknowledge */
-			printk(KERN_ERR "i2c-algo-bit.o: %s sendbytes: error - bailout.\n",
-			       i2c_adap->name);
+			dev_err(&i2c_adap->dev, "sendbytes: error - bailout.\n");
 			i2c_stop(adap);
 			return (retval<0)? retval : -EFAULT;
 			        /* got a better one ?? */
@@ -527,13 +527,12 @@
 	struct i2c_algo_bit_data *bit_adap = adap->algo_data;
 
 	if (bit_test) {
-		int ret = test_bus(bit_adap, adap->name);
+		int ret = test_bus(bit_adap, adap->dev.name);
 		if (ret<0)
 			return -ENODEV;
 	}
 
-	DEB2(printk(KERN_DEBUG "i2c-algo-bit.o: hw routines for %s registered.\n",
-	            adap->name));
+	DEB2(dev_dbg(&adap->dev, "hw routines registered.\n"));
 
 	/* register new adapter to i2c module... */
 
diff -Nru a/drivers/i2c/i2c-algo-pcf.c b/drivers/i2c/i2c-algo-pcf.c
--- a/drivers/i2c/i2c-algo-pcf.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/i2c/i2c-algo-pcf.c	Mon Mar 31 13:41:08 2003
@@ -27,6 +27,8 @@
    messages, proper stop/repstart signaling during receive,
    added detect code */
 
+/* #define DEBUG 1 */		/* to pick up dev_dbg calls */
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/delay.h>
@@ -222,21 +224,19 @@
 	int wrcount, status, timeout;
     
 	for (wrcount=0; wrcount<count; ++wrcount) {
-		DEB2(printk(KERN_DEBUG "i2c-algo-pcf.o: %s i2c_write: writing %2.2X\n",
-		      i2c_adap->name, buf[wrcount]&0xff));
+		DEB2(dev_dbg(&i2c_adap->dev, "i2c_write: writing %2.2X\n",
+				buf[wrcount]&0xff));
 		i2c_outb(adap, buf[wrcount]);
 		timeout = wait_for_pin(adap, &status);
 		if (timeout) {
 			i2c_stop(adap);
-			printk(KERN_ERR "i2c-algo-pcf.o: %s i2c_write: "
-			       "error - timeout.\n", i2c_adap->name);
+			dev_err(&i2c_adap->dev, "i2c_write: error - timeout.\n");
 			return -EREMOTEIO; /* got a better one ?? */
 		}
 #ifndef STUB_I2C
 		if (status & I2C_PCF_LRB) {
 			i2c_stop(adap);
-			printk(KERN_ERR "i2c-algo-pcf.o: %s i2c_write: "
-			       "error - no ack.\n", i2c_adap->name);
+			dev_err(&i2c_adap->dev, "i2c_write: error - no ack.\n");
 			return -EREMOTEIO; /* got a better one ?? */
 		}
 #endif
@@ -263,14 +263,14 @@
 
 		if (wait_for_pin(adap, &status)) {
 			i2c_stop(adap);
-			printk(KERN_ERR "i2c-algo-pcf.o: pcf_readbytes timed out.\n");
+			dev_err(&i2c_adap->dev, "pcf_readbytes timed out.\n");
 			return (-1);
 		}
 
 #ifndef STUB_I2C
 		if ((status & I2C_PCF_LRB) && (i != count)) {
 			i2c_stop(adap);
-			printk(KERN_ERR "i2c-algo-pcf.o: i2c_read: i2c_inb, No ack.\n");
+			dev_err(&i2c_adap->dev, "i2c_read: i2c_inb, No ack.\n");
 			return (-1);
 		}
 #endif
@@ -445,8 +445,7 @@
 	struct i2c_algo_pcf_data *pcf_adap = adap->algo_data;
 	int rval;
 
-	DEB2(printk(KERN_DEBUG "i2c-algo-pcf.o: hw routines for %s registered.\n",
-	            adap->name));
+	DEB2(dev_dbg(&adap->dev, "hw routines registered.\n"));
 
 	/* register new adapter to i2c module... */
 
diff -Nru a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
--- a/drivers/i2c/i2c-core.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/i2c/i2c-core.c	Mon Mar 31 13:41:06 2003
@@ -23,6 +23,8 @@
 
 /* $Id: i2c-core.c,v 1.95 2003/01/22 05:25:08 kmalkki Exp $ */
 
+/* #define DEBUG 1 */		/* needed to pick up the dev_dbg() calls */
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -63,6 +65,14 @@
 	return 0;
 }
 
+static struct device_driver i2c_generic_driver = {
+	.name =	"i2c",
+	.bus = &i2c_bus_type,
+	.probe = i2c_device_probe,
+	.remove = i2c_device_remove,
+};
+
+
 /* ---------------------------------------------------
  * registering functions 
  * --------------------------------------------------- 
@@ -82,9 +92,8 @@
 		if (NULL == adapters[i])
 			break;
 	if (I2C_ADAP_MAX == i) {
-		printk(KERN_WARNING 
-		       " i2c-core.o: register_adapter(%s) - enlarge I2C_ADAP_MAX.\n",
-			adap->name);
+		dev_warn(&adap->dev,
+			"register_adapter - enlarge I2C_ADAP_MAX.\n");
 		res = -ENOMEM;
 		goto out_unlock;
 	}
@@ -105,7 +114,7 @@
 	if (adap->dev.parent == NULL)
 		adap->dev.parent = &legacy_bus;
 	sprintf(adap->dev.bus_id, "i2c-%d", i);
-	strcpy(adap->dev.name, "i2c controller");
+	adap->dev.driver = &i2c_generic_driver;
 	device_register(&adap->dev);
 
 	/* inform drivers of new adapters */
@@ -116,8 +125,7 @@
 			drivers[j]->attach_adapter(adap);
 	up(&core_lists);
 	
-	DEB(printk(KERN_DEBUG "i2c-core.o: adapter %s registered as adapter %d.\n",
-	           adap->name,i));
+	DEB(dev_dbg(&adap->dev, "registered as adapter %d.\n", i));
 
  out_unlock:
 	up(&core_lists);
@@ -134,8 +142,7 @@
 		if (adap == adapters[i])
 			break;
 	if (I2C_ADAP_MAX == i) {
-		printk( KERN_WARNING "i2c-core.o: unregister_adapter adap [%s] not found.\n",
-			adap->name);
+		dev_warn(&adap->dev, "unregister_adapter adap not found.\n");
 		res = -ENODEV;
 		goto out_unlock;
 	}
@@ -148,9 +155,9 @@
 	for (j = 0; j < I2C_DRIVER_MAX; j++) 
 		if (drivers[j] && (drivers[j]->flags & I2C_DF_DUMMY))
 			if ((res = drivers[j]->attach_adapter(adap))) {
-				printk(KERN_WARNING "i2c-core.o: can't detach adapter %s "
+				dev_warn(&adap->dev, "can't detach adapter"
 				       "while detaching driver %s: driver not "
-				       "detached!",adap->name,drivers[j]->name);
+				       "detached!", drivers[j]->name);
 				goto out_unlock;
 			}
 
@@ -164,10 +171,10 @@
 		     * must be deleted, as this would cause invalid states.
 		     */
 			if ((res=client->driver->detach_client(client))) {
-				printk(KERN_ERR "i2c-core.o: adapter %s not "
+				dev_err(&adap->dev, "adapter not "
 					"unregistered, because client at "
 					"address %02x can't be detached. ",
-					adap->name, client->addr);
+					client->addr);
 				goto out_unlock;
 			}
 		}
@@ -180,7 +187,7 @@
 
 	adapters[i] = NULL;
 
-	DEB(printk(KERN_DEBUG "i2c-core.o: adapter unregistered: %s\n",adap->name));
+	DEB(dev_dbg(&adap->dev, "adapter unregistered\n"));
 
  out_unlock:
 	up(&core_lists);
@@ -272,8 +279,7 @@
 		struct i2c_adapter *adap = adapters[k];
 		if (adap == NULL) /* skip empty entries. */
 			continue;
-		DEB2(printk(KERN_DEBUG "i2c-core.o: examining adapter %s:\n",
-			    adap->name));
+		DEB2(dev_dbg(&adap->dev, "examining adapter\n"));
 		if (driver->flags & I2C_DF_DUMMY) {
 		/* DUMMY drivers do not register their clients, so we have to
 		 * use a trick here: we call driver->attach_adapter to
@@ -281,11 +287,10 @@
 		 * this or hell will break loose...  
 		 */
 			if ((res = driver->attach_adapter(adap))) {
-				printk(KERN_WARNING "i2c-core.o: while unregistering "
-				       "dummy driver %s, adapter %s could "
+				dev_warn(&adap->dev, "while unregistering "
+				       "dummy driver %s, adapter could "
 				       "not be detached properly; driver "
-				       "not unloaded!",driver->name,
-				       adap->name);
+				       "not unloaded!",driver->name);
 				goto out_unlock;
 			}
 		} else {
@@ -295,20 +300,17 @@
 				    client->driver == driver) {
 					DEB2(printk(KERN_DEBUG "i2c-core.o: "
 						    "detaching client %s:\n",
-					            client->name));
-					if ((res = driver->
-							detach_client(client)))
-					{
-						printk(KERN_ERR "i2c-core.o: while "
+					            client->dev.name));
+					if ((res = driver->detach_client(client))) {
+						dev_err(&adap->dev, "while "
 						       "unregistering driver "
 						       "`%s', the client at "
 						       "address %02x of "
-						       "adapter `%s' could not "
+						       "adapter could not "
 						       "be detached; driver "
 						       "not unloaded!",
 						       driver->name,
-						       client->addr,
-						       adap->name);
+						       client->addr);
 						goto out_unlock;
 					}
 				}
@@ -362,7 +364,7 @@
 
 	printk(KERN_WARNING 
 	       " i2c-core.o: attach_client(%s) - enlarge I2C_CLIENT_MAX.\n",
-	       client->name);
+	       client->dev.name);
 
  out_unlock_list:
 	up(&adapter->list);
@@ -374,19 +376,26 @@
 	
 	if (adapter->client_register)  {
 		if (adapter->client_register(client))  {
-			printk(KERN_DEBUG
-			       "i2c-core.o: warning: client_register seems "
-			       "to have failed for client %02x at adapter %s\n",
-			       client->addr, adapter->name);
+			dev_warn(&adapter->dev, "warning: client_register "
+				"seems to have failed for client %02x\n",
+				client->addr);
 		}
 	}
 
-	DEB(printk(KERN_DEBUG
-		   "i2c-core.o: client [%s] registered to adapter [%s] "
-		   "(pos. %d).\n", client->name, adapter->name, i));
+	DEB(dev_dbg(&adapter->dev, "client [%s] registered to adapter "
+			"(pos. %d).\n", client->dev.name, i));
 
 	if (client->flags & I2C_CLIENT_ALLOW_USE)
 		client->usage_count = 0;
+
+	client->dev.parent = &client->adapter->dev;
+	client->dev.driver = &client->driver->driver;
+	client->dev.bus = &i2c_bus_type;
+	
+	snprintf(&client->dev.bus_id[0], sizeof(client->dev.bus_id), "i2c_dev_%d", i);
+	printk("registering %s\n", client->dev.bus_id);
+	device_register(&client->dev);
+	
 	return 0;
 }
 
@@ -404,7 +413,7 @@
 		if (res) {
 			printk(KERN_ERR
 			       "i2c-core.o: client_unregister [%s] failed, "
-			       "client not detached", client->name);
+			       "client not detached", client->dev.name);
 			goto out;
 		}
 	}
@@ -419,10 +428,11 @@
 
 	printk(KERN_WARNING
 	       " i2c-core.o: unregister_client [%s] not found\n",
-	       client->name);
+	       client->dev.name);
 	res = -ENODEV;
 
  out_unlock:
+	device_unregister(&client->dev);
 	up(&adapter->list);
  out:
 	return res;
@@ -531,7 +541,7 @@
 				client = adapters[i]->clients[order[j]];
 				len += sprintf(kbuf+len,"%02x\t%-32s\t%-32s\n",
 				              client->addr,
-				              client->name,
+				              client->dev.name,
 				              client->driver->name);
 			}
 			len = len - file->f_pos;
@@ -579,7 +589,7 @@
 			seq_printf(s, "dummy     ");
 
 		seq_printf(s, "\t%-32s\t%-32s\n",
-			      adapter->name, adapter->algo->name);
+			      adapter->dev.name, adapter->algo->name);
 	}
 	up(&core_lists);
 
@@ -675,7 +685,7 @@
 	bus_unregister(&i2c_bus_type);
 }
 
-module_init(i2c_init);
+subsys_initcall(i2c_init);
 module_exit(i2c_exit);
 
 /* ----------------------------------------------------
@@ -688,8 +698,7 @@
 	int ret;
 
 	if (adap->algo->master_xfer) {
- 	 	DEB2(printk(KERN_DEBUG "i2c-core.o: master_xfer: %s with %d msgs.\n",
-		            adap->name,num));
+ 	 	DEB2(dev_dbg(&adap->dev, "master_xfer: with %d msgs.\n", num));
 
 		down(&adap->bus);
 		ret = adap->algo->master_xfer(adap,msgs,num);
@@ -697,8 +706,7 @@
 
 		return ret;
 	} else {
-		printk(KERN_ERR "i2c-core.o: I2C adapter %04x: I2C level transfers not supported\n",
-		       adap->id);
+		dev_err(&adap->dev, "I2C level transfers not supported\n");
 		return -ENOSYS;
 	}
 }
@@ -715,8 +723,8 @@
 		msg.len = count;
 		(const char *)msg.buf = buf;
 	
-		DEB2(printk(KERN_DEBUG "i2c-core.o: master_send: writing %d bytes on %s.\n",
-			count,client->adapter->name));
+		DEB2(dev_dbg(&client->adapter->dev, "master_send: writing %d bytes.\n",
+				count));
 	
 		down(&adap->bus);
 		ret = adap->algo->master_xfer(adap,&msg,1);
@@ -745,8 +753,8 @@
 		msg.len = count;
 		msg.buf = buf;
 
-		DEB2(printk(KERN_DEBUG "i2c-core.o: master_recv: reading %d bytes on %s.\n",
-			count,client->adapter->name));
+		DEB2(dev_dbg(&client->adapter->dev, "master_recv: reading %d bytes.\n",
+				count));
 	
 		down(&adap->bus);
 		ret = adap->algo->master_xfer(adap,&msg,1);
diff -Nru a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
--- a/drivers/i2c/i2c-dev.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/i2c/i2c-dev.c	Mon Mar 31 13:41:08 2003
@@ -30,6 +30,9 @@
 
 /* $Id: i2c-dev.c,v 1.53 2003/01/21 08:08:16 kmalkki Exp $ */
 
+/* If you want debugging uncomment: */
+/* #define DEBUG 1 */
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/fs.h>
@@ -41,10 +44,6 @@
 #include <linux/i2c-dev.h>
 #include <asm/uaccess.h>
 
-/* If you want debugging uncomment: */
-/* #define DEBUG */
-
-
 /* struct file_operations changed too often in the 2.1 series for nice code */
 
 static ssize_t i2cdev_read (struct file *file, char *buf, size_t count, 
@@ -87,7 +86,9 @@
 };
 
 static struct i2c_client i2cdev_client_template = {
-	.name		= "I2C /dev entry",
+	.dev		= {
+		.name	= "I2C /dev entry",
+	},
 	.id		= 1,
 	.addr		= -1,
 	.driver		= &i2cdev_driver,
@@ -386,11 +387,11 @@
 	char name[12];
 
 	if ((i = i2c_adapter_id(adap)) < 0) {
-		printk(KERN_DEBUG "i2c-dev.o: Unknown adapter ?!?\n");
+		dev_dbg(&adap->dev, "Unknown adapter ?!?\n");
 		return -ENODEV;
 	}
 	if (i >= I2CDEV_ADAPS_MAX) {
-		printk(KERN_DEBUG "i2c-dev.o: Adapter number too large?!? (%d)\n",i);
+		dev_dbg(&adap->dev, "Adapter number too large?!? (%d)\n",i);
 		return -ENODEV;
 	}
 
@@ -401,14 +402,12 @@
 			DEVFS_FL_DEFAULT, I2C_MAJOR, i,
 			S_IFCHR | S_IRUSR | S_IWUSR,
 			&i2cdev_fops, NULL);
-		printk(KERN_DEBUG "i2c-dev.o: Registered '%s' as minor %d\n",adap->name,i);
+		dev_dbg(&adap->dev, "Registered as minor %d\n", i);
 	} else {
 		/* This is actually a detach_adapter call! */
 		devfs_remove("i2c/%d", i);
 		i2cdev_adaps[i] = NULL;
-#ifdef DEBUG
-		printk(KERN_DEBUG "i2c-dev.o: Adapter unregistered: %s\n",adap->name);
-#endif
+		dev_dbg(&adap->dev, "Adapter unregistered\n");
 	}
 
 	return 0;
diff -Nru a/drivers/i2c/i2c-elektor.c b/drivers/i2c/i2c-elektor.c
--- a/drivers/i2c/i2c-elektor.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/i2c/i2c-elektor.c	Mon Mar 31 13:41:06 2003
@@ -174,10 +174,12 @@
 };
 
 static struct i2c_adapter pcf_isa_ops = {
-	.owner		   = THIS_MODULE,
-	.name		   = "PCF8584 ISA adapter",
-	.id		   = I2C_HW_P_ELEK,
-	.algo_data	   = &pcf_isa_data,
+	.owner		= THIS_MODULE,
+	.id		= I2C_HW_P_ELEK,
+	.algo_data	= &pcf_isa_data,
+	.dev		= {
+		.name	= "PCF8584 ISA adapter",
+	},
 };
 
 static int __init i2c_pcfisa_init(void) 
diff -Nru a/drivers/i2c/i2c-elv.c b/drivers/i2c/i2c-elv.c
--- a/drivers/i2c/i2c-elv.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/i2c/i2c-elv.c	Mon Mar 31 13:41:08 2003
@@ -129,9 +129,11 @@
 
 static struct i2c_adapter bit_elv_ops = {
 	.owner		= THIS_MODULE,
-	.name		= "ELV Parallel port adaptor",
 	.id		= I2C_HW_B_ELV,
 	.algo_data	= &bit_elv_data,
+	.dev		= {
+		.name	= "ELV Parallel port adaptor",
+	},
 };
 
 static int __init i2c_bitelv_init(void)
@@ -148,7 +150,7 @@
 			return -ENODEV;
 		}
 	} else {
-		bit_elv_ops.data=(void*)base;
+		i2c_set_adapdata(&bit_elv_ops, (void *)base);
 		if (bit_elv_init()==0) {
 			if(i2c_bit_add_bus(&bit_elv_ops) < 0)
 				return -ENODEV;
diff -Nru a/drivers/i2c/i2c-philips-par.c b/drivers/i2c/i2c-philips-par.c
--- a/drivers/i2c/i2c-philips-par.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/i2c/i2c-philips-par.c	Mon Mar 31 13:41:06 2003
@@ -151,8 +151,10 @@
 
 static struct i2c_adapter bit_lp_ops = {
 	.owner		= THIS_MODULE,
-	.name		= "Philips Parallel port adapter",
 	.id		= I2C_HW_B_LP,
+	.dev		= {
+		.name	= "Philips Parallel port adapter",
+	},
 };
 
 static void i2c_parport_attach (struct parport *port)
diff -Nru a/drivers/i2c/i2c-proc.c b/drivers/i2c/i2c-proc.c
--- a/drivers/i2c/i2c-proc.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/i2c/i2c-proc.c	Mon Mar 31 13:41:07 2003
@@ -23,6 +23,8 @@
     This driver puts entries in /proc/sys/dev/sensors for each I2C device
 */
 
+/* #define DEBUG 1 */
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -581,9 +583,9 @@
 	    is_isa ? SENSORS_ISA_BUS : i2c_adapter_id(adapter);
 
 	/* Forget it if we can't probe using SMBUS_QUICK */
-	if ((!is_isa)
-	    && !i2c_check_functionality(adapter,
-					I2C_FUNC_SMBUS_QUICK)) return -1;
+	if ((!is_isa) &&
+	    !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_QUICK))
+		return -1;
 
 	for (addr = 0x00; addr <= (is_isa ? 0xffff : 0x7f); addr++) {
 		/* XXX: WTF is going on here??? */
@@ -594,31 +596,14 @@
 		/* If it is in one of the force entries, we don't do any
 		   detection at all */
 		found = 0;
-		for (i = 0;
-		     !found
-		     && (this_force =
-			 address_data->forces + i, this_force->force); i++) {
-			for (j = 0;
-			     !found
-			     && (this_force->force[j] != SENSORS_I2C_END);
-			     j += 2) {
-				if (
-				    ((adapter_id == this_force->force[j])
-				     ||
-				     ((this_force->
-				       force[j] == SENSORS_ANY_I2C_BUS)
-				      && !is_isa))
-				    && (addr == this_force->force[j + 1])) {
-#ifdef DEBUG
-					printk
-					    (KERN_DEBUG "i2c-proc.o: found force parameter for adapter %d, addr %04x\n",
-					     adapter_id, addr);
-#endif
-					if (
-					    (err =
-					     found_proc(adapter, addr, 0,
-							this_force->
-							kind))) return err;
+		for (i = 0; !found && (this_force = address_data->forces + i, this_force->force); i++) {
+			for (j = 0; !found && (this_force->force[j] != SENSORS_I2C_END); j += 2) {
+				if ( ((adapter_id == this_force->force[j]) ||
+				      ((this_force->force[j] == SENSORS_ANY_I2C_BUS) && !is_isa)) &&
+				      (addr == this_force->force[j + 1]) ) {
+					dev_dbg(&adapter->dev, "found force parameter for adapter %d, addr %04x\n", adapter_id, addr);
+					if ((err = found_proc(adapter, addr, 0, this_force->kind)))
+						return err;
 					found = 1;
 				}
 			}
@@ -628,42 +613,22 @@
 
 		/* If this address is in one of the ignores, we can forget about it
 		   right now */
-		for (i = 0;
-		     !found
-		     && (address_data->ignore[i] != SENSORS_I2C_END);
-		     i += 2) {
-			if (
-			    ((adapter_id == address_data->ignore[i])
-			     ||
-			     ((address_data->
-			       ignore[i] == SENSORS_ANY_I2C_BUS)
-			      && !is_isa))
-			    && (addr == address_data->ignore[i + 1])) {
-#ifdef DEBUG
-				printk
-				    (KERN_DEBUG "i2c-proc.o: found ignore parameter for adapter %d, "
-				     "addr %04x\n", adapter_id, addr);
-#endif
+		for (i = 0; !found && (address_data->ignore[i] != SENSORS_I2C_END); i += 2) {
+			if ( ((adapter_id == address_data->ignore[i]) ||
+			      ((address_data->ignore[i] == SENSORS_ANY_I2C_BUS) &&
+			       !is_isa)) &&
+			      (addr == address_data->ignore[i + 1])) {
+				dev_dbg(&adapter->dev, "found ignore parameter for adapter %d, addr %04x\n", adapter_id, addr);
 				found = 1;
 			}
 		}
-		for (i = 0;
-		     !found
-		     && (address_data->ignore_range[i] != SENSORS_I2C_END);
-		     i += 3) {
-			if (
-			    ((adapter_id == address_data->ignore_range[i])
-			     ||
-			     ((address_data->
-			       ignore_range[i] ==
-			       SENSORS_ANY_I2C_BUS) & !is_isa))
-			    && (addr >= address_data->ignore_range[i + 1])
-			    && (addr <= address_data->ignore_range[i + 2])) {
-#ifdef DEBUG
-				printk
-				    (KERN_DEBUG "i2c-proc.o: found ignore_range parameter for adapter %d, "
-				     "addr %04x\n", adapter_id, addr);
-#endif
+		for (i = 0; !found && (address_data->ignore_range[i] != SENSORS_I2C_END); i += 3) {
+			if ( ((adapter_id == address_data->ignore_range[i]) ||
+			      ((address_data-> ignore_range[i] == SENSORS_ANY_I2C_BUS) & 
+			       !is_isa)) &&
+			     (addr >= address_data->ignore_range[i + 1]) &&
+			     (addr <= address_data->ignore_range[i + 2])) {
+				dev_dbg(&adapter->dev,  "found ignore_range parameter for adapter %d, addr %04x\n", adapter_id, addr);
 				found = 1;
 			}
 		}
@@ -673,68 +638,31 @@
 		/* Now, we will do a detection, but only if it is in the normal or 
 		   probe entries */
 		if (is_isa) {
-			for (i = 0;
-			     !found
-			     && (address_data->normal_isa[i] !=
-				 SENSORS_ISA_END); i += 1) {
+			for (i = 0; !found && (address_data->normal_isa[i] != SENSORS_ISA_END); i += 1) {
 				if (addr == address_data->normal_isa[i]) {
-#ifdef DEBUG
-					printk
-					    (KERN_DEBUG "i2c-proc.o: found normal isa entry for adapter %d, "
-					     "addr %04x\n", adapter_id,
-					     addr);
-#endif
+					dev_dbg(&adapter->dev, "found normal isa entry for adapter %d, addr %04x\n", adapter_id, addr);
 					found = 1;
 				}
 			}
-			for (i = 0;
-			     !found
-			     && (address_data->normal_isa_range[i] !=
-				 SENSORS_ISA_END); i += 3) {
-				if ((addr >=
-				     address_data->normal_isa_range[i])
-				    && (addr <=
-					address_data->normal_isa_range[i + 1])
-				    &&
-				    ((addr -
-				      address_data->normal_isa_range[i]) %
-				     address_data->normal_isa_range[i + 2] ==
-				     0)) {
-#ifdef DEBUG
-					printk
-					    (KERN_DEBUG "i2c-proc.o: found normal isa_range entry for adapter %d, "
-					     "addr %04x", adapter_id, addr);
-#endif
+			for (i = 0; !found && (address_data->normal_isa_range[i] != SENSORS_ISA_END); i += 3) {
+				if ((addr >= address_data->normal_isa_range[i]) &&
+				    (addr <= address_data->normal_isa_range[i + 1]) &&
+				    ((addr - address_data->normal_isa_range[i]) % address_data->normal_isa_range[i + 2] == 0)) {
+					dev_dbg(&adapter->dev, "found normal isa_range entry for adapter %d, addr %04x", adapter_id, addr);
 					found = 1;
 				}
 			}
 		} else {
-			for (i = 0;
-			     !found && (address_data->normal_i2c[i] !=
-				 SENSORS_I2C_END); i += 1) {
+			for (i = 0; !found && (address_data->normal_i2c[i] != SENSORS_I2C_END); i += 1) {
 				if (addr == address_data->normal_i2c[i]) {
 					found = 1;
-#ifdef DEBUG
-					printk
-					    (KERN_DEBUG "i2c-proc.o: found normal i2c entry for adapter %d, "
-					     "addr %02x", adapter_id, addr);
-#endif
+					dev_dbg(&adapter->dev, "found normal i2c entry for adapter %d, addr %02x", adapter_id, addr);
 				}
 			}
-			for (i = 0;
-			     !found
-			     && (address_data->normal_i2c_range[i] !=
-				 SENSORS_I2C_END); i += 2) {
-				if ((addr >=
-				     address_data->normal_i2c_range[i])
-				    && (addr <=
-					address_data->normal_i2c_range[i + 1]))
-				{
-#ifdef DEBUG
-					printk
-					    (KERN_DEBUG "i2c-proc.o: found normal i2c_range entry for adapter %d, "
-					     "addr %04x\n", adapter_id, addr);
-#endif
+			for (i = 0; !found && (address_data->normal_i2c_range[i] != SENSORS_I2C_END); i += 2) {
+				if ((addr >= address_data->normal_i2c_range[i]) &&
+				    (addr <= address_data->normal_i2c_range[i + 1])) {
+					dev_dbg(&adapter->dev, "found normal i2c_range entry for adapter %d, addr %04x\n", adapter_id, addr);
 					found = 1;
 				}
 			}
@@ -747,30 +675,17 @@
 			     ((address_data->
 			       probe[i] == SENSORS_ANY_I2C_BUS) & !is_isa))
 			    && (addr == address_data->probe[i + 1])) {
-#ifdef DEBUG
-				printk
-				    (KERN_DEBUG "i2c-proc.o: found probe parameter for adapter %d, "
-				     "addr %04x\n", adapter_id, addr);
-#endif
+				dev_dbg(&adapter->dev, "found probe parameter for adapter %d, addr %04x\n", adapter_id, addr);
 				found = 1;
 			}
 		}
-		for (i = 0; !found &&
-		           (address_data->probe_range[i] != SENSORS_I2C_END);
-		     i += 3) {
-			if (
-			    ((adapter_id == address_data->probe_range[i])
-			     ||
-			     ((address_data->probe_range[i] ==
-			       SENSORS_ANY_I2C_BUS) & !is_isa))
-			    && (addr >= address_data->probe_range[i + 1])
-			    && (addr <= address_data->probe_range[i + 2])) {
+		for (i = 0; !found && (address_data->probe_range[i] != SENSORS_I2C_END); i += 3) {
+			if ( ((adapter_id == address_data->probe_range[i]) ||
+			      ((address_data->probe_range[i] == SENSORS_ANY_I2C_BUS) & !is_isa)) &&
+			     (addr >= address_data->probe_range[i + 1]) &&
+			     (addr <= address_data->probe_range[i + 2])) {
 				found = 1;
-#ifdef DEBUG
-				printk
-				    (KERN_DEBUG "i2c-proc.o: found probe_range parameter for adapter %d, "
-				     "addr %04x\n", adapter_id, addr);
-#endif
+				dev_dbg(&adapter->dev, "found probe_range parameter for adapter %d, addr %04x\n", adapter_id, addr);
 			}
 		}
 		if (!found)
@@ -779,8 +694,7 @@
 		/* OK, so we really should examine this address. First check
 		   whether there is some client here at all! */
 		if (is_isa ||
-		    (i2c_smbus_xfer
-		     (adapter, addr, 0, 0, 0, I2C_SMBUS_QUICK, NULL) >= 0))
+		    (i2c_smbus_xfer (adapter, addr, 0, 0, 0, I2C_SMBUS_QUICK, NULL) >= 0))
 			if ((err = found_proc(adapter, addr, 0, -1)))
 				return err;
 	}
diff -Nru a/drivers/i2c/i2c-velleman.c b/drivers/i2c/i2c-velleman.c
--- a/drivers/i2c/i2c-velleman.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/i2c/i2c-velleman.c	Mon Mar 31 13:41:08 2003
@@ -114,9 +114,11 @@
 
 static struct i2c_adapter bit_velle_ops = {
 	.owner		= THIS_MODULE,
-	.name		= "Velleman K8000",
 	.id		= I2C_HW_B_VELLE,
 	.algo_data	= &bit_velle_data,
+	.dev		= {
+		.name	= "Velleman K8000",
+	},
 };
 
 static int __init i2c_bitvelle_init(void)
diff -Nru a/drivers/i2c/scx200_acb.c b/drivers/i2c/scx200_acb.c
--- a/drivers/i2c/scx200_acb.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/i2c/scx200_acb.c	Mon Mar 31 13:41:07 2003
@@ -140,8 +140,7 @@
 
 	switch (iface->state) {
 	case state_idle:
-		printk(KERN_WARNING NAME ": %s, interrupt in idle state\n", 
-		       iface->adapter.name);
+		dev_warn(&iface->adapter.dev, "interrupt in idle state\n");
 		break;
 
 	case state_address:
@@ -226,8 +225,8 @@
 	return;
 
  error:
-	printk(KERN_ERR NAME ": %s, %s in state %s\n", iface->adapter.name, 
-	       errmsg, scx200_acb_state_name[iface->state]);
+	dev_err(&iface->adapter.dev, "%s in state %s\n", errmsg,
+		scx200_acb_state_name[iface->state]);
 
 	iface->state = state_idle;
 	iface->result = -EIO;
@@ -236,8 +235,8 @@
 
 static void scx200_acb_timeout(struct scx200_acb_iface *iface) 
 {
-	printk(KERN_ERR NAME ": %s, timeout in state %s\n", 
-	       iface->adapter.name, scx200_acb_state_name[iface->state]);
+	dev_err(&iface->adapter.dev, "timeout in state %s\n",
+		scx200_acb_state_name[iface->state]);
 
 	iface->state = state_idle;
 	iface->result = -EIO;
@@ -290,7 +289,7 @@
 				char rw, u8 command, int size, 
 				union i2c_smbus_data *data)
 {
-	struct scx200_acb_iface *iface = adapter->data;
+	struct scx200_acb_iface *iface = i2c_get_adapdata(adapter);
 	int len;
 	u8 *buffer;
 	u16 cur_word;
@@ -331,13 +330,12 @@
 	    size, address, command, len, rw == I2C_SMBUS_READ);
 
 	if (!len && rw == I2C_SMBUS_READ) {
-		printk(KERN_WARNING NAME ": %s, zero length read\n", 
-		       adapter->name);
+		dev_warn(&adapter->dev, "zero length read\n");
 		return -EINVAL;
 	}
 
 	if (len && !buffer) {
-		printk(KERN_WARNING NAME ": %s, nonzero length but no buffer\n", adapter->name);
+		dev_warn(&adapter->dev, "nonzero length but no buffer\n");
 		return -EFAULT;
 	}
 
@@ -457,18 +455,18 @@
 
 	memset(iface, 0, sizeof(*iface));
 	adapter = &iface->adapter;
-	adapter->data = iface;
-	sprintf(adapter->name, "SCx200 ACB%d", index);
+	i2c_set_adapdata(adapter, iface);
+	snprintf(adapter->dev.name, DEVICE_NAME_SIZE, "SCx200 ACB%d", index);
 	adapter->owner = THIS_MODULE;
 	adapter->id = I2C_ALGO_SMBUS;
 	adapter->algo = &scx200_acb_algorithm;
 
 	init_MUTEX(&iface->sem);
 
-	sprintf(description, "NatSemi SCx200 ACCESS.bus [%s]", adapter->name);
+	snprintf(description, sizeof(description), "NatSemi SCx200 ACCESS.bus [%s]", adapter->dev.name);
 	if (request_region(base, 8, description) == 0) {
-		printk(KERN_ERR NAME ": %s, can't allocate io 0x%x-0x%x\n", 
-		       adapter->name, base, base + 8-1);
+		dev_err(&adapter->dev, "can't allocate io 0x%x-0x%x\n",
+			base, base + 8-1);
 		rc = -EBUSY;
 		goto errout;
 	}
@@ -476,14 +474,14 @@
 
 	rc = scx200_acb_probe(iface);
 	if (rc) {
-		printk(KERN_WARNING NAME ": %s, probe failed\n", adapter->name);
+		dev_warn(&adapter->dev, "probe failed\n");
 		goto errout;
 	}
 
 	scx200_acb_reset(iface);
 
 	if (i2c_add_adapter(adapter) < 0) {
-		printk(KERN_ERR NAME ": %s, failed to register\n", adapter->name);
+		dev_err(&adapter->dev, "failed to register\n");
 		rc = -ENODEV;
 		goto errout;
 	}
diff -Nru a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
--- a/drivers/ide/ide-disk.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/ide/ide-disk.c	Mon Mar 31 13:41:07 2003
@@ -1098,6 +1098,7 @@
  * in above order (i.e., if value of higher priority is available,
  * reset will be ignored).
  */
+#define IDE_STROKE_LIMIT	(32000*1024*2)
 static void init_idedisk_capacity (ide_drive_t  *drive)
 {
 	struct hd_driveid *id = drive->id;
@@ -1118,7 +1119,7 @@
 		drive->cyl = (unsigned int) capacity_2 / (drive->head * drive->sect);
 		drive->select.b.lba	= 1;
 		set_max_ext = idedisk_read_native_max_address_ext(drive);
-		if (set_max_ext > capacity_2) {
+		if (set_max_ext > capacity_2 && capacity_2 > IDE_STROKE_LIMIT) {
 #ifdef CONFIG_IDEDISK_STROKE
 			set_max_ext = idedisk_read_native_max_address_ext(drive);
 			set_max_ext = idedisk_set_max_address_ext(drive, set_max_ext);
@@ -1145,7 +1146,7 @@
 		drive->select.b.lba = 1;
 	}
 
-	if (set_max > capacity) {
+	if (set_max > capacity && capacity > IDE_STROKE_LIMIT) {
 #ifdef CONFIG_IDEDISK_STROKE
 		set_max = idedisk_read_native_max_address(drive);
 		set_max = idedisk_set_max_address(drive, set_max);
diff -Nru a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
--- a/drivers/ide/ide-iops.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/ide/ide-iops.c	Mon Mar 31 13:41:06 2003
@@ -903,6 +903,14 @@
          * Select the drive, and issue the SETFEATURES command
          */
 	disable_irq_nosync(hwif->irq);
+	
+	/*
+	 *	FIXME: we race against the running IRQ here if
+	 *	this is called from non IRQ context. If we use
+	 *	disable_irq() we hang on the error path. Work
+	 *	is needed.
+	 */
+	 
 	udelay(1);
 	SELECT_DRIVE(drive);
 	SELECT_MASK(drive, 0);
diff -Nru a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
--- a/drivers/ide/ide-taskfile.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/ide/ide-taskfile.c	Mon Mar 31 13:41:06 2003
@@ -1670,7 +1670,7 @@
 
 #else
 
-	int err = 0;
+	int err = -EIO;
 	u8 args[4], *argbuf = args;
 	u8 xfer_rate = 0;
 	int argsize = 0;
diff -Nru a/drivers/ide/ide-timing.h b/drivers/ide/ide-timing.h
--- a/drivers/ide/ide-timing.h	Mon Mar 31 13:41:07 2003
+++ b/drivers/ide/ide-timing.h	Mon Mar 31 13:41:07 2003
@@ -245,14 +245,6 @@
 	}
 
 /*
- * If the drive is an ATAPI device it may need slower address setup timing,
- * so we stay on the safe side.
- */
-
-	if (drive->media != ide_disk)
-		p.setup = 120;
-
-/*
  * Convert the timing to bus clock counts.
  */
 
diff -Nru a/drivers/ieee1394/pcilynx.c b/drivers/ieee1394/pcilynx.c
--- a/drivers/ieee1394/pcilynx.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/ieee1394/pcilynx.c	Mon Mar 31 13:41:06 2003
@@ -138,10 +138,12 @@
 }; 
 
 static struct i2c_adapter bit_ops = {
-	.name			= "PCILynx I2C adapter",
 	.id 			= 0xAA, //FIXME: probably we should get an id in i2c-id.h
 	.client_register	= bit_reg,
 	.client_unregister	= bit_unreg,
+	.dev			= {
+		.name		= "PCILynx I2C",
+	},
 };
 
 
diff -Nru a/drivers/isdn/hisax/elsa_cs.c b/drivers/isdn/hisax/elsa_cs.c
--- a/drivers/isdn/hisax/elsa_cs.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/isdn/hisax/elsa_cs.c	Mon Mar 31 13:41:07 2003
@@ -53,7 +53,6 @@
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
-#include <pcmcia/bus_ops.h>
 
 MODULE_DESCRIPTION("ISDN4Linux: PCMCIA client driver for Elsa PCM cards");
 MODULE_AUTHOR("Klaus Lichtenwalder");
@@ -163,17 +162,12 @@
    "stopped" due to a power management event, or card ejection.  The
    device IO routines can use a flag like this to throttle IO to a
    card that is not ready to accept it.
-
-   The bus_operations pointer is used on platforms for which we need
-   to use special socket-specific versions of normal IO primitives
-   (inb, outb, readb, writeb, etc) for card IO.
 */
 
 typedef struct local_info_t {
     dev_link_t          link;
     dev_node_t          node;
     int                 busy;
-  struct bus_operations *bus;
 } local_info_t;
 
 /*====================================================================*/
@@ -522,7 +516,6 @@
         break;
     case CS_EVENT_CARD_INSERTION:
         link->state |= DEV_PRESENT | DEV_CONFIG_PENDING;
-        dev->bus = args->bus;
         elsa_cs_config(link);
         break;
     case CS_EVENT_PM_SUSPEND:
diff -Nru a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
--- a/drivers/isdn/hisax/sedlbauer_cs.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/isdn/hisax/sedlbauer_cs.c	Mon Mar 31 13:41:06 2003
@@ -53,7 +53,6 @@
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
-#include <pcmcia/bus_ops.h>
 
 MODULE_DESCRIPTION("ISDN4Linux: PCMCIA client driver for Sedlbauer cards");
 MODULE_AUTHOR("Marcus Niemann");
@@ -171,17 +170,12 @@
    "stopped" due to a power management event, or card ejection.  The
    device IO routines can use a flag like this to throttle IO to a
    card that is not ready to accept it.
-
-   The bus_operations pointer is used on platforms for which we need
-   to use special socket-specific versions of normal IO primitives
-   (inb, outb, readb, writeb, etc) for card IO.
 */
    
 typedef struct local_info_t {
     dev_link_t		link;
     dev_node_t		node;
     int			stop;
-    struct bus_operations *bus;
 } local_info_t;
 
 /*====================================================================*/
@@ -620,7 +614,6 @@
 	break;
     case CS_EVENT_CARD_INSERTION:
 	link->state |= DEV_PRESENT | DEV_CONFIG_PENDING;
-	dev->bus = args->bus;
 	sedlbauer_config(link);
 	break;
     case CS_EVENT_PM_SUSPEND:
diff -Nru a/drivers/md/linear.c b/drivers/md/linear.c
--- a/drivers/md/linear.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/md/linear.c	Mon Mar 31 13:41:07 2003
@@ -37,7 +37,11 @@
 	linear_conf_t *conf = mddev_to_conf(mddev);
 	sector_t block = sector >> 1;
 
-	hash = conf->hash_table + sector_div(block, conf->smallest->size);
+	/*
+	 * sector_div(a,b) returns the remainer and sets a to a/b
+	 */
+	(void)sector_div(block, conf->smallest->size);
+	hash = conf->hash_table + block;
 
 	if ((sector>>1) >= (hash->dev0->size + hash->dev0->offset))
 		return hash->dev1;
@@ -75,8 +79,6 @@
 	unsigned int curr_offset;
 	struct list_head *tmp;
 
-	MOD_INC_USE_COUNT;
-
 	conf = kmalloc (sizeof (*conf), GFP_KERNEL);
 	if (!conf)
 		goto out;
@@ -163,7 +165,6 @@
 out:
 	if (conf)
 		kfree(conf);
-	MOD_DEC_USE_COUNT;
 	return 1;
 }
 
@@ -174,8 +175,6 @@
 	kfree(conf->hash_table);
 	kfree(conf);
 
-	MOD_DEC_USE_COUNT;
-
 	return 0;
 }
 
@@ -189,7 +188,7 @@
 	block = bio->bi_sector >> 1;
   
 	if (unlikely(!tmp_dev)) {
-		printk ("linear_make_request : hash->dev1==NULL for block %llu\n",
+		printk("linear_make_request: hash->dev1==NULL for block %llu\n",
 			(unsigned long long)block);
 		bio_io_error(bio, bio->bi_size);
 		return 0;
@@ -199,7 +198,7 @@
 		     || block < tmp_dev->offset)) {
 		char b[BDEVNAME_SIZE];
 
-		printk ("linear_make_request: Block %llu out of bounds on "
+		printk("linear_make_request: Block %llu out of bounds on "
 			"dev %s size %ld offset %ld\n",
 			(unsigned long long)block,
 			bdevname(tmp_dev->rdev->bdev, b),
@@ -242,6 +241,7 @@
 static mdk_personality_t linear_personality=
 {
 	.name		= "linear",
+	.owner		= THIS_MODULE,
 	.make_request	= linear_make_request,
 	.run		= linear_run,
 	.stop		= linear_stop,
diff -Nru a/drivers/md/md.c b/drivers/md/md.c
--- a/drivers/md/md.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/md/md.c	Mon Mar 31 13:41:06 2003
@@ -64,6 +64,7 @@
 #endif
 
 static mdk_personality_t *pers[MAX_PERSONALITY];
+static spinlock_t pers_lock = SPIN_LOCK_UNLOCKED;
 
 /*
  * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
@@ -302,22 +303,6 @@
 	return 0;
 }
 
-
-#define BAD_MAGIC KERN_ERR \
-"md: invalid raid superblock magic on %s\n"
-
-#define BAD_MINOR KERN_ERR \
-"md: %s: invalid raid minor (%x)\n"
-
-#define OUT_OF_MEM KERN_ALERT \
-"md: out of memory.\n"
-
-#define NO_SB KERN_ERR \
-"md: disabled device %s, could not read superblock.\n"
-
-#define BAD_CSUM KERN_WARNING \
-"md: invalid superblock checksum on %s\n"
-
 static int alloc_disk_sb(mdk_rdev_t * rdev)
 {
 	if (rdev->sb_page)
@@ -325,7 +310,7 @@
 
 	rdev->sb_page = alloc_page(GFP_KERNEL);
 	if (!rdev->sb_page) {
-		printk(OUT_OF_MEM);
+		printk(KERN_ALERT "md: out of memory.\n");
 		return -EINVAL;
 	}
 
@@ -397,7 +382,8 @@
 	return 0;
 
 fail:
-	printk(NO_SB,bdev_partition_name(rdev->bdev));
+	printk(KERN_ERR "md: disabled device %s, could not read superblock.\n",
+		bdev_partition_name(rdev->bdev));
 	return -EINVAL;
 }
 
@@ -526,27 +512,30 @@
 	sb = (mdp_super_t*)page_address(rdev->sb_page);
 
 	if (sb->md_magic != MD_SB_MAGIC) {
-		printk(BAD_MAGIC, bdev_partition_name(rdev->bdev));
+		printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
+			bdev_partition_name(rdev->bdev));
 		goto abort;
 	}
 
 	if (sb->major_version != 0 ||
 	    sb->minor_version != 90) {
 		printk(KERN_WARNING "Bad version number %d.%d on %s\n",
-		       sb->major_version, sb->minor_version,
-		       bdev_partition_name(rdev->bdev));
+			sb->major_version, sb->minor_version,
+			bdev_partition_name(rdev->bdev));
 		goto abort;
 	}
 
 	if (sb->md_minor >= MAX_MD_DEVS) {
-		printk(BAD_MINOR, bdev_partition_name(rdev->bdev), sb->md_minor);
+		printk(KERN_ERR "md: %s: invalid raid minor (%x)\n",
+			bdev_partition_name(rdev->bdev), sb->md_minor);
 		goto abort;
 	}
 	if (sb->raid_disks <= 0)
 		goto abort;
 
 	if (calc_sb_csum(sb) != sb->sb_csum) {
-		printk(BAD_CSUM, bdev_partition_name(rdev->bdev));
+		printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
+			bdev_partition_name(rdev->bdev));
 		goto abort;
 	}
 
@@ -565,14 +554,15 @@
 		mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
 		if (!uuid_equal(refsb, sb)) {
 			printk(KERN_WARNING "md: %s has different UUID to %s\n",
-			       bdev_partition_name(rdev->bdev),
-			       bdev_partition_name(refdev->bdev));
+				bdev_partition_name(rdev->bdev),
+				bdev_partition_name(refdev->bdev));
 			goto abort;
 		}
 		if (!sb_equal(refsb, sb)) {
-			printk(KERN_WARNING "md: %s has same UUID but different superblock to %s\n",
-			       bdev_partition_name(rdev->bdev),
-			       bdev_partition_name(refdev->bdev));
+			printk(KERN_WARNING "md: %s has same UUID"
+				" but different superblock to %s\n",
+				bdev_partition_name(rdev->bdev),
+				bdev_partition_name(refdev->bdev));
 			goto abort;
 		}
 		ev1 = md_event(sb);
@@ -826,7 +816,8 @@
 		return -EINVAL;
 
 	if (calc_sb_1_csum(sb) != sb->sb_csum) {
-		printk(BAD_CSUM, bdev_partition_name(rdev->bdev));
+		printk("md: invalid superblock checksum on %s\n",
+			bdev_partition_name(rdev->bdev));
 		return -EINVAL;
 	}
 	rdev->preferred_minor = 0xffff;
@@ -843,9 +834,10 @@
 		    sb->level != refsb->level ||
 		    sb->layout != refsb->layout ||
 		    sb->chunksize != refsb->chunksize) {
-			printk(KERN_WARNING "md: %s has strangely different superblock to %s\n",
-			       bdev_partition_name(rdev->bdev),
-			       bdev_partition_name(refdev->bdev));
+			printk(KERN_WARNING "md: %s has strangely different"
+				" superblock to %s\n",
+				bdev_partition_name(rdev->bdev),
+				bdev_partition_name(refdev->bdev));
 			return -EINVAL;
 		}
 		ev1 = le64_to_cpu(sb->events);
@@ -1020,11 +1012,12 @@
 	}
 	same_pdev = match_dev_unit(mddev, rdev);
 	if (same_pdev)
-		printk( KERN_WARNING
-"md%d: WARNING: %s appears to be on the same physical disk as %s. True\n"
-"     protection against single-disk failure might be compromised.\n",
+		printk(KERN_WARNING
+			"md%d: WARNING: %s appears to be on the same physical"
+	 		" disk as %s. True\n     protection against single-disk"
+			" failure might be compromised.\n",
 			mdidx(mddev), bdev_partition_name(rdev->bdev),
-				bdev_partition_name(same_pdev->bdev));
+			bdev_partition_name(same_pdev->bdev));
 
 	/* Verify rdev->desc_nr is unique.
 	 * If it is -1, assign a free number, else
@@ -1099,7 +1092,8 @@
 
 static void export_rdev(mdk_rdev_t * rdev)
 {
-	printk(KERN_INFO "md: export_rdev(%s)\n",bdev_partition_name(rdev->bdev));
+	printk(KERN_INFO "md: export_rdev(%s)\n",
+		bdev_partition_name(rdev->bdev));
 	if (rdev->mddev)
 		MD_BUG();
 	free_disk_sb(rdev);
@@ -1135,11 +1129,6 @@
 	mddev->major_version = 0;
 }
 
-#undef BAD_CSUM
-#undef BAD_MAGIC
-#undef OUT_OF_MEM
-#undef NO_SB
-
 static void print_desc(mdp_disk_t *desc)
 {
 	printk(" DISK<N:%d,%s(%d,%d),R:%d,S:%d>\n", desc->number,
@@ -1151,14 +1140,16 @@
 {
 	int i;
 
-	printk(KERN_INFO "md:  SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
+	printk(KERN_INFO 
+		"md:  SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
 		sb->major_version, sb->minor_version, sb->patch_version,
 		sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
 		sb->ctime);
-	printk(KERN_INFO "md:     L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n", sb->level,
-		sb->size, sb->nr_disks, sb->raid_disks, sb->md_minor,
-		sb->layout, sb->chunk_size);
-	printk(KERN_INFO "md:     UT:%08x ST:%d AD:%d WD:%d FD:%d SD:%d CSUM:%08x E:%08lx\n",
+	printk(KERN_INFO "md:     L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
+		sb->level, sb->size, sb->nr_disks, sb->raid_disks,
+		sb->md_minor, sb->layout, sb->chunk_size);
+	printk(KERN_INFO "md:     UT:%08x ST:%d AD:%d WD:%d"
+		" FD:%d SD:%d CSUM:%08x E:%08lx\n",
 		sb->utime, sb->state, sb->active_disks, sb->working_disks,
 		sb->failed_disks, sb->spare_disks,
 		sb->sb_csum, (unsigned long)sb->events_lo);
@@ -1182,8 +1173,8 @@
 static void print_rdev(mdk_rdev_t *rdev)
 {
 	printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%d ",
-		bdev_partition_name(rdev->bdev),
-		(unsigned long long)rdev->size, rdev->faulty, rdev->in_sync, rdev->desc_nr);
+		bdev_partition_name(rdev->bdev), (unsigned long long)rdev->size,
+	       	rdev->faulty, rdev->in_sync, rdev->desc_nr);
 	if (rdev->sb_loaded) {
 		printk(KERN_INFO "md: rdev superblock:\n");
 		print_sb((mdp_super_t*)page_address(rdev->sb_page));
@@ -1227,13 +1218,15 @@
 		return 1;
 	}
 
-	dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", bdev_partition_name(rdev->bdev),
+	dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
+		bdev_partition_name(rdev->bdev),
 	       (unsigned long long)rdev->sb_offset);
   
 	if (sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, WRITE))
 		return 0;
 
-	printk("md: write_disk_sb failed for device %s\n", bdev_partition_name(rdev->bdev));
+	printk("md: write_disk_sb failed for device %s\n", 
+		bdev_partition_name(rdev->bdev));
 	return 1;
 }
 
@@ -1278,8 +1271,9 @@
 	if (!mddev->persistent)
 		return;
 
-	dprintk(KERN_INFO "md: updating md%d RAID superblock on device (in sync %d)\n",
-					mdidx(mddev),mddev->in_sync);
+	dprintk(KERN_INFO 
+		"md: updating md%d RAID superblock on device (in sync %d)\n",
+		mdidx(mddev),mddev->in_sync);
 
 	err = 0;
 	ITERATE_RDEV(mddev,rdev,tmp) {
@@ -1298,10 +1292,12 @@
 	}
 	if (err) {
 		if (--count) {
-			printk(KERN_ERR "md: errors occurred during superblock update, repeating\n");
+			printk(KERN_ERR "md: errors occurred during superblock"
+				" update, repeating\n");
 			goto repeat;
 		}
-		printk(KERN_ERR "md: excessive errors occurred during superblock update, exiting\n");
+		printk(KERN_ERR \
+			"md: excessive errors occurred during superblock update, exiting\n");
 	}
 }
 
@@ -1323,7 +1319,8 @@
 
 	rdev = (mdk_rdev_t *) kmalloc(sizeof(*rdev), GFP_KERNEL);
 	if (!rdev) {
-		printk(KERN_ERR "md: could not alloc mem for %s!\n", partition_name(newdev));
+		printk(KERN_ERR "md: could not alloc mem for %s!\n", 
+			partition_name(newdev));
 		return ERR_PTR(-ENOMEM);
 	}
 	memset(rdev, 0, sizeof(*rdev));
@@ -1345,9 +1342,9 @@
 
 	size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
 	if (!size) {
-		printk(KERN_WARNING
-		       "md: %s has zero or unknown size, marking faulty!\n",
-		       bdev_partition_name(rdev->bdev));
+		printk(KERN_WARNING 
+			"md: %s has zero or unknown size, marking faulty!\n",
+			bdev_partition_name(rdev->bdev));
 		err = -EINVAL;
 		goto abort_free;
 	}
@@ -1356,13 +1353,15 @@
 		err = super_types[super_format].
 			load_super(rdev, NULL, super_minor);
 		if (err == -EINVAL) {
-			printk(KERN_WARNING "md: %s has invalid sb, not importing!\n",
-			       bdev_partition_name(rdev->bdev));
+			printk(KERN_WARNING 
+				"md: %s has invalid sb, not importing!\n",
+				bdev_partition_name(rdev->bdev));
 			goto abort_free;
 		}
 		if (err < 0) {
-			printk(KERN_WARNING "md: could not read %s's sb, not importing!\n",
-			       bdev_partition_name(rdev->bdev));
+			printk(KERN_WARNING 
+				"md: could not read %s's sb, not importing!\n",
+				bdev_partition_name(rdev->bdev));
 			goto abort_free;
 		}
 	}
@@ -1384,20 +1383,6 @@
  * Check a full RAID array for plausibility
  */
 
-#define INCONSISTENT KERN_ERR \
-"md: fatal superblock inconsistency in %s -- removing from array\n"
-
-#define OUT_OF_DATE KERN_ERR \
-"md: superblock update time inconsistency -- using the most recent one\n"
-
-#define OLD_VERSION KERN_ALERT \
-"md: md%d: unsupported raid array version %d.%d.%d\n"
-
-#define NOT_CLEAN_IGNORE KERN_ERR \
-"md: md%d: raid array is not clean -- starting background reconstruction\n"
-
-#define UNKNOWN_LEVEL KERN_ERR \
-"md: md%d: unsupported raid level %d\n"
 
 static int analyze_sbs(mddev_t * mddev)
 {
@@ -1415,7 +1400,10 @@
 		case 0:
 			break;
 		default:
-			printk(INCONSISTENT, bdev_partition_name(rdev->bdev));
+			printk( KERN_ERR \
+				"md: fatal superblock inconsistency in %s"
+				" -- removing from array\n", 
+				bdev_partition_name(rdev->bdev));
 			kick_rdev_from_array(rdev);
 		}
 
@@ -1428,8 +1416,9 @@
 		if (rdev != freshest)
 			if (super_types[mddev->major_version].
 			    validate_super(mddev, rdev)) {
-				printk(KERN_WARNING "md: kicking non-fresh %s from array!\n",
-				       bdev_partition_name(rdev->bdev));
+				printk(KERN_WARNING "md: kicking non-fresh %s"
+					" from array!\n",
+					bdev_partition_name(rdev->bdev));
 				kick_rdev_from_array(rdev);
 				continue;
 			}
@@ -1446,26 +1435,24 @@
 	 */
 	if (mddev->major_version != MD_MAJOR_VERSION ||
 			mddev->minor_version > MD_MINOR_VERSION) {
-
-		printk(OLD_VERSION, mdidx(mddev), mddev->major_version,
-				mddev->minor_version, mddev->patch_version);
+		printk(KERN_ALERT 
+			"md: md%d: unsupported raid array version %d.%d.%d\n",
+			mdidx(mddev), mddev->major_version,
+			mddev->minor_version, mddev->patch_version);
 		goto abort;
 	}
 
 	if ((mddev->recovery_cp != MaxSector) && ((mddev->level == 1) ||
 			(mddev->level == 4) || (mddev->level == 5)))
-		printk(NOT_CLEAN_IGNORE, mdidx(mddev));
+		printk(KERN_ERR "md: md%d: raid array is not clean"
+			" -- starting background reconstruction\n", 
+			mdidx(mddev));
 
 	return 0;
 abort:
 	return 1;
 }
 
-#undef INCONSISTENT
-#undef OUT_OF_DATE
-#undef OLD_VERSION
-#undef OLD_LEVEL
-
 static int device_size_calculation(mddev_t * mddev)
 {
 	int data_disks = 0;
@@ -1484,9 +1471,11 @@
 			continue;
 		if (rdev->size < mddev->chunk_size / 1024) {
 			printk(KERN_WARNING
-				"md: Dev %s smaller than chunk_size: %lluk < %dk\n",
+				"md: Dev %s smaller than chunk_size:"
+				" %lluk < %dk\n",
 				bdev_partition_name(rdev->bdev),
-				(unsigned long long)rdev->size, mddev->chunk_size / 1024);
+				(unsigned long long)rdev->size,
+				mddev->chunk_size / 1024);
 			return -EINVAL;
 		}
 	}
@@ -1517,7 +1506,8 @@
 			data_disks = mddev->raid_disks-1;
 			break;
 		default:
-			printk(UNKNOWN_LEVEL, mdidx(mddev), mddev->level);
+			printk(KERN_ERR "md: md%d: unsupported raid level %d\n",
+				mdidx(mddev), mddev->level);
 			goto abort;
 	}
 	if (!md_size[mdidx(mddev)])
@@ -1539,7 +1529,7 @@
 
 	printk(KERN_INFO
 		"md%d: %d data-disks, max readahead per data-disk: %ldk\n",
-			mdidx(mddev), data_disks, readahead/data_disks*(PAGE_SIZE/1024));
+		mdidx(mddev), data_disks, readahead/data_disks*(PAGE_SIZE/1024));
 	return 0;
 abort:
 	return 1;
@@ -1589,14 +1579,6 @@
 	md_wakeup_thread(mddev->thread);
 }
 
-#define TOO_BIG_CHUNKSIZE KERN_ERR \
-"too big chunk_size: %d > %d\n"
-
-#define TOO_SMALL_CHUNKSIZE KERN_ERR \
-"too small chunk_size: %d < %ld\n"
-
-#define BAD_CHUNKSIZE KERN_ERR \
-"no chunksize specified, see 'man raidtab'\n"
 
 static int do_md_run(mddev_t * mddev)
 {
@@ -1639,11 +1621,13 @@
 			 * we abort here to be on the safe side. We don't
 			 * want to continue the bad practice.
 			 */
-			printk(BAD_CHUNKSIZE);
+			printk(KERN_ERR 
+				"no chunksize specified, see 'man raidtab'\n");
 			return -EINVAL;
 		}
 		if (chunk_size > MAX_CHUNK_SIZE) {
-			printk(TOO_BIG_CHUNKSIZE, chunk_size, MAX_CHUNK_SIZE);
+			printk(KERN_ERR "too big chunk_size: %d > %d\n",
+				chunk_size, MAX_CHUNK_SIZE);
 			return -EINVAL;
 		}
 		/*
@@ -1654,7 +1638,8 @@
 			return -EINVAL;
 		}
 		if (chunk_size < PAGE_SIZE) {
-			printk(TOO_SMALL_CHUNKSIZE, chunk_size, PAGE_SIZE);
+			printk(KERN_ERR "too small chunk_size: %d < %ld\n",
+				chunk_size, PAGE_SIZE);
 			return -EINVAL;
 		}
 	}
@@ -1664,20 +1649,14 @@
 		return -EINVAL;
 	}
 
+#ifdef CONFIG_KMOD
 	if (!pers[pnum])
 	{
-#ifdef CONFIG_KMOD
 		char module_name[80];
 		sprintf (module_name, "md-personality-%d", pnum);
 		request_module (module_name);
-		if (!pers[pnum])
-#endif
-		{
-			printk(KERN_ERR "md: personality %d is not loaded!\n",
-				pnum);
-			return -EINVAL;
-		}
 	}
+#endif
 
 	if (device_size_calculation(mddev))
 		return -EINVAL;
@@ -1711,13 +1690,23 @@
 	disk = disks[mdidx(mddev)];
 	if (!disk)
 		return -ENOMEM;
+
+	spin_lock(&pers_lock);
+	if (!pers[pnum] || !try_module_get(pers[pnum]->owner)) {
+		spin_unlock(&pers_lock);
+		printk(KERN_ERR "md: personality %d is not loaded!\n",
+		       pnum);
+		return -EINVAL;
+	}
+
 	mddev->pers = pers[pnum];
+	spin_unlock(&pers_lock);
 
 	blk_queue_make_request(&mddev->queue, mddev->pers->make_request);
 	printk("%s: setting max_sectors to %d, segment boundary to %d\n",
-	       disk->disk_name,
-	       chunk_size >> 9,
-	       (chunk_size>>1)-1);
+		disk->disk_name,
+		chunk_size >> 9,
+		(chunk_size>>1)-1);
 	blk_queue_max_sectors(&mddev->queue, chunk_size >> 9);
 	blk_queue_segment_boundary(&mddev->queue, (chunk_size>>1) - 1);
 	mddev->queue.queuedata = mddev;
@@ -1726,6 +1715,7 @@
 	if (err) {
 		printk(KERN_ERR "md: pers->run() failed ...\n");
 		mddev->pers = NULL;
+		module_put(mddev->pers->owner);
 		return -EINVAL;
 	}
  	atomic_set(&mddev->writes_pending,0);
@@ -1741,9 +1731,6 @@
 	return (0);
 }
 
-#undef TOO_BIG_CHUNKSIZE
-#undef BAD_CHUNKSIZE
-
 static int restart_array(mddev_t *mddev)
 {
 	struct gendisk *disk = disks[mdidx(mddev)];
@@ -1765,8 +1752,8 @@
 		mddev->ro = 0;
 		set_disk_ro(disk, 0);
 
-		printk(KERN_INFO
-			"md: md%d switched to read-write mode.\n", mdidx(mddev));
+		printk(KERN_INFO "md: md%d switched to read-write mode.\n",
+			mdidx(mddev));
 		/*
 		 * Kick recovery or resync if necessary
 		 */
@@ -1783,18 +1770,13 @@
 	return err;
 }
 
-#define STILL_MOUNTED KERN_WARNING \
-"md: md%d still mounted.\n"
-#define	STILL_IN_USE \
-"md: md%d still in use.\n"
-
 static int do_md_stop(mddev_t * mddev, int ro)
 {
 	int err = 0;
 	struct gendisk *disk = disks[mdidx(mddev)];
 
 	if (atomic_read(&mddev->active)>2) {
-		printk(STILL_IN_USE, mdidx(mddev));
+		printk("md: md%d still in use.\n",mdidx(mddev));
 		err = -EBUSY;
 		goto out;
 	}
@@ -1824,6 +1806,7 @@
 					set_disk_ro(disk, 1);
 				goto out;
 			}
+			module_put(mddev->pers->owner);
 			mddev->pers = NULL;
 			if (mddev->ro)
 				mddev->ro = 0;
@@ -1850,7 +1833,8 @@
 		if (disk)
 			set_capacity(disk, 0);
 	} else
-		printk(KERN_INFO "md: md%d switched to read-only mode.\n", mdidx(mddev));
+		printk(KERN_INFO "md: md%d switched to read-only mode.\n",
+			mdidx(mddev));
 	err = 0;
 out:
 	return err;
@@ -1905,11 +1889,13 @@
 		rdev0 = list_entry(pending_raid_disks.next,
 					 mdk_rdev_t, same_set);
 
-		printk(KERN_INFO "md: considering %s ...\n", bdev_partition_name(rdev0->bdev));
+		printk(KERN_INFO "md: considering %s ...\n",
+			bdev_partition_name(rdev0->bdev));
 		INIT_LIST_HEAD(&candidates);
 		ITERATE_RDEV_PENDING(rdev,tmp)
 			if (super_90_load(rdev, rdev0, 0) >= 0) {
-				printk(KERN_INFO "md:  adding %s ...\n", bdev_partition_name(rdev->bdev));
+				printk(KERN_INFO "md:  adding %s ...\n",
+					bdev_partition_name(rdev->bdev));
 				list_move(&rdev->same_set, &candidates);
 			}
 		/*
@@ -1920,7 +1906,8 @@
 
 		mddev = mddev_find(rdev0->preferred_minor);
 		if (!mddev) {
-			printk(KERN_ERR "md: cannot allocate memory for md drive.\n");
+			printk(KERN_ERR 
+				"md: cannot allocate memory for md drive.\n");
 			break;
 		}
 		if (mddev_lock(mddev)) 
@@ -1928,8 +1915,9 @@
 			       mdidx(mddev));
 		else if (mddev->raid_disks || mddev->major_version
 			 || !list_empty(&mddev->disks)) {
-			printk(KERN_WARNING "md: md%d already running, cannot run %s\n",
-			       mdidx(mddev), bdev_partition_name(rdev0->bdev));
+			printk(KERN_WARNING 
+				"md: md%d already running, cannot run %s\n",
+				mdidx(mddev), bdev_partition_name(rdev0->bdev));
 			mddev_unlock(mddev);
 		} else {
 			printk(KERN_INFO "md: created md%d\n", mdidx(mddev));
@@ -1956,33 +1944,6 @@
  * if possible, the array gets run as well.
  */
 
-#define BAD_VERSION KERN_ERR \
-"md: %s has RAID superblock version 0.%d, autodetect needs v0.90 or higher\n"
-
-#define OUT_OF_MEM KERN_ALERT \
-"md: out of memory.\n"
-
-#define NO_DEVICE KERN_ERR \
-"md: disabled device %s\n"
-
-#define AUTOADD_FAILED KERN_ERR \
-"md: auto-adding devices to md%d FAILED (error %d).\n"
-
-#define AUTOADD_FAILED_USED KERN_ERR \
-"md: cannot auto-add device %s to md%d, already used.\n"
-
-#define AUTORUN_FAILED KERN_ERR \
-"md: auto-running md%d FAILED (error %d).\n"
-
-#define MDDEV_BUSY KERN_ERR \
-"md: cannot auto-add to md%d, already running.\n"
-
-#define AUTOADDING KERN_INFO \
-"md: auto-adding devices to md%d, based on %s's superblock.\n"
-
-#define AUTORUNNING KERN_INFO \
-"md: auto-running md%d.\n"
-
 static int autostart_array(dev_t startdev)
 {
 	int err = -EINVAL, i;
@@ -1991,7 +1952,8 @@
 
 	start_rdev = md_import_device(startdev, 0, 0);
 	if (IS_ERR(start_rdev)) {
-		printk(KERN_WARNING "md: could not import %s!\n", partition_name(startdev));
+		printk(KERN_WARNING "md: could not import %s!\n",
+			partition_name(startdev));
 		return err;
 	}
 
@@ -2005,8 +1967,9 @@
 	}
 
 	if (start_rdev->faulty) {
-		printk(KERN_WARNING "md: can not autostart based on faulty %s!\n",
-						bdev_partition_name(start_rdev->bdev));
+		printk(KERN_WARNING 
+			"md: can not autostart based on faulty %s!\n",
+			bdev_partition_name(start_rdev->bdev));
 		export_rdev(start_rdev);
 		return err;
 	}
@@ -2025,8 +1988,9 @@
 			continue;
 		rdev = md_import_device(dev, 0, 0);
 		if (IS_ERR(rdev)) {
-			printk(KERN_WARNING "md: could not import %s, trying to run array nevertheless.\n",
-			       partition_name(dev));
+			printk(KERN_WARNING "md: could not import %s,"
+				" trying to run array nevertheless.\n",
+				partition_name(dev));
 			continue;
 		}
 		list_add(&rdev->same_set, &pending_raid_disks);
@@ -2040,15 +2004,6 @@
 
 }
 
-#undef BAD_VERSION
-#undef OUT_OF_MEM
-#undef NO_DEVICE
-#undef AUTOADD_FAILED_USED
-#undef AUTOADD_FAILED
-#undef AUTORUN_FAILED
-#undef AUTOADDING
-#undef AUTORUNNING
-
 
 static int get_version(void * arg)
 {
@@ -2113,8 +2068,6 @@
 
 	return 0;
 }
-#undef SET_FROM_SB
-
 
 static int get_disk_info(mddev_t * mddev, void * arg)
 {
@@ -2161,7 +2114,9 @@
 		/* expecting a device which has a superblock */
 		rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
 		if (IS_ERR(rdev)) {
-			printk(KERN_WARNING "md: md_import_device returned %ld\n", PTR_ERR(rdev));
+			printk(KERN_WARNING 
+				"md: md_import_device returned %ld\n",
+				PTR_ERR(rdev));
 			return PTR_ERR(rdev);
 		}
 		if (!list_empty(&mddev->disks)) {
@@ -2170,8 +2125,10 @@
 			int err = super_types[mddev->major_version]
 				.load_super(rdev, rdev0, mddev->minor_version);
 			if (err < 0) {
-				printk(KERN_WARNING "md: %s has different UUID to %s\n",
-				       bdev_partition_name(rdev->bdev), bdev_partition_name(rdev0->bdev));
+				printk(KERN_WARNING 
+					"md: %s has different UUID to %s\n",
+					bdev_partition_name(rdev->bdev), 
+					bdev_partition_name(rdev0->bdev));
 				export_rdev(rdev);
 				return -EINVAL;
 			}
@@ -2190,14 +2147,17 @@
 	if (mddev->pers) {
 		int err;
 		if (!mddev->pers->hot_add_disk) {
-			printk(KERN_WARNING "md%d: personality does not support diskops!\n",
+			printk(KERN_WARNING 
+				"md%d: personality does not support diskops!\n",
 			       mdidx(mddev));
 			return -EINVAL;
 		}
 		rdev = md_import_device(dev, mddev->major_version,
 					mddev->minor_version);
 		if (IS_ERR(rdev)) {
-			printk(KERN_WARNING "md: md_import_device returned %ld\n", PTR_ERR(rdev));
+			printk(KERN_WARNING 
+				"md: md_import_device returned %ld\n",
+				PTR_ERR(rdev));
 			return PTR_ERR(rdev);
 		}
 		rdev->in_sync = 0; /* just to be sure */
@@ -2223,7 +2183,9 @@
 		int err;
 		rdev = md_import_device (dev, -1, 0);
 		if (IS_ERR(rdev)) {
-			printk(KERN_WARNING "md: error, md_import_device() returned %ld\n", PTR_ERR(rdev));
+			printk(KERN_WARNING 
+				"md: error, md_import_device() returned %ld\n",
+				PTR_ERR(rdev));
 			return PTR_ERR(rdev);
 		}
 		rdev->desc_nr = info->number;
@@ -2333,19 +2295,23 @@
 		partition_name(dev), mdidx(mddev));
 
 	if (mddev->major_version != 0) {
-		printk(KERN_WARNING "md%d: HOT_ADD may only be used with version-0 superblocks.\n",
-		       mdidx(mddev));
+		printk(KERN_WARNING "md%d: HOT_ADD may only be used with"
+			" version-0 superblocks.\n",
+			mdidx(mddev));
 		return -EINVAL;
 	}
 	if (!mddev->pers->hot_add_disk) {
-		printk(KERN_WARNING "md%d: personality does not support diskops!\n",
-		       mdidx(mddev));
+		printk(KERN_WARNING 
+			"md%d: personality does not support diskops!\n",
+			mdidx(mddev));
 		return -EINVAL;
 	}
 
 	rdev = md_import_device (dev, -1, 0);
 	if (IS_ERR(rdev)) {
-		printk(KERN_WARNING "md: error, md_import_device() returned %ld\n", PTR_ERR(rdev));
+		printk(KERN_WARNING 
+			"md: error, md_import_device() returned %ld\n",
+			PTR_ERR(rdev));
 		return -EINVAL;
 	}
 
@@ -2354,16 +2320,18 @@
 	rdev->size = size;
 
 	if (size < mddev->size) {
-		printk(KERN_WARNING "md%d: disk size %llu blocks < array size %llu\n",
-				mdidx(mddev), (unsigned long long)size, 
-				(unsigned long long)mddev->size);
+		printk(KERN_WARNING 
+			"md%d: disk size %llu blocks < array size %llu\n",
+			mdidx(mddev), (unsigned long long)size,
+			(unsigned long long)mddev->size);
 		err = -ENOSPC;
 		goto abort_export;
 	}
 
 	if (rdev->faulty) {
-		printk(KERN_WARNING "md: can not hot-add faulty %s disk to md%d!\n",
-				bdev_partition_name(rdev->bdev), mdidx(mddev));
+		printk(KERN_WARNING 
+			"md: can not hot-add faulty %s disk to md%d!\n",
+			bdev_partition_name(rdev->bdev), mdidx(mddev));
 		err = -EINVAL;
 		goto abort_export;
 	}
@@ -2378,7 +2346,7 @@
 
 	if (rdev->desc_nr == mddev->max_disks) {
 		printk(KERN_WARNING "md%d: can not hot-add to full array!\n",
-		       mdidx(mddev));
+			mdidx(mddev));
 		err = -EBUSY;
 		goto abort_unbind_export;
 	}
@@ -2426,8 +2394,9 @@
 		    info->major_version >= sizeof(super_types)/sizeof(super_types[0]) ||
 		    super_types[info->major_version].name == NULL) {
 			/* maybe try to auto-load a module? */
-			printk(KERN_INFO "md: superblock version %d not known\n",
-			       info->major_version);
+			printk(KERN_INFO 
+				"md: superblock version %d not known\n",
+				info->major_version);
 			return -EINVAL;
 		}
 		mddev->major_version = info->major_version;
@@ -2540,7 +2509,7 @@
 		err = autostart_array(arg);
 		if (err) {
 			printk(KERN_WARNING "md: autostart %s failed!\n",
-			       partition_name(arg));
+				partition_name(arg));
 			goto abort;
 		}
 		goto done;
@@ -2548,8 +2517,9 @@
 
 	err = mddev_lock(mddev);
 	if (err) {
-		printk(KERN_INFO "md: ioctl lock interrupted, reason %d, cmd %d\n",
-		       err, cmd);
+		printk(KERN_INFO 
+			"md: ioctl lock interrupted, reason %d, cmd %d\n",
+			err, cmd);
 		goto abort;
 	}
 
@@ -2558,13 +2528,15 @@
 		case SET_ARRAY_INFO:
 
 			if (!list_empty(&mddev->disks)) {
-				printk(KERN_WARNING "md: array md%d already has disks!\n",
+				printk(KERN_WARNING 
+					"md: array md%d already has disks!\n",
 					mdidx(mddev));
 				err = -EBUSY;
 				goto abort_unlock;
 			}
 			if (mddev->raid_disks) {
-				printk(KERN_WARNING "md: array md%d already initialised!\n",
+				printk(KERN_WARNING 
+					"md: array md%d already initialised!\n",
 					mdidx(mddev));
 				err = -EBUSY;
 				goto abort_unlock;
@@ -2579,7 +2551,8 @@
 				}
 				err = set_array_info(mddev, &info);
 				if (err) {
-					printk(KERN_WARNING "md: couldn't set array info. %d\n", err);
+					printk(KERN_WARNING "md: couldn't set"
+						" array info. %d\n", err);
 					goto abort_unlock;
 				}
 			}
@@ -2701,9 +2674,10 @@
 
 		default:
 			if (_IOC_TYPE(cmd) == MD_MAJOR)
-				printk(KERN_WARNING "md: %s(pid %d) used obsolete MD ioctl, "
-				       "upgrade your software to use new ictls.\n",
-				       current->comm, current->pid);
+				printk(KERN_WARNING "md: %s(pid %d) used"
+					" obsolete MD ioctl, upgrade your"
+					" software to use new ictls.\n",
+					current->comm, current->pid);
 			err = -EINVAL;
 			goto abort_unlock;
 	}
@@ -2879,7 +2853,8 @@
 void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
 {
 	dprintk("md_error dev:(%d:%d), rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
-		MD_MAJOR,mdidx(mddev),MAJOR(rdev->bdev->bd_dev),MINOR(rdev->bdev->bd_dev),
+		MD_MAJOR,mdidx(mddev),
+		MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev),
 		__builtin_return_address(0),__builtin_return_address(1),
 		__builtin_return_address(2),__builtin_return_address(3));
 
@@ -3038,10 +3013,12 @@
 
 	if (v == (void*)1) {
 		seq_printf(seq, "Personalities : ");
+		spin_lock(&pers_lock);
 		for (i = 0; i < MAX_PERSONALITY; i++)
 			if (pers[i])
 				seq_printf(seq, "[%s] ", pers[i]->name);
 
+		spin_unlock(&pers_lock);
 		seq_printf(seq, "\n");
 		return 0;
 	}
@@ -3125,13 +3102,16 @@
 		return -EINVAL;
 	}
 
+	spin_lock(&pers_lock);
 	if (pers[pnum]) {
+		spin_unlock(&pers_lock);
 		MD_BUG();
 		return -EBUSY;
 	}
 
 	pers[pnum] = p;
 	printk(KERN_INFO "md: %s personality registered as nr %d\n", p->name, pnum);
+	spin_unlock(&pers_lock);
 	return 0;
 }
 
@@ -3143,7 +3123,9 @@
 	}
 
 	printk(KERN_INFO "md: %s personality unregistered\n", pers[pnum]->name);
+	spin_lock(&pers_lock);
 	pers[pnum] = NULL;
+	spin_unlock(&pers_lock);
 	return 0;
 }
 
@@ -3228,7 +3210,8 @@
 void md_handle_safemode(mddev_t *mddev)
 {
 	if (signal_pending(current)) {
-		printk(KERN_INFO "md: md%d in immediate safe mode\n",mdidx(mddev));
+		printk(KERN_INFO "md: md%d in immediate safe mode\n",
+			mdidx(mddev));
 		mddev->safemode = 2;
 		flush_signals(current);
 	}
@@ -3271,8 +3254,9 @@
 				continue;
 			if (mddev2->curr_resync && 
 			    match_mddev_units(mddev,mddev2)) {
-				printk(KERN_INFO "md: delaying resync of md%d until md%d "
-				       "has finished resync (they share one or more physical units)\n",
+				printk(KERN_INFO "md: delaying resync of md%d"
+					" until md%d has finished resync (they"
+				       	" share one or more physical units)\n",
 				       mdidx(mddev), mdidx(mddev2));
 				if (mddev < mddev2) {/* arbitrarily yield */
 					mddev->curr_resync = 1;
@@ -3295,7 +3279,8 @@
 	max_sectors = mddev->size << 1;
 
 	printk(KERN_INFO "md: syncing RAID array md%d\n", mdidx(mddev));
-	printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed: %d KB/sec/disc.\n", sysctl_speed_limit_min);
+	printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:"
+		" %d KB/sec/disc.\n", sysctl_speed_limit_min);
 	printk(KERN_INFO "md: using maximum available idle IO bandwith "
 	       "(but not more than %d KB/sec) for reconstruction.\n",
 	       sysctl_speed_limit_max);
@@ -3318,14 +3303,16 @@
 	 */
 	window = 32*(PAGE_SIZE/512);
 	printk(KERN_INFO "md: using %dk window, over a total of %d blocks.\n",
-	       window/2,max_sectors/2);
+		window/2,max_sectors/2);
 
 	atomic_set(&mddev->recovery_active, 0);
 	init_waitqueue_head(&mddev->recovery_wait);
 	last_check = 0;
 
 	if (j)
-		printk(KERN_INFO "md: resuming recovery of md%d from checkpoint.\n", mdidx(mddev));
+		printk(KERN_INFO 
+			"md: resuming recovery of md%d from checkpoint.\n",
+			mdidx(mddev));
 
 	while (j < max_sectors) {
 		int sectors;
@@ -3367,7 +3354,8 @@
 			/*
 			 * got a signal, exit.
 			 */
-			printk(KERN_INFO "md: md_do_sync() got signal ... exiting\n");
+			printk(KERN_INFO 
+				"md: md_do_sync() got signal ... exiting\n");
 			flush_signals(current);
 			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 			goto out;
@@ -3408,7 +3396,9 @@
 	    mddev->curr_resync > 2 &&
 	    mddev->curr_resync > mddev->recovery_cp) {
 		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
-			printk(KERN_INFO "md: checkpointing recovery of md%d.\n", mdidx(mddev));
+			printk(KERN_INFO 
+				"md: checkpointing recovery of md%d.\n",
+				mdidx(mddev));
 			mddev->recovery_cp = mddev->curr_resync;
 		} else
 			mddev->recovery_cp = MaxSector;
@@ -3526,7 +3516,9 @@
 								mddev,
 								"md%d_resync");
 			if (!mddev->sync_thread) {
-				printk(KERN_ERR "md%d: could not start resync thread...\n", mdidx(mddev));
+				printk(KERN_ERR "md%d: could not start resync"
+					" thread...\n", 
+					mdidx(mddev));
 				/* leave the spares where they are, it shouldn't hurt */
 				mddev->recovery = 0;
 			} else {
@@ -3590,7 +3582,8 @@
 {
 	int minor;
 
-	printk(KERN_INFO "md: md driver %d.%d.%d MAX_MD_DEVS=%d, MD_SB_DISKS=%d\n",
+	printk(KERN_INFO "md: md driver %d.%d.%d MAX_MD_DEVS=%d,"
+			" MD_SB_DISKS=%d\n",
 			MD_MAJOR_VERSION, MD_MINOR_VERSION,
 			MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS);
 
diff -Nru a/drivers/md/multipath.c b/drivers/md/multipath.c
--- a/drivers/md/multipath.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/md/multipath.c	Mon Mar 31 13:41:07 2003
@@ -78,7 +78,7 @@
 	}
 	spin_unlock_irq(&conf->device_lock);
 
-	printk (KERN_ERR "multipath_map(): no more operational IO paths?\n");
+	printk(KERN_ERR "multipath_map(): no more operational IO paths?\n");
 	return (-1);
 }
 
@@ -130,7 +130,8 @@
 		 */
 		md_error (mp_bh->mddev, rdev);
 		printk(KERN_ERR "multipath: %s: rescheduling sector %llu\n", 
-		       bdev_partition_name(rdev->bdev), (unsigned long long)bio->bi_sector);
+		       bdev_partition_name(rdev->bdev), 
+		       (unsigned long long)bio->bi_sector);
 		multipath_reschedule_retry(mp_bh);
 	}
 	atomic_dec(&rdev->nr_pending);
@@ -198,16 +199,6 @@
 	seq_printf (seq, "]");
 }
 
-#define LAST_DISK KERN_ALERT \
-"multipath: only one IO path left and IO error.\n"
-
-#define NO_SPARE_DISK KERN_ALERT \
-"multipath: no spare IO path left!\n"
-
-#define DISK_FAILED KERN_ALERT \
-"multipath: IO failure on %s, disabling IO path. \n" \
-"	Operation continuing on %d IO paths.\n"
-
 
 /*
  * Careful, this can execute in IRQ contexts as well!
@@ -222,7 +213,8 @@
 		 * first check if this is a queued request for a device
 		 * which has just failed.
 		 */
-		printk (LAST_DISK);
+		printk(KERN_ALERT 
+			"multipath: only one IO path left and IO error.\n");
 		/* leave it active... it's all we have */
 	} else {
 		/*
@@ -233,17 +225,15 @@
 			rdev->faulty = 1;
 			mddev->sb_dirty = 1;
 			conf->working_disks--;
-			printk (DISK_FAILED, bdev_partition_name (rdev->bdev),
+			printk(KERN_ALERT "multipath: IO failure on %s,"
+				" disabling IO path. \n	Operation continuing"
+				" on %d IO paths.\n",
+				bdev_partition_name (rdev->bdev),
 				conf->working_disks);
 		}
 	}
 }
 
-#undef LAST_DISK
-#undef NO_SPARE_DISK
-#undef DISK_FAILED
-
-
 static void print_multipath_conf (multipath_conf_t *conf)
 {
 	int i;
@@ -302,7 +292,7 @@
 	if (p->rdev) {
 		if (p->rdev->in_sync ||
 		    atomic_read(&p->rdev->nr_pending)) {
-			printk(KERN_ERR "hot-remove-disk, slot %d is identified but is still operational!\n", number);
+			printk(KERN_ERR "hot-remove-disk, slot %d is identified"				" but is still operational!\n", number);
 			err = -EBUSY;
 			goto abort;
 		}
@@ -318,11 +308,7 @@
 	return err;
 }
 
-#define IO_ERROR KERN_ALERT \
-"multipath: %s: unrecoverable IO read error for block %llu\n"
 
-#define REDIRECT_SECTOR KERN_ERR \
-"multipath: %s: redirecting sector %llu to another IO path\n"
 
 /*
  * This is a kernel thread which:
@@ -354,59 +340,22 @@
 		
 		rdev = NULL;
 		if (multipath_map (mddev, &rdev)<0) {
-			printk(IO_ERROR,
-				bdev_partition_name(bio->bi_bdev), (unsigned long long)bio->bi_sector);
+			printk(KERN_ALERT "multipath: %s: unrecoverable IO read"
+				" error for block %llu\n",
+				bdev_partition_name(bio->bi_bdev), 
+				(unsigned long long)bio->bi_sector);
 			multipath_end_bh_io(mp_bh, 0);
 		} else {
-			printk(REDIRECT_SECTOR,
-				bdev_partition_name(bio->bi_bdev), (unsigned long long)bio->bi_sector);
+			printk(KERN_ERR "multipath: %s: redirecting sector %llu"
+				" to another IO path\n",
+				bdev_partition_name(bio->bi_bdev), 
+				(unsigned long long)bio->bi_sector);
 			bio->bi_bdev = rdev->bdev;
 			generic_make_request(bio);
 		}
 	}
 	spin_unlock_irqrestore(&retry_list_lock, flags);
 }
-#undef IO_ERROR
-#undef REDIRECT_SECTOR
-
-#define INVALID_LEVEL KERN_WARNING \
-"multipath: md%d: raid level not set to multipath IO (%d)\n"
-
-#define NO_SB KERN_ERR \
-"multipath: disabled IO path %s (couldn't access raid superblock)\n"
-
-#define ERRORS KERN_ERR \
-"multipath: disabled IO path %s (errors detected)\n"
-
-#define NOT_IN_SYNC KERN_ERR \
-"multipath: making IO path %s a spare path (not in sync)\n"
-
-#define INCONSISTENT KERN_ERR \
-"multipath: disabled IO path %s (inconsistent descriptor)\n"
-
-#define ALREADY_RUNNING KERN_ERR \
-"multipath: disabled IO path %s (multipath %d already operational)\n"
-
-#define OPERATIONAL KERN_INFO \
-"multipath: device %s operational as IO path %d\n"
-
-#define MEM_ERROR KERN_ERR \
-"multipath: couldn't allocate memory for md%d\n"
-
-#define SPARE KERN_INFO \
-"multipath: spare IO path %s\n"
-
-#define NONE_OPERATIONAL KERN_ERR \
-"multipath: no operational IO paths for md%d\n"
-
-#define SB_DIFFERENCES KERN_ERR \
-"multipath: detected IO path differences!\n"
-
-#define ARRAY_IS_ACTIVE KERN_INFO \
-"multipath: array md%d active with %d out of %d IO paths\n"
-
-#define THREAD_ERROR KERN_ERR \
-"multipath: couldn't allocate thread for md%d\n"
 
 static int multipath_run (mddev_t *mddev)
 {
@@ -416,10 +365,9 @@
 	mdk_rdev_t *rdev;
 	struct list_head *tmp;
 
-	MOD_INC_USE_COUNT;
-
 	if (mddev->level != LEVEL_MULTIPATH) {
-		printk(INVALID_LEVEL, mdidx(mddev), mddev->level);
+		printk("multipath: md%d: raid level not set to multipath IO (%d)\n",
+		       mdidx(mddev), mddev->level);
 		goto out;
 	}
 	/*
@@ -431,7 +379,9 @@
 	conf = kmalloc(sizeof(multipath_conf_t), GFP_KERNEL);
 	mddev->private = conf;
 	if (!conf) {
-		printk(MEM_ERROR, mdidx(mddev));
+		printk(KERN_ERR 
+			"multipath: couldn't allocate memory for md%d\n",
+			mdidx(mddev));
 		goto out;
 	}
 	memset(conf, 0, sizeof(*conf));
@@ -455,7 +405,8 @@
 	conf->device_lock = SPIN_LOCK_UNLOCKED;
 
 	if (!conf->working_disks) {
-		printk(NONE_OPERATIONAL, mdidx(mddev));
+		printk(KERN_ERR "multipath: no operational IO paths for md%d\n",
+			mdidx(mddev));
 		goto out_free_conf;
 	}
 	mddev->degraded = conf->raid_disks = conf->working_disks;
@@ -464,7 +415,9 @@
 				    mp_pool_alloc, mp_pool_free,
 				    NULL);
 	if (conf->pool == NULL) {
-		printk(MEM_ERROR, mdidx(mddev));
+		printk(KERN_ERR 
+			"multipath: couldn't allocate memory for md%d\n",
+			mdidx(mddev));
 		goto out_free_conf;
 	}
 
@@ -473,13 +426,15 @@
 
 		mddev->thread = md_register_thread(multipathd, mddev, name);
 		if (!mddev->thread) {
-			printk(THREAD_ERROR, mdidx(mddev));
+			printk(KERN_ERR "multipath: couldn't allocate thread"
+				" for md%d\n", mdidx(mddev));
 			goto out_free_conf;
 		}
 	}
 
-	printk(ARRAY_IS_ACTIVE, mdidx(mddev), conf->working_disks,
-			mddev->raid_disks);
+	printk(KERN_INFO 
+		"multipath: array md%d active with %d out of %d IO paths\n",
+		mdidx(mddev), conf->working_disks, mddev->raid_disks);
 	/*
 	 * Ok, everything is just fine now
 	 */
@@ -491,21 +446,9 @@
 	kfree(conf);
 	mddev->private = NULL;
 out:
-	MOD_DEC_USE_COUNT;
 	return -EIO;
 }
 
-#undef INVALID_LEVEL
-#undef NO_SB
-#undef ERRORS
-#undef NOT_IN_SYNC
-#undef INCONSISTENT
-#undef ALREADY_RUNNING
-#undef OPERATIONAL
-#undef SPARE
-#undef NONE_OPERATIONAL
-#undef SB_DIFFERENCES
-#undef ARRAY_IS_ACTIVE
 
 static int multipath_stop (mddev_t *mddev)
 {
@@ -515,13 +458,13 @@
 	mempool_destroy(conf->pool);
 	kfree(conf);
 	mddev->private = NULL;
-	MOD_DEC_USE_COUNT;
 	return 0;
 }
 
 static mdk_personality_t multipath_personality=
 {
 	.name		= "multipath",
+	.owner		= THIS_MODULE,
 	.make_request	= multipath_make_request,
 	.run		= multipath_run,
 	.stop		= multipath_stop,
diff -Nru a/drivers/md/raid0.c b/drivers/md/raid0.c
--- a/drivers/md/raid0.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/md/raid0.c	Mon Mar 31 13:41:07 2003
@@ -43,12 +43,15 @@
 	conf->nr_strip_zones = 0;
  
 	ITERATE_RDEV(mddev,rdev1,tmp1) {
-		printk("raid0: looking at %s\n", bdev_partition_name(rdev1->bdev));
+		printk("raid0: looking at %s\n",
+			bdev_partition_name(rdev1->bdev));
 		c = 0;
 		ITERATE_RDEV(mddev,rdev2,tmp2) {
 			printk("raid0:   comparing %s(%llu) with %s(%llu)\n",
-			       bdev_partition_name(rdev1->bdev), (unsigned long long)rdev1->size,
-			       bdev_partition_name(rdev2->bdev), (unsigned long long)rdev2->size);
+				bdev_partition_name(rdev1->bdev),
+				(unsigned long long)rdev1->size,
+				bdev_partition_name(rdev2->bdev),
+				(unsigned long long)rdev2->size);
 			if (rdev2 == rdev1) {
 				printk("raid0:   END\n");
 				break;
@@ -94,7 +97,8 @@
 			goto abort;
 		}
 		if (zone->dev[j]) {
-			printk("raid0: multiple devices for %d - aborting!\n", j);
+			printk("raid0: multiple devices for %d - aborting!\n",
+				j);
 			goto abort;
 		}
 		zone->dev[j] = rdev1;
@@ -103,8 +107,8 @@
 		cnt++;
 	}
 	if (cnt != mddev->raid_disks) {
-		printk("raid0: too few disks (%d of %d) - aborting!\n", cnt, 
-		       mddev->raid_disks);
+		printk("raid0: too few disks (%d of %d) - aborting!\n",
+			cnt, mddev->raid_disks);
 		goto abort;
 	}
 	zone->nb_dev = cnt;
@@ -136,7 +140,7 @@
 				if (!smallest || (rdev->size <smallest->size)) {
 					smallest = rdev;
 					printk("  (%llu) is smallest!.\n", 
-					       (unsigned long long)rdev->size);
+						(unsigned long long)rdev->size);
 				}
 			} else
 				printk(" nope.\n");
@@ -144,7 +148,8 @@
 
 		zone->nb_dev = c;
 		zone->size = (smallest->size - current_offset) * c;
-		printk("raid0: zone->nb_dev: %d, size: %llu\n",zone->nb_dev, (unsigned long long)zone->size);
+		printk("raid0: zone->nb_dev: %d, size: %llu\n",
+			zone->nb_dev, (unsigned long long)zone->size);
 
 		if (!conf->smallest || (zone->size < conf->smallest->size))
 			conf->smallest = zone;
@@ -153,7 +158,8 @@
 		curr_zone_offset += zone->size;
 
 		current_offset = smallest->size;
-		printk("raid0: current zone offset: %llu\n", (unsigned long long)current_offset);
+		printk("raid0: current zone offset: %llu\n",
+			(unsigned long long)current_offset);
 	}
 	printk("raid0: done.\n");
 	return 0;
@@ -191,8 +197,6 @@
 	s64 size;
 	raid0_conf_t *conf;
 
-	MOD_INC_USE_COUNT;
-
 	conf = vmalloc(sizeof (raid0_conf_t));
 	if (!conf)
 		goto out;
@@ -201,8 +205,10 @@
 	if (create_strip_zones (mddev)) 
 		goto out_free_conf;
 
-	printk("raid0 : md_size is %llu blocks.\n", (unsigned long long)md_size[mdidx(mddev)]);
-	printk("raid0 : conf->smallest->size is %llu blocks.\n", (unsigned long long)conf->smallest->size);
+	printk("raid0 : md_size is %llu blocks.\n", 
+		(unsigned long long)md_size[mdidx(mddev)]);
+	printk("raid0 : conf->smallest->size is %llu blocks.\n",
+		(unsigned long long)conf->smallest->size);
 	{
 #if __GNUC__ < 3
 		volatile
@@ -267,7 +273,6 @@
 	vfree(conf);
 	mddev->private = NULL;
 out:
-	MOD_DEC_USE_COUNT;
 	return 1;
 }
 
@@ -282,7 +287,6 @@
 	vfree (conf);
 	mddev->private = NULL;
 
-	MOD_DEC_USE_COUNT;
 	return 0;
 }
 
@@ -357,16 +361,21 @@
 	return 1;
 
 bad_map:
-	printk ("raid0_make_request bug: can't convert block across chunks or bigger than %dk %llu %d\n", chunk_size, (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
+	printk("raid0_make_request bug: can't convert block across chunks"
+		" or bigger than %dk %llu %d\n", chunk_size, 
+		(unsigned long long)bio->bi_sector, bio->bi_size >> 10);
 	goto outerr;
 bad_hash:
-	printk("raid0_make_request bug: hash==NULL for block %llu\n", (unsigned long long)block);
+	printk("raid0_make_request bug: hash==NULL for block %llu\n",
+		(unsigned long long)block);
 	goto outerr;
 bad_zone0:
-	printk ("raid0_make_request bug: hash->zone0==NULL for block %llu\n", (unsigned long long)block);
+	printk("raid0_make_request bug: hash->zone0==NULL for block %llu\n",
+		(unsigned long long)block);
 	goto outerr;
 bad_zone1:
-	printk ("raid0_make_request bug: hash->zone1==NULL for block %llu\n", (unsigned long long)block);
+	printk("raid0_make_request bug: hash->zone1==NULL for block %llu\n",
+			(unsigned long long)block);
  outerr:
 	bio_io_error(bio, bio->bi_size);
 	return 0;
@@ -411,6 +420,7 @@
 static mdk_personality_t raid0_personality=
 {
 	.name		= "raid0",
+	.owner		= THIS_MODULE,
 	.make_request	= raid0_make_request,
 	.run		= raid0_run,
 	.stop		= raid0_stop,
diff -Nru a/drivers/md/raid1.c b/drivers/md/raid1.c
--- a/drivers/md/raid1.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/md/raid1.c	Mon Mar 31 13:41:07 2003
@@ -217,7 +217,7 @@
 	}
 	spin_unlock_irq(&conf->device_lock);
 
-	printk (KERN_ERR "raid1_map(): huh, no more operational devices?\n");
+	printk(KERN_ERR "raid1_map(): huh, no more operational devices?\n");
 	return -1;
 }
 
@@ -305,7 +305,7 @@
 			 * oops, read error:
 			 */
 			printk(KERN_ERR "raid1: %s: rescheduling sector %llu\n",
-			       bdev_partition_name(conf->mirrors[mirror].rdev->bdev), (unsigned long long)r1_bio->sector);
+				bdev_partition_name(conf->mirrors[mirror].rdev->bdev), (unsigned long long)r1_bio->sector);
 			reschedule_retry(r1_bio);
 		}
 	} else {
@@ -584,22 +584,6 @@
 	seq_printf(seq, "]");
 }
 
-#define LAST_DISK KERN_ALERT \
-"raid1: only one disk left and IO error.\n"
-
-#define NO_SPARE_DISK KERN_ALERT \
-"raid1: no spare disk left, degrading mirror level by one.\n"
-
-#define DISK_FAILED KERN_ALERT \
-"raid1: Disk failure on %s, disabling device. \n" \
-"	Operation continuing on %d devices\n"
-
-#define START_SYNCING KERN_ALERT \
-"raid1: start syncing spare disk.\n"
-
-#define ALREADY_SYNCING KERN_INFO \
-"raid1: syncing already in progress.\n"
-
 
 static void error(mddev_t *mddev, mdk_rdev_t *rdev)
 {
@@ -629,7 +613,9 @@
 	rdev->in_sync = 0;
 	rdev->faulty = 1;
 	mddev->sb_dirty = 1;
-	printk(DISK_FAILED, bdev_partition_name(rdev->bdev), conf->working_disks);
+	printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
+		"	Operation continuing on %d devices\n",
+		bdev_partition_name(rdev->bdev), conf->working_disks);
 }
 
 static void print_conf(conf_t *conf)
@@ -643,14 +629,14 @@
 		return;
 	}
 	printk(" --- wd:%d rd:%d\n", conf->working_disks,
-			conf->raid_disks);
+		conf->raid_disks);
 
 	for (i = 0; i < conf->raid_disks; i++) {
 		tmp = conf->mirrors + i;
 		if (tmp->rdev)
 			printk(" disk %d, wo:%d, o:%d, dev:%s\n",
-			       i, !tmp->rdev->in_sync, !tmp->rdev->faulty,
-			       bdev_partition_name(tmp->rdev->bdev));
+				i, !tmp->rdev->in_sync, !tmp->rdev->faulty,
+				bdev_partition_name(tmp->rdev->bdev));
 	}
 }
 
@@ -743,11 +729,6 @@
 	return err;
 }
 
-#define IO_ERROR KERN_ALERT \
-"raid1: %s: unrecoverable I/O read error for block %llu\n"
-
-#define REDIRECT_SECTOR KERN_ERR \
-"raid1: %s: redirecting sector %llu to another mirror\n"
 
 static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
 {
@@ -823,7 +804,10 @@
 		 * There is no point trying a read-for-reconstruct as
 		 * reconstruct is about to be aborted
 		 */
-		printk(IO_ERROR, bdev_partition_name(bio->bi_bdev), (unsigned long long)r1_bio->sector);
+		printk(KERN_ALERT "raid1: %s: unrecoverable I/O read error"
+			" for block %llu\n",
+			bdev_partition_name(bio->bi_bdev), 
+			(unsigned long long)r1_bio->sector);
 		md_done_sync(mddev, r1_bio->master_bio->bi_size >> 9, 0);
 		put_buf(r1_bio);
 		return;
@@ -874,7 +858,8 @@
 		 * Nowhere to write this to... I guess we
 		 * must be done
 		 */
-		printk(KERN_ALERT "raid1: sync aborting as there is nowhere to write sector %llu\n", 
+		printk(KERN_ALERT "raid1: sync aborting as there is nowhere"
+			" to write sector %llu\n", 
 			(unsigned long long)r1_bio->sector);
 		md_done_sync(mddev, r1_bio->master_bio->bi_size >> 9, 0);
 		put_buf(r1_bio);
@@ -928,12 +913,17 @@
 		case READ:
 		case READA:
 			if (map(mddev, &rdev) == -1) {
-				printk(IO_ERROR, bdev_partition_name(bio->bi_bdev), (unsigned long long)r1_bio->sector);
+				printk(KERN_ALERT "raid1: %s: unrecoverable I/O"
+				" read error for block %llu\n",
+				bdev_partition_name(bio->bi_bdev), 
+				(unsigned long long)r1_bio->sector);
 				raid_end_bio_io(r1_bio, 0);
 				break;
 			}
-			printk(REDIRECT_SECTOR,
-				bdev_partition_name(rdev->bdev), (unsigned long long)r1_bio->sector);
+			printk(KERN_ERR "raid1: %s: redirecting sector %llu to"
+				" another mirror\n",
+				bdev_partition_name(rdev->bdev), 
+				(unsigned long long)r1_bio->sector);
 			bio->bi_bdev = rdev->bdev;
 			bio->bi_sector = r1_bio->sector + rdev->data_offset;
 			bio->bi_rw = r1_bio->cmd;
@@ -1063,45 +1053,6 @@
 	return nr_sectors;
 }
 
-#define INVALID_LEVEL KERN_WARNING \
-"raid1: md%d: raid level not set to mirroring (%d)\n"
-
-#define NO_SB KERN_ERR \
-"raid1: disabled mirror %s (couldn't access raid superblock)\n"
-
-#define ERRORS KERN_ERR \
-"raid1: disabled mirror %s (errors detected)\n"
-
-#define NOT_IN_SYNC KERN_ERR \
-"raid1: disabled mirror %s (not in sync)\n"
-
-#define INCONSISTENT KERN_ERR \
-"raid1: disabled mirror %s (inconsistent descriptor)\n"
-
-#define ALREADY_RUNNING KERN_ERR \
-"raid1: disabled mirror %s (mirror %d already operational)\n"
-
-#define OPERATIONAL KERN_INFO \
-"raid1: device %s operational as mirror %d\n"
-
-#define MEM_ERROR KERN_ERR \
-"raid1: couldn't allocate memory for md%d\n"
-
-#define SPARE KERN_INFO \
-"raid1: spare disk %s\n"
-
-#define NONE_OPERATIONAL KERN_ERR \
-"raid1: no operational mirrors for md%d\n"
-
-#define ARRAY_IS_ACTIVE KERN_INFO \
-"raid1: raid set md%d active with %d out of %d mirrors\n"
-
-#define THREAD_ERROR KERN_ERR \
-"raid1: couldn't allocate thread for md%d\n"
-
-#define START_RESYNC KERN_WARNING \
-"raid1: raid set md%d not clean; reconstructing mirrors\n"
-
 static int run(mddev_t *mddev)
 {
 	conf_t *conf;
@@ -1110,10 +1061,9 @@
 	mdk_rdev_t *rdev;
 	struct list_head *tmp;
 
-	MOD_INC_USE_COUNT;
-
 	if (mddev->level != 1) {
-		printk(INVALID_LEVEL, mdidx(mddev), mddev->level);
+		printk("raid1: md%d: raid level not set to mirroring (%d)\n",
+		       mdidx(mddev), mddev->level);
 		goto out;
 	}
 	/*
@@ -1124,7 +1074,8 @@
 	conf = kmalloc(sizeof(conf_t), GFP_KERNEL);
 	mddev->private = conf;
 	if (!conf) {
-		printk(MEM_ERROR, mdidx(mddev));
+		printk(KERN_ERR "raid1: couldn't allocate memory for md%d\n",
+			mdidx(mddev));
 		goto out;
 	}
 	memset(conf, 0, sizeof(*conf));
@@ -1132,7 +1083,8 @@
 	conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
 						r1bio_pool_free, NULL);
 	if (!conf->r1bio_pool) {
-		printk(MEM_ERROR, mdidx(mddev));
+		printk(KERN_ERR "raid1: couldn't allocate memory for md%d\n", 
+			mdidx(mddev));
 		goto out;
 	}
 
@@ -1160,7 +1112,8 @@
 	init_waitqueue_head(&conf->wait_resume);
 
 	if (!conf->working_disks) {
-		printk(NONE_OPERATIONAL, mdidx(mddev));
+		printk(KERN_ERR "raid1: no operational mirrors for md%d\n",
+			mdidx(mddev));
 		goto out_free_conf;
 	}
 
@@ -1190,12 +1143,16 @@
 	{
 		mddev->thread = md_register_thread(raid1d, mddev, "md%d_raid1");
 		if (!mddev->thread) {
-			printk(THREAD_ERROR, mdidx(mddev));
+			printk(KERN_ERR 
+				"raid1: couldn't allocate thread for md%d\n", 
+				mdidx(mddev));
 			goto out_free_conf;
 		}
 	}
-
-	printk(ARRAY_IS_ACTIVE, mdidx(mddev), mddev->raid_disks - mddev->degraded, mddev->raid_disks);
+	printk(KERN_INFO 
+		"raid1: raid set md%d active with %d out of %d mirrors\n",
+		mdidx(mddev), mddev->raid_disks - mddev->degraded, 
+		mddev->raid_disks);
 	/*
 	 * Ok, everything is just fine now
 	 */
@@ -1207,7 +1164,6 @@
 	kfree(conf);
 	mddev->private = NULL;
 out:
-	MOD_DEC_USE_COUNT;
 	return -EIO;
 }
 
@@ -1221,13 +1177,13 @@
 		mempool_destroy(conf->r1bio_pool);
 	kfree(conf);
 	mddev->private = NULL;
-	MOD_DEC_USE_COUNT;
 	return 0;
 }
 
 static mdk_personality_t raid1_personality =
 {
 	.name		= "raid1",
+	.owner		= THIS_MODULE,
 	.make_request	= make_request,
 	.run		= run,
 	.stop		= stop,
diff -Nru a/drivers/md/raid5.c b/drivers/md/raid5.c
--- a/drivers/md/raid5.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/md/raid5.c	Mon Mar 31 13:41:06 2003
@@ -182,7 +182,8 @@
 		BUG();
 	
 	CHECK_DEVLOCK();
-	PRINTK("init_stripe called, stripe %llu\n", (unsigned long long)sh->sector);
+	PRINTK("init_stripe called, stripe %llu\n", 
+		(unsigned long long)sh->sector);
 
 	remove_hash(sh);
 	
@@ -338,7 +339,9 @@
 		if (bi == &sh->dev[i].req)
 			break;
 
-	PRINTK("end_read_request %llu/%d, count: %d, uptodate %d.\n", (unsigned long long)sh->sector, i, atomic_read(&sh->count), uptodate);
+	PRINTK("end_read_request %llu/%d, count: %d, uptodate %d.\n", 
+		(unsigned long long)sh->sector, i, atomic_read(&sh->count), 
+		uptodate);
 	if (i == disks) {
 		BUG();
 		return 0;
@@ -409,7 +412,9 @@
 		if (bi == &sh->dev[i].req)
 			break;
 
-	PRINTK("end_write_request %llu/%d, count %d, uptodate: %d.\n", (unsigned long long)sh->sector, i, atomic_read(&sh->count), uptodate);
+	PRINTK("end_write_request %llu/%d, count %d, uptodate: %d.\n", 
+		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
+		uptodate);
 	if (i == disks) {
 		BUG();
 		return 0;
@@ -533,7 +538,8 @@
 			*dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks;
 			break;
 		default:
-			printk ("raid5: unsupported algorithm %d\n", conf->algorithm);
+			printk("raid5: unsupported algorithm %d\n",
+				conf->algorithm);
 	}
 
 	/*
@@ -573,7 +579,8 @@
 			i -= (sh->pd_idx + 1);
 			break;
 		default:
-			printk ("raid5: unsupported algorithm %d\n", conf->algorithm);
+			printk("raid5: unsupported algorithm %d\n",
+				conf->algorithm);
 	}
 
 	chunk_number = stripe * data_disks + i;
@@ -655,7 +662,8 @@
 	int i, count, disks = conf->raid_disks;
 	void *ptr[MAX_XOR_BLOCKS], *p;
 
-	PRINTK("compute_block, stripe %llu, idx %d\n", (unsigned long long)sh->sector, dd_idx);
+	PRINTK("compute_block, stripe %llu, idx %d\n", 
+		(unsigned long long)sh->sector, dd_idx);
 
 	ptr[0] = page_address(sh->dev[dd_idx].page);
 	memset(ptr[0], 0, STRIPE_SIZE);
@@ -667,7 +675,9 @@
 		if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
 			ptr[count++] = p;
 		else
-			printk("compute_block() %d, stripe %llu, %d not present\n", dd_idx, (unsigned long long)sh->sector, i);
+			printk("compute_block() %d, stripe %llu, %d"
+				" not present\n", dd_idx,
+				(unsigned long long)sh->sector, i);
 
 		check_xor();
 	}
@@ -683,7 +693,8 @@
 	void *ptr[MAX_XOR_BLOCKS];
 	struct bio *chosen;
 
-	PRINTK("compute_parity, stripe %llu, method %d\n", (unsigned long long)sh->sector, method);
+	PRINTK("compute_parity, stripe %llu, method %d\n",
+		(unsigned long long)sh->sector, method);
 
 	count = 1;
 	ptr[0] = page_address(sh->dev[pd_idx].page);
@@ -768,7 +779,9 @@
 	struct bio **bip;
 	raid5_conf_t *conf = sh->raid_conf;
 
-	PRINTK("adding bh b#%llu to stripe s#%llu\n", (unsigned long long)bi->bi_sector, (unsigned long long)sh->sector);
+	PRINTK("adding bh b#%llu to stripe s#%llu\n",
+		(unsigned long long)bi->bi_sector,
+		(unsigned long long)sh->sector);
 
 
 	spin_lock(&sh->lock);
@@ -789,7 +802,9 @@
 	spin_unlock_irq(&conf->device_lock);
 	spin_unlock(&sh->lock);
 
-	PRINTK("added bi b#%llu to stripe s#%llu, disk %d.\n", (unsigned long long)bi->bi_sector, (unsigned long long)sh->sector, dd_idx);
+	PRINTK("added bi b#%llu to stripe s#%llu, disk %d.\n",
+		(unsigned long long)bi->bi_sector,
+		(unsigned long long)sh->sector, dd_idx);
 
 	if (forwrite) {
 		/* check if page is coverred */
@@ -838,7 +853,9 @@
 	int failed_num=0;
 	struct r5dev *dev;
 
-	PRINTK("handling stripe %llu, cnt=%d, pd_idx=%d\n", (unsigned long long)sh->sector, atomic_read(&sh->count), sh->pd_idx);
+	PRINTK("handling stripe %llu, cnt=%d, pd_idx=%d\n",
+		(unsigned long long)sh->sector, atomic_read(&sh->count),
+		sh->pd_idx);
 
 	spin_lock(&sh->lock);
 	clear_bit(STRIPE_HANDLE, &sh->state);
@@ -853,8 +870,8 @@
 		clear_bit(R5_Insync, &dev->flags);
 		clear_bit(R5_Syncio, &dev->flags);
 
-		PRINTK("check %d: state 0x%lx read %p write %p written %p\n", i, 
-		       dev->flags, dev->toread, dev->towrite, dev->written);
+		PRINTK("check %d: state 0x%lx read %p write %p written %p\n",
+			i, dev->flags, dev->toread, dev->towrite, dev->written);
 		/* maybe we can reply to a read */
 		if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
 			struct bio *rbi, *rbi2;
@@ -895,8 +912,9 @@
 		} else
 			set_bit(R5_Insync, &dev->flags);
 	}
-	PRINTK("locked=%d uptodate=%d to_read=%d to_write=%d failed=%d failed_num=%d\n",
-	       locked, uptodate, to_read, to_write, failed, failed_num);
+	PRINTK("locked=%d uptodate=%d to_read=%d"
+		" to_write=%d failed=%d failed_num=%d\n",
+		locked, uptodate, to_read, to_write, failed, failed_num);
 	/* check if the array has lost two devices and, if so, some requests might
 	 * need to be failed
 	 */
@@ -1015,7 +1033,8 @@
 					}
 #endif
 					locked++;
-					PRINTK("Reading block %d (sync=%d)\n", i, syncing);
+					PRINTK("Reading block %d (sync=%d)\n", 
+						i, syncing);
 					if (syncing)
 						md_sync_acct(conf->disks[i].rdev, STRIPE_SECTORS);
 				}
@@ -1055,7 +1074,8 @@
 				else rcw += 2*disks;
 			}
 		}
-		PRINTK("for sector %llu, rmw=%d rcw=%d\n", (unsigned long long)sh->sector, rmw, rcw);
+		PRINTK("for sector %llu, rmw=%d rcw=%d\n", 
+			(unsigned long long)sh->sector, rmw, rcw);
 		set_bit(STRIPE_HANDLE, &sh->state);
 		if (rmw < rcw && rmw > 0)
 			/* prefer read-modify-write, but need to get some data */
@@ -1204,7 +1224,8 @@
 				md_sync_acct(rdev, STRIPE_SECTORS);
 
 			bi->bi_bdev = rdev->bdev;
-			PRINTK("for %llu schedule op %ld on disc %d\n", (unsigned long long)sh->sector, bi->bi_rw, i);
+			PRINTK("for %llu schedule op %ld on disc %d\n",
+				(unsigned long long)sh->sector, bi->bi_rw, i);
 			atomic_inc(&sh->count);
 			bi->bi_sector = sh->sector + rdev->data_offset;
 			bi->bi_flags = 1 << BIO_UPTODATE;
@@ -1217,7 +1238,8 @@
 			bi->bi_next = NULL;
 			generic_make_request(bi);
 		} else {
-			PRINTK("skip op %ld on disc %d for sector %llu\n", bi->bi_rw, i, (unsigned long long)sh->sector);
+			PRINTK("skip op %ld on disc %d for sector %llu\n",
+				bi->bi_rw, i, (unsigned long long)sh->sector);
 			clear_bit(R5_LOCKED, &sh->dev[i].flags);
 			set_bit(STRIPE_HANDLE, &sh->state);
 		}
@@ -1285,8 +1307,9 @@
 		new_sector = raid5_compute_sector(logical_sector,
 						  raid_disks, data_disks, &dd_idx, &pd_idx, conf);
 
-		PRINTK("raid5: make_request, sector %Lu logical %Lu\n", 
-		       (unsigned long long)new_sector, (unsigned long long)logical_sector);
+		PRINTK("raid5: make_request, sector %Lu logical %Lu\n",
+			(unsigned long long)new_sector, 
+			(unsigned long long)logical_sector);
 
 		sh = get_active_stripe(conf, new_sector, pd_idx, (bi->bi_rw&RWA_MASK));
 		if (sh) {
@@ -1410,11 +1433,8 @@
 	struct disk_info *disk;
 	struct list_head *tmp;
 
-	MOD_INC_USE_COUNT;
-
 	if (mddev->level != 5 && mddev->level != 4) {
 		printk("raid5: md%d: raid level not set to 4/5 (%d)\n", mdidx(mddev), mddev->level);
-		MOD_DEC_USE_COUNT;
 		return -EIO;
 	}
 
@@ -1450,7 +1470,9 @@
 		disk->rdev = rdev;
 
 		if (rdev->in_sync) {
-			printk(KERN_INFO "raid5: device %s operational as raid disk %d\n", bdev_partition_name(rdev->bdev), raid_disk);
+			printk(KERN_INFO "raid5: device %s operational as raid"
+				" disk %d\n", bdev_partition_name(rdev->bdev),
+				raid_disk);
 			conf->working_disks++;
 		}
 	}
@@ -1467,48 +1489,62 @@
 	conf->max_nr_stripes = NR_STRIPES;
 
 	if (!conf->chunk_size || conf->chunk_size % 4) {
-		printk(KERN_ERR "raid5: invalid chunk size %d for md%d\n", conf->chunk_size, mdidx(mddev));
+		printk(KERN_ERR "raid5: invalid chunk size %d for md%d\n",
+			conf->chunk_size, mdidx(mddev));
 		goto abort;
 	}
 	if (conf->algorithm > ALGORITHM_RIGHT_SYMMETRIC) {
-		printk(KERN_ERR "raid5: unsupported parity algorithm %d for md%d\n", conf->algorithm, mdidx(mddev));
+		printk(KERN_ERR 
+			"raid5: unsupported parity algorithm %d for md%d\n",
+			conf->algorithm, mdidx(mddev));
 		goto abort;
 	}
 	if (mddev->degraded > 1) {
-		printk(KERN_ERR "raid5: not enough operational devices for md%d (%d/%d failed)\n", mdidx(mddev), conf->failed_disks, conf->raid_disks);
+		printk(KERN_ERR "raid5: not enough operational devices for md%d"
+			" (%d/%d failed)\n",
+			mdidx(mddev), conf->failed_disks, conf->raid_disks);
 		goto abort;
 	}
 
 	if (mddev->degraded == 1 &&
 	    mddev->recovery_cp != MaxSector) {
-		printk(KERN_ERR "raid5: cannot start dirty degraded array for md%d\n", mdidx(mddev));
+		printk(KERN_ERR 
+			"raid5: cannot start dirty degraded array for md%d\n",
+			mdidx(mddev));
 		goto abort;
 	}
 
 	{
 		mddev->thread = md_register_thread(raid5d, mddev, "md%d_raid5");
 		if (!mddev->thread) {
-			printk(KERN_ERR "raid5: couldn't allocate thread for md%d\n", mdidx(mddev));
+			printk(KERN_ERR 
+				"raid5: couldn't allocate thread for md%d\n",
+				mdidx(mddev));
 			goto abort;
 		}
 	}
-
-	memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
+memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
 		 conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
 	if (grow_stripes(conf, conf->max_nr_stripes)) {
-		printk(KERN_ERR "raid5: couldn't allocate %dkB for buffers\n", memory);
+		printk(KERN_ERR 
+			"raid5: couldn't allocate %dkB for buffers\n", memory);
 		shrink_stripes(conf);
 		md_unregister_thread(mddev->thread);
 		goto abort;
 	} else
-		printk(KERN_INFO "raid5: allocated %dkB for md%d\n", memory, mdidx(mddev));
+		printk(KERN_INFO "raid5: allocated %dkB for md%d\n",
+			memory, mdidx(mddev));
 
 	if (mddev->degraded == 0)
-		printk("raid5: raid level %d set md%d active with %d out of %d devices, algorithm %d\n", conf->level, mdidx(mddev), 
-		       mddev->raid_disks-mddev->degraded, mddev->raid_disks, conf->algorithm);
+		printk("raid5: raid level %d set md%d active with %d out of %d"
+			" devices, algorithm %d\n", conf->level, mdidx(mddev), 
+			mddev->raid_disks-mddev->degraded, mddev->raid_disks,
+			conf->algorithm);
 	else
-		printk(KERN_ALERT "raid5: raid level %d set md%d active with %d out of %d devices, algorithm %d\n", conf->level, mdidx(mddev),
-		       mddev->raid_disks - mddev->degraded, mddev->raid_disks, conf->algorithm);
+		printk(KERN_ALERT "raid5: raid level %d set md%d active with %d"
+			" out of %d devices, algorithm %d\n", conf->level,
+			mdidx(mddev), mddev->raid_disks - mddev->degraded,
+			mddev->raid_disks, conf->algorithm);
 
 	print_raid5_conf(conf);
 
@@ -1524,7 +1560,6 @@
 	}
 	mddev->private = NULL;
 	printk(KERN_ALERT "raid5: failed to run raid set md%d\n", mdidx(mddev));
-	MOD_DEC_USE_COUNT;
 	return -EIO;
 }
 
@@ -1540,7 +1575,6 @@
 	free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER);
 	kfree(conf);
 	mddev->private = NULL;
-	MOD_DEC_USE_COUNT;
 	return 0;
 }
 
@@ -1549,11 +1583,14 @@
 {
 	int i;
 
-	printk("sh %llu, pd_idx %d, state %ld.\n", (unsigned long long)sh->sector, sh->pd_idx, sh->state);
-	printk("sh %llu,  count %d.\n", (unsigned long long)sh->sector, atomic_read(&sh->count));
+	printk("sh %llu, pd_idx %d, state %ld.\n",
+		(unsigned long long)sh->sector, sh->pd_idx, sh->state);
+	printk("sh %llu,  count %d.\n",
+		(unsigned long long)sh->sector, atomic_read(&sh->count));
 	printk("sh %llu, ", (unsigned long long)sh->sector);
 	for (i = 0; i < sh->raid_conf->raid_disks; i++) {
-		printk("(cache%d: %p %ld) ", i, sh->dev[i].page, sh->dev[i].flags);
+		printk("(cache%d: %p %ld) ", 
+			i, sh->dev[i].page, sh->dev[i].flags);
 	}
 	printk("\n");
 }
@@ -1693,6 +1730,7 @@
 static mdk_personality_t raid5_personality=
 {
 	.name		= "raid5",
+	.owner		= THIS_MODULE,
 	.make_request	= make_request,
 	.run		= run,
 	.stop		= stop,
diff -Nru a/drivers/media/radio/radio-cadet.c b/drivers/media/radio/radio-cadet.c
--- a/drivers/media/radio/radio-cadet.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/media/radio/radio-cadet.c	Mon Mar 31 13:41:07 2003
@@ -23,7 +23,9 @@
  * 2002-01-17	Adam Belay <ambx1@neo.rr.com>
  *		Updated to latest pnp code
  *
-*/
+ * 2003-01-31	Alan Cox <alan@redhat.com>
+ *		Cleaned up locking, delay code, general odds and ends
+ */
 
 #include <linux/module.h>	/* Modules 			*/
 #include <linux/init.h>		/* Initdata			*/
@@ -43,11 +45,11 @@
 static int curtuner=0;
 static int tunestat=0;
 static int sigstrength=0;
-static wait_queue_head_t tunerq,rdsq,readq;
+static wait_queue_head_t readq;
 struct timer_list tunertimer,rdstimer,readtimer;
 static __u8 rdsin=0,rdsout=0,rdsstat=0;
 static unsigned char rdsbuf[RDS_BUFFER];
-static int cadet_lock=0;
+static spinlock_t cadet_io_lock;
 
 static int cadet_probe(void);
 
@@ -58,37 +60,19 @@
  */
 static __u16 sigtable[2][4]={{5,10,30,150},{28,40,63,1000}};
 
-static void cadet_wake(unsigned long qnum)
-{
-        switch(qnum) {
-	case 0:           /* cadet_setfreq */
-	        wake_up(&tunerq);
-		break;
-	case 1:           /* cadet_getrds */
-	        wake_up(&rdsq);
-		break;
-	}	
-}
-
-
-
 static int cadet_getrds(void)
 {
         int rdsstat=0;
 
-	cadet_lock++;
+	spin_lock(&cadet_io_lock);
         outb(3,io);                 /* Select Decoder Control/Status */
 	outb(inb(io+1)&0x7f,io+1);  /* Reset RDS detection */
-	cadet_lock--;
-	init_timer(&rdstimer);
-	rdstimer.function=cadet_wake;
-	rdstimer.data=(unsigned long)1;
-	rdstimer.expires=jiffies+(HZ/10);
-	init_waitqueue_head(&rdsq);
-	add_timer(&rdstimer);
-	sleep_on(&rdsq);
+	spin_unlock(&cadet_io_lock);
 	
-	cadet_lock++;
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule_timeout(HZ/10);
+
+	spin_lock(&cadet_io_lock);	
         outb(3,io);                 /* Select Decoder Control/Status */
 	if((inb(io+1)&0x80)!=0) {
 	        rdsstat|=VIDEO_TUNER_RDS_ON;
@@ -96,32 +80,24 @@
 	if((inb(io+1)&0x10)!=0) {
 	        rdsstat|=VIDEO_TUNER_MBS_ON;
 	}
-	cadet_lock--;
+	spin_unlock(&cadet_io_lock);
 	return rdsstat;
 }
 
-
-
-
 static int cadet_getstereo(void)
 {
-        if(curtuner!=0) {          /* Only FM has stereo capability! */
+	int ret = 0;
+        if(curtuner != 0)	/* Only FM has stereo capability! */
 	        return 0;
-	}
-        cadet_lock++;
+
+	spin_lock(&cadet_io_lock);
         outb(7,io);          /* Select tuner control */
-        if((inb(io+1)&0x40)==0) {
-	        cadet_lock--;
-                return 1;    /* Stereo pilot detected */
-        }
-        else {
-	        cadet_lock--;
-                return 0;    /* Mono */
-        }
+	if( (inb(io+1) & 0x40) == 0)
+        	ret = 1;
+        spin_unlock(&cadet_io_lock);
+        return ret;
 }
 
-
-
 static unsigned cadet_gettune(void)
 {
         int curvol,i;
@@ -130,7 +106,9 @@
         /*
          * Prepare for read
          */
-	cadet_lock++;
+
+	spin_lock(&cadet_io_lock);
+	
         outb(7,io);       /* Select tuner control */
         curvol=inb(io+1); /* Save current volume/mute setting */
         outb(0x00,io+1);  /* Ensure WRITE-ENABLE is LOW */
@@ -152,13 +130,11 @@
          * Restore volume/mute setting
          */
         outb(curvol,io+1);
-	cadet_lock--;
+	spin_unlock(&cadet_io_lock);
 
 	return fifo;
 }
 
-
-
 static unsigned cadet_getfreq(void)
 {
         int i;
@@ -191,14 +167,13 @@
         return freq;
 }
 
-
-
 static void cadet_settune(unsigned fifo)
 {
         int i;
 	unsigned test;  
 
-	cadet_lock++;
+	spin_lock(&cadet_io_lock);
+	
 	outb(7,io);                /* Select tuner control */
 	/*
 	 * Write the shift register
@@ -217,11 +192,9 @@
 		test=0x1c|((fifo>>23)&0x02);
 		outb(test,io+1);
 	}
-	cadet_lock--;
+	spin_unlock(&cadet_io_lock);
 }
 
-
-
 static void cadet_setfreq(unsigned freq)
 {
         unsigned fifo;
@@ -253,92 +226,90 @@
         /*
          * Save current volume/mute setting
          */
-	cadet_lock++;
+
+	spin_lock(&cadet_io_lock);
 	outb(7,io);                /* Select tuner control */
         curvol=inb(io+1); 
+        spin_unlock(&cadet_io_lock);
 
 	/*
 	 * Tune the card
 	 */
 	for(j=3;j>-1;j--) {
 	        cadet_settune(fifo|(j<<16));
+	        
+	        spin_lock(&cadet_io_lock);
 		outb(7,io);         /* Select tuner control */
 		outb(curvol,io+1);
-		cadet_lock--;
-		init_timer(&tunertimer);
-		tunertimer.function=cadet_wake;
-		tunertimer.data=(unsigned long)0;
-		tunertimer.expires=jiffies+(HZ/10);
-		init_waitqueue_head(&tunerq);
-		add_timer(&tunertimer);
-		sleep_on(&tunerq);
+		spin_unlock(&cadet_io_lock);
+		
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(HZ/10);
+
 		cadet_gettune();
-		if((tunestat&0x40)==0) {   /* Tuned */
+		if((tunestat & 0x40) == 0) {   /* Tuned */
 		        sigstrength=sigtable[curtuner][j];
 			return;
 		}
-		cadet_lock++;
 	}
-	cadet_lock--;
 	sigstrength=0;
 }
 
 
 static int cadet_getvol(void)
 {
-        cadet_lock++;
+	int ret = 0;
+	
+	spin_lock(&cadet_io_lock);
+	
         outb(7,io);                /* Select tuner control */
-        if((inb(io+1)&0x20)!=0) {
-	        cadet_lock--;
-                return 0xffff;
-        }
-        else {
-	        cadet_lock--;
-                return 0;
-        }
+        if((inb(io + 1) & 0x20) != 0)
+        	ret = 0xffff;
+        
+        spin_unlock(&cadet_io_lock);
+        return ret;
 }
 
 
 static void cadet_setvol(int vol)
 {
-        cadet_lock++;
+	spin_lock(&cadet_io_lock);
         outb(7,io);                /* Select tuner control */
-        if(vol>0) {
+        if(vol>0)
                 outb(0x20,io+1);
-        }
-        else {
+        else
                 outb(0x00,io+1);
-        }
-	cadet_lock--;
+	spin_unlock(&cadet_io_lock);
 }  
 
-
-
 void cadet_handler(unsigned long data)
 {
 	/*
 	 * Service the RDS fifo
 	 */
-        if(cadet_lock==0) {
+
+	if(spin_trylock(&cadet_io_lock))
+	{
 	        outb(0x3,io);       /* Select RDS Decoder Control */
 		if((inb(io+1)&0x20)!=0) {
 		        printk(KERN_CRIT "cadet: RDS fifo overflow\n");
 		}
 		outb(0x80,io);      /* Select RDS fifo */
 		while((inb(io)&0x80)!=0) {
-		        rdsbuf[rdsin++]=inb(io+1);
-			if(rdsin==rdsout) {
-			        printk(KERN_CRIT "cadet: RDS buffer overflow\n");
-			}
+		        rdsbuf[rdsin]=inb(io+1);
+			if(rdsin==rdsout)
+			        printk(KERN_WARNING "cadet: RDS buffer overflow\n");
+			else
+				rdsin++;
 		}
+		spin_unlock(&cadet_io_lock);
 	}
 
 	/*
 	 * Service pending read
 	 */
-	if( rdsin!=rdsout) {
+	if( rdsin!=rdsout)
 	        wake_up_interruptible(&readq);
-	}
 
 	/* 
 	 * Clean up and exit
@@ -359,10 +330,10 @@
 	unsigned char readbuf[RDS_BUFFER];
 
         if(rdsstat==0) {
-	        cadet_lock++;
+		spin_lock(&cadet_io_lock);
 	        rdsstat=1;
 		outb(0x80,io);        /* Select RDS fifo */
-		cadet_lock--;
+		spin_unlock(&cadet_io_lock);
 		init_timer(&readtimer);
 		readtimer.function=cadet_handler;
 		readtimer.data=(unsigned long)0;
@@ -370,14 +341,13 @@
 		add_timer(&readtimer);
 	}
 	if(rdsin==rdsout) {
-  	        if (file->f_flags & O_NONBLOCK) {
+  	        if (file->f_flags & O_NONBLOCK)
 		        return -EWOULDBLOCK;
-		}
 	        interruptible_sleep_on(&readq);
 	}		
-	while((i<count)&&(rdsin!=rdsout)) {
+	while( i<count && rdsin!=rdsout)
 	        readbuf[i++]=rdsbuf[rdsout++];
-	}
+
 	if (copy_to_user(data,readbuf,i))
 	        return -EFAULT;
 	return i;
@@ -515,10 +485,8 @@
 
 static int cadet_release(struct inode *inode, struct file *file)
 {
-        if(rdsstat==1) {
-                del_timer(&readtimer);
-		rdsstat=0;
-	}
+	del_timer_sync(&readtimer);
+	rdsstat=0;
 	users--;
 	return 0;
 }
@@ -595,13 +563,15 @@
 	return -1;
 }
 
-	/* 
-	 * io should only be set if the user has used something like
-	 * isapnp (the userspace program) to initialize this card for us
-	 */
+/* 
+ * io should only be set if the user has used something like
+ * isapnp (the userspace program) to initialize this card for us
+ */
 
 static int __init cadet_init(void)
 {
+	spin_lock_init(&cadet_io_lock);
+	
 	/*
 	 *	If a probe was requested then probe ISAPnP first (safest)
 	 */
diff -Nru a/drivers/media/video/adv7175.c b/drivers/media/video/adv7175.c
--- a/drivers/media/video/adv7175.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/media/video/adv7175.c	Mon Mar 31 13:41:08 2003
@@ -170,6 +170,7 @@
 	client=kmalloc(sizeof(*client), GFP_KERNEL);
 	if(client == NULL)
 		return -ENOMEM;
+	memset(client, 0, sizeof(*client));
 
 	client_template.adapter = adap;
 	client_template.addr = addr;
@@ -190,9 +191,10 @@
 		// We should never get here!!!
 		dname = unknown_name;
 	}
-	strcpy(client->name, dname);
+	strncpy(client->dev.name, dname, DEVICE_NAME_SIZE);
 	init_MUTEX(&encoder->lock);
 	encoder->client = client;
+	i2c_set_clientdata(client, encoder);
 	encoder->addr = addr;
 	encoder->norm = VIDEO_MODE_PAL;
 	encoder->input = 0;
@@ -201,7 +203,7 @@
 	for (i=1; i<x_common; i++) {
 		rv = i2c_smbus_write_byte(client,init_common[i]);
 		if (rv < 0) {
-			printk(KERN_ERR "%s_attach: init error %d\n", client->name, rv);
+			printk(KERN_ERR "%s_attach: init error %d\n", client->dev.name, rv);
 			break;
 		}
 	}
@@ -211,7 +213,7 @@
 		i2c_smbus_write_byte_data(client,0x07, TR0MODE);
 		i2c_smbus_read_byte_data(client,0x12);
 		printk(KERN_INFO "%s_attach: %s rev. %d at 0x%02x\n",
-		       client->name, dname, rv & 1, client->addr);
+		       client->dev.name, dname, rv & 1, client->addr);
 	}
 
 	i2c_attach_client(client);
@@ -229,7 +231,7 @@
 static int adv717x_detach(struct i2c_client *client)
 {
 	i2c_detach_client(client);
-	kfree(client->data);
+	i2c_get_clientdata(client);
 	kfree(client);
 	return 0;
 }
@@ -237,7 +239,7 @@
 static int adv717x_command(struct i2c_client *client, unsigned int cmd,
 			   void *arg)
 {
-	struct adv7175 *encoder = client->data;
+	struct adv7175 *encoder = i2c_get_clientdata(client);
 	int i, x_ntsc=13, x_pal=13; 
 		/* x_ntsc is number of entries in init_ntsc -1 */
 		/* x_pal is number of entries in init_pal -1 */
@@ -297,7 +299,7 @@
 				default:
 					printk(KERN_ERR
 					       "%s: illegal norm: %d\n",
-					       client->name, iarg);
+					       client->dev.name, iarg);
 					return -EINVAL;
 
 				}
@@ -353,7 +355,7 @@
 				default:
 					printk(KERN_ERR
 					       "%s: illegal input: %d\n",
-					       client->name, iarg);
+					       client->dev.name, iarg);
 					return -EINVAL;
 
 				}
@@ -419,8 +421,10 @@
 };
 
 static struct i2c_client client_template = {
-	.name		= "adv7175_client",
-	.driver		= &i2c_driver_adv7175
+	.driver		= &i2c_driver_adv7175,
+	.dev		= {
+		.name	= "adv7175_client",
+	},
 };
 
 static int adv717x_init(void)
diff -Nru a/drivers/media/video/bt819.c b/drivers/media/video/bt819.c
--- a/drivers/media/video/bt819.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/media/video/bt819.c	Mon Mar 31 13:41:06 2003
@@ -128,7 +128,7 @@
 
 	struct timing *timing;
 
-	decoder = client->data;
+	decoder = i2c_get_clientdata(client);
 	timing = &timing_data[decoder->norm];
 
 	init[3 * 2 - 1] = (((timing->vdelay >> 8) & 0x03) << 6) |
@@ -159,6 +159,7 @@
 	client = kmalloc(sizeof(*client), GFP_KERNEL);
 	if(client == NULL)
 		return -ENOMEM;
+	memset(client, 0, sizeof(*client));
 	client_template.adapter = adap;
 	client_template.addr = addr;
 	memcpy(client, &client_template, sizeof(*client));
@@ -170,8 +171,8 @@
 	}
 
 	memset(decoder, 0, sizeof(struct bt819));
-	strcpy(client->name, "bt819");
-	client->data = decoder;
+	strncpy(client->dev.name, "bt819", DEVICE_NAME_SIZE);
+	i2c_set_clientdata(client, decoder);
 	decoder->client = client;
 	decoder->addr = addr;
 	decoder->norm = VIDEO_MODE_NTSC;
@@ -186,10 +187,10 @@
 	i = bt819_init(client);
 	if (i < 0) {
 		printk(KERN_ERR "%s: bt819_attach: init status %d\n",
-		       decoder->client->name, i);
+		       decoder->client->dev.name, i);
 	} else {
 		printk(KERN_INFO "%s: bt819_attach: chip version %x\n",
-		       decoder->client->name, i2c_smbus_read_byte_data(client,
+		       decoder->client->dev.name, i2c_smbus_read_byte_data(client,
 						      0x17) & 0x0f);
 	}
 	init_MUTEX(&decoder->lock);
@@ -205,7 +206,7 @@
 static int bt819_detach(struct i2c_client *client)
 {
 	i2c_detach_client(client);
-	kfree(client->data);
+	i2c_get_clientdata(client);
 	kfree(client);
 	MOD_DEC_USE_COUNT;
 	return 0;
@@ -215,7 +216,7 @@
 {
 	int temp;
 
-	struct bt819 *decoder = client->data;
+	struct bt819 *decoder = i2c_get_clientdata(client);
 	//return 0;
 
 	if (!decoder->initialized) {	// First call to bt819_init could be
@@ -268,7 +269,7 @@
 			*iarg = res;
 
 			DEBUG(printk(KERN_INFO "%s-bt819: get status %x\n",
-				     decoder->client->name, *iarg));
+				     decoder->client->dev.name, *iarg));
 		}
 		break;
 
@@ -278,7 +279,7 @@
 			struct timing *timing;
 
 			DEBUG(printk(KERN_INFO "%s-bt819: set norm %x\n",
-				     decoder->client->name, *iarg));
+				     decoder->client->dev.name, *iarg));
 
 			if (*iarg == VIDEO_MODE_NTSC) {
 				bt819_setbit(decoder, 0x01, 0, 1);
@@ -319,7 +320,7 @@
 			int *iarg = arg;
 
 			DEBUG(printk(KERN_INFO "%s-bt819: set input %x\n",
-				     decoder->client->name, *iarg));
+				     decoder->client->dev.name, *iarg));
 
 			if (*iarg < 0 || *iarg > 7) {
 				return -EINVAL;
@@ -344,7 +345,7 @@
 			int *iarg = arg;
 
 			DEBUG(printk(KERN_INFO "%s-bt819: set output %x\n",
-				     decoder->client->name, *iarg));
+				     decoder->client->dev.name, *iarg));
 
 			/* not much choice of outputs */
 			if (*iarg != 0) {
@@ -360,7 +361,7 @@
 
 			DEBUG(printk
 			      (KERN_INFO "%s-bt819: enable output %x\n",
-			       decoder->client->name, *iarg));
+			       decoder->client->dev.name, *iarg));
 
 			if (decoder->enable != enable) {
 				decoder->enable = enable;
@@ -381,7 +382,7 @@
 			DEBUG(printk
 			      (KERN_INFO
 			       "%s-bt819: set picture brightness %d contrast %d colour %d\n",
-			       decoder->client->name, pic->brightness,
+			       decoder->client->dev.name, pic->brightness,
 			       pic->contrast, pic->colour));
 
 
@@ -448,9 +449,11 @@
 };
 
 static struct i2c_client client_template = {
-	.name = "bt819_client",
 	.id = -1,
-	.driver = &i2c_driver_bt819
+	.driver = &i2c_driver_bt819,
+	.dev = {
+		.name = "bt819_client",
+	},
 };
 
 static int bt819_setup(void)
diff -Nru a/drivers/media/video/bt856.c b/drivers/media/video/bt856.c
--- a/drivers/media/video/bt856.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/media/video/bt856.c	Mon Mar 31 13:41:06 2003
@@ -106,6 +106,7 @@
 	client = kmalloc(sizeof(*client), GFP_KERNEL);
 	if(client == NULL)
 		return -ENOMEM;
+	memset(client, 0, sizeof(*client));
 	client_template.adapter = adap;
 	client_template.addr = addr;
 	memcpy(client, &client_template, sizeof(*client));	
@@ -123,14 +124,14 @@
 
 
 	memset(encoder, 0, sizeof(struct bt856));
-	strcpy(client->name, "bt856");
+	strncpy(client->dev.name, "bt856", DEVICE_NAME_SIZE);
 	encoder->client = client;
-	client->data = encoder;
+	i2c_set_clientdata(client, encoder);
 	encoder->addr = client->addr;
 	encoder->norm = VIDEO_MODE_NTSC;
 	encoder->enable = 1;
 
-	DEBUG(printk(KERN_INFO "%s-bt856: attach\n", encoder->client->name));
+	DEBUG(printk(KERN_INFO "%s-bt856: attach\n", encoder->client->dev.name));
 
 	i2c_smbus_write_byte_data(client, 0xdc, 0x18);
 	encoder->reg[0xdc] = 0x18;
@@ -171,7 +172,7 @@
 static int bt856_detach(struct i2c_client *client)
 {
 	i2c_detach_client(client);
-	kfree(client->data);
+	i2c_get_clientdata(client);
 	kfree(client);
 	MOD_DEC_USE_COUNT;
 	return 0;
@@ -180,7 +181,7 @@
 static int bt856_command(struct i2c_client *client, unsigned int cmd,
 			 void *arg)
 {
-	struct bt856 *encoder = client->data;
+	struct bt856 *encoder = i2c_get_clientdata(client);
 
 	switch (cmd) {
 
@@ -190,7 +191,7 @@
 
 			DEBUG(printk
 			      (KERN_INFO "%s-bt856: get capabilities\n",
-			       encoder->client->name));
+			       encoder->client->dev.name));
 
 			cap->flags
 			    = VIDEO_ENCODER_PAL
@@ -205,7 +206,7 @@
 			int *iarg = arg;
 
 			DEBUG(printk(KERN_INFO "%s-bt856: set norm %d\n",
-				     encoder->client->name, *iarg));
+				     encoder->client->dev.name, *iarg));
 
 			switch (*iarg) {
 
@@ -232,7 +233,7 @@
 			int *iarg = arg;
 
 			DEBUG(printk(KERN_INFO "%s-bt856: set input %d\n",
-				     encoder->client->name, *iarg));
+				     encoder->client->dev.name, *iarg));
 
 			/*     We only have video bus.
 			   *iarg = 0: input is from bt819
@@ -268,7 +269,7 @@
 			int *iarg = arg;
 
 			DEBUG(printk(KERN_INFO "%s-bt856: set output %d\n",
-				     encoder->client->name, *iarg));
+				     encoder->client->dev.name, *iarg));
 
 			/* not much choice of outputs */
 			if (*iarg != 0) {
@@ -285,7 +286,7 @@
 
 			DEBUG(printk
 			      (KERN_INFO "%s-bt856: enable output %d\n",
-			       encoder->client->name, encoder->enable));
+			       encoder->client->dev.name, encoder->enable));
 		}
 		break;
 
@@ -309,9 +310,11 @@
 };
 
 static struct i2c_client client_template = {
-	.name = "bt856_client",
 	.id = -1,
-	.driver = &i2c_driver_bt856
+	.driver = &i2c_driver_bt856,
+	.dev = {
+		.name = "bt856_client",
+	},
 };
 
 static int bt856_init(void)
diff -Nru a/drivers/media/video/bttv-if.c b/drivers/media/video/bttv-if.c
--- a/drivers/media/video/bttv-if.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/media/video/bttv-if.c	Mon Mar 31 13:41:06 2003
@@ -194,7 +194,7 @@
 
 static int attach_inform(struct i2c_client *client)
 {
-        struct bttv *btv = (struct bttv*)client->adapter->data;
+        struct bttv *btv = i2c_get_adapdata(client->adapter);
 	int i;
 
 	for (i = 0; i < I2C_CLIENTS_MAX; i++) {
@@ -207,13 +207,13 @@
 		bttv_call_i2c_clients(btv,TUNER_SET_TYPE,&btv->tuner_type);
         if (bttv_verbose)
 		printk("bttv%d: i2c attach [client=%s,%s]\n",btv->nr,
-		       client->name, (i < I2C_CLIENTS_MAX) ?  "ok" : "failed");
+		       client->dev.name, (i < I2C_CLIENTS_MAX) ?  "ok" : "failed");
         return 0;
 }
 
 static int detach_inform(struct i2c_client *client)
 {
-        struct bttv *btv = (struct bttv*)client->adapter->data;
+        struct bttv *btv = i2c_get_adapdata(client->adapter);
 	int i;
 
 	for (i = 0; i < I2C_CLIENTS_MAX; i++) {
@@ -224,7 +224,7 @@
 	}
         if (bttv_verbose)
 		printk("bttv%d: i2c detach [client=%s,%s]\n",btv->nr,
-		       client->name, (i < I2C_CLIENTS_MAX) ?  "ok" : "failed");
+		       client->dev.name, (i < I2C_CLIENTS_MAX) ?  "ok" : "failed");
         return 0;
 }
 
@@ -261,15 +261,19 @@
 
 static struct i2c_adapter bttv_i2c_adap_template = {
 	.owner          = THIS_MODULE,
-	.name              = "bt848",
 	.id                = I2C_HW_B_BT848,
 	.client_register   = attach_inform,
 	.client_unregister = detach_inform,
+	.dev		= {
+		.name	= "bt848",
+	},
 };
 
 static struct i2c_client bttv_i2c_client_template = {
-        .name = "bttv internal use only",
-        .id   = -1,
+        .id	= -1,
+        .dev	= {
+		.name = "bttv internal",
+	},
 };
 
 
@@ -343,10 +347,10 @@
 	memcpy(&btv->i2c_client, &bttv_i2c_client_template,
 	       sizeof(struct i2c_client));
 
-	sprintf(btv->i2c_adap.name+strlen(btv->i2c_adap.name),
+	sprintf(btv->i2c_adap.dev.name+strlen(btv->i2c_adap.dev.name),
 		" #%d", btv->nr);
         btv->i2c_algo.data = btv;
-        btv->i2c_adap.data = btv;
+        i2c_set_adapdata(&btv->i2c_adap, btv);
         btv->i2c_adap.algo_data = &btv->i2c_algo;
         btv->i2c_client.adapter = &btv->i2c_adap;
 
diff -Nru a/drivers/media/video/msp3400.c b/drivers/media/video/msp3400.c
--- a/drivers/media/video/msp3400.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/media/video/msp3400.c	Mon Mar 31 13:41:07 2003
@@ -349,7 +349,7 @@
 static void
 msp3400c_set_scart(struct i2c_client *client, int in, int out)
 {
-	struct msp3400c *msp = client->data;
+	struct msp3400c *msp = i2c_get_clientdata(client);
 
 	if (-1 == scarts[out][in])
 		return;
@@ -411,7 +411,7 @@
 
 static void msp3400c_setmode(struct i2c_client *client, int type)
 {
-	struct msp3400c *msp = client->data;
+	struct msp3400c *msp = i2c_get_clientdata(client);
 	int i;
 	
 	dprintk("msp3400: setmode: %d\n",type);
@@ -471,7 +471,7 @@
 {
 	static char *strmode[] = { "0", "mono", "stereo", "3",
 				   "lang1", "5", "6", "7", "lang2" };
-	struct msp3400c *msp = client->data;
+	struct msp3400c *msp = i2c_get_clientdata(client);
 	int nicam=0; /* channel source: FM/AM or nicam */
 	int src=0;
 
@@ -599,7 +599,7 @@
 static void
 msp3400c_restore_dfp(struct i2c_client *client)
 {
-	struct msp3400c *msp = client->data;
+	struct msp3400c *msp = i2c_get_clientdata(client);
 	int i;
 
 	for (i = 0; i < DFP_COUNT; i++) {
@@ -627,7 +627,7 @@
 static int
 autodetect_stereo(struct i2c_client *client)
 {
-	struct msp3400c *msp = client->data;
+	struct msp3400c *msp = i2c_get_clientdata(client);
 	int val;
 	int newstereo = msp->stereo;
 	int newnicam  = msp->nicam_on;
@@ -727,7 +727,7 @@
 /* stereo/multilang monitoring */
 static void watch_stereo(struct i2c_client *client)
 {
-	struct msp3400c *msp = client->data;
+	struct msp3400c *msp = i2c_get_clientdata(client);
 
 	if (autodetect_stereo(client)) {
 		if (msp->stereo & VIDEO_SOUND_STEREO)
@@ -746,7 +746,7 @@
 static int msp3400c_thread(void *data)
 {
 	struct i2c_client *client = data;
-	struct msp3400c *msp = client->data;
+	struct msp3400c *msp = i2c_get_clientdata(client);
 	
 	struct CARRIER_DETECT *cd;
 	int count, max1,max2,val1,val2, val,this;
@@ -1002,7 +1002,7 @@
 static int msp3410d_thread(void *data)
 {
 	struct i2c_client *client = data;
-	struct msp3400c *msp = client->data;
+	struct msp3400c *msp = i2c_get_clientdata(client);
 	int mode,val,i,std;
     
 #ifdef CONFIG_SMP
@@ -1226,9 +1226,11 @@
 
 static struct i2c_client client_template = 
 {
-	.name   = "(unset)",
 	.flags  = I2C_CLIENT_ALLOW_USE,
         .driver = &driver,
+	.dev	= {
+		.name   = "(unset)",
+	},
 };
 
 static int msp_attach(struct i2c_adapter *adap, int addr,
@@ -1265,7 +1267,7 @@
 	for (i = 0; i < DFP_COUNT; i++)
 		msp->dfp_regs[i] = -1;
 
-	c->data = msp;
+	i2c_set_clientdata(c, msp);
 	init_waitqueue_head(&msp->wq);
 
 	if (-1 == msp3400c_reset(c)) {
@@ -1291,7 +1293,7 @@
 #endif
 	msp3400c_setvolume(c,msp->muted,msp->left,msp->right);
 
-	sprintf(c->name,"MSP34%02d%c-%c%d",
+	snprintf(c->dev.name, DEVICE_NAME_SIZE, "MSP34%02d%c-%c%d",
 		(rev2>>8)&0xff, (rev1&0xff)+'@', ((rev1>>8)&0xff)+'@', rev2&0x1f);
 	msp->nicam = (((rev2>>8)&0xff) != 00) ? 1 : 0;
 
@@ -1310,7 +1312,7 @@
 	msp->wake_stereo.data     = (unsigned long)msp;
 
 	/* hello world :-) */
-	printk(KERN_INFO "msp34xx: init: chip=%s",c->name);
+	printk(KERN_INFO "msp34xx: init: chip=%s",c->dev.name);
 	if (msp->nicam)
 		printk(", has NICAM support");
 	printk("\n");
@@ -1340,7 +1342,7 @@
 static int msp_detach(struct i2c_client *client)
 {
 	DECLARE_MUTEX_LOCKED(sem);
-	struct msp3400c *msp  = (struct msp3400c*)client->data;
+	struct msp3400c *msp = i2c_get_clientdata(client);
 	int i;
 	
 	/* shutdown control thread */
@@ -1379,7 +1381,7 @@
 
 static void msp_wake_thread(struct i2c_client *client)
 {
-	struct msp3400c *msp  = (struct msp3400c*)client->data;
+	struct msp3400c *msp = i2c_get_clientdata(client);
 
 	msp3400c_setvolume(client,msp->muted,0,0);
 	msp->watch_stereo=0;
@@ -1391,7 +1393,7 @@
 
 static int msp_command(struct i2c_client *client, unsigned int cmd, void *arg)
 {
-	struct msp3400c *msp  = (struct msp3400c*)client->data;
+	struct msp3400c *msp = i2c_get_clientdata(client);
         __u16           *sarg = arg;
 #if 0
 	int             *iarg = (int*)arg;
diff -Nru a/drivers/media/video/saa5249.c b/drivers/media/video/saa5249.c
--- a/drivers/media/video/saa5249.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/media/video/saa5249.c	Mon Mar 31 13:41:08 2003
@@ -171,20 +171,21 @@
 		return -ENOMEM;
 	}
 	memset(t, 0, sizeof(*t));
-	strcpy(client->name, IF_NAME);
+	strncpy(client->dev.name, IF_NAME, DEVICE_NAME_SIZE);
 	init_MUTEX(&t->lock);
 	
 	/*
 	 *	Now create a video4linux device
 	 */
 	 
-	client->data = vd=(struct video_device *)kmalloc(sizeof(struct video_device), GFP_KERNEL);
+	vd = (struct video_device *)kmalloc(sizeof(struct video_device), GFP_KERNEL);
 	if(vd==NULL)
 	{
 		kfree(t);
 		kfree(client);
 		return -ENOMEM;
 	}
+	i2c_set_clientdata(client, vd);
 	memcpy(vd, &saa_template, sizeof(*vd));
 		
 	for (pgbuf = 0; pgbuf < NUM_DAUS; pgbuf++) 
@@ -234,7 +235,7 @@
 
 static int saa5249_detach(struct i2c_client *client)
 {
-	struct video_device *vd=client->data;
+	struct video_device *vd = i2c_get_clientdata(client);
 	i2c_detach_client(client);
 	video_unregister_device(vd);
 	kfree(vd->priv);
@@ -264,9 +265,11 @@
 };
 
 static struct i2c_client client_template = {
-	.name 		= "(unset)",
 	.id 		= -1,
-	.driver 	= &i2c_driver_videotext
+	.driver		= &i2c_driver_videotext,
+	.dev		= {
+		.name	= "(unset)",
+	},
 };
 
 /*
diff -Nru a/drivers/media/video/saa7110.c b/drivers/media/video/saa7110.c
--- a/drivers/media/video/saa7110.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/media/video/saa7110.c	Mon Mar 31 13:41:06 2003
@@ -163,6 +163,7 @@
 	client=kmalloc(sizeof(*client), GFP_KERNEL);
 	if(client == NULL) 
 		return -ENOMEM;
+	memset(client, 0, sizeof(*client));
 	client_template.adapter = adap;
 	client_template.addr = addr;
 	memcpy(client, &client_template, sizeof(*client));
@@ -175,9 +176,9 @@
 
 	/* clear our private data */
 	memset(decoder, 0, sizeof(*decoder));
-	strcpy(client->name, IF_NAME);
+	strncpy(client->dev.name, IF_NAME, DEVICE_NAME_SIZE);
 	decoder->client = client;
-	client->data = decoder;
+	i2c_set_clientdata(client, decoder);
 	decoder->addr = addr;
 	decoder->norm = VIDEO_MODE_PAL;
 	decoder->input = 0;
@@ -189,7 +190,7 @@
 
 	rv = i2c_master_send(client, initseq, sizeof(initseq));
 	if (rv < 0)
-		printk(KERN_ERR "%s_attach: init status %d\n", client->name, rv);
+		printk(KERN_ERR "%s_attach: init status %d\n", client->dev.name, rv);
 	else {
 		i2c_smbus_write_byte_data(client,0x21,0x16);
 		i2c_smbus_write_byte_data(client,0x0D,0x04);
@@ -213,7 +214,7 @@
 static
 int saa7110_detach(struct i2c_client *client)
 {
-	struct saa7110* decoder = client->data;
+	struct saa7110* decoder = i2c_get_clientdata(client);
 
 	i2c_detach_client(client);
 
@@ -232,7 +233,7 @@
 static
 int saa7110_command(struct i2c_client *client, unsigned int cmd, void *arg)
 {
-	struct saa7110* decoder = client->data;
+	struct saa7110* decoder = i2c_get_clientdata(client);
 	int	v;
 
 	switch (cmd) {
@@ -251,7 +252,7 @@
 
 	 case DECODER_GET_STATUS:
 		{
-			struct saa7110* decoder = client->data;
+			struct saa7110* decoder = i2c_get_clientdata(client);
 			int status;
 			int res = 0;
 
@@ -390,9 +391,11 @@
 	.command 	= saa7110_command
 };
 static struct i2c_client client_template = {
-	.name 		= "saa7110_client",
 	.id 		= -1,
-	.driver 	= &i2c_driver_saa7110
+	.driver 	= &i2c_driver_saa7110,
+	.dev		= {
+		.name	= "saa7110_client",
+	},
 };
 
 static int saa7110_init(void)
diff -Nru a/drivers/media/video/saa7111.c b/drivers/media/video/saa7111.c
--- a/drivers/media/video/saa7111.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/media/video/saa7111.c	Mon Mar 31 13:41:07 2003
@@ -120,6 +120,7 @@
 	client = kmalloc(sizeof(*client), GFP_KERNEL);
 	if(client == NULL) 
 		return -ENOMEM;
+	memset(client, 0, sizeof(*client));
 	client_template.adapter = adap;
 	client_template.addr = addr;
 	memcpy(client, &client_template, sizeof(*client));
@@ -132,9 +133,9 @@
 	}
 
 	memset(decoder, 0, sizeof(*decoder));
-	strcpy(client->name, "saa7111");
+	strncpy(client->dev.name, "saa7111", DEVICE_NAME_SIZE);
 	decoder->client = client;
-	client->data = decoder;
+	i2c_set_clientdata(client, decoder);
 	decoder->addr = addr;
 	decoder->norm = VIDEO_MODE_NTSC;
 	decoder->input = 0;
@@ -147,10 +148,10 @@
 	i = i2c_master_send(client, init, sizeof(init));
 	if (i < 0) {
 		printk(KERN_ERR "%s_attach: init status %d\n",
-		       client->name, i);
+		       client->dev.name, i);
 	} else {
 		printk(KERN_INFO "%s_attach: chip version %x\n",
-		       client->name, i2c_smbus_read_byte_data(client, 0x00) >> 4);
+		       client->dev.name, i2c_smbus_read_byte_data(client, 0x00) >> 4);
 	}
 	init_MUTEX(&decoder->lock);
 	i2c_attach_client(client);
@@ -164,7 +165,7 @@
 
 static int saa7111_detach(struct i2c_client *client)
 {
-	struct saa7111 *decoder = client->data;
+	struct saa7111 *decoder = i2c_get_clientdata(client);
 	i2c_detach_client(client);
 	kfree(decoder);
 	kfree(client);
@@ -175,7 +176,7 @@
 static int saa7111_command(struct i2c_client *client, unsigned int cmd,
 			   void *arg)
 {
-	struct saa7111 *decoder = client->data;
+	struct saa7111 *decoder = i2c_get_clientdata(client);
 
 	switch (cmd) {
 
@@ -187,7 +188,7 @@
 			for (i = 0; i < 32; i += 16) {
 				int j;
 
-				printk("KERN_DEBUG %s: %03x", client->name,
+				printk("KERN_DEBUG %s: %03x", client->dev.name,
 				       i);
 				for (j = 0; j < 16; ++j) {
 					printk(" %02x",
@@ -407,9 +408,11 @@
 };
 
 static struct i2c_client client_template = {
-	.name 	= "saa7111_client",
 	.id 	= -1,
-	.driver = &i2c_driver_saa7111
+	.driver	= &i2c_driver_saa7111,
+	.dev	= {
+		.name	= "saa7111_client",
+	},
 };
 
 static int saa7111_init(void)
diff -Nru a/drivers/media/video/saa7134/saa7134-i2c.c b/drivers/media/video/saa7134/saa7134-i2c.c
--- a/drivers/media/video/saa7134/saa7134-i2c.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/media/video/saa7134/saa7134-i2c.c	Mon Mar 31 13:41:06 2003
@@ -334,15 +334,19 @@
 
 static struct i2c_adapter saa7134_adap_template = {
 	.owner         = THIS_MODULE,
-	.name	       = "saa7134",
 	.id            = I2C_ALGO_SAA7134,
 	.algo          = &saa7134_algo,
 	.client_register = attach_inform,
+	.dev		= {
+		.name	= "saa7134",
+	},
 };
 
 static struct i2c_client saa7134_client_template = {
-        .name = "saa7134 internal",
         .id   = -1,
+	.dev	= {
+		.name	= "saa7134 internal",
+	},
 };
 
 /* ----------------------------------------------------------- */
@@ -410,7 +414,7 @@
 int saa7134_i2c_register(struct saa7134_dev *dev)
 {
 	dev->i2c_adap = saa7134_adap_template;
-	strcpy(dev->i2c_adap.name,dev->name);
+	strncpy(dev->i2c_adap.dev.name, dev->name, DEVICE_NAME_SIZE);
 	dev->i2c_adap.algo_data = dev;
 	i2c_add_adapter(&dev->i2c_adap);
 	
diff -Nru a/drivers/media/video/saa7185.c b/drivers/media/video/saa7185.c
--- a/drivers/media/video/saa7185.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/media/video/saa7185.c	Mon Mar 31 13:41:06 2003
@@ -191,6 +191,7 @@
 	client = kmalloc(sizeof(*client), GFP_KERNEL);
 	if (client == NULL)
 		return -ENOMEM;
+	memset(client, 0, sizeof(*client));
 	client_template.adapter = adap;
 	client_template.addr = addr;
 	memcpy(client, &client_template, sizeof(*client));
@@ -202,9 +203,9 @@
 
 
 	memset(encoder, 0, sizeof(*encoder));
-	strcpy(client->name, "saa7185");
+	strncpy(client->dev.name, "saa7185", DEVICE_NAME_SIZE);
 	encoder->client = client;
-	client->data = encoder;
+	i2c_set_clientdata(client, encoder);
 	encoder->addr = addr;
 	encoder->norm = VIDEO_MODE_NTSC;
 	encoder->enable = 1;
@@ -215,11 +216,11 @@
 					sizeof(init_ntsc));
 	}
 	if (i < 0) {
-		printk(KERN_ERR "%s_attach: init error %d\n", client->name,
+		printk(KERN_ERR "%s_attach: init error %d\n", client->dev.name,
 		       i);
 	} else {
 		printk(KERN_INFO "%s_attach: chip version %d\n",
-		       client->name, i2c_smbus_read_byte(client) >> 5);
+		       client->dev.name, i2c_smbus_read_byte(client) >> 5);
 	}
 	init_MUTEX(&encoder->lock);
 	i2c_attach_client(client);
@@ -233,7 +234,7 @@
 
 static int saa7185_detach(struct i2c_client *client)
 {
-	struct saa7185 *encoder = client->data;
+	struct saa7185 *encoder = i2c_get_clientdata(client);
 	i2c_detach_client(client);
 	i2c_smbus_write_byte_data(client, 0x61, (encoder->reg[0x61]) | 0x40);	/* SW: output off is active */
 	//i2c_smbus_write_byte_data(client, 0x3a, (encoder->reg[0x3a]) | 0x80); /* SW: color bar */
@@ -246,7 +247,7 @@
 static int saa7185_command(struct i2c_client *client, unsigned int cmd,
 			   void *arg)
 {
-	struct saa7185 *encoder = client->data;
+	struct saa7185 *encoder = i2c_get_clientdata(client);
 
 	switch (cmd) {
 
@@ -365,9 +366,11 @@
 };
 
 static struct i2c_client client_template = {
-	.name 	= "saa7185_client",
 	.id 	= -1,
-	.driver = &i2c_driver_saa7185
+	.driver = &i2c_driver_saa7185,
+	.dev	= {
+		.name	= "saa7185_client",
+	},
 };
 
 static int saa7185_init(void)
diff -Nru a/drivers/media/video/tda7432.c b/drivers/media/video/tda7432.c
--- a/drivers/media/video/tda7432.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/media/video/tda7432.c	Mon Mar 31 13:41:08 2003
@@ -260,7 +260,7 @@
 
 static int tda7432_set(struct i2c_client *client)
 {
-	struct tda7432 *t = client->data;
+	struct tda7432 *t = i2c_get_clientdata(client);
 	unsigned char buf[16];
 	d2printk("tda7432: In tda7432_set\n");
 	
@@ -287,7 +287,7 @@
 
 static void do_tda7432_init(struct i2c_client *client)
 {
-	struct tda7432 *t = client->data;
+	struct tda7432 *t = i2c_get_clientdata(client);
 	d2printk("tda7432: In tda7432_init\n");
 
 	t->input  = TDA7432_STEREO_IN |  /* Main (stereo) input   */
@@ -328,11 +328,11 @@
         memcpy(client,&client_template,sizeof(struct i2c_client));
         client->adapter = adap;
         client->addr = addr;
-	client->data = t;
+	i2c_set_clientdata(client, t);
 	
 	do_tda7432_init(client);
 	MOD_INC_USE_COUNT;
-	strcpy(client->name,"TDA7432");
+	strncpy(client->dev.name, "TDA7432", DEVICE_NAME_SIZE);
 	printk(KERN_INFO "tda7432: init\n");
 
 	i2c_attach_client(client);
@@ -348,7 +348,7 @@
 
 static int tda7432_detach(struct i2c_client *client)
 {
-	struct tda7432 *t  = client->data;
+	struct tda7432 *t = i2c_get_clientdata(client);
 
 	do_tda7432_init(client);
 	i2c_detach_client(client);
@@ -361,7 +361,7 @@
 static int tda7432_command(struct i2c_client *client,
 			   unsigned int cmd, void *arg)
 {
-	struct tda7432 *t = client->data;
+	struct tda7432 *t = i2c_get_clientdata(client);
 	d2printk("tda7432: In tda7432_command\n");
 
 	switch (cmd) {
@@ -526,9 +526,11 @@
 
 static struct i2c_client client_template =
 {
-        .name   = "tda7432",
         .id     = -1,
 	.driver = &driver, 
+        .dev	= {
+		.name	= "tda7432",
+	},
 };
 
 static int tda7432_init(void)
diff -Nru a/drivers/media/video/tda9875.c b/drivers/media/video/tda9875.c
--- a/drivers/media/video/tda9875.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/media/video/tda9875.c	Mon Mar 31 13:41:07 2003
@@ -158,7 +158,7 @@
 
 static void tda9875_set(struct i2c_client *client)
 {
-	struct tda9875 *tda = client->data;
+	struct tda9875 *tda = i2c_get_clientdata(client);
 	unsigned char a;
 
 	dprintk(KERN_DEBUG "tda9875_set(%04x,%04x,%04x,%04x)\n",tda->lvol,tda->rvol,tda->bass,tda->treble);
@@ -176,7 +176,7 @@
 
 static void do_tda9875_init(struct i2c_client *client)
 {
-	struct tda9875 *t = client->data;
+	struct tda9875 *t = i2c_get_clientdata(client);
 	dprintk("In tda9875_init\n"); 
 	tda9875_write(client, TDA9875_CFG, 0xd0 ); /*reg de config 0 (reset)*/
     	tda9875_write(client, TDA9875_MSR, 0x03 );    /* Monitor 0b00000XXX*/
@@ -256,7 +256,7 @@
         memcpy(client,&client_template,sizeof(struct i2c_client));
         client->adapter = adap;
         client->addr = addr;
-	client->data = t;
+	i2c_set_clientdata(client, t);
 
 	if(!tda9875_checkit(adap,addr)) {
 		kfree(t);
@@ -265,7 +265,7 @@
 	
 	do_tda9875_init(client);
 	MOD_INC_USE_COUNT;
-	strcpy(client->name,"TDA9875");
+	strncpy(client->dev.name, "TDA9875", DEVICE_NAME_SIZE);
 	printk(KERN_INFO "tda9875: init\n");
 
 	i2c_attach_client(client);
@@ -281,7 +281,7 @@
 
 static int tda9875_detach(struct i2c_client *client)
 {
-	struct tda9875 *t  = client->data;
+	struct tda9875 *t = i2c_get_clientdata(client);
 
 	do_tda9875_init(client);
 	i2c_detach_client(client);
@@ -294,7 +294,7 @@
 static int tda9875_command(struct i2c_client *client,
 				unsigned int cmd, void *arg)
 {
-	struct tda9875 *t = client->data;
+	struct tda9875 *t = i2c_get_clientdata(client);
 
 	dprintk("In tda9875_command...\n"); 
 
@@ -396,9 +396,11 @@
 
 static struct i2c_client client_template =
 {
-        .name    = "tda9875",
         .id      = -1,
         .driver  = &driver,
+        .dev	= {
+		.name	= "tda9875",
+	},
 };
 
 static int tda9875_init(void)
diff -Nru a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c
--- a/drivers/media/video/tda9887.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/media/video/tda9887.c	Mon Mar 31 13:41:06 2003
@@ -359,7 +359,7 @@
                 return -ENOMEM;
 	memset(t,0,sizeof(*t));
 	t->client = client_template;
-        t->client.data = t;
+        i2c_set_clientdata(&t->client, t);
 	t->pinnacle_id = -1;
         i2c_attach_client(&t->client);
         
@@ -376,12 +376,12 @@
 	case I2C_ALGO_BIT | I2C_HW_B_RIVA:
 	case I2C_ALGO_SAA7134:
 		printk("tda9887: probing %s i2c adapter [id=0x%x]\n",
-		       adap->name,adap->id);
+		       adap->dev.name,adap->id);
 		rc = i2c_probe(adap, &addr_data, tda9887_attach);
 		break;
 	default:
 		printk("tda9887: ignoring %s i2c adapter [id=0x%x]\n",
-		       adap->name,adap->id);
+		       adap->dev.name,adap->id);
 		rc = 0;
 		/* nothing */
 	}
@@ -390,7 +390,7 @@
 
 static int tda9887_detach(struct i2c_client *client)
 {
-	struct tda9887 *t = (struct tda9887*)client->data;
+	struct tda9887 *t = i2c_get_clientdata(client);
 
 	i2c_detach_client(client);
 	kfree(t);
@@ -401,7 +401,7 @@
 static int
 tda9887_command(struct i2c_client *client, unsigned int cmd, void *arg)
 {
-	struct tda9887 *t = (struct tda9887*)client->data;
+	struct tda9887 *t = i2c_get_clientdata(client);
 
         switch (cmd) {
 
@@ -456,9 +456,11 @@
 };
 static struct i2c_client client_template =
 {
-        .name   = "tda9887",
 	.flags  = I2C_CLIENT_ALLOW_USE,
         .driver = &driver,
+        .dev	= {
+		.name	= "tda9887",
+	},
 };
 
 static int tda9887_init_module(void)
diff -Nru a/drivers/media/video/tuner-3036.c b/drivers/media/video/tuner-3036.c
--- a/drivers/media/video/tuner-3036.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/media/video/tuner-3036.c	Mon Mar 31 13:41:07 2003
@@ -196,9 +196,11 @@
 
 static struct i2c_client client_template =
 {
-        .name 		= "SAB3036",
         .id 		= -1,
-        .driver		= &i2c_driver_tuner
+        .driver		= &i2c_driver_tuner,
+        .dev		= {
+		.name	= "SAB3036",
+	},
 };
 
 int __init
diff -Nru a/drivers/media/video/tuner.c b/drivers/media/video/tuner.c
--- a/drivers/media/video/tuner.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/media/video/tuner.c	Mon Mar 31 13:41:08 2003
@@ -226,7 +226,7 @@
 {
 	unsigned char byte;
 
-	struct tuner *t = (struct tuner*)c->data;
+	struct tuner *t = i2c_get_clientdata(c);
 
         if (t->type == TUNER_MT2032)
 		return 0;
@@ -276,7 +276,7 @@
 {
         unsigned char buf[21];
         int ret,xogc,xok=0;
-	struct tuner *t = (struct tuner*)c->data;
+	struct tuner *t = i2c_get_clientdata(c);
 
         buf[0]=0;
         ret=i2c_master_send(c,buf,1);
@@ -517,7 +517,7 @@
 {
 	unsigned char buf[21];
 	int lint_try,ret,sel,lock=0;
-	struct tuner *t = (struct tuner*)c->data;
+	struct tuner *t = i2c_get_clientdata(c);
 
 	dprintk("mt2032_set_if_freq rfin=%d if1=%d if2=%d from=%d to=%d\n",rfin,if1,if2,from,to);
 
@@ -594,7 +594,7 @@
 	u8 config;
 	u16 div;
 	struct tunertype *tun;
-	struct tuner *t = c->data;
+	struct tuner *t = i2c_get_clientdata(c);
         unsigned char buffer[4];
 	int rc;
 
@@ -733,7 +733,7 @@
 static void set_radio_freq(struct i2c_client *c, int freq)
 {
 	struct tunertype *tun;
-	struct tuner *t = (struct tuner*)c->data;
+	struct tuner *t = i2c_get_clientdata(c);
         unsigned char buffer[4];
 	int rc,div;
 
@@ -794,16 +794,17 @@
         if (NULL == (client = kmalloc(sizeof(struct i2c_client), GFP_KERNEL)))
                 return -ENOMEM;
         memcpy(client,&client_template,sizeof(struct i2c_client));
-        client->data = t = kmalloc(sizeof(struct tuner),GFP_KERNEL);
+        t = kmalloc(sizeof(struct tuner),GFP_KERNEL);
         if (NULL == t) {
                 kfree(client);
                 return -ENOMEM;
         }
+	i2c_set_clientdata(client, t);
         memset(t,0,sizeof(struct tuner));
 	if (type >= 0 && type < TUNERS) {
 		t->type = type;
 		printk("tuner(bttv): type forced to %d (%s) [insmod]\n",t->type,tuners[t->type].name);
-		strncpy(client->name, tuners[t->type].name, sizeof(client->name));
+		strncpy(client->dev.name, tuners[t->type].name, DEVICE_NAME_SIZE);
 	} else {
 		t->type = -1;
 	}
@@ -830,12 +831,12 @@
 	case I2C_ALGO_SAA7134:
 	case I2C_ALGO_SAA7146:
 		printk("tuner: probing %s i2c adapter [id=0x%x]\n",
-		       adap->name,adap->id);
+		       adap->dev.name,adap->id);
 		rc = i2c_probe(adap, &addr_data, tuner_attach);
 		break;
 	default:
 		printk("tuner: ignoring %s i2c adapter [id=0x%x]\n",
-		       adap->name,adap->id);
+		       adap->dev.name,adap->id);
 		rc = 0;
 		/* nothing */
 	}
@@ -844,7 +845,7 @@
 
 static int tuner_detach(struct i2c_client *client)
 {
-	struct tuner *t = (struct tuner*)client->data;
+	struct tuner *t = i2c_get_clientdata(client);
 
 	i2c_detach_client(client);
 	kfree(t);
@@ -856,7 +857,7 @@
 static int
 tuner_command(struct i2c_client *client, unsigned int cmd, void *arg)
 {
-	struct tuner *t = (struct tuner*)client->data;
+	struct tuner *t = i2c_get_clientdata(client);
         int   *iarg = (int*)arg;
 #if 0
         __u16 *sarg = (__u16*)arg;
@@ -875,7 +876,7 @@
 		t->type = *iarg;
 		printk("tuner: type set to %d (%s)\n",
                         t->type,tuners[t->type].name);
-		strncpy(client->name, tuners[t->type].name, sizeof(client->name));
+		strncpy(client->dev.name, tuners[t->type].name, DEVICE_NAME_SIZE);
 		if (t->type == TUNER_MT2032)
                         mt2032_init(client);
 		break;
@@ -977,9 +978,11 @@
 };
 static struct i2c_client client_template =
 {
-        .name   = "(tuner unset)",
 	.flags  = I2C_CLIENT_ALLOW_USE,
         .driver = &driver,
+        .dev	= {
+		.name	= "(tuner unset)",
+	},
 };
 
 static int tuner_init_module(void)
diff -Nru a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c
--- a/drivers/media/video/tvaudio.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/media/video/tvaudio.c	Mon Mar 31 13:41:06 2003
@@ -161,22 +161,22 @@
 	unsigned char buffer[2];
 
 	if (-1 == subaddr) {
-		dprintk("%s: chip_write: 0x%x\n", chip->c.name, val);
+		dprintk("%s: chip_write: 0x%x\n", chip->c.dev.name, val);
 		chip->shadow.bytes[1] = val;
 		buffer[0] = val;
 		if (1 != i2c_master_send(&chip->c,buffer,1)) {
 			printk(KERN_WARNING "%s: I/O error (write 0x%x)\n",
-			       chip->c.name, val);
+			       chip->c.dev.name, val);
 			return -1;
 		}
 	} else {
-		dprintk("%s: chip_write: reg%d=0x%x\n", chip->c.name, subaddr, val);
+		dprintk("%s: chip_write: reg%d=0x%x\n", chip->c.dev.name, subaddr, val);
 		chip->shadow.bytes[subaddr+1] = val;
 		buffer[0] = subaddr;
 		buffer[1] = val;
 		if (2 != i2c_master_send(&chip->c,buffer,2)) {
 			printk(KERN_WARNING "%s: I/O error (write reg%d=0x%x)\n",
-			       chip->c.name, subaddr, val);
+			       chip->c.dev.name, subaddr, val);
 			return -1;
 		}
 	}
@@ -201,10 +201,10 @@
 
 	if (1 != i2c_master_recv(&chip->c,&buffer,1)) {
 		printk(KERN_WARNING "%s: I/O error (read)\n",
-		       chip->c.name);
+		       chip->c.dev.name);
 		return -1;
 	}
-	dprintk("%s: chip_read: 0x%x\n",chip->c.name,buffer); 
+	dprintk("%s: chip_read: 0x%x\n",chip->c.dev.name,buffer); 
 	return buffer;
 }
 
@@ -220,11 +220,11 @@
 
 	if (2 != i2c_transfer(chip->c.adapter,msgs,2)) {
 		printk(KERN_WARNING "%s: I/O error (read2)\n",
-		       chip->c.name);
+		       chip->c.dev.name);
 		return -1;
 	}
 	dprintk("%s: chip_read2: reg%d=0x%x\n",
-		chip->c.name,subaddr,read[0]); 
+		chip->c.dev.name,subaddr,read[0]); 
 	return read[0];
 }
 
@@ -237,7 +237,7 @@
 
 	/* update our shadow register set; print bytes if (debug > 0) */
 	dprintk("%s: chip_cmd(%s): reg=%d, data:",
-		chip->c.name,name,cmd->bytes[0]);
+		chip->c.dev.name,name,cmd->bytes[0]);
 	for (i = 1; i < cmd->count; i++) {
 		dprintk(" 0x%x",cmd->bytes[i]);
 		chip->shadow.bytes[i+cmd->bytes[0]] = cmd->bytes[i];
@@ -246,7 +246,7 @@
 
 	/* send data to the chip */
 	if (cmd->count != i2c_master_send(&chip->c,cmd->bytes,cmd->count)) {
-		printk(KERN_WARNING "%s: I/O error (%s)\n", chip->c.name, name);
+		printk(KERN_WARNING "%s: I/O error (%s)\n", chip->c.dev.name, name);
 		return -1;
 	}
 	return 0;
@@ -273,19 +273,19 @@
 #ifdef CONFIG_SMP
 	lock_kernel();
 #endif
-	daemonize("%s", chip->c.name);
+	daemonize("%s", chip->c.dev.name);
 	chip->thread = current;
 #ifdef CONFIG_SMP
 	unlock_kernel();
 #endif
 
-	dprintk("%s: thread started\n", chip->c.name);
+	dprintk("%s: thread started\n", chip->c.dev.name);
 	if(chip->notify != NULL)
 		up(chip->notify);
 
 	for (;;) {
 		interruptible_sleep_on(&chip->wq);
-		dprintk("%s: thread wakeup\n", chip->c.name);
+		dprintk("%s: thread wakeup\n", chip->c.dev.name);
 		if (chip->done || signal_pending(current))
 			break;
 
@@ -301,7 +301,7 @@
 	}
 
 	chip->thread = NULL;
-	dprintk("%s: thread exiting\n", chip->c.name);
+	dprintk("%s: thread exiting\n", chip->c.dev.name);
 	if(chip->notify != NULL)
 		up(chip->notify);
 
@@ -316,7 +316,7 @@
 	if (mode == chip->prevmode)
 	    return;
 
-	dprintk("%s: thread checkmode\n", chip->c.name);
+	dprintk("%s: thread checkmode\n", chip->c.dev.name);
 	chip->prevmode = mode;
 
 	if (mode & VIDEO_SOUND_STEREO)
@@ -1339,7 +1339,7 @@
 	memcpy(&chip->c,&client_template,sizeof(struct i2c_client));
         chip->c.adapter = adap;
         chip->c.addr = addr;
-	chip->c.data = chip;
+	i2c_set_clientdata(&chip->c, chip);
 
 	/* find description for the chip */
 	dprintk("tvaudio: chip found @ i2c-addr=0x%x\n", addr<<1);
@@ -1364,7 +1364,7 @@
 		(desc->flags & CHIP_HAS_INPUTSEL)   ? " audiomux"    : "");
 
 	/* fill required data structures */
-	strcpy(chip->c.name,desc->name);
+	strncpy(chip->c.dev.name, desc->name, DEVICE_NAME_SIZE);
 	chip->type = desc-chiplist;
 	chip->shadow.count = desc->registers+1;
         chip->prevmode = -1;
@@ -1421,7 +1421,7 @@
 
 static int chip_detach(struct i2c_client *client)
 {
-	struct CHIPSTATE *chip = client->data;
+	struct CHIPSTATE *chip = i2c_get_clientdata(client);
 
 	del_timer(&chip->wt);
 	if (NULL != chip->thread) {
@@ -1447,10 +1447,10 @@
 			unsigned int cmd, void *arg)
 {
         __u16 *sarg = arg;
-	struct CHIPSTATE *chip = client->data;
+	struct CHIPSTATE *chip = i2c_get_clientdata(client);
 	struct CHIPDESC  *desc = chiplist + chip->type;
 
-	dprintk("%s: chip_command 0x%x\n",chip->c.name,cmd);
+	dprintk("%s: chip_command 0x%x\n",chip->c.dev.name,cmd);
 
 	switch (cmd) {
 	case AUDC_SET_INPUT:
@@ -1558,9 +1558,11 @@
 
 static struct i2c_client client_template =
 {
-        .name   = "(unset)",
 	.flags  = I2C_CLIENT_ALLOW_USE,
         .driver = &driver,
+        .dev	= {
+		.name	= "(unset)",
+	},
 };
 
 static int audiochip_init_module(void)
diff -Nru a/drivers/mtd/maps/epxa10db-flash.c b/drivers/mtd/maps/epxa10db-flash.c
--- a/drivers/mtd/maps/epxa10db-flash.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/mtd/maps/epxa10db-flash.c	Mon Mar 31 13:41:08 2003
@@ -199,12 +199,12 @@
 
 	printk("Using default partitions for %s\n",BOARD_NAME);
 	npartitions=1;
-	parts = kmalloc(npartitions*sizeof(*parts)+strlen(name), GFP_KERNEL);
-	memzero(parts,npartitions*sizeof(*parts)+strlen(name));
+	parts = kmalloc(npartitions*sizeof(*parts)+strlen(name)+1, GFP_KERNEL);
 	if (!parts) {
 		ret = -ENOMEM;
 		goto out;
 	}
+	memzero(parts,npartitions*sizeof(*parts)+strlen(name));
 	i=0;
 	names = (char *)&parts[npartitions];	
 	parts[i].name = names;
@@ -218,10 +218,11 @@
 	parts[i].size = FLASH_SIZE-0x00180000;
 	parts[i].offset = 0x00180000;
 #endif
+	ret = npartitions;
 
  out:
 	*pparts = parts;
-	return npartitions;
+	return ret;
 }
 
 
diff -Nru a/drivers/net/3c509.c b/drivers/net/3c509.c
--- a/drivers/net/3c509.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/net/3c509.c	Mon Mar 31 13:41:06 2003
@@ -208,7 +208,9 @@
 static int el3_pm_callback(struct pm_dev *pdev, pm_request_t rqst, void *data);
 #endif
 /* generic device remove for all device types */
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 static int el3_device_remove (struct device *device);
+#endif
 
 #ifdef CONFIG_EISA
 struct eisa_device_id el3_eisa_ids[] = {
diff -Nru a/drivers/net/Kconfig b/drivers/net/Kconfig
--- a/drivers/net/Kconfig	Mon Mar 31 13:41:08 2003
+++ b/drivers/net/Kconfig	Mon Mar 31 13:41:08 2003
@@ -2262,7 +2262,7 @@
 
 config PPP_FILTER
 	bool "PPP filtering"
-	depends on PPP && FILTER
+	depends on PPP
 	help
 	  Say Y here if you want to be able to filter the packets passing over
 	  PPP interfaces.  This allows you to control which packets count as
diff -Nru a/drivers/net/bonding.c b/drivers/net/bonding.c
--- a/drivers/net/bonding.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/net/bonding.c	Mon Mar 31 13:41:08 2003
@@ -258,10 +258,47 @@
  *     - correct ifr_data reference.  Update ifr_data reference
  *       to mii_ioctl_data struct values to avoid confusion.
  *
- *
  *  2002/11/22 - Bert Barbe <bert.barbe at oracle dot com>
  *      - Add support for multiple arp_ip_target
  *
+ *  2002/12/13 - Jay Vosburgh <fubar at us dot ibm dot com>
+ *	- Changed to allow text strings for mode and multicast, e.g.,
+ *	  insmod bonding mode=active-backup.  The numbers still work.
+ *	  One change: an invalid choice will cause module load failure,
+ *	  rather than the previous behavior of just picking one.
+ *	- Minor cleanups; got rid of dup ctype stuff, atoi function
+ * 
+ * 2003/02/07 - Jay Vosburgh <fubar at us dot ibm dot com>
+ *	- Added use_carrier module parameter that causes miimon to
+ *	  use netif_carrier_ok() test instead of MII/ETHTOOL ioctls.
+ *	- Minor cleanups; consolidated ioctl calls to one function.
+ *
+ * 2003/02/07 - Tony Cureington <tony.cureington * hp_com>
+ *	- Fix bond_mii_monitor() logic error that could result in
+ *	  bonding round-robin mode ignoring links after failover/recovery
+ *
+ * 2003/03/17 - Jay Vosburgh <fubar at us dot ibm dot com>
+ *	- kmalloc fix (GFP_KERNEL to GFP_ATOMIC) reported by
+ *	  Shmulik dot Hen at intel.com.
+ *	- Based on discussion on mailing list, changed use of
+ *	  update_slave_cnt(), created wrapper functions for adding/removing
+ *	  slaves, changed bond_xmit_xor() to check slave_cnt instead of
+ *	  checking slave and slave->dev (which only worked by accident).
+ *	- Misc code cleanup: get arp_send() prototype from header file,
+ *	  add max_bonds to bonding.txt.
+ *
+ * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
+ *		Shmulik Hen <shmulik.hen at intel dot com>
+ *	- Make sure only bond_attach_slave() and bond_detach_slave() can
+ *	  manipulate the slave list, including slave_cnt, even when in
+ *	  bond_release_all().
+ *	- Fixed hang in bond_release() while traffic is running.
+ *	  netdev_set_master() must not be called from within the bond lock.
+ *
+ * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
+ *		Shmulik Hen <shmulik.hen at intel dot com>
+ *	- Fixed hang in bond_enslave(): netdev_set_master() must not be
+ *	  called from within the bond lock while traffic is running.
  */
 
 #include <linux/config.h>
@@ -278,6 +315,7 @@
 #include <linux/init.h>
 #include <linux/timer.h>
 #include <linux/socket.h>
+#include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
@@ -288,7 +326,7 @@
 #include <linux/if_bonding.h>
 #include <linux/smp.h>
 #include <linux/if_ether.h>
-#include <linux/if_arp.h>
+#include <net/arp.h>
 #include <linux/mii.h>
 #include <linux/ethtool.h>
 
@@ -298,8 +336,8 @@
 #include <asm/dma.h>
 #include <asm/uaccess.h>
 
-#define DRV_VERSION		"2.4.20-20021210"
-#define DRV_RELDATE		"December 10, 2002"
+#define DRV_VERSION		"2.5.65-20030320"
+#define DRV_RELDATE		"March 20, 2003"
 #define DRV_NAME		"bonding"
 #define DRV_DESCRIPTION		"Ethernet Channel Bonding Driver"
 
@@ -330,14 +368,30 @@
 
 static int max_bonds	= BOND_DEFAULT_MAX_BONDS;
 static int miimon	= BOND_LINK_MON_INTERV;
-static int mode		= BOND_MODE_ROUNDROBIN;
+static int use_carrier	= 1;
+static int bond_mode	= BOND_MODE_ROUNDROBIN;
 static int updelay	= 0;
 static int downdelay	= 0;
 
-#define BOND_MULTICAST_DISABLED 0
-#define BOND_MULTICAST_ACTIVE   1
-#define BOND_MULTICAST_ALL      2
-static int multicast    = BOND_MULTICAST_ALL;
+static char *mode	= NULL;
+
+static struct bond_parm_tbl bond_mode_tbl[] = {
+{	"balance-rr",		BOND_MODE_ROUNDROBIN},
+{	"active-backup",	BOND_MODE_ACTIVEBACKUP},
+{	"balance-xor",		BOND_MODE_XOR},
+{	"broadcast",		BOND_MODE_BROADCAST},
+{	NULL,			-1},
+};
+
+static int multicast_mode	= BOND_MULTICAST_ALL;
+static char *multicast		= NULL;
+
+static struct bond_parm_tbl bond_mc_tbl[] = {
+{	"disabled",		BOND_MULTICAST_DISABLED},
+{	"active",		BOND_MULTICAST_ACTIVE},
+{	"all",			BOND_MULTICAST_ALL},
+{	NULL,			-1},
+};
 
 static int first_pass	= 1;
 static struct bonding *these_bonds =  NULL;
@@ -347,25 +401,23 @@
 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
 MODULE_PARM(miimon, "i");
 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
-MODULE_PARM(mode, "i");
+MODULE_PARM(use_carrier, "i");
+MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; 09 for off, 1 for on (default)");
+MODULE_PARM(mode, "s");
+MODULE_PARM_DESC(mode, "Mode of operation : 0 for round robin, 1 for active-backup, 2 for xor");
 MODULE_PARM(arp_interval, "i");
 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
 MODULE_PARM(arp_ip_target, "1-" __MODULE_STRING(MAX_ARP_IP_TARGETS) "s");
 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
-MODULE_PARM_DESC(mode, "Mode of operation : 0 for round robin, 1 for active-backup, 2 for xor");
 MODULE_PARM(updelay, "i");
 MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds");
 MODULE_PARM(downdelay, "i");
 MODULE_PARM_DESC(downdelay, "Delay before considering link down, in milliseconds");
 MODULE_PARM(primary, "s");
 MODULE_PARM_DESC(primary, "Primary network device to use");
-MODULE_PARM(multicast, "i");
+MODULE_PARM(multicast, "s");
 MODULE_PARM_DESC(multicast, "Mode for multicast support : 0 for none, 1 for active slave, 2 for all slaves (default)");
 
-extern void arp_send( int type, int ptype, u32 dest_ip, struct net_device *dev,
-	u32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, 
-	unsigned char *target_hw);
-
 static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev);
 static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev);
 static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev);
@@ -408,18 +460,20 @@
   
 static void arp_send_all(slave_t *slave)
 {	
-  int i; 
-  for ( i=0; (i<MAX_ARP_IP_TARGETS) && arp_target[i]; i++) { 
-    arp_send(ARPOP_REQUEST, ETH_P_ARP, arp_target[i], slave->dev, 
-	     my_ip, arp_target_hw_addr, slave->dev->dev_addr,
-	     arp_target_hw_addr); 
-  } 
+	int i; 
+
+	for (i = 0; (i<MAX_ARP_IP_TARGETS) && arp_target[i]; i++) { 
+		arp_send(ARPOP_REQUEST, ETH_P_ARP, arp_target[i], slave->dev, 
+			 my_ip, arp_target_hw_addr, slave->dev->dev_addr,
+			 arp_target_hw_addr); 
+	} 
 }
  
 
-static const char *bond_mode(void)
+static const char *
+bond_mode_name(void)
 {
-	switch (mode) {
+	switch (bond_mode) {
 	case BOND_MODE_ROUNDROBIN :
 		return "load balancing (round-robin)";
 	case BOND_MODE_ACTIVEBACKUP :
@@ -433,9 +487,10 @@
 	}
 }
 
-static const char *multicast_mode(void)
+static const char *
+multicast_mode_name(void)
 {
-	switch(multicast) {
+	switch(multicast_mode) {
 	case BOND_MULTICAST_DISABLED :
 		return "disabled";
 	case BOND_MULTICAST_ACTIVE :
@@ -464,6 +519,29 @@
 	slave->dev->flags &= ~IFF_NOARP;
 }
 
+/*
+ * This function counts and verifies the the number of attached
+ * slaves, checking the count against the expected value (given that incr
+ * is either 1 or -1, for add or removal of a slave).  Only
+ * bond_xmit_xor() uses the slave_cnt value, but this is still a good
+ * consistency check.
+ */
+static inline void
+update_slave_cnt(bonding_t *bond, int incr)
+{
+	slave_t *slave = NULL;
+	int expect = bond->slave_cnt + incr;
+
+	bond->slave_cnt = 0;
+	for (slave = bond->prev; slave != (slave_t*)bond;
+	     slave = slave->prev) {
+		bond->slave_cnt++;
+	}
+
+	if (expect != bond->slave_cnt)
+		BUG();
+}
+
 /* 
  * This function detaches the slave <slave> from the list <bond>.
  * WARNING: no check is made to verify if the slave effectively
@@ -471,8 +549,11 @@
  * Nothing is freed on return, structures are just unchained.
  * If the bond->current_slave pointer was pointing to <slave>,
  * it's replaced with slave->next, or <bond> if not applicable.
+ *
+ * bond->lock held by caller.
  */
-static slave_t *bond_detach_slave(bonding_t *bond, slave_t *slave)
+static slave_t *
+bond_detach_slave(bonding_t *bond, slave_t *slave)
 {
 	if ((bond == NULL) || (slave == NULL) ||
 	   ((void *)bond == (void *)slave)) {
@@ -499,8 +580,7 @@
 			}
 			write_unlock(&bond->ptrlock);
 		}
-	}
-	else {
+	} else {
 		slave->prev->next = slave->next;
 		if (bond->prev == slave) {  /* is this slave the last one ? */
 			bond->prev = slave->prev;
@@ -515,9 +595,44 @@
 		write_unlock(&bond->ptrlock);
 	}
 
+	update_slave_cnt(bond, -1);
+
 	return slave;
 }
 
+static void
+bond_attach_slave(struct bonding *bond, struct slave *new_slave)
+{
+	/* 
+	 * queue to the end of the slaves list, make the first element its
+	 * successor, the last one its predecessor, and make it the bond's
+	 * predecessor. 
+	 *
+	 * Just to clarify, so future bonding driver hackers don't go through
+	 * the same confusion stage I did trying to figure this out, the
+	 * slaves are stored in a double linked circular list, sortof.
+	 * In the ->next direction, the last slave points to the first slave,
+	 * bypassing bond; only the slaves are in the ->next direction.
+	 * In the ->prev direction, however, the first slave points to bond
+	 * and bond points to the last slave.
+	 *
+	 * It looks like a circle with a little bubble hanging off one side
+	 * in the ->prev direction only.
+	 *
+	 * When going through the list once, its best to start at bond->prev
+	 * and go in the ->prev direction, testing for bond.  Doing this
+	 * in the ->next direction doesn't work.  Trust me, I know this now.
+	 * :)  -mts 2002.03.14
+	 */
+	new_slave->prev       = bond->prev;
+	new_slave->prev->next = new_slave;
+	bond->prev            = new_slave;
+	new_slave->next       = bond->next;
+
+	update_slave_cnt(bond, 1);
+}
+
+
 /*
  * Less bad way to call ioctl from within the kernel; this needs to be
  * done some other way to get the call out of interrupt context.
@@ -534,17 +649,31 @@
 /* 
  * if <dev> supports MII link status reporting, check its link status.
  *
+ * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(),
+ * depening upon the setting of the use_carrier parameter.
+ *
  * Return either BMSR_LSTATUS, meaning that the link is up (or we
  * can't tell and just pretend it is), or 0, meaning that the link is
  * down.
+ *
+ * If reporting is non-zero, instead of faking link up, return -1 if
+ * both ETHTOOL and MII ioctls fail (meaning the device does not
+ * support them).  If use_carrier is set, return whatever it says.
+ * It'd be nice if there was a good way to tell if a driver supports
+ * netif_carrier, but there really isn't.
  */
-static u16 bond_check_dev_link(struct net_device *dev)
+static int
+bond_check_dev_link(struct net_device *dev, int reporting)
 {
 	static int (* ioctl)(struct net_device *, struct ifreq *, int);
 	struct ifreq ifr;
 	struct mii_ioctl_data *mii;
 	struct ethtool_value etool;
 
+	if (use_carrier) {
+		return netif_carrier_ok(dev) ? BMSR_LSTATUS : 0;
+	}
+
 	ioctl = dev->do_ioctl;
 	if (ioctl) {
 		/* TODO: set pointer to correct ioctl on a per team member */
@@ -576,18 +705,24 @@
 		if (IOCTL(dev, &ifr, SIOCETHTOOL) == 0) {
 			if (etool.data == 1) {
 				return BMSR_LSTATUS;
-			} 
-			else { 
+			} else { 
 #ifdef BONDING_DEBUG
 				printk(KERN_INFO 
-					":: SIOCETHTOOL shows failure \n");
+					":: SIOCETHTOOL shows link down \n");
 #endif
-				return(0);
+				return 0;
 			} 
 		}
 
 	}
-	return BMSR_LSTATUS;  /* spoof link up ( we can't check it) */
+ 
+	/*
+	 * If reporting, report that either there's no dev->do_ioctl,
+	 * or both SIOCGMIIREG and SIOCETHTOOL failed (meaning that we
+	 * cannot report link status).  If not reporting, pretend
+	 * we're ok.
+	 */
+	return reporting ? -1 : BMSR_LSTATUS;
 }
 
 static u16 bond_check_mii_link(bonding_t *bond)
@@ -622,7 +757,7 @@
 		init_timer(arp_timer);
 		arp_timer->expires  = jiffies + (arp_interval * HZ / 1000);
 		arp_timer->data     = (unsigned long)dev;
-		if (mode == BOND_MODE_ACTIVEBACKUP) {
+		if (bond_mode == BOND_MODE_ACTIVEBACKUP) {
 			arp_timer->function = (void *)&activebackup_arp_monitor;
 		} else {
 			arp_timer->function = (void *)&loadbalance_arp_monitor;
@@ -692,7 +827,7 @@
 static void bond_mc_add(bonding_t *bond, void *addr, int alen)
 { 
 	slave_t *slave;
-	switch (multicast) {
+	switch (multicast_mode) {
 	case BOND_MULTICAST_ACTIVE :
 		/* write lock already acquired */
 		if (bond->current_slave != NULL)
@@ -713,7 +848,7 @@
 static void bond_mc_delete(bonding_t *bond, void *addr, int alen)
 { 
 	slave_t *slave; 
-	switch (multicast) {
+	switch (multicast_mode) {
 	case BOND_MULTICAST_ACTIVE :
 		/* write lock already acquired */
 		if (bond->current_slave != NULL)
@@ -769,7 +904,7 @@
 static void bond_set_promiscuity(bonding_t *bond, int inc)
 { 
 	slave_t *slave; 
-	switch (multicast) {
+	switch (multicast_mode) {
 	case BOND_MULTICAST_ACTIVE :
 		/* write lock already acquired */
 		if (bond->current_slave != NULL)
@@ -790,7 +925,7 @@
 static void bond_set_allmulti(bonding_t *bond, int inc)
 { 
 	slave_t *slave; 
-	switch (multicast) {
+	switch (multicast_mode) {
 	case BOND_MULTICAST_ACTIVE : 
 		/* write lock already acquired */
 		if (bond->current_slave != NULL)
@@ -827,7 +962,7 @@
 	struct dev_mc_list *dmi;
 	unsigned long flags = 0;
 
-	if (multicast == BOND_MULTICAST_DISABLED)
+	if (multicast_mode == BOND_MULTICAST_DISABLED)
 		return;
 	/*
 	 * Lock the private data for the master
@@ -865,7 +1000,7 @@
 
 	/* save master's multicast list */ 
 	bond_mc_list_destroy (bond);
-	bond_mc_list_copy (master->mc_list, bond, GFP_KERNEL);
+	bond_mc_list_copy (master->mc_list, bond, GFP_ATOMIC);
 
 	write_unlock_irqrestore(&bond->lock, flags);
 }
@@ -878,7 +1013,7 @@
 {
 	struct dev_mc_list *dmi;
 
-	switch(multicast) {
+	switch(multicast_mode) {
 	case BOND_MULTICAST_ACTIVE :		
 		if (bond->device->flags & IFF_PROMISC) {
 			if (old != NULL && new != old)
@@ -907,20 +1042,6 @@
 	}
 }
 
-/*
- * This function counts the number of attached 
- * slaves for use by bond_xmit_xor.
- */
-static void update_slave_cnt(bonding_t *bond)
-{
-	slave_t *slave = NULL;
-
-	bond->slave_cnt = 0;
-	for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) {
-		bond->slave_cnt++;
-	}
-}
-
 /* enslave device <slave> to bond device <master> */
 static int bond_enslave(struct net_device *master_dev, 
                         struct net_device *slave_dev)
@@ -934,10 +1055,7 @@
 	struct dev_mc_list *dmi;
 	struct in_ifaddr **ifap;
 	struct in_ifaddr *ifa;
-	static int (* ioctl)(struct net_device *, struct ifreq *, int);
-	struct ifreq ifr;
-	struct ethtool_value etool; 
-	int link_reporting = 0;
+	int link_reporting;
 
 	if (master_dev == NULL || slave_dev == NULL) {
 		return -ENODEV;
@@ -949,14 +1067,12 @@
 			"Warning : no link monitoring support for %s\n",
 			slave_dev->name);
 	}
-	write_lock_irqsave(&bond->lock, flags);
 
 	/* not running. */
 	if ((slave_dev->flags & IFF_UP) != IFF_UP) {
 #ifdef BONDING_DEBUG
 		printk(KERN_CRIT "Error, slave_dev is not running\n");
 #endif
-		write_unlock_irqrestore(&bond->lock, flags);
 		return -EINVAL;
 	}
 
@@ -965,12 +1081,10 @@
 #ifdef BONDING_DEBUG
 		printk(KERN_CRIT "Error, Device was already enslaved\n");
 #endif
-		write_unlock_irqrestore(&bond->lock, flags);
 		return -EBUSY;
 	}
 		   
-	if ((new_slave = kmalloc(sizeof(slave_t), GFP_KERNEL)) == NULL) {
-		write_unlock_irqrestore(&bond->lock, flags);
+	if ((new_slave = kmalloc(sizeof(slave_t), GFP_ATOMIC)) == NULL) {
 		return -ENOMEM;
 	}
 	memset(new_slave, 0, sizeof(slave_t));
@@ -983,14 +1097,12 @@
 #ifdef BONDING_DEBUG
 		printk(KERN_CRIT "Error %d calling netdev_set_master\n", err);
 #endif
-		kfree(new_slave);
-		write_unlock_irqrestore(&bond->lock, flags);
-		return err;      
+		goto err_free;
 	}
 
 	new_slave->dev = slave_dev;
 
-	if (multicast == BOND_MULTICAST_ALL) {
+	if (multicast_mode == BOND_MULTICAST_ALL) {
 		/* set promiscuity level to new slave */ 
 		if (master_dev->flags & IFF_PROMISC)
 			dev_set_promiscuity(slave_dev, 1); 
@@ -1004,64 +1116,24 @@
 			dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
 	}
 
-	/* 
-	 * queue to the end of the slaves list, make the first element its
-	 * successor, the last one its predecessor, and make it the bond's
-	 * predecessor. 
-	 *
-	 * Just to clarify, so future bonding driver hackers don't go through
-	 * the same confusion stage I did trying to figure this out, the
-	 * slaves are stored in a double linked circular list, sortof.
-	 * In the ->next direction, the last slave points to the first slave,
-	 * bypassing bond; only the slaves are in the ->next direction.
-	 * In the ->prev direction, however, the first slave points to bond
-	 * and bond points to the last slave.
-	 *
-	 * It looks like a circle with a little bubble hanging off one side
-	 * in the ->prev direction only.
-	 *
-	 * When going through the list once, its best to start at bond->prev
-	 * and go in the ->prev direction, testing for bond.  Doing this
-	 * in the ->next direction doesn't work.  Trust me, I know this now.
-	 * :)  -mts 2002.03.14
-	 */
-	new_slave->prev       = bond->prev;
-	new_slave->prev->next = new_slave;
-	bond->prev            = new_slave;
-	new_slave->next       = bond->next;
-
+	write_lock_irqsave(&bond->lock, flags);
+	
+	bond_attach_slave(bond, new_slave);
 	new_slave->delay = 0;
 	new_slave->link_failure_count = 0;
 
-	if (miimon > 0) {
-		/* if the network driver for the slave does not support
-		 * ETHTOOL/MII link status reporting, warn the user of this
-		 */
-		if ((ioctl = slave_dev->do_ioctl) != NULL) {
-			etool.cmd = ETHTOOL_GLINK;
-	        	ifr.ifr_data = (char*)&etool;
-			if (IOCTL(slave_dev, &ifr, SIOCETHTOOL) == 0) {
-				link_reporting = 1;
-			}
-			else {
-				if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) {
-		                        /* Yes, the mii is overlaid on the
-					 * ifreq.ifr_ifru
-					 */
-					((struct mii_ioctl_data*)
-					 (&ifr.ifr_data))->reg_num = 1;
-					if (IOCTL(slave_dev, &ifr, SIOCGMIIREG)
-						       			== 0) {
-	                                	link_reporting = 1;
-					}
-				}
-			}
-		}
+	if (miimon > 0 && !use_carrier) {
+		link_reporting = bond_check_dev_link(slave_dev, 1);
 
-		if ((link_reporting == 0) && (arp_interval == 0)) {
-			/* miimon is set but a bonded network driver does
-			 * not support ETHTOOL/MII and arp_interval is
-			 * not set
+		if ((link_reporting == -1) && (arp_interval == 0)) {
+			/*
+			 * miimon is set but a bonded network driver
+			 * does not support ETHTOOL/MII and
+			 * arp_interval is not set.  Note: if
+			 * use_carrier is enabled, we will never go
+			 * here (because netif_carrier is always
+			 * supported); thus, we don't need to change
+			 * the messages for netif_carrier.
 			 */ 
 			printk(KERN_ERR
 				"bond_enslave(): MII and ETHTOOL support not "
@@ -1070,8 +1142,7 @@
 		       		"not specified, thus bonding will not detect "
 				"link failures! see bonding.txt for details.\n",
 		       		slave_dev->name);
-		}
-		else if (link_reporting == 0) {
+		} else if (link_reporting == -1) {
 			/* unable  get link status using mii/ethtool */
 			printk(KERN_WARNING 
 			       "bond_enslave: can't get link status from "
@@ -1085,7 +1156,7 @@
 
 	/* check for initial state */
 	if ((miimon <= 0) ||
-	    (bond_check_dev_link(slave_dev) == BMSR_LSTATUS)) {
+	    (bond_check_dev_link(slave_dev, 0) == BMSR_LSTATUS)) {
 #ifdef BONDING_DEBUG
 		printk(KERN_CRIT "Initial state of slave_dev is BOND_LINK_UP\n");
 #endif
@@ -1106,7 +1177,7 @@
 	 * since we guarantee that current_slave always point to the last
 	 * usable interface, we just have to verify this interface's flag.
 	 */
-	if (mode == BOND_MODE_ACTIVEBACKUP) {
+	if (bond_mode == BOND_MODE_ACTIVEBACKUP) {
 		if (((bond->current_slave == NULL)
 			|| (bond->current_slave->dev->flags & IFF_NOARP))
 			&& (new_slave->link == BOND_LINK_UP)) {
@@ -1145,8 +1216,6 @@
 			bond->current_slave = new_slave;
 	}
 
-	update_slave_cnt(bond);
-
 	write_unlock_irqrestore(&bond->lock, flags);
 
 	/*
@@ -1187,7 +1256,11 @@
 		new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup",
 		new_slave->link == BOND_LINK_UP ? "n up" : " down");
 
+	/* enslave is successful */
 	return 0;
+err_free:
+	kfree(new_slave);
+	return err;
 }
 
 /* 
@@ -1276,13 +1349,13 @@
 		} else {
 
 			printk (" but could not find any %s interface.\n",
-				(mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other");
+				(bond_mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other");
 			write_lock(&bond->ptrlock);
 			bond->current_slave = (slave_t *)NULL;
 			write_unlock(&bond->ptrlock);
 			return NULL; /* still no slave, return NULL */
 		}
-	} else if (mode == BOND_MODE_ACTIVEBACKUP) {
+	} else if (bond_mode == BOND_MODE_ACTIVEBACKUP) {
 		/* make sure oldslave doesn't send arps - this could
 		 * cause a ping-pong effect between interfaces since they
 		 * would be able to tx arps - in active backup only one
@@ -1311,7 +1384,7 @@
 		if (IS_UP(newslave->dev)) {
 			if (newslave->link == BOND_LINK_UP) {
 				/* this one is immediately usable */
-				if (mode == BOND_MODE_ACTIVEBACKUP) {
+				if (bond_mode == BOND_MODE_ACTIVEBACKUP) {
 					bond_set_slave_active_flags(newslave);
 					bond_mc_update(bond, newslave, oldslave);
 					printk (" and making interface %s the active one.\n",
@@ -1358,8 +1431,8 @@
 		return bestslave;
 	}
 
-	if ((mode == BOND_MODE_ACTIVEBACKUP) &&
-	    (multicast == BOND_MULTICAST_ACTIVE) &&
+	if ((bond_mode == BOND_MODE_ACTIVEBACKUP) &&
+	    (multicast_mode == BOND_MULTICAST_ACTIVE) &&
 	    (oldslave != NULL)) {
 		/* flush bonds (master's) mc_list from oldslave since it wasn't
 		 * updated (and deleted) above
@@ -1374,7 +1447,7 @@
 	}
 
 	printk (" but could not find any %s interface.\n",
-		(mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other");
+		(bond_mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other");
 	
 	/* absolutely nothing found. let's return NULL */
 	write_lock(&bond->ptrlock);
@@ -1406,16 +1479,14 @@
 
 	bond = (struct bonding *) master->priv;
 
-	write_lock_irqsave(&bond->lock, flags);
-
 	/* master already enslaved, or slave not enslaved,
 	   or no slave for this master */
 	if ((master->flags & IFF_SLAVE) || !(slave->flags & IFF_SLAVE)) {
 		printk (KERN_DEBUG "%s: cannot release %s.\n", master->name, slave->name);
-		write_unlock_irqrestore(&bond->lock, flags);
 		return -EINVAL;
 	}
 
+	write_lock_irqsave(&bond->lock, flags);
 	bond->current_arp_slave = NULL;
 	our_slave = (slave_t *)bond;
 	old_current = bond->current_slave;
@@ -1434,60 +1505,62 @@
 			} else {
 				printk(".\n");
 			}
-
-			/* release the slave from its bond */
-
-			if (multicast == BOND_MULTICAST_ALL) {
-				/* flush master's mc_list from slave */ 
-				bond_mc_list_flush (slave, master); 
-				
-				/* unset promiscuity level from slave */
-				if (master->flags & IFF_PROMISC) 
-					dev_set_promiscuity(slave, -1); 
-       
-				/* unset allmulti level from slave */ 
-				if (master->flags & IFF_ALLMULTI)
-					dev_set_allmulti(slave, -1); 
-			}
-
-			netdev_set_master(slave, NULL);
-
-			/* only restore its RUNNING flag if monitoring set it down */
-			if (slave->flags & IFF_UP) {
-				slave->flags |= IFF_RUNNING;
-			}
-
-			if (slave->flags & IFF_NOARP || 
-				bond->current_slave != NULL) {
-					dev_close(slave);
-					our_slave->original_flags &= ~IFF_UP;
-			}
-
-			bond_restore_slave_flags(our_slave);
-			kfree(our_slave);
-
+			
 			if (bond->current_slave == NULL) {
 				printk(KERN_INFO
 					"%s: now running without any active interface !\n",
 					master->name);
 			}
 
-			update_slave_cnt(bond);
-
 			if (bond->primary_slave == our_slave) {
 				bond->primary_slave = NULL;
 			}
 
-			write_unlock_irqrestore(&bond->lock, flags);
-			return 0;  /* deletion OK */
+			break;
 		}
-	}
 
-	/* if we get here, it's because the device was not found */
+	}
 	write_unlock_irqrestore(&bond->lock, flags);
+	
+	if (our_slave == (slave_t *)bond) {
+		/* if we get here, it's because the device was not found */
+		printk (KERN_INFO "%s: %s not enslaved\n", master->name, slave->name);
+		return -EINVAL;
+	}
 
-	printk (KERN_INFO "%s: %s not enslaved\n", master->name, slave->name);
-	return -EINVAL;
+	/* undo settings and restore original values */
+	
+	if (multicast_mode == BOND_MULTICAST_ALL) {
+		/* flush master's mc_list from slave */ 
+		bond_mc_list_flush (slave, master); 
+
+		/* unset promiscuity level from slave */
+		if (master->flags & IFF_PROMISC) 
+			dev_set_promiscuity(slave, -1); 
+
+		/* unset allmulti level from slave */ 
+		if (master->flags & IFF_ALLMULTI)
+			dev_set_allmulti(slave, -1); 
+	}
+
+	netdev_set_master(slave, NULL);
+
+	/* only restore its RUNNING flag if monitoring set it down */
+	if (slave->flags & IFF_UP) {
+		slave->flags |= IFF_RUNNING;
+	}
+
+	if (slave->flags & IFF_NOARP || 
+		bond->current_slave != NULL) {
+			dev_close(slave);
+			our_slave->original_flags &= ~IFF_UP;
+	}
+
+	bond_restore_slave_flags(our_slave);
+	
+	kfree(our_slave);
+
+	return 0;  /* deletion OK */
 }
 
 /* 
@@ -1510,13 +1583,15 @@
 
 	bond = (struct bonding *) master->priv;
 	bond->current_arp_slave = NULL;
+	bond->current_slave = NULL;
+	bond->primary_slave = NULL;
 
 	while ((our_slave = bond->prev) != (slave_t *)bond) {
 		slave_dev = our_slave->dev;
-		bond->prev = our_slave->prev;
+		bond_detach_slave(bond, our_slave);
 
-		if (multicast == BOND_MULTICAST_ALL 
-		    || (multicast == BOND_MULTICAST_ACTIVE 
+		if (multicast_mode == BOND_MULTICAST_ALL 
+		    || (multicast_mode == BOND_MULTICAST_ACTIVE 
 			&& bond->current_slave == our_slave)) {
 
 			/* flush master's mc_list from slave */ 
@@ -1533,6 +1608,10 @@
 
 		kfree(our_slave);
 
+		/*
+		 * Can be safely called from inside the bond lock
+		 * since traffic and timers have already stopped
+		 */
 		netdev_set_master(slave_dev, NULL);
 
 		/* only restore its RUNNING flag if monitoring set it down */
@@ -1543,10 +1622,6 @@
 			dev_close(slave_dev);
 	}
 
-	bond->current_slave = NULL;
-	bond->next = (slave_t *)bond;
-	bond->slave_cnt = 0;
-	bond->primary_slave = NULL;
 	printk (KERN_INFO "%s: released all slaves\n", master->name);
 
 	return 0;
@@ -1579,9 +1654,9 @@
 		/* use updelay+1 to match an UP slave even when updelay is 0 */
 		int mindelay = updelay + 1;
 		struct net_device *dev = slave->dev;
-		u16 link_state;
+		int link_state;
 		
-		link_state = bond_check_dev_link(dev);
+		link_state = bond_check_dev_link(dev, 0);
 
 		switch (slave->link) {
 		case BOND_LINK_UP:	/* the link was up */
@@ -1608,7 +1683,7 @@
 						"%s, disabling it in %d ms.\n",
 						master->name,
 						IS_UP(dev)
-						? ((mode == BOND_MODE_ACTIVEBACKUP)
+						? ((bond_mode == BOND_MODE_ACTIVEBACKUP)
 						   ? ((slave == oldcurrent)
 						      ? "active " : "backup ")
 						   : "")
@@ -1628,7 +1703,7 @@
 					slave->link = BOND_LINK_DOWN;
 					/* in active/backup mode, we must
 					   completely disable this interface */
-					if (mode == BOND_MODE_ACTIVEBACKUP) {
+					if (bond_mode == BOND_MODE_ACTIVEBACKUP) {
 						bond_set_slave_inactive_flags(slave);
 					}
 					printk(KERN_INFO
@@ -1708,14 +1783,12 @@
 					slave->link = BOND_LINK_UP;
 					slave->jiffies = jiffies;
 
-					if ( (mode == BOND_MODE_ACTIVEBACKUP) 
-					 || (slave != bond->primary_slave) ) {
-						/* prevent it from being the active one */
-						slave->state = BOND_STATE_BACKUP;
-					}
-					else {
+					if (bond_mode != BOND_MODE_ACTIVEBACKUP) {
 						/* make it immediately active */
 						slave->state = BOND_STATE_ACTIVE;
+					} else if (slave != bond->primary_slave) {
+						/* prevent it from being the active one */
+						slave->state = BOND_STATE_BACKUP;
 					}
 
 					printk(KERN_INFO
@@ -1775,7 +1848,7 @@
 				bestslave->jiffies = jiffies;
 			}
 
-			if (mode == BOND_MODE_ACTIVEBACKUP) {
+			if (bond_mode == BOND_MODE_ACTIVEBACKUP) {
 				bond_set_slave_active_flags(bestslave);
 				bond_mc_update(bond, bestslave, NULL);
 			} else {
@@ -2171,19 +2244,6 @@
 	read_unlock_irqrestore(&bond->lock, flags);
 }
 
-
-#define isdigit(c) (c >= '0' && c <= '9')
-__inline static int atoi( char **s) 
-{
-int i = 0;
-while (isdigit(**s))
-  i = i*20 + *((*s)++) - '0';
-return i;
-}
-
-#define isascii(c) (((unsigned char)(c))<=0x7f)
-#define LF 0xA
-#define isspace(c) (c==' ' || c=='	'|| c==LF)   
 typedef uint32_t in_addr_t;
 
 int
@@ -2279,7 +2339,7 @@
 	slave_t *slave;
 	unsigned long flags;
 
-	info->bond_mode = mode;
+	info->bond_mode = bond_mode;
 	info->num_slaves = 0;
 	info->miimon = miimon;
 
@@ -2416,7 +2476,7 @@
 			break;
 		case BOND_CHANGE_ACTIVE_OLD:
 		case SIOCBONDCHANGEACTIVE:
-			if (mode == BOND_MODE_ACTIVEBACKUP) {
+			if (bond_mode == BOND_MODE_ACTIVEBACKUP) {
 				ret = bond_change_active(master_dev, slave_dev);
 			}
 			else {
@@ -2567,20 +2627,13 @@
 	slave = bond->prev;
 
 	/* we're at the root, get the first slave */
-	if ((slave == NULL) || (slave->dev == NULL)) { 
+	if (bond->slave_cnt == 0) {
 		/* no suitable interface, frame not sent */
 		dev_kfree_skb(skb);
 		read_unlock_irqrestore(&bond->lock, flags);
 		return 0;
 	}
 
-	if (bond->slave_cnt == 0) {
-		/* no slaves in the bond, frame not sent */
-		dev_kfree_skb(skb);
-		read_unlock_irqrestore(&bond->lock, flags);
-		return 0;
-	}
-	
 	slave_no = (data->h_dest[5]^slave->dev->dev_addr[5]) % bond->slave_cnt;
 
 	while ( (slave_no > 0) && (slave != (slave_t *)bond) ) {
@@ -2642,7 +2695,7 @@
 	 * receive packets to stay up, and the only ones they receive are 
 	 * broadcasts. 
 	 */
-	if ( (mode != BOND_MODE_ACTIVEBACKUP) && 
+	if ( (bond_mode != BOND_MODE_ACTIVEBACKUP) && 
              (arp_ip_count == 1) &&
 	     (arp_interval > 0) && (arp_target_hw_addr == NULL) &&
 	     (skb->protocol == __constant_htons(ETH_P_IP) ) ) {
@@ -2743,9 +2796,10 @@
 		 */
 		link = bond_check_mii_link(bond);
 
-		len += sprintf(buf + len, "Bonding Mode: %s\n", bond_mode());
+		len += sprintf(buf + len, "Bonding Mode: %s\n",
+			       bond_mode_name());
 
-		if (mode == BOND_MODE_ACTIVEBACKUP) {
+		if (bond_mode == BOND_MODE_ACTIVEBACKUP) {
 			read_lock_irqsave(&bond->lock, flags);
 			read_lock(&bond->ptrlock);
 			if (bond->current_slave != NULL) {
@@ -2766,7 +2820,8 @@
 				updelay * miimon);
 		len += sprintf(buf + len, "Down Delay (ms): %d\n", 
 				downdelay * miimon);
-		len += sprintf(buf + len, "Multicast Mode: %s\n", multicast_mode());
+		len += sprintf(buf + len, "Multicast Mode: %s\n",
+			       multicast_mode_name());
 
 		read_lock_irqsave(&bond->lock, flags);
 		for (slave = bond->prev; slave != (slave_t *)bond; 
@@ -2884,16 +2939,21 @@
 	dev->priv = bond;
 
 	/* Initialize the device structure. */
-	if (mode == BOND_MODE_ACTIVEBACKUP) {
+	switch (bond_mode) {
+	case BOND_MODE_ACTIVEBACKUP:
 		dev->hard_start_xmit = bond_xmit_activebackup;
-	} else if (mode == BOND_MODE_ROUNDROBIN) {
+		break;
+	case BOND_MODE_ROUNDROBIN:
 		dev->hard_start_xmit = bond_xmit_roundrobin;
-	} else if (mode == BOND_MODE_XOR) {
+		break;
+	case BOND_MODE_XOR:
 		dev->hard_start_xmit = bond_xmit_xor;
-	} else if (mode == BOND_MODE_BROADCAST) {
+		break;
+	case BOND_MODE_BROADCAST:
 		dev->hard_start_xmit = bond_xmit_broadcast;
-	} else {
-		printk(KERN_ERR "Unknown bonding mode %d\n", mode);
+		break;
+	default:
+		printk(KERN_ERR "Unknown bonding mode %d\n", bond_mode);
 		kfree(bond->stats);
 		kfree(bond);
 		return -EINVAL;
@@ -2926,7 +2986,7 @@
 	} else {
 		printk("out MII link monitoring");
 	}
-	printk(", in %s mode.\n", bond_mode());
+	printk(", in %s mode.\n", bond_mode_name());
 
 	printk(KERN_INFO "%s registered with", dev->name);
 	if (arp_interval > 0) {
@@ -2986,6 +3046,28 @@
 }
  */
 
+/*
+ * Convert string input module parms.  Accept either the
+ * number of the mode or its string name.
+ */
+static inline int
+bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl)
+{
+	int i;
+
+	for (i = 0; tbl[i].modename != NULL; i++) {
+		if ((isdigit(*mode_arg) &&
+		    tbl[i].mode == simple_strtol(mode_arg, NULL, 0)) ||
+		    (0 == strncmp(mode_arg, tbl[i].modename,
+				  strlen(tbl[i].modename)))) {
+			return tbl[i].mode;
+		}
+	}
+
+	return -1;
+}
+
+
 static int __init bonding_init(void)
 {
 	int no;
@@ -2996,6 +3078,29 @@
 
 	printk(KERN_INFO "%s", version);
 
+	/*
+	 * Convert string parameters.
+	 */
+	if (mode) {
+		bond_mode = bond_parse_parm(mode, bond_mode_tbl);
+		if (bond_mode == -1) {
+			printk(KERN_WARNING
+			       "bonding_init(): Invalid bonding mode \"%s\"\n",
+			       mode == NULL ? "NULL" : mode);
+			return -EINVAL;
+		}
+	}
+
+	if (multicast) {
+		multicast_mode = bond_parse_parm(multicast, bond_mc_tbl);
+		if (multicast_mode == -1) {
+			printk(KERN_WARNING 
+		       "bonding_init(): Invalid multicast mode \"%s\"\n",
+			       multicast == NULL ? "NULL" : multicast);
+			return -EINVAL;
+		}
+	}
+
 	if (max_bonds < 1 || max_bonds > INT_MAX) {
 		printk(KERN_WARNING 
 		       "bonding_init(): max_bonds (%d) not in range %d-%d, "
@@ -3125,27 +3230,17 @@
 		       "link failures! see bonding.txt for details.\n");
 	}
 
-	if ((primary != NULL) && (mode != BOND_MODE_ACTIVEBACKUP)){
+	if ((primary != NULL) && (bond_mode != BOND_MODE_ACTIVEBACKUP)){
 		/* currently, using a primary only makes sence 
 		 * in active backup mode 
 		 */
 		printk(KERN_WARNING 
 		       "bonding_init(): %s primary device specified but has "
 		       " no effect in %s mode\n",
-		       primary, bond_mode());
+		       primary, bond_mode_name());
 		primary = NULL;
 	}
 
-
-        if (multicast != BOND_MULTICAST_DISABLED &&
-	    multicast != BOND_MULTICAST_ACTIVE &&
-	    multicast != BOND_MULTICAST_ALL) {
-		printk(KERN_WARNING 
-		       "bonding_init(): unknown multicast module "
-		       "parameter (%d), multicast reset to %d\n", 
-		       multicast, BOND_MULTICAST_ALL);
-		multicast = BOND_MULTICAST_ALL;
-	}
 
 	for (no = 0; no < max_bonds; no++) {
 		dev_bond->init = bond_init;
diff -Nru a/drivers/net/gt96100eth.c b/drivers/net/gt96100eth.c
--- a/drivers/net/gt96100eth.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/net/gt96100eth.c	Mon Mar 31 13:41:08 2003
@@ -758,19 +758,6 @@
 		goto free_region;
 	}
 
-	/* Initialize our private structure. */
-	if (dev->priv == NULL) {
-
-		gp = (struct gt96100_private *)kmalloc(sizeof(*gp),
-						       GFP_KERNEL);
-		if (gp == NULL) {
-			retval = -ENOMEM;
-			goto free_region;
-		}
-	
-		dev->priv = gp;
-	}
-
 	gp = dev->priv;
 
 	memset(gp, 0, sizeof(*gp)); // clear it
@@ -854,8 +841,6 @@
  free_region:
 	release_region(gtif->iobase, GT96100_ETH_IO_SIZE);
 	unregister_netdev(dev);
-	if (dev->priv != NULL)
-		kfree (dev->priv);
 	kfree (dev);
 	err("%s failed.  Returns %d\n", __FUNCTION__, retval);
 	return retval;
@@ -1601,8 +1586,6 @@
 				(struct gt96100_private *)gtif->dev->priv;
 			release_region(gtif->iobase, gp->io_size);
 			unregister_netdev(gtif->dev);
-			if (gtif->dev->priv != NULL)
-				kfree (gtif->dev->priv);
 			kfree (gtif->dev);
 		}
 	}
diff -Nru a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c
--- a/drivers/net/irda/irda-usb.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/net/irda/irda-usb.c	Mon Mar 31 13:41:07 2003
@@ -402,7 +402,7 @@
 
         usb_fill_bulk_urb(urb, self->usbdev, 
 		      usb_sndbulkpipe(self->usbdev, self->bulk_out_ep),
-                      skb->data, IRDA_USB_MAX_MTU,
+                      skb->data, IRDA_SKB_MAX_MTU,
                       write_bulk_callback, skb);
 	urb->transfer_buffer_length = skb->len;
 	/* Note : unlink *must* be Asynchronous because of the code in 
@@ -442,6 +442,9 @@
 			 * would be lost in the noise - Jean II */
 			diff += IU_USB_MIN_RTT;
 #endif /* IU_USB_MIN_RTT */
+			/* If the usec counter did wraparound, the diff will
+			 * go negative (tv_usec is a long), so we need to
+			 * correct it by one second. Jean II */
 			if (diff < 0)
 				diff += 1000000;
 
@@ -701,30 +704,11 @@
 
 	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
 
-	/* Check that we have an urb */
-	if (!urb) {
-		WARNING("%s(), Bug : urb == NULL\n", __FUNCTION__);
-		return;
-	}
-
-	/* Allocate new skb if it has not been recycled */
-	if (!skb) {
-		skb = dev_alloc_skb(IRDA_USB_MAX_MTU + 1);
-		if (!skb) {
-			/* If this ever happen, we are in deep s***.
-			 * Basically, the Rx path will stop... */
-			WARNING("%s(), Failed to allocate Rx skb\n", __FUNCTION__);
-			return;
-		}
-	} else  {
-		/* Reset recycled skb */
-		skb->data = skb->tail = skb->head;
-		skb->len = 0;
-	}
-	/* Make sure IP header get aligned (IrDA header is 5 bytes ) */
-	skb_reserve(skb, 1);
+	/* This should never happen */
+	ASSERT(skb != NULL, return;);
+	ASSERT(urb != NULL, return;);
 
-	/* Save ourselves */
+	/* Save ourselves in the skb */
 	cb = (struct irda_skb_cb *) skb->cb;
 	cb->context = self;
 
@@ -758,8 +742,10 @@
 	struct sk_buff *skb = (struct sk_buff *) urb->context;
 	struct irda_usb_cb *self; 
 	struct irda_skb_cb *cb;
-	struct sk_buff *new;
-	
+	struct sk_buff *newskb;
+	struct sk_buff *dataskb;
+	int		docopy;
+
 	IRDA_DEBUG(2, "%s(), len=%d\n", __FUNCTION__, urb->actual_length);
 	
 	/* Find ourselves */
@@ -808,39 +794,56 @@
 	 */
         do_gettimeofday(&self->stamp);
 
-	/* Fix skb, and remove USB-IrDA header */
-	skb_put(skb, urb->actual_length);
-	skb_pull(skb, USB_IRDA_HEADER);
-
-	/* Don't waste a lot of memory on small IrDA frames */
-	if (skb->len < RX_COPY_THRESHOLD) {
-		new = dev_alloc_skb(skb->len+1);
-		if (!new) {
-			self->stats.rx_dropped++;
-			goto done;  
-		}
+	/* Check if we need to copy the data to a new skb or not.
+	 * For most frames, we use ZeroCopy and pass the already
+	 * allocated skb up the stack.
+	 * If the frame is small, it is more efficient to copy it
+	 * to save memory (copy will be fast anyway - that's
+	 * called Rx-copy-break). Jean II */
+	docopy = (urb->actual_length < IRDA_RX_COPY_THRESHOLD);
+
+	/* Allocate a new skb */
+	newskb = dev_alloc_skb(docopy ? urb->actual_length : IRDA_SKB_MAX_MTU);
+	if (!newskb)  {
+		self->stats.rx_dropped++;
+		/* We could deliver the current skb, but this would stall
+		 * the Rx path. Better drop the packet... Jean II */
+		goto done;  
+	}
+
+	/* Make sure IP header get aligned (IrDA header is 5 bytes) */
+	/* But IrDA-USB header is 1 byte. Jean II */
+	//skb_reserve(newskb, USB_IRDA_HEADER - 1);
 
-		/* Make sure IP header get aligned (IrDA header is 5 bytes) */
-		skb_reserve(new, 1);
-		
+	if(docopy) {
 		/* Copy packet, so we can recycle the original */
-		memcpy(skb_put(new, skb->len), skb->data, skb->len);
-		/* We will cleanup the skb in irda_usb_submit() */
+		memcpy(newskb->data, skb->data, urb->actual_length);
+		/* Deliver this new skb */
+		dataskb = newskb;
+		/* And hook the old skb to the URB
+		 * Note : we don't need to "clean up" the old skb,
+		 * as we never touched it. Jean II */
 	} else {
-		/* Deliver the original skb */
-		new = skb;
-		skb = NULL;
+		/* We are using ZeroCopy. Deliver old skb */
+		dataskb = skb;
+		/* And hook the new skb to the URB */
+		skb = newskb;
 	}
-	
-	self->stats.rx_bytes += new->len;
-	self->stats.rx_packets++;
+
+	/* Set proper length on skb & remove USB-IrDA header */
+	skb_put(dataskb, urb->actual_length);
+	skb_pull(dataskb, USB_IRDA_HEADER);
 
 	/* Ask the networking layer to queue the packet for the IrDA stack */
-        new->dev = self->netdev;
-        new->mac.raw  = new->data;
-        new->protocol = htons(ETH_P_IRDA);
-        netif_rx(new);
-        self->netdev->last_rx = jiffies;
+	dataskb->dev = self->netdev;
+	dataskb->mac.raw  = dataskb->data;
+	dataskb->protocol = htons(ETH_P_IRDA);
+	netif_rx(dataskb);
+
+	/* Keep stats up to date */
+	self->stats.rx_bytes += dataskb->len;
+	self->stats.rx_packets++;
+	self->netdev->last_rx = jiffies;
 
 done:
 	/* Note : at this point, the URB we've just received (urb)
@@ -973,8 +976,17 @@
 
 	/* Now that we can pass data to IrLAP, allow the USB layer
 	 * to send us some data... */
-	for (i = 0; i < IU_MAX_ACTIVE_RX_URBS; i++)
-		irda_usb_submit(self, NULL, self->rx_urb[i]);
+	for (i = 0; i < IU_MAX_ACTIVE_RX_URBS; i++) {
+		struct sk_buff *skb = dev_alloc_skb(IRDA_SKB_MAX_MTU);
+		if (!skb) {
+			/* If this ever happen, we are in deep s***.
+			 * Basically, we can't start the Rx path... */
+			WARNING("%s(), Failed to allocate Rx skb\n", __FUNCTION__);
+			return -1;
+		}
+		//skb_reserve(newskb, USB_IRDA_HEADER - 1);
+		irda_usb_submit(self, skb, self->rx_urb[i]);
+	}
 
 	/* Ready to play !!! */
 	return 0;
@@ -1167,9 +1179,6 @@
 	spin_lock_init(&self->lock);
 
 	irda_usb_init_qos(self);
-	
-	/* Initialise list of skb beeing curently transmitted */
-	self->tx_list = hashbin_new(HB_NOLOCK);	/* unused */
 
 	/* Allocate the buffer for speed changes */
 	/* Don't change this buffer size and allocation without doing
@@ -1228,8 +1237,6 @@
 		self->netdev = NULL;
 		rtnl_unlock();
 	}
-	/* Delete all pending skbs */
-	hashbin_delete(self->tx_list, (FREE_FUNC) &dev_kfree_skb_any);
 	/* Remove the speed buffer */
 	if (self->speed_buff != NULL) {
 		kfree(self->speed_buff);
@@ -1492,8 +1499,10 @@
 		case 0:
 			break;
 		case -EPIPE:		/* -EPIPE = -32 */
-			usb_clear_halt(dev, usb_sndctrlpipe(dev, 0));
-			IRDA_DEBUG(0, "%s(), Clearing stall on control interface\n", __FUNCTION__);
+			/* Martin Diehl says if we get a -EPIPE we should
+			 * be fine and we don't need to do a usb_clear_halt().
+			 * - Jean II */
+			IRDA_DEBUG(0, "%s(), Received -EPIPE, ignoring...\n", __FUNCTION__);
 			break;
 		default:
 			IRDA_DEBUG(0, "%s(), Unknown error %d\n", __FUNCTION__, ret);
diff -Nru a/drivers/net/irda/sir_dev.c b/drivers/net/irda/sir_dev.c
--- a/drivers/net/irda/sir_dev.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/net/irda/sir_dev.c	Mon Mar 31 13:41:07 2003
@@ -223,25 +223,24 @@
 	}
 
 	/* Read the characters into the buffer */
- 	while (count--) {
-		if (likely(atomic_read(&dev->enable_rx))) {
+	if (likely(atomic_read(&dev->enable_rx))) {
+		while (count--)
 			/* Unwrap and destuff one byte */
 			async_unwrap_char(dev->netdev, &dev->stats, 
-				  &dev->rx_buff, *cp++);
-		}
-		else {
+					  &dev->rx_buff, *cp++);
+	} else {
+		while (count--) {
 			/* rx not enabled: save the raw bytes and never
 			 * trigger any netif_rx. The received bytes are flushed
 			 * later when we re-enable rx but might be read meanwhile
 			 * by the dongle driver.
 			 */
 			dev->rx_buff.data[dev->rx_buff.len++] = *cp++;
-		}
 
-		/* What should we do when the buffer is full? */
-		if (unlikely(dev->rx_buff.len == dev->rx_buff.truesize))
-			dev->rx_buff.len = 0;
-			
+			/* What should we do when the buffer is full? */
+			if (unlikely(dev->rx_buff.len == dev->rx_buff.truesize))
+				dev->rx_buff.len = 0;
+		}
 	}
 
 	return 0;
@@ -423,19 +422,24 @@
 
 static int sirdev_alloc_buffers(struct sir_dev *dev)
 {
-	dev->rx_buff.truesize = SIRBUF_ALLOCSIZE; 
 	dev->tx_buff.truesize = SIRBUF_ALLOCSIZE;
+	dev->rx_buff.truesize = IRDA_SKB_MAX_MTU; 
 
-	dev->rx_buff.head = kmalloc(dev->rx_buff.truesize, GFP_KERNEL);
-	if (dev->rx_buff.head == NULL)
+	/* Bootstrap ZeroCopy Rx */
+	dev->rx_buff.skb = __dev_alloc_skb(dev->rx_buff.truesize, GFP_KERNEL);
+	if (dev->rx_buff.skb == NULL)
 		return -ENOMEM;
-	memset(dev->rx_buff.head, 0, dev->rx_buff.truesize);
+	skb_reserve(dev->rx_buff.skb, 1);
+	dev->rx_buff.head = dev->rx_buff.skb->data;
+	/* No need to memset the buffer, unless you are really pedantic */
 
 	dev->tx_buff.head = kmalloc(dev->tx_buff.truesize, GFP_KERNEL);
 	if (dev->tx_buff.head == NULL) {
-		kfree(dev->rx_buff.head);
+		kfree_skb(dev->rx_buff.skb);
+		dev->rx_buff.skb = NULL;
 		dev->rx_buff.head = NULL;
 		return -ENOMEM;
+		/* Hu ??? This should not be here, Martin ? */
 		memset(dev->tx_buff.head, 0, dev->tx_buff.truesize);
 	}
 
@@ -451,11 +455,12 @@
 
 static void sirdev_free_buffers(struct sir_dev *dev)
 {
-	if (dev->rx_buff.head)
-		kfree(dev->rx_buff.head);
+	if (dev->rx_buff.skb)
+		kfree_skb(dev->rx_buff.skb);
 	if (dev->tx_buff.head)
 		kfree(dev->tx_buff.head);
 	dev->rx_buff.head = dev->tx_buff.head = NULL;
+	dev->rx_buff.skb = NULL;
 }
 
 static int sirdev_open(struct net_device *ndev)
diff -Nru a/drivers/net/mace.c b/drivers/net/mace.c
--- a/drivers/net/mace.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/net/mace.c	Mon Mar 31 13:41:06 2003
@@ -16,6 +16,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/crc32.h>
+#include <linux/spinlock.h>
 #include <asm/prom.h>
 #include <asm/dbdma.h>
 #include <asm/io.h>
@@ -63,6 +64,7 @@
     int chipid;
     struct device_node* of_node;
     struct net_device *next_mace;
+    spinlock_t lock;
 };
 
 /*
@@ -203,6 +205,7 @@
 	memset((char *) mp->tx_cmds, 0,
 	       (NCMDS_TX*N_TX_RING + N_RX_RING + 2) * sizeof(struct dbdma_cmd));
 	init_timer(&mp->tx_timeout);
+	spin_lock_init(&mp->lock);
 	mp->timeout_active = 0;
 
 	if (port_aaui >= 0)
@@ -351,14 +354,14 @@
     volatile struct mace *mb = mp->mace;
     unsigned long flags;
 
-    save_flags(flags); cli();
+    spin_lock_irqsave(&mp->lock, flags);
 
     __mace_set_address(dev, addr);
 
     /* note: setting ADDRCHG clears ENRCV */
     out_8(&mb->maccc, mp->maccc);
 
-    restore_flags(flags);
+    spin_unlock_irqrestore(&mp->lock, flags);
     return 0;
 }
 
@@ -473,10 +476,7 @@
 static inline void mace_set_timeout(struct net_device *dev)
 {
     struct mace_data *mp = (struct mace_data *) dev->priv;
-    unsigned long flags;
 
-    save_flags(flags);
-    cli();
     if (mp->timeout_active)
 	del_timer(&mp->tx_timeout);
     mp->tx_timeout.expires = jiffies + TX_TIMEOUT;
@@ -484,7 +484,6 @@
     mp->tx_timeout.data = (unsigned long) dev;
     add_timer(&mp->tx_timeout);
     mp->timeout_active = 1;
-    restore_flags(flags);
 }
 
 static int mace_xmit_start(struct sk_buff *skb, struct net_device *dev)
@@ -496,7 +495,7 @@
     int fill, next, len;
 
     /* see if there's a free slot in the tx ring */
-    save_flags(flags); cli();
+    spin_lock_irqsave(&mp->lock, flags);
     fill = mp->tx_fill;
     next = fill + 1;
     if (next >= N_TX_RING)
@@ -504,10 +503,10 @@
     if (next == mp->tx_empty) {
 	netif_stop_queue(dev);
 	mp->tx_fullup = 1;
-	restore_flags(flags);
+	spin_unlock_irqrestore(&mp->lock, flags);
 	return 1;		/* can't take it at the moment */
     }
-    restore_flags(flags);
+    spin_unlock_irqrestore(&mp->lock, flags);
 
     /* partially fill in the dma command block */
     len = skb->len;
@@ -524,8 +523,7 @@
     out_le16(&np->command, DBDMA_STOP);
 
     /* poke the tx dma channel */
-    save_flags(flags);
-    cli();
+    spin_lock_irqsave(&mp->lock, flags);
     mp->tx_fill = next;
     if (!mp->tx_bad_runt && mp->tx_active < MAX_TX_ACTIVE) {
 	out_le16(&cp->xfer_status, 0);
@@ -538,7 +536,7 @@
 	next = 0;
     if (next == mp->tx_empty)
 	netif_stop_queue(dev);
-    restore_flags(flags);
+    spin_unlock_irqrestore(&mp->lock, flags);
 
     return 0;
 }
@@ -556,7 +554,9 @@
     volatile struct mace *mb = mp->mace;
     int i, j;
     u32 crc;
+    unsigned long flags;
 
+    spin_lock_irqsave(&mp->lock, flags);
     mp->maccc &= ~PROM;
     if (dev->flags & IFF_PROMISC) {
 	mp->maccc |= PROM;
@@ -598,6 +598,7 @@
     }
     /* reset maccc */
     out_8(&mb->maccc, mp->maccc);
+    spin_unlock_irqrestore(&mp->lock, flags);
 }
 
 static void mace_handle_misc_intrs(struct mace_data *mp, int intr)
@@ -630,8 +631,10 @@
     volatile struct dbdma_cmd *cp;
     int intr, fs, i, stat, x;
     int xcount, dstat;
+    unsigned long flags;
     /* static int mace_last_fs, mace_last_xcount; */
 
+    spin_lock_irqsave(&mp->lock, flags);
     intr = in_8(&mb->ir);		/* read interrupt register */
     in_8(&mb->xmtrc);			/* get retries */
     mace_handle_misc_intrs(mp, intr);
@@ -761,6 +764,7 @@
 	out_le32(&td->control, ((RUN|WAKE) << 16) + (RUN|WAKE));
 	mace_set_timeout(dev);
     }
+    spin_unlock_irqrestore(&mp->lock, flags);
 }
 
 static void mace_tx_timeout(unsigned long data)
@@ -774,8 +778,7 @@
     unsigned long flags;
     int i;
 
-    save_flags(flags);
-    cli();
+    spin_lock_irqsave(&mp->lock, flags);
     mp->timeout_active = 0;
     if (mp->tx_active == 0 && !mp->tx_bad_runt)
 	goto out;
@@ -827,7 +830,7 @@
     out_8(&mb->maccc, mp->maccc);
 
 out:
-    restore_flags(flags);
+    spin_unlock_irqrestore(&mp->lock, flags);
 }
 
 static void mace_txdma_intr(int irq, void *dev_id, struct pt_regs *regs)
@@ -845,7 +848,9 @@
     unsigned frame_status;
     static int mace_lost_status;
     unsigned char *data;
+    unsigned long flags;
 
+    spin_lock_irqsave(&mp->lock, flags);
     for (i = mp->rx_empty; i != mp->rx_fill; ) {
 	cp = mp->rx_cmds + i;
 	stat = ld_le16(&cp->xfer_status);
@@ -941,6 +946,7 @@
 	out_le32(&rd->control, ((RUN|WAKE) << 16) | (RUN|WAKE));
 	mp->rx_fill = i;
     }
+    spin_unlock_irqrestore(&mp->lock, flags);
 }
 
 MODULE_AUTHOR("Paul Mackerras");
diff -Nru a/drivers/net/r8169.c b/drivers/net/r8169.c
--- a/drivers/net/r8169.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/net/r8169.c	Mon Mar 31 13:41:07 2003
@@ -1110,7 +1110,7 @@
 	.name		= MODULENAME,
 	.id_table	= rtl8169_pci_tbl,
 	.probe		= rtl8169_init_one,
-	.remove		= rtl8169_remove_one,
+	.remove		= __devexit_p(rtl8169_remove_one),
 	.suspend	= NULL,
 	.resume		= NULL,
 };
diff -Nru a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
--- a/drivers/net/tulip/de4x5.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/net/tulip/de4x5.c	Mon Mar 31 13:41:06 2003
@@ -452,7 +452,6 @@
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/version.h>
 #include <linux/spinlock.h>
 #include <linux/crc32.h>
 #include <linux/netdevice.h>
diff -Nru a/drivers/net/tulip/dmfe.c b/drivers/net/tulip/dmfe.c
--- a/drivers/net/tulip/dmfe.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/net/tulip/dmfe.c	Mon Mar 31 13:41:08 2003
@@ -49,6 +49,10 @@
     support.  Updated PCI resource allocation.  Do not
     forget to unmap PCI mapped skbs.
 
+    Alan Cox <alan@redhat.com>
+    Added new PCI identifiers provided by Clear Zhang at ALi 
+    for their 1563 ethernet device.
+
     TODO
 
     Implement pci_driver::suspend() and pci_driver::resume()
@@ -75,7 +79,6 @@
 #include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/init.h>
-#include <linux/version.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
@@ -1975,6 +1978,7 @@
 	{ 0x1282, 0x9102, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PCI_DM9102_ID },
 	{ 0x1282, 0x9100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PCI_DM9100_ID },
 	{ 0x1282, 0x9009, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PCI_DM9009_ID },
+	{ 0x10B9, 0x5261, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PCI_DM9102_ID },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, dmfe_pci_tbl);
diff -Nru a/drivers/pci/Makefile b/drivers/pci/Makefile
--- a/drivers/pci/Makefile	Mon Mar 31 13:41:08 2003
+++ b/drivers/pci/Makefile	Mon Mar 31 13:41:08 2003
@@ -29,6 +29,9 @@
 obj-y += setup-bus.o
 endif
 
+# Hotplug (eg, cardbus) now requires setup-bus
+obj-$(CONFIG_HOTPLUG) += setup-bus.o
+
 ifndef CONFIG_X86
 obj-y += syscall.o
 endif
diff -Nru a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig
--- a/drivers/pcmcia/Kconfig	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/Kconfig	Mon Mar 31 13:41:08 2003
@@ -82,10 +82,26 @@
 config PCMCIA_SA1100
 	tristate "SA1100 support"
 	depends on ARM && ARCH_SA1100 && PCMCIA
+	help
+	  Say Y here to include support for SA11x0-based PCMCIA or CF
+	  sockets, found on HP iPAQs, Yopy, and other StrongARM(R)/
+	  Xscale(R) embedded machines.
+
+	  This driver is also available as a module called sa1100_cs.
 
 config PCMCIA_SA1111
 	tristate "SA1111 support"
-	depends on PCMCIA_SA1100 && SA1111
+	depends on ARM && ARCH_SA1100 && SA1111 && PCMCIA
+	help
+	  Say Y  here to include support for SA1111-based PCMCIA or CF
+	  sockets, found on the Jornada 720, Graphicsmaster and other
+	  StrongARM(R)/Xscale(R) embedded machines.
+
+	  This driver is also available as a module called sa1111_cs.
+
+config PCMCIA_PROBE
+	bool
+	default y if ISA && !ARCH_SA1100 && !ARCH_CLPS711X
 
 endmenu
 
diff -Nru a/drivers/pcmcia/Makefile b/drivers/pcmcia/Makefile
--- a/drivers/pcmcia/Makefile	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/Makefile	Mon Mar 31 13:41:08 2003
@@ -2,46 +2,43 @@
 # Makefile for the kernel pcmcia subsystem (c/o David Hinds)
 #
 
-obj-$(CONFIG_PCMCIA)			+= pcmcia_core.o ds.o
+obj-$(CONFIG_PCMCIA)				+= pcmcia_core.o ds.o
 ifeq ($(CONFIG_CARDBUS),y)
-  obj-$(CONFIG_PCMCIA) 			+= yenta_socket.o
+  obj-$(CONFIG_PCMCIA) 				+= yenta_socket.o
 endif
 
-obj-$(CONFIG_I82365)			+= i82365.o
-obj-$(CONFIG_I82092)			+= i82092.o
-obj-$(CONFIG_TCIC)			+= tcic.o
-obj-$(CONFIG_HD64465_PCMCIA)		+= hd64465_ss.o
-obj-$(CONFIG_PCMCIA_SA1100)		+= sa1100_cs.o
-obj-$(CONFIG_PCMCIA_SA1111)		+= sa1111_cs.o
+obj-$(CONFIG_I82365)				+= i82365.o
+obj-$(CONFIG_I82092)				+= i82092.o
+obj-$(CONFIG_TCIC)				+= tcic.o
+obj-$(CONFIG_HD64465_PCMCIA)			+= hd64465_ss.o
+obj-$(CONFIG_PCMCIA_SA1100)			+= sa11xx_core.o sa1100_cs.o
+obj-$(CONFIG_PCMCIA_SA1111)			+= sa11xx_core.o sa1111_cs.o
 
-yenta_socket-objs				:= pci_socket.o yenta.o
+yenta_socket-y					+= pci_socket.o yenta.o
 
-pcmcia_core-objs-y				:= cistpl.o rsrc_mgr.o bulkmem.o cs.o
-pcmcia_core-objs-$(CONFIG_CARDBUS)		+= cardbus.o
-pcmcia_core-objs				:= $(pcmcia_core-objs-y)
+pcmcia_core-y					+= cistpl.o rsrc_mgr.o bulkmem.o cs.o
+pcmcia_core-$(CONFIG_CARDBUS)			+= cardbus.o
 
-sa1111_cs-objs-y				:= sa1111_generic.o
-sa1111_cs-objs-$(CONFIG_SA1100_ADSBITSY)	+= sa1100_adsbitsy.o
-sa1111_cs-objs-$(CONFIG_ASSABET_NEPONSET)	+= sa1100_neponset.o
-sa1111_cs-objs-$(CONFIG_SA1100_BADGE4)		+= sa1100_badge4.o
-sa1111_cs-objs-$(CONFIG_SA1100_GRAPHICSMASTER)	+= sa1100_graphicsmaster.o
-sa1111_cs-objs-$(CONFIG_SA1100_JORNADA720)	+= sa1100_jornada720.o
-sa1111_cs-objs-$(CONFIG_SA1100_PFS168)		+= sa1100_pfs168.o
-sa1111_cs-objs-$(CONFIG_SA1100_PT_SYSTEM3)	+= sa1100_system3.o
-sa1111_cs-objs-$(CONFIG_SA1100_XP860)		+= sa1100_xp860.o
-sa1111_cs-objs					:= $(sa1111_cs-objs-y)
+sa1111_cs-y					+= sa1111_generic.o
+sa1111_cs-$(CONFIG_SA1100_ADSBITSY)		+= sa1100_adsbitsy.o
+sa1111_cs-$(CONFIG_ASSABET_NEPONSET)		+= sa1100_neponset.o
+sa1111_cs-$(CONFIG_SA1100_BADGE4)		+= sa1100_badge4.o
+sa1111_cs-$(CONFIG_SA1100_GRAPHICSMASTER)	+= sa1100_graphicsmaster.o
+sa1111_cs-$(CONFIG_SA1100_JORNADA720)		+= sa1100_jornada720.o
+sa1111_cs-$(CONFIG_SA1100_PFS168)		+= sa1100_pfs168.o
+sa1111_cs-$(CONFIG_SA1100_PT_SYSTEM3)		+= sa1100_system3.o
+sa1111_cs-$(CONFIG_SA1100_XP860)		+= sa1100_xp860.o
 
-sa1100_cs-objs-y				:= sa1100_generic.o
-sa1100_cs-objs-$(CONFIG_SA1100_ASSABET)		+= sa1100_assabet.o
-sa1100_cs-objs-$(CONFIG_SA1100_CERF)		+= sa1100_cerf.o
-sa1100_cs-objs-$(CONFIG_SA1100_FLEXANET)	+= sa1100_flexanet.o
-sa1100_cs-objs-$(CONFIG_SA1100_FREEBIRD)	+= sa1100_freebird.o
-sa1100_cs-objs-$(CONFIG_SA1100_GRAPHICSCLIENT)	+= sa1100_graphicsclient.o
-sa1100_cs-objs-$(CONFIG_SA1100_H3600)		+= sa1100_h3600.o
-sa1100_cs-objs-$(CONFIG_SA1100_PANGOLIN)	+= sa1100_pangolin.o
-sa1100_cs-objs-$(CONFIG_SA1100_SHANNON)		+= sa1100_shannon.o
-sa1100_cs-objs-$(CONFIG_SA1100_SIMPAD)		+= sa1100_simpad.o
-sa1100_cs-objs-$(CONFIG_SA1100_STORK)		+= sa1100_stork.o
-sa1100_cs-objs-$(CONFIG_SA1100_TRIZEPS) 	+= sa1100_trizeps.o
-sa1100_cs-objs-$(CONFIG_SA1100_YOPY)		+= sa1100_yopy.o
-sa1100_cs-objs					:= $(sa1100_cs-objs-y)
+sa1100_cs-y					+= sa1100_generic.o
+sa1100_cs-$(CONFIG_SA1100_ASSABET)		+= sa1100_assabet.o
+sa1100_cs-$(CONFIG_SA1100_CERF)			+= sa1100_cerf.o
+sa1100_cs-$(CONFIG_SA1100_FLEXANET)		+= sa1100_flexanet.o
+sa1100_cs-$(CONFIG_SA1100_FREEBIRD)		+= sa1100_freebird.o
+sa1100_cs-$(CONFIG_SA1100_GRAPHICSCLIENT)	+= sa1100_graphicsclient.o
+sa1100_cs-$(CONFIG_SA1100_H3600)		+= sa1100_h3600.o
+sa1100_cs-$(CONFIG_SA1100_PANGOLIN)		+= sa1100_pangolin.o
+sa1100_cs-$(CONFIG_SA1100_SHANNON)		+= sa1100_shannon.o
+sa1100_cs-$(CONFIG_SA1100_SIMPAD)		+= sa1100_simpad.o
+sa1100_cs-$(CONFIG_SA1100_STORK)		+= sa1100_stork.o
+sa1100_cs-$(CONFIG_SA1100_TRIZEPS) 		+= sa1100_trizeps.o
+sa1100_cs-$(CONFIG_SA1100_YOPY)			+= sa1100_yopy.o
diff -Nru a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c
--- a/drivers/pcmcia/cardbus.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/pcmcia/cardbus.c	Mon Mar 31 13:41:06 2003
@@ -87,10 +87,6 @@
 #define PCDATA_CODE_TYPE	0x0014
 #define PCDATA_INDICATOR	0x0015
 
-typedef struct cb_config_t {
-	struct pci_dev *dev[8];
-} cb_config_t;
-
 /*=====================================================================
 
     Expansion ROM's have a special layout, and pointers specify an
@@ -173,11 +169,10 @@
 
 	DEBUG(3, "cs: read_cb_mem(%d, %#x, %u)\n", space, addr, len);
 
-	if (!s->cb_config)
+	dev = pci_find_slot(s->cap.cb_dev->subordinate->number, 0);
+	if (!dev)
 		goto fail;
 
-	dev = s->cb_config->dev[0];
-
 	/* Config space? */
 	if (space == 0) {
 		if (addr + len > 0x100)
@@ -219,171 +214,61 @@
     
 =====================================================================*/
 
-int cb_alloc(socket_info_t * s)
+/*
+ * Since there is only one interrupt available to CardBus
+ * devices, all devices downstream of this device must
+ * be using this IRQ.
+ */
+static void cardbus_assign_irqs(struct pci_bus *bus, int irq)
 {
-	struct pci_bus *bus;
-	u_short vend, v, dev;
-	u_char i, hdr, fn;
-	cb_config_t *c;
-	int irq;
-
-	bus = s->cap.cb_dev->subordinate;
-
-	pci_bus_read_config_word(bus, 0, PCI_VENDOR_ID, &vend);
-	pci_bus_read_config_word(bus, 0, PCI_DEVICE_ID, &dev);
-	printk(KERN_INFO "cs: cb_alloc(bus %d): vendor 0x%04x, "
-	       "device 0x%04x\n", bus->number, vend, dev);
-
-	pci_bus_read_config_byte(bus, 0, PCI_HEADER_TYPE, &hdr);
-	fn = 1;
-	if (hdr & 0x80) {
-		do {
-			if (pci_bus_read_config_word(bus, fn, PCI_VENDOR_ID, &v) ||
-			    !v || v == 0xffff)
-				break;
-			fn++;
-		} while (fn < 8);
-	}
-	s->functions = fn;
-
-	c = kmalloc(sizeof(struct cb_config_t), GFP_ATOMIC);
-	if (!c)
-		return CS_OUT_OF_RESOURCE;
- 	memset(c, 0, sizeof(struct cb_config_t));
-
-	for (i = 0; i < fn; i++) {
-		c->dev[i] = kmalloc(sizeof(struct pci_dev), GFP_ATOMIC);
-		if (!c->dev[i]) {
-			for (; i--; )
-				kfree(c->dev[i]);
-			kfree(c);
-			return CS_OUT_OF_RESOURCE;
-		}
-		memset(c->dev[i], 0, sizeof(struct pci_dev));
-	}
+	struct pci_dev *dev;
 
-	irq = s->cap.pci_irq;
-	for (i = 0; i < fn; i++) {
-		struct pci_dev *dev = c->dev[i];
+	list_for_each_entry(dev, &bus->devices, bus_list) {
 		u8 irq_pin;
-		int r;
-
-		dev->bus = bus;
-		dev->sysdata = bus->sysdata;
-		dev->dev.parent = bus->dev;
-		dev->dev.bus = &pci_bus_type;
-		dev->devfn = i;
-
-		pci_read_config_word(dev, PCI_VENDOR_ID, &dev->vendor);
-		pci_read_config_word(dev, PCI_DEVICE_ID, &dev->device);
-		dev->hdr_type = hdr & 0x7f;
-		dev->dma_mask = 0xffffffff;
-		dev->dev.dma_mask = &dev->dma_mask;
-
-		pci_setup_device(dev);
-
-		strcpy(dev->dev.bus_id, dev->slot_name);
-
-		/* We need to assign resources for expansion ROM. */
-		for (r = 0; r < 7; r++) {
-			struct resource *res = dev->resource + r;
-			if (res->flags)
-				pci_assign_resource(dev, r);
-		}
 
-		/* Does this function have an interrupt at all? */
 		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq_pin);
-		if (irq_pin)
+		if (irq_pin) {
 			dev->irq = irq;
-		
-		/* pci_enable_device needs to be called after pci_assign_resource */
-		/* because it returns an error if (!res->start && res->end).      */
-		if (pci_enable_device(dev))
-			continue;
-
-		if (irq_pin)
-			pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-		
-		device_register(&dev->dev);
-		pci_insert_device(dev, bus);
-	}
-
-	s->cb_config = c;
-	s->irq.AssignedIRQ = irq;
-	return CS_SUCCESS;
-}
-
-void cb_free(socket_info_t * s)
-{
-	cb_config_t *c = s->cb_config;
-
-	if (c) {
-		s->cb_config = NULL;
-		pci_remove_behind_bridge(s->cap.cb_dev);
+			pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+		}
 
-		kfree(c);
-		printk(KERN_INFO "cs: cb_free(bus %d)\n", s->cap.cb_dev->subordinate->number);
+		if (dev->subordinate)
+			cardbus_assign_irqs(dev->subordinate, irq);
 	}
 }
 
-/*=====================================================================
-
-    cb_enable() has the job of configuring a socket for a Cardbus
-    card, and initializing the card's PCI configuration registers.
-
-    It first sets up the Cardbus bridge windows, for IO and memory
-    accesses.  Then, it initializes each card function's base address
-    registers, interrupt line register, and command register.
-
-    It is called as part of the RequestConfiguration card service.
-    It should be called after a previous call to cb_config() (via the
-    RequestIO service).
-    
-======================================================================*/
-
-void cb_enable(socket_info_t * s)
+int cb_alloc(socket_info_t * s)
 {
+	struct pci_bus *bus = s->cap.cb_dev->subordinate;
 	struct pci_dev *dev;
-	u_char i;
+	unsigned int max, pass;
 
-	DEBUG(0, "cs: cb_enable(bus %d)\n", s->cap.cb_dev->subordinate->number);
+	s->functions = pci_scan_slot(bus, PCI_DEVFN(0, 0));
+//	pcibios_fixup_bus(bus);
 
-	/* Configure bridge */
-	cb_release_cis_mem(s);
-
-	/* Set up PCI interrupt and command registers */
-	for (i = 0; i < s->functions; i++) {
-		dev = s->cb_config->dev[i];
-		pci_write_config_byte(dev, PCI_COMMAND, PCI_COMMAND_MASTER |
-				      PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
-		pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE,
-				      L1_CACHE_BYTES / 4);
-	}
+	max = bus->secondary;
+	for (pass = 0; pass < 2; pass++)
+		list_for_each_entry(dev, &bus->devices, bus_list)
+			if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+			    dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+				max = pci_scan_bridge(bus, dev, max, pass);
+
+	/*
+	 * Size all resources below the CardBus controller.
+	 */
+	pci_bus_size_bridges(bus);
+	pci_bus_assign_resources(bus);
+	cardbus_assign_irqs(bus, s->cap.pci_irq);
+	pci_enable_bridges(bus);
+	pci_bus_add_devices(bus);
 
-	if (s->irq.AssignedIRQ) {
-		for (i = 0; i < s->functions; i++) {
-			dev = s->cb_config->dev[i];
-			pci_write_config_byte(dev, PCI_INTERRUPT_LINE,
-					      s->irq.AssignedIRQ);
-		}
-		s->socket.io_irq = s->irq.AssignedIRQ;
-		s->ss_entry->set_socket(s->sock, &s->socket);
-	}
+	s->irq.AssignedIRQ = s->cap.pci_irq;
+	return CS_SUCCESS;
 }
 
-/*======================================================================
-
-    cb_disable() unconfigures a Cardbus card previously set up by
-    cb_enable().
-
-    It is called from the ReleaseConfiguration service.
-    
-======================================================================*/
-
-void cb_disable(socket_info_t * s)
+void cb_free(socket_info_t * s)
 {
-	DEBUG(0, "cs: cb_disable(bus %d)\n", s->cap.cb_dev->subordinate->number);
+	struct pci_dev *bridge = s->cap.cb_dev;
 
-	/* Turn off bridge windows */
-	cb_release_cis_mem(s);
+	pci_remove_behind_bridge(bridge);
 }
diff -Nru a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
--- a/drivers/pcmcia/cistpl.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/pcmcia/cistpl.c	Mon Mar 31 13:41:06 2003
@@ -47,7 +47,6 @@
 #include <asm/byteorder.h>
 
 #include <pcmcia/cs_types.h>
-#include <pcmcia/bus_ops.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/bulkmem.h>
@@ -83,6 +82,52 @@
 
 INT_MODULE_PARM(cis_width,	0);		/* 16-bit CIS? */
 
+void release_cis_mem(socket_info_t *s)
+{
+    if (s->cis_mem.sys_start != 0) {
+	s->cis_mem.flags &= ~MAP_ACTIVE;
+	s->ss_entry->set_mem_map(s->sock, &s->cis_mem);
+	if (!(s->cap.features & SS_CAP_STATIC_MAP))
+	    release_mem_region(s->cis_mem.sys_start, s->cap.map_size);
+	iounmap(s->cis_virt);
+	s->cis_mem.sys_start = 0;
+	s->cis_virt = NULL;
+    }
+}
+
+/*
+ * Map the card memory at "card_offset" into virtual space.
+ * If flags & MAP_ATTRIB, map the attribute space, otherwise
+ * map the memory space.
+ */
+static unsigned char *
+set_cis_map(socket_info_t *s, unsigned int card_offset, unsigned int flags)
+{
+    pccard_mem_map *mem = &s->cis_mem;
+    if (!(s->cap.features & SS_CAP_STATIC_MAP) &&
+	mem->sys_start == 0) {
+	int low = !(s->cap.features & SS_CAP_PAGE_REGS);
+	validate_mem(s);
+	mem->sys_start = 0;
+	if (find_mem_region(&mem->sys_start, s->cap.map_size,
+			    s->cap.map_size, low, "card services", s)) {
+	    printk(KERN_NOTICE "cs: unable to map card memory!\n");
+	    return NULL;
+	}
+	mem->sys_stop = mem->sys_start+s->cap.map_size-1;
+	s->cis_virt = ioremap(mem->sys_start, s->cap.map_size);
+    }
+    mem->card_start = card_offset;
+    mem->flags = flags;
+    s->ss_entry->set_mem_map(s->sock, mem);
+    if (s->cap.features & SS_CAP_STATIC_MAP) {
+	if (s->cis_virt)
+	    iounmap(s->cis_virt);
+	s->cis_virt = ioremap(mem->sys_start, s->cap.map_size);
+    }
+    return s->cis_virt;
+}
+
 /*======================================================================
 
     Low-level functions to read and write CIS memory.  I think the
@@ -94,60 +139,60 @@
 #define IS_ATTR		1
 #define IS_INDIRECT	8
 
-static int setup_cis_mem(socket_info_t *s);
-
-static void set_cis_map(socket_info_t *s, pccard_mem_map *mem)
-{
-    s->ss_entry->set_mem_map(s->sock, mem);
-    if (s->cap.features & SS_CAP_STATIC_MAP) {
-	if (s->cis_virt)
-	    bus_iounmap(s->cap.bus, s->cis_virt);
-	s->cis_virt = bus_ioremap(s->cap.bus, mem->sys_start,
-				  s->cap.map_size);
-    }
-}
-
 int read_cis_mem(socket_info_t *s, int attr, u_int addr,
 		 u_int len, void *ptr)
 {
-    pccard_mem_map *mem = &s->cis_mem;
-    u_char *sys, *buf = ptr;
+    u_char *sys, *end, *buf = ptr;
     
     DEBUG(3, "cs: read_cis_mem(%d, %#x, %u)\n", attr, addr, len);
-    if (setup_cis_mem(s) != 0) {
-	memset(ptr, 0xff, len);
-	return -1;
-    }
-    mem->flags = MAP_ACTIVE | ((cis_width) ? MAP_16BIT : 0);
 
     if (attr & IS_INDIRECT) {
 	/* Indirect accesses use a bunch of special registers at fixed
 	   locations in common memory */
 	u_char flags = ICTRL0_COMMON|ICTRL0_AUTOINC|ICTRL0_BYTEGRAN;
-	if (attr & IS_ATTR) { addr *= 2; flags = ICTRL0_AUTOINC; }
-	mem->card_start = 0; mem->flags = MAP_ACTIVE;
-	set_cis_map(s, mem);
-	sys = s->cis_virt;
-	bus_writeb(s->cap.bus, flags, sys+CISREG_ICTRL0);
-	bus_writeb(s->cap.bus, addr & 0xff, sys+CISREG_IADDR0);
-	bus_writeb(s->cap.bus, (addr>>8) & 0xff, sys+CISREG_IADDR1);
-	bus_writeb(s->cap.bus, (addr>>16) & 0xff, sys+CISREG_IADDR2);
-	bus_writeb(s->cap.bus, (addr>>24) & 0xff, sys+CISREG_IADDR3);
+	if (attr & IS_ATTR) {
+	    addr *= 2;
+	    flags = ICTRL0_AUTOINC;
+	}
+
+	sys = set_cis_map(s, 0, MAP_ACTIVE | ((cis_width) ? MAP_16BIT : 0));
+	if (!sys) {
+	    memset(ptr, 0xff, len);
+	    return -1;
+	}
+
+	writeb(flags, sys+CISREG_ICTRL0);
+	writeb(addr & 0xff, sys+CISREG_IADDR0);
+	writeb((addr>>8) & 0xff, sys+CISREG_IADDR1);
+	writeb((addr>>16) & 0xff, sys+CISREG_IADDR2);
+	writeb((addr>>24) & 0xff, sys+CISREG_IADDR3);
 	for ( ; len > 0; len--, buf++)
-	    *buf = bus_readb(s->cap.bus, sys+CISREG_IDATA0);
+	    *buf = readb(sys+CISREG_IDATA0);
     } else {
-	u_int inc = 1;
-	if (attr) { mem->flags |= MAP_ATTRIB; inc++; addr *= 2; }
-	sys += (addr & (s->cap.map_size-1));
-	mem->card_start = addr & ~(s->cap.map_size-1);
+	u_int inc = 1, card_offset, flags;
+
+	flags = MAP_ACTIVE | ((cis_width) ? MAP_16BIT : 0);
+	if (attr) {
+	    flags |= MAP_ATTRIB;
+	    inc++;
+	    addr *= 2;
+	}
+
+	card_offset = addr & ~(s->cap.map_size-1);
 	while (len) {
-	    set_cis_map(s, mem);
-	    sys = s->cis_virt + (addr & (s->cap.map_size-1));
+	    sys = set_cis_map(s, card_offset, flags);
+	    if (!sys) {
+		memset(ptr, 0xff, len);
+		return -1;
+	    }
+	    end = sys + s->cap.map_size;
+	    sys = sys + (addr & (s->cap.map_size-1));
 	    for ( ; len > 0; len--, buf++, sys += inc) {
-		if (sys == s->cis_virt+s->cap.map_size) break;
-		*buf = bus_readb(s->cap.bus, sys);
+		if (sys == end)
+		    break;
+		*buf = readb(sys);
 	    }
-	    mem->card_start += s->cap.map_size;
+	    card_offset += s->cap.map_size;
 	    addr = 0;
 	}
     }
@@ -160,40 +205,54 @@
 void write_cis_mem(socket_info_t *s, int attr, u_int addr,
 		   u_int len, void *ptr)
 {
-    pccard_mem_map *mem = &s->cis_mem;
-    u_char *sys, *buf = ptr;
+    u_char *sys, *end, *buf = ptr;
     
     DEBUG(3, "cs: write_cis_mem(%d, %#x, %u)\n", attr, addr, len);
-    if (setup_cis_mem(s) != 0) return;
-    mem->flags = MAP_ACTIVE | ((cis_width) ? MAP_16BIT : 0);
 
     if (attr & IS_INDIRECT) {
 	/* Indirect accesses use a bunch of special registers at fixed
 	   locations in common memory */
 	u_char flags = ICTRL0_COMMON|ICTRL0_AUTOINC|ICTRL0_BYTEGRAN;
-	if (attr & IS_ATTR) { addr *= 2; flags = ICTRL0_AUTOINC; }
-	mem->card_start = 0; mem->flags = MAP_ACTIVE;
-	set_cis_map(s, mem);
-	sys = s->cis_virt;
-	bus_writeb(s->cap.bus, flags, sys+CISREG_ICTRL0);
-	bus_writeb(s->cap.bus, addr & 0xff, sys+CISREG_IADDR0);
-	bus_writeb(s->cap.bus, (addr>>8) & 0xff, sys+CISREG_IADDR1);
-	bus_writeb(s->cap.bus, (addr>>16) & 0xff, sys+CISREG_IADDR2);
-	bus_writeb(s->cap.bus, (addr>>24) & 0xff, sys+CISREG_IADDR3);
+	if (attr & IS_ATTR) {
+	    addr *= 2;
+	    flags = ICTRL0_AUTOINC;
+	}
+
+	sys = set_cis_map(s, 0, MAP_ACTIVE | ((cis_width) ? MAP_16BIT : 0));
+	if (!sys)
+		return; /* FIXME: Error */
+
+	writeb(flags, sys+CISREG_ICTRL0);
+	writeb(addr & 0xff, sys+CISREG_IADDR0);
+	writeb((addr>>8) & 0xff, sys+CISREG_IADDR1);
+	writeb((addr>>16) & 0xff, sys+CISREG_IADDR2);
+	writeb((addr>>24) & 0xff, sys+CISREG_IADDR3);
 	for ( ; len > 0; len--, buf++)
-	    bus_writeb(s->cap.bus, *buf, sys+CISREG_IDATA0);
+	    writeb(*buf, sys+CISREG_IDATA0);
     } else {
-	int inc = 1;
-	if (attr & IS_ATTR) { mem->flags |= MAP_ATTRIB; inc++; addr *= 2; }
-	mem->card_start = addr & ~(s->cap.map_size-1);
+	u_int inc = 1, card_offset, flags;
+
+	flags = MAP_ACTIVE | ((cis_width) ? MAP_16BIT : 0);
+	if (attr & IS_ATTR) {
+	    flags |= MAP_ATTRIB;
+	    inc++;
+	    addr *= 2;
+	}
+
+	card_offset = addr & ~(s->cap.map_size-1);
 	while (len) {
-	    set_cis_map(s, mem);
-	    sys = s->cis_virt + (addr & (s->cap.map_size-1));
+	    sys = set_cis_map(s, card_offset, flags);
+	    if (!sys)
+		return; /* FIXME: error */
+
+	    end = sys + s->cap.map_size;
+	    sys = sys + (addr & (s->cap.map_size-1));
 	    for ( ; len > 0; len--, buf++, sys += inc) {
-		if (sys == s->cis_virt+s->cap.map_size) break;
-		bus_writeb(s->cap.bus, *buf, sys);
+		if (sys == end)
+		    break;
+		writeb(*buf, sys);
 	    }
-	    mem->card_start += s->cap.map_size;
+	    card_offset += s->cap.map_size;
 	    addr = 0;
 	}
     }
@@ -201,98 +260,6 @@
 
 /*======================================================================
 
-    This is tricky... when we set up CIS memory, we try to validate
-    the memory window space allocations.
-    
-======================================================================*/
-
-/* Scratch pointer to the socket we use for validation */
-static socket_info_t *vs = NULL;
-
-/* Validation function for cards with a valid CIS */
-static int cis_readable(u_long base)
-{
-    cisinfo_t info1, info2;
-    int ret;
-    vs->cis_mem.sys_start = base;
-    vs->cis_mem.sys_stop = base+vs->cap.map_size-1;
-    vs->cis_virt = bus_ioremap(vs->cap.bus, base, vs->cap.map_size);
-    ret = pcmcia_validate_cis(vs->clients, &info1);
-    /* invalidate mapping and CIS cache */
-    bus_iounmap(vs->cap.bus, vs->cis_virt); vs->cis_used = 0;
-    if ((ret != 0) || (info1.Chains == 0))
-	return 0;
-    vs->cis_mem.sys_start = base+vs->cap.map_size;
-    vs->cis_mem.sys_stop = base+2*vs->cap.map_size-1;
-    vs->cis_virt = bus_ioremap(vs->cap.bus, base+vs->cap.map_size,
-			       vs->cap.map_size);
-    ret = pcmcia_validate_cis(vs->clients, &info2);
-    bus_iounmap(vs->cap.bus, vs->cis_virt); vs->cis_used = 0;
-    return ((ret == 0) && (info1.Chains == info2.Chains));
-}
-
-/* Validation function for simple memory cards */
-static int checksum(u_long base)
-{
-    int i, a, b, d;
-    vs->cis_mem.sys_start = base;
-    vs->cis_mem.sys_stop = base+vs->cap.map_size-1;
-    vs->cis_virt = bus_ioremap(vs->cap.bus, base, vs->cap.map_size);
-    vs->cis_mem.card_start = 0;
-    vs->cis_mem.flags = MAP_ACTIVE;
-    vs->ss_entry->set_mem_map(vs->sock, &vs->cis_mem);
-    /* Don't bother checking every word... */
-    a = 0; b = -1;
-    for (i = 0; i < vs->cap.map_size; i += 44) {
-	d = bus_readl(vs->cap.bus, vs->cis_virt+i);
-	a += d; b &= d;
-    }
-    bus_iounmap(vs->cap.bus, vs->cis_virt);
-    return (b == -1) ? -1 : (a>>1);
-}
-
-static int checksum_match(u_long base)
-{
-    int a = checksum(base), b = checksum(base+vs->cap.map_size);
-    return ((a == b) && (a >= 0));
-}
-
-static int setup_cis_mem(socket_info_t *s)
-{
-    if (!(s->cap.features & SS_CAP_STATIC_MAP) &&
-	(s->cis_mem.sys_start == 0)) {
-	int low = !(s->cap.features & SS_CAP_PAGE_REGS);
-	vs = s;
-	validate_mem(cis_readable, checksum_match, low, s);
-	s->cis_mem.sys_start = 0;
-	vs = NULL;
-	if (find_mem_region(&s->cis_mem.sys_start, s->cap.map_size,
-			    s->cap.map_size, low, "card services", s)) {
-	    printk(KERN_NOTICE "cs: unable to map card memory!\n");
-	    return -1;
-	}
-	s->cis_mem.sys_stop = s->cis_mem.sys_start+s->cap.map_size-1;
-	s->cis_virt = bus_ioremap(s->cap.bus, s->cis_mem.sys_start,
-				  s->cap.map_size);
-    }
-    return 0;
-}
-
-void release_cis_mem(socket_info_t *s)
-{
-    if (s->cis_mem.sys_start != 0) {
-	s->cis_mem.flags &= ~MAP_ACTIVE;
-	s->ss_entry->set_mem_map(s->sock, &s->cis_mem);
-	if (!(s->cap.features & SS_CAP_STATIC_MAP))
-	    release_mem_region(s->cis_mem.sys_start, s->cap.map_size);
-	bus_iounmap(s->cap.bus, s->cis_virt);
-	s->cis_mem.sys_start = 0;
-	s->cis_virt = NULL;
-    }
-}
-
-/*======================================================================
-
     This is a wrapper around read_cis_mem, with the same interface,
     but which caches information, for cards whose CIS may not be
     readable all the time.
@@ -427,11 +394,9 @@
     tuple->TupleLink = tuple->Flags = 0;
 #ifdef CONFIG_CARDBUS
     if (s->state & SOCKET_CARDBUS) {
+	struct pci_dev *dev = s->cap.cb_dev;
 	u_int ptr;
-	struct pci_dev *dev = pci_find_slot (s->cap.cb_dev->subordinate->number, 0);
-	if (!dev)
-	    return CS_BAD_HANDLE;
-	pci_read_config_dword(dev, 0x28, &ptr);
+	pci_bus_read_config_dword(dev->subordinate, 0, PCI_CARDBUS_CIS, &ptr);
 	tuple->CISOffset = ptr & ~7;
 	SPACE(tuple->Flags) = (ptr & 7);
     } else
diff -Nru a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c
--- a/drivers/pcmcia/cs.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/pcmcia/cs.c	Mon Mar 31 13:41:06 2003
@@ -59,7 +59,6 @@
 #include <pcmcia/bulkmem.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
-#include <pcmcia/bus_ops.h>
 #include "cs_internal.h"
 
 #ifdef CONFIG_PCI
@@ -112,13 +111,6 @@
 /* Access speed for IO windows */
 INT_MODULE_PARM(io_speed,	0);		/* ns */
 
-/* Optional features */
-#ifdef CONFIG_PM
-INT_MODULE_PARM(do_apm,		1);
-#else
-INT_MODULE_PARM(do_apm,		0);
-#endif
-
 #ifdef PCMCIA_DEBUG
 INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG);
 static const char *version =
@@ -349,7 +341,6 @@
 		/* base address = 0, map = 0 */
 		s->cis_mem.flags = 0;
 		s->cis_mem.speed = cis_speed;
-		s->use_bus_pm = cls_d->use_bus_pm;
 		s->erase_busy.next = s->erase_busy.prev = &s->erase_busy;
 		spin_lock_init(&s->lock);
     
@@ -622,8 +613,10 @@
 		send_event(s, CS_EVENT_PM_RESUME, CS_EVENT_PRI_LOW);
 	} else if (s->state & SOCKET_SETUP_PENDING) {
 #ifdef CONFIG_CARDBUS
-	    if (s->state & SOCKET_CARDBUS)
+	    if (s->state & SOCKET_CARDBUS) {
 		cb_alloc(s);
+		s->state |= SOCKET_CARDBUS_CONFIG;
+	    }
 #endif
 	    send_event(s, CS_EVENT_CARD_INSERTION, CS_EVENT_PRI_LOW);
 	    s->state &= ~SOCKET_SETUP_PENDING;
@@ -654,6 +647,8 @@
     DEBUG(1, "cs: send_event(sock %d, event %d, pri %d)\n",
 	  s->sock, event, priority);
     ret = 0;
+    if (s->state & SOCKET_CARDBUS)
+	    return 0;
     for (; client; client = client->next) { 
 	if (client->state & (CLIENT_UNBOUND|CLIENT_STALE))
 	    continue;
@@ -755,33 +750,47 @@
 	parse_events(s, SS_DETECT);
 }
 
-static int handle_pm_event(struct pm_dev *dev, pm_request_t rqst, void *data)
+
+int pcmcia_socket_dev_suspend(struct device * dev, u32 state, u32 level)
 {
-    int i;
-    socket_info_t *s;
+	struct pcmcia_socket_class_data *cls_d = to_class_data(dev);
+	socket_info_t *s;
+	int i;
 
-    /* only for busses that don't suspend/resume slots directly */
+	if ((!cls_d) || (level != SUSPEND_SAVE_STATE))
+		return 0;
 
-    switch (rqst) {
-    case PM_SUSPEND:
-	DEBUG(1, "cs: received suspend notification\n");
-	for (i = 0; i < sockets; i++) {
-	    s = socket_table [i];
-	    if (!s->use_bus_pm)
-		pcmcia_suspend_socket (socket_table [i]);
+	s = (socket_info_t *) cls_d->s_info;
+
+	for (i = 0; i < cls_d->nsock; i++) {
+		pcmcia_suspend_socket(s);
+		s++;
 	}
-	break;
-    case PM_RESUME:
-	DEBUG(1, "cs: received resume notification\n");
-	for (i = 0; i < sockets; i++) {
-	    s = socket_table [i];
-	    if (!s->use_bus_pm)
-		pcmcia_resume_socket (socket_table [i]);
+
+	return 0;
+}
+EXPORT_SYMBOL(pcmcia_socket_dev_suspend);
+
+int pcmcia_socket_dev_resume(struct device * dev, u32 level)
+{
+	struct pcmcia_socket_class_data *cls_d = to_class_data(dev);
+	socket_info_t *s;
+	int i;
+
+	if ((!cls_d) || (level != RESUME_RESTORE_STATE))
+		return 0;
+
+	s = (socket_info_t *) cls_d->s_info;
+
+	for (i = 0; i < cls_d->nsock; i++) {
+		pcmcia_resume_socket(s);
+		s++;
 	}
-	break;
-    }
-    return 0;
-} /* handle_pm_event */
+
+	return 0;
+}
+EXPORT_SYMBOL(pcmcia_socket_dev_resume);
+
 
 /*======================================================================
 
@@ -1077,7 +1086,7 @@
 	config->Vcc = s->socket.Vcc;
 	config->Vpp1 = config->Vpp2 = s->socket.Vpp;
 	config->Option = s->cap.cb_dev->subordinate->number;
-	if (s->cb_config) {
+	if (s->state & SOCKET_CARDBUS_CONFIG) {
 	    config->Attributes = CONF_VALID_CLIENT;
 	    config->IntType = INT_CARDBUS;
 	    config->AssignedIRQ = s->irq.AssignedIRQ;
@@ -1473,7 +1482,6 @@
     client->event_handler = req->event_handler;
     client->event_callback_args = req->event_callback_args;
     client->event_callback_args.client_handle = client;
-    client->event_callback_args.bus = s->cap.bus;
 
     if (s->state & SOCKET_CARDBUS)
 	client->state |= CLIENT_CARDBUS;
@@ -1522,11 +1530,8 @@
     s = SOCKET(handle);
     
 #ifdef CONFIG_CARDBUS
-    if (handle->state & CLIENT_CARDBUS) {
-	cb_disable(s);
-	s->lock_count = 0;
+    if (handle->state & CLIENT_CARDBUS)
 	return CS_SUCCESS;
-    }
 #endif
     
     if (!(handle->state & CLIENT_STALE)) {
@@ -1573,9 +1578,8 @@
     s = SOCKET(handle);
     
 #ifdef CONFIG_CARDBUS
-    if (handle->state & CLIENT_CARDBUS) {
+    if (handle->state & CLIENT_CARDBUS)
 	return CS_SUCCESS;
-    }
 #endif
     
     if (!(handle->state & CLIENT_STALE)) {
@@ -1622,10 +1626,10 @@
     }
     
     if (req->Attributes & IRQ_HANDLE_PRESENT) {
-	bus_free_irq(s->cap.bus, req->AssignedIRQ, req->Instance);
+	free_irq(req->AssignedIRQ, req->Instance);
     }
 
-#ifdef CONFIG_ISA
+#ifdef CONFIG_PCMCIA_PROBE
     if (req->AssignedIRQ != s->cap.pci_irq)
 	undo_irq(req->Attributes, req->AssignedIRQ);
 #endif
@@ -1678,16 +1682,8 @@
 	return CS_NO_CARD;
     
 #ifdef CONFIG_CARDBUS
-    if (handle->state & CLIENT_CARDBUS) {
-	if (!(req->IntType & INT_CARDBUS))
-	    return CS_UNSUPPORTED_MODE;
-	if (s->lock_count != 0)
-	    return CS_CONFIGURATION_LOCKED;
-	cb_enable(s);
-	handle->state |= CLIENT_CONFIG_LOCKED;
-	s->lock_count++;
-	return CS_SUCCESS;
-    }
+    if (handle->state & CLIENT_CARDBUS)
+	return CS_UNSUPPORTED_MODE;
 #endif
     
     if (req->IntType & INT_CARDBUS)
@@ -1887,7 +1883,7 @@
     if (!s->cap.irq_mask) {
 	irq = s->cap.pci_irq;
 	ret = (irq) ? 0 : CS_IN_USE;
-#ifdef CONFIG_ISA
+#ifdef CONFIG_PCMCIA_PROBE
     } else if (s->irq.AssignedIRQ != 0) {
 	/* If the interrupt is already assigned, it must match */
 	irq = s->irq.AssignedIRQ;
@@ -1917,7 +1913,7 @@
     if (ret != 0) return ret;
 
     if (req->Attributes & IRQ_HANDLE_PRESENT) {
-	if (bus_request_irq(s->cap.bus, irq, req->Handler,
+	if (request_irq(irq, req->Handler,
 			    ((req->Attributes & IRQ_TYPE_DYNAMIC_SHARING) || 
 			     (s->functions > 1) ||
 			     (irq == s->cap.pci_irq)) ? SA_SHIRQ : 0,
@@ -2441,8 +2437,6 @@
     printk(KERN_INFO "  %s\n", options);
     DEBUG(0, "%s\n", version);
     devclass_register(&pcmcia_socket_class);
-    if (do_apm)
-	pm_register(PM_SYS_DEV, PM_SYS_PCMCIA, handle_pm_event);
 #ifdef CONFIG_PROC_FS
     proc_pccard = proc_mkdir("pccard", proc_bus);
 #endif
@@ -2458,8 +2452,6 @@
 	remove_proc_entry("pccard", proc_bus);
     }
 #endif
-    if (do_apm)
-	pm_unregister_all(handle_pm_event);
     release_resource_db();
     devclass_unregister(&pcmcia_socket_class);
 }
diff -Nru a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
--- a/drivers/pcmcia/cs_internal.h	Mon Mar 31 13:41:06 2003
+++ b/drivers/pcmcia/cs_internal.h	Mon Mar 31 13:41:06 2003
@@ -136,7 +136,6 @@
 #ifdef CONFIG_CARDBUS
     struct resource *		cb_cis_res;
     u_char			*cb_cis_virt;
-    struct cb_config_t		*cb_config;
 #endif
     struct {
 	u_int			AssignedIRQ;
@@ -158,7 +157,6 @@
 #ifdef CONFIG_PROC_FS
     struct proc_dir_entry	*proc;
 #endif
-    int				use_bus_pm;
 } socket_info_t;
 
 /* Flags in config state */
@@ -176,6 +174,7 @@
 #define SOCKET_IO_REQ(i)	(0x1000<<(i))
 #define SOCKET_REGION_INFO	0x4000
 #define SOCKET_CARDBUS		0x8000
+#define SOCKET_CARDBUS_CONFIG	0x10000
 
 #define CHECK_HANDLE(h) \
     (((h) == NULL) || ((h)->client_magic != CLIENT_MAGIC))
@@ -198,8 +197,6 @@
 /* In cardbus.c */
 int cb_alloc(socket_info_t *s);
 void cb_free(socket_info_t *s);
-void cb_enable(socket_info_t *s);
-void cb_disable(socket_info_t *s);
 int read_cb_mem(socket_info_t *s, int space, u_int addr, u_int len, void *ptr);
 void cb_release_cis_mem(socket_info_t *s);
 
@@ -234,8 +231,7 @@
 int copy_memory(memory_handle_t handle, copy_op_t *req);
 
 /* In rsrc_mgr */
-void validate_mem(int (*is_valid)(u_long), int (*do_cksum)(u_long),
-		  int force_low, socket_info_t *s);
+void validate_mem(socket_info_t *s);
 int find_io_region(ioaddr_t *base, ioaddr_t num, ioaddr_t align,
 		   char *name, socket_info_t *s);
 int find_mem_region(u_long *base, u_long num, u_long align,
diff -Nru a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
--- a/drivers/pcmcia/ds.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/ds.c	Mon Mar 31 13:41:08 2003
@@ -48,6 +48,8 @@
 #include <linux/proc_fs.h>
 #include <linux/poll.h>
 #include <linux/pci.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
 
 #include <pcmcia/version.h>
 #include <pcmcia/cs_types.h>
@@ -55,6 +57,7 @@
 #include <pcmcia/bulkmem.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
+#include <pcmcia/ss.h>
 
 /*====================================================================*/
 
@@ -69,8 +72,6 @@
 #ifdef PCMCIA_DEBUG
 INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG);
 #define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args)
-static const char *version =
-"ds.c 1.112 2001/10/13 00:08:28 (David Hinds)";
 #else
 #define DEBUG(n, args...)
 #endif
@@ -97,15 +98,18 @@
 } user_info_t;
 
 /* Socket state information */
-typedef struct socket_info_t {
-    client_handle_t	handle;
-    int			state;
-    user_info_t		*user;
-    int			req_pending, req_result;
-    wait_queue_head_t	queue, request;
-    struct timer_list	removal;
-    socket_bind_t	*bind;
-} socket_info_t;
+struct pcmcia_bus_socket {
+	client_handle_t		handle;
+	int			state;
+	user_info_t		*user;
+	int			req_pending, req_result;
+	wait_queue_head_t	queue, request;
+	struct work_struct	removal;
+	socket_bind_t		*bind;
+	struct device		*socket_dev;
+	struct list_head	socket_list;
+	unsigned int		socket_no; /* deprecated */
+};
 
 #define SOCKET_PRESENT		0x01
 #define SOCKET_BUSY		0x02
@@ -116,13 +120,13 @@
 /* Device driver ID passed to Card Services */
 static dev_info_t dev_info = "Driver Services";
 
-static int sockets = 0, major_dev = -1;
-static socket_info_t *socket_table = NULL;
+static int major_dev = -1;
 
-extern struct proc_dir_entry *proc_pccard;
+/* list of all sockets registered with the pcmcia bus driver */
+static DECLARE_RWSEM(bus_socket_list_rwsem);
+static LIST_HEAD(bus_socket_list);
 
-/* We use this to distinguish in-kernel from modular drivers */
-static int init_status = 1;
+extern struct proc_dir_entry *proc_pccard;
 
 /*====================================================================*/
 
@@ -135,6 +139,7 @@
 /*======================================================================*/
 
 static struct pcmcia_driver * get_pcmcia_driver (dev_info_t *dev_info);
+static struct pcmcia_bus_socket * get_socket_info_by_nr(unsigned int nr);
 
 /**
  * pcmcia_register_driver - register a PCMCIA driver with the bus core
@@ -147,7 +152,6 @@
 		return -EINVAL;
 
  	driver->use_count = 0;
- 	driver->status = init_status;
 	driver->drv.bus = &pcmcia_bus_type;
 
 	return driver_register(&driver->drv);
@@ -160,17 +164,19 @@
 void pcmcia_unregister_driver(struct pcmcia_driver *driver)
 {
 	socket_bind_t *b;
-	int i;
+	struct pcmcia_bus_socket *bus_sock;
 
 	if (driver->use_count > 0) {
 		/* Blank out any left-over device instances */
 		driver->attach = NULL; driver->detach = NULL;
-		for (i = 0; i < sockets; i++)
-			for (b = socket_table[i].bind; b; b = b->next)
+		down_read(&bus_socket_list_rwsem);
+		list_for_each_entry(bus_sock, &bus_socket_list, socket_list) {
+			for (b = bus_sock->bind; b; b = b->next)
 				if (b->driver == driver) 
 					b->instance = NULL;
- 	}
-
+		}
+		up_read(&bus_socket_list_rwsem);
+	}
 	driver_unregister(&driver->drv);
 }
 EXPORT_SYMBOL(pcmcia_unregister_driver);
@@ -181,33 +187,21 @@
 			   void (*detach)(dev_link_t *))
 {
     struct pcmcia_driver *driver;
-    socket_bind_t *b;
-    int i;
 
     DEBUG(0, "ds: register_pccard_driver('%s')\n", (char *)dev_info);
     driver = get_pcmcia_driver(dev_info);
-    if (!driver) {
-	driver = kmalloc(sizeof(struct pcmcia_driver), GFP_KERNEL);
-	if (!driver) return -ENOMEM;
-	memset(driver, 0, sizeof(struct pcmcia_driver));
-	driver->drv.name = (char *)dev_info;
-	pcmcia_register_driver(driver);
-    }
+    if (driver)
+	    return -EBUSY;
+
+    driver = kmalloc(sizeof(struct pcmcia_driver), GFP_KERNEL);
+    if (!driver) return -ENOMEM;
+    memset(driver, 0, sizeof(struct pcmcia_driver));
+    driver->drv.name = (char *)dev_info;
+    pcmcia_register_driver(driver);
 
     driver->attach = attach;
     driver->detach = detach;
-    if (driver->use_count == 0) return 0;
-    
-    /* Instantiate any already-bound devices */
-    for (i = 0; i < sockets; i++)
-	for (b = socket_table[i].bind; b; b = b->next) {
-	    if (b->driver != driver) continue;
-	    b->instance = driver->attach();
-	    if (b->instance == NULL)
-		printk(KERN_NOTICE "ds: unable to create instance "
-		       "of '%s'!\n", driver->drv.name);
-	}
-    
+
     return 0;
 } /* register_pccard_driver */
 
@@ -238,8 +232,7 @@
 	struct pcmcia_driver *p_dev = container_of(driver, 
 						   struct pcmcia_driver, drv);
 
-	*p += sprintf(*p, "%-24.24s %d %d\n", driver->name, p_dev->status,
-		     p_dev->use_count);
+	*p += sprintf(*p, "%-24.24s 1 %d\n", driver->name, p_dev->use_count);
 	d = (void *) p;
 
 	return 0;
@@ -282,7 +275,7 @@
     user->event[user->event_head] = event;
 }
 
-static void handle_event(socket_info_t *s, event_t event)
+static void handle_event(struct pcmcia_bus_socket *s, event_t event)
 {
     user_info_t *user;
     for (user = s->user; user; user = user->next)
@@ -290,7 +283,7 @@
     wake_up_interruptible(&s->queue);
 }
 
-static int handle_request(socket_info_t *s, event_t event)
+static int handle_request(struct pcmcia_bus_socket *s, event_t event)
 {
     if (s->req_pending != 0)
 	return CS_IN_USE;
@@ -307,9 +300,9 @@
     return CS_SUCCESS;
 }
 
-static void handle_removal(u_long sn)
+static void handle_removal(void *data)
 {
-    socket_info_t *s = &socket_table[sn];
+    struct pcmcia_bus_socket *s = data;
     handle_event(s, CS_EVENT_CARD_REMOVAL);
     s->state &= ~SOCKET_REMOVAL_PENDING;
 }
@@ -323,23 +316,19 @@
 static int ds_event(event_t event, int priority,
 		    event_callback_args_t *args)
 {
-    socket_info_t *s;
-    int i;
+    struct pcmcia_bus_socket *s;
 
     DEBUG(1, "ds: ds_event(0x%06x, %d, 0x%p)\n",
 	  event, priority, args->client_handle);
     s = args->client_data;
-    i = s - socket_table;
     
     switch (event) {
 	
     case CS_EVENT_CARD_REMOVAL:
 	s->state &= ~SOCKET_PRESENT;
 	if (!(s->state & SOCKET_REMOVAL_PENDING)) {
-	    s->state |= SOCKET_REMOVAL_PENDING;
-	    init_timer(&s->removal);
-	    s->removal.expires = jiffies + HZ/10;
-	    add_timer(&s->removal);
+		s->state |= SOCKET_REMOVAL_PENDING;
+		schedule_delayed_work(&s->removal,  HZ/10);
 	}
 	break;
 	
@@ -366,21 +355,21 @@
     
 ======================================================================*/
 
-static int bind_mtd(int i, mtd_info_t *mtd_info)
+static int bind_mtd(struct pcmcia_bus_socket *bus_sock, mtd_info_t *mtd_info)
 {
     mtd_bind_t bind_req;
     int ret;
 
     bind_req.dev_info = &mtd_info->dev_info;
     bind_req.Attributes = mtd_info->Attributes;
-    bind_req.Socket = i;
+    bind_req.Socket = bus_sock->socket_no;
     bind_req.CardOffset = mtd_info->CardOffset;
     ret = pcmcia_bind_mtd(&bind_req);
     if (ret != CS_SUCCESS) {
 	cs_error(NULL, BindMTD, ret);
 	printk(KERN_NOTICE "ds: unable to bind MTD '%s' to socket %d"
 	       " offset 0x%x\n",
-	       (char *)bind_req.dev_info, i, bind_req.CardOffset);
+	       (char *)bind_req.dev_info, bus_sock->socket_no, bind_req.CardOffset);
 	return -ENODEV;
     }
     return 0;
@@ -395,24 +384,21 @@
     
 ======================================================================*/
 
-static int bind_request(int i, bind_info_t *bind_info)
+static int bind_request(struct pcmcia_bus_socket *s, bind_info_t *bind_info)
 {
     struct pcmcia_driver *driver;
     socket_bind_t *b;
     bind_req_t bind_req;
-    socket_info_t *s = &socket_table[i];
     int ret;
 
-    DEBUG(2, "bind_request(%d, '%s')\n", i,
+    if (!s)
+	    return -EINVAL;
+
+    DEBUG(2, "bind_request(%d, '%s')\n", s->socket_no,
 	  (char *)bind_info->dev_info);
     driver = get_pcmcia_driver(&bind_info->dev_info);
-    if (driver == NULL) {
-	driver = kmalloc(sizeof(struct pcmcia_driver), GFP_KERNEL);
-	if (!driver) return -ENOMEM;
-	memset(driver, 0, sizeof(struct pcmcia_driver));
-	driver->drv.name = bind_info->dev_info;
-	pcmcia_register_driver(driver);
-    }
+    if (!driver)
+	    return -EINVAL;
 
     for (b = s->bind; b; b = b->next)
 	if ((driver == b->driver) &&
@@ -423,14 +409,14 @@
 	return -EBUSY;
     }
 
-    bind_req.Socket = i;
+    bind_req.Socket = s->socket_no;
     bind_req.Function = bind_info->function;
     bind_req.dev_info = (dev_info_t *) driver->drv.name;
     ret = pcmcia_bind_device(&bind_req);
     if (ret != CS_SUCCESS) {
 	cs_error(NULL, BindDevice, ret);
 	printk(KERN_NOTICE "ds: unable to bind '%s' to socket %d\n",
-	       (char *)dev_info, i);
+	       (char *)dev_info, s->socket_no);
 	return -ENODEV;
     }
 
@@ -462,9 +448,8 @@
 
 /*====================================================================*/
 
-static int get_device_info(int i, bind_info_t *bind_info, int first)
+static int get_device_info(struct pcmcia_bus_socket *s, bind_info_t *bind_info, int first)
 {
-    socket_info_t *s = &socket_table[i];
     socket_bind_t *b;
     dev_node_t *node;
 
@@ -532,12 +517,11 @@
 
 /*====================================================================*/
 
-static int unbind_request(int i, bind_info_t *bind_info)
+static int unbind_request(struct pcmcia_bus_socket *s, bind_info_t *bind_info)
 {
-    socket_info_t *s = &socket_table[i];
     socket_bind_t **b, *c;
 
-    DEBUG(2, "unbind_request(%d, '%s')\n", i,
+    DEBUG(2, "unbind_request(%d, '%s')\n", s->socket_no,
 	  (char *)bind_info->dev_info);
     for (b = &s->bind; *b; b = &(*b)->next)
 	if ((strcmp((char *)(*b)->driver->drv.name,
@@ -568,13 +552,15 @@
 static int ds_open(struct inode *inode, struct file *file)
 {
     socket_t i = minor(inode->i_rdev);
-    socket_info_t *s;
+    struct pcmcia_bus_socket *s;
     user_info_t *user;
 
     DEBUG(0, "ds_open(socket %d)\n", i);
-    if ((i >= sockets) || (sockets == 0))
-	return -ENODEV;
-    s = &socket_table[i];
+
+    s = get_socket_info_by_nr(i);
+    if (!s)
+	    return -ENODEV;
+
     if ((file->f_flags & O_ACCMODE) != O_RDONLY) {
 	if (s->state & SOCKET_BUSY)
 	    return -EBUSY;
@@ -600,13 +586,15 @@
 static int ds_release(struct inode *inode, struct file *file)
 {
     socket_t i = minor(inode->i_rdev);
-    socket_info_t *s;
+    struct pcmcia_bus_socket *s;
     user_info_t *user, **link;
 
     DEBUG(0, "ds_release(socket %d)\n", i);
-    if ((i >= sockets) || (sockets == 0))
-	return 0;
-    s = &socket_table[i];
+
+    s = get_socket_info_by_nr(i);
+    if (!s)
+	    return 0;
+
     user = file->private_data;
     if (CHECK_USER(user))
 	goto out;
@@ -632,16 +620,18 @@
 		       size_t count, loff_t *ppos)
 {
     socket_t i = minor(file->f_dentry->d_inode->i_rdev);
-    socket_info_t *s;
+    struct pcmcia_bus_socket *s;
     user_info_t *user;
 
     DEBUG(2, "ds_read(socket %d)\n", i);
     
-    if ((i >= sockets) || (sockets == 0))
-	return -ENODEV;
     if (count < 4)
 	return -EINVAL;
-    s = &socket_table[i];
+
+    s = get_socket_info_by_nr(i);
+    if (!s)
+	    return -ENODEV;
+
     user = file->private_data;
     if (CHECK_USER(user))
 	return -EIO;
@@ -661,18 +651,20 @@
 			size_t count, loff_t *ppos)
 {
     socket_t i = minor(file->f_dentry->d_inode->i_rdev);
-    socket_info_t *s;
+    struct pcmcia_bus_socket *s;
     user_info_t *user;
 
     DEBUG(2, "ds_write(socket %d)\n", i);
     
-    if ((i >= sockets) || (sockets == 0))
-	return -ENODEV;
     if (count != 4)
 	return -EINVAL;
     if ((file->f_flags & O_ACCMODE) == O_RDONLY)
 	return -EBADF;
-    s = &socket_table[i];
+
+    s = get_socket_info_by_nr(i);
+    if (!s)
+	    return -ENODEV;
+
     user = file->private_data;
     if (CHECK_USER(user))
 	return -EIO;
@@ -694,14 +686,15 @@
 static u_int ds_poll(struct file *file, poll_table *wait)
 {
     socket_t i = minor(file->f_dentry->d_inode->i_rdev);
-    socket_info_t *s;
+    struct pcmcia_bus_socket *s;
     user_info_t *user;
 
     DEBUG(2, "ds_poll(socket %d)\n", i);
     
-    if ((i >= sockets) || (sockets == 0))
-	return POLLERR;
-    s = &socket_table[i];
+    s = get_socket_info_by_nr(i);
+    if (!s)
+	    return POLLERR;
+
     user = file->private_data;
     if (CHECK_USER(user))
 	return POLLERR;
@@ -717,16 +710,16 @@
 		    u_int cmd, u_long arg)
 {
     socket_t i = minor(inode->i_rdev);
-    socket_info_t *s;
+    struct pcmcia_bus_socket *s;
     u_int size;
     int ret, err;
     ds_ioctl_arg_t buf;
 
     DEBUG(2, "ds_ioctl(socket %d, %#x, %#lx)\n", i, cmd, arg);
     
-    if ((i >= sockets) || (sockets == 0))
-	return -ENODEV;
-    s = &socket_table[i];
+    s = get_socket_info_by_nr(i);
+    if (!s)
+	    return -ENODEV;
     
     size = (cmd & IOCSIZE_MASK) >> IOCSIZE_SHIFT;
     if (size > sizeof(ds_ioctl_arg_t)) return -EINVAL;
@@ -827,20 +820,20 @@
 	break;
     case DS_BIND_REQUEST:
 	if (!capable(CAP_SYS_ADMIN)) return -EPERM;
-	err = bind_request(i, &buf.bind_info);
+	err = bind_request(s, &buf.bind_info);
 	break;
     case DS_GET_DEVICE_INFO:
-	err = get_device_info(i, &buf.bind_info, 1);
+	err = get_device_info(s, &buf.bind_info, 1);
 	break;
     case DS_GET_NEXT_DEVICE:
-	err = get_device_info(i, &buf.bind_info, 0);
+	err = get_device_info(s, &buf.bind_info, 0);
 	break;
     case DS_UNBIND_REQUEST:
-	err = unbind_request(i, &buf.bind_info);
+	err = unbind_request(s, &buf.bind_info);
 	break;
     case DS_BIND_MTD:
 	if (!capable(CAP_SYS_ADMIN)) return -EPERM;
-	err = bind_mtd(i, &buf.mtd_info);
+	err = bind_mtd(s, &buf.mtd_info);
 	break;
     default:
 	err = -EINVAL;
@@ -889,140 +882,198 @@
 
 /*====================================================================*/
 
-struct bus_type pcmcia_bus_type = {
-	.name = "pcmcia",
-};
-EXPORT_SYMBOL(pcmcia_bus_type);
-
-static int __init init_pcmcia_bus(void)
+static int __devinit pcmcia_bus_add_socket(struct device *dev, unsigned int socket_nr)
 {
-	bus_register(&pcmcia_bus_type);
-	return 0;
-}
+	client_reg_t client_reg;
+	bind_req_t bind;
+	struct pcmcia_bus_socket *s, *tmp_s;
+	int ret;
+	int i;
 
-static int __init init_pcmcia_ds(void)
-{
-    client_reg_t client_reg;
-    servinfo_t serv;
-    bind_req_t bind;
-    socket_info_t *s;
-    int i, ret;
-    
-    DEBUG(0, "%s\n", version);
- 
-    /*
-     * Ugly. But we want to wait for the socket threads to have started up.
-     * We really should let the drivers themselves drive some of this..
-     */
-    current->state = TASK_INTERRUPTIBLE;
-    schedule_timeout(HZ/4);
+	s = kmalloc(sizeof(struct pcmcia_bus_socket), GFP_KERNEL);
+	if(!s)
+		return -ENOMEM;
+	memset(s, 0, sizeof(struct pcmcia_bus_socket));
+    
+	/*
+	 * Ugly. But we want to wait for the socket threads to have started up.
+	 * We really should let the drivers themselves drive some of this..
+	 */
+	current->state = TASK_INTERRUPTIBLE;
+	schedule_timeout(HZ/4);
 
-    pcmcia_get_card_services_info(&serv);
-    if (serv.Revision != CS_RELEASE_CODE) {
-	printk(KERN_NOTICE "ds: Card Services release does not match!\n");
-	return -1;
-    }
-    if (serv.Count == 0) {
-	printk(KERN_NOTICE "ds: no socket drivers loaded!\n");
-	return -1;
-    }
-    
-    sockets = serv.Count;
-    socket_table = kmalloc(sockets*sizeof(socket_info_t), GFP_KERNEL);
-    if (!socket_table) return -1;
-    for (i = 0, s = socket_table; i < sockets; i++, s++) {
-	s->state = 0;
-	s->user = NULL;
-	s->req_pending = 0;
 	init_waitqueue_head(&s->queue);
 	init_waitqueue_head(&s->request);
-	s->handle = NULL;
-	init_timer(&s->removal);
-	s->removal.data = i;
-	s->removal.function = &handle_removal;
-	s->bind = NULL;
-    }
-    
-    /* Set up hotline to Card Services */
-    client_reg.dev_info = bind.dev_info = &dev_info;
-    client_reg.Attributes = INFO_MASTER_CLIENT;
-    client_reg.EventMask =
-	CS_EVENT_CARD_INSERTION | CS_EVENT_CARD_REMOVAL |
-	CS_EVENT_RESET_PHYSICAL | CS_EVENT_CARD_RESET |
-	CS_EVENT_EJECTION_REQUEST | CS_EVENT_INSERTION_REQUEST |
-        CS_EVENT_PM_SUSPEND | CS_EVENT_PM_RESUME;
-    client_reg.event_handler = &ds_event;
-    client_reg.Version = 0x0210;
-    for (i = 0; i < sockets; i++) {
-	bind.Socket = i;
+
+	/* find the lowest, unused socket no. Please note that this is a
+	 * temporary workaround until "struct pcmcia_socket" is introduced
+	 * into cs.c which will include this number, and which will be
+	 * accessible to ds.c directly */
+	i = 0;
+ next_try:
+	list_for_each_entry(tmp_s, &bus_socket_list, socket_list) {
+		if (tmp_s->socket_no == i) {
+			i++;
+			goto next_try;
+		}
+	}
+	s->socket_no = i;
+
+	/* initialize data */
+	s->socket_dev = dev;
+	INIT_WORK(&s->removal, handle_removal, s);
+    
+	/* Set up hotline to Card Services */
+	client_reg.dev_info = bind.dev_info = &dev_info;
+
+	bind.Socket = s->socket_no;
 	bind.Function = BIND_FN_ALL;
 	ret = pcmcia_bind_device(&bind);
 	if (ret != CS_SUCCESS) {
-	    cs_error(NULL, BindDevice, ret);
-	    break;
+		cs_error(NULL, BindDevice, ret);
+		kfree(s);
+		return -EINVAL;
 	}
-	client_reg.event_callback_args.client_data = &socket_table[i];
-	ret = pcmcia_register_client(&socket_table[i].handle,
-			   &client_reg);
+
+	client_reg.Attributes = INFO_MASTER_CLIENT;
+	client_reg.EventMask =
+		CS_EVENT_CARD_INSERTION | CS_EVENT_CARD_REMOVAL |
+		CS_EVENT_RESET_PHYSICAL | CS_EVENT_CARD_RESET |
+		CS_EVENT_EJECTION_REQUEST | CS_EVENT_INSERTION_REQUEST |
+		CS_EVENT_PM_SUSPEND | CS_EVENT_PM_RESUME;
+	client_reg.event_handler = &ds_event;
+	client_reg.Version = 0x0210;
+	client_reg.event_callback_args.client_data = s;
+	ret = pcmcia_register_client(&s->handle, &client_reg);
 	if (ret != CS_SUCCESS) {
-	    cs_error(NULL, RegisterClient, ret);
-	    break;
+		cs_error(NULL, RegisterClient, ret);
+		kfree(s);
+		return -EINVAL;
 	}
-    }
-    
-    /* Set up character device for user mode clients */
-    i = register_chrdev(0, "pcmcia", &ds_fops);
-    if (i == -EBUSY)
-	printk(KERN_NOTICE "unable to find a free device # for "
-	       "Driver Services\n");
-    else
-	major_dev = i;
 
-#ifdef CONFIG_PROC_FS
-    if (proc_pccard)
-	create_proc_read_entry("drivers",0,proc_pccard,proc_read_drivers,NULL);
-    init_status = 0;
-#endif
-    return 0;
+	list_add(&s->socket_list, &bus_socket_list);
+
+	return 0;
 }
 
 
-static void __exit exit_pcmcia_ds(void)
+static int __devinit pcmcia_bus_add_socket_dev(struct device *dev)
+{
+	struct pcmcia_socket_class_data *cls_d = dev->class_data;
+	unsigned int i;
+	unsigned int ret = 0;
+
+	if (!cls_d)
+		return -ENODEV;
+
+	down_write(&bus_socket_list_rwsem);
+        for (i = 0; i < cls_d->nsock; i++)
+		ret += pcmcia_bus_add_socket(dev, i);
+	up_write(&bus_socket_list_rwsem);
+
+	return ret;
+}
+
+static int __devexit pcmcia_bus_remove_socket_dev(struct device *dev)
+{
+	struct pcmcia_socket_class_data *cls_d = dev->class_data;
+	struct list_head *list_loop;
+	struct list_head *tmp_storage;
+
+	if (!cls_d)
+		return -ENODEV;
+
+	flush_scheduled_work();
+
+	down_write(&bus_socket_list_rwsem);
+	list_for_each_safe(list_loop, tmp_storage, &bus_socket_list) {
+		struct pcmcia_bus_socket *bus_sock = container_of(list_loop, struct pcmcia_bus_socket, socket_list);
+		if (bus_sock->socket_dev == dev) {
+			pcmcia_deregister_client(bus_sock->handle);
+			list_del(&bus_sock->socket_list);
+			kfree(bus_sock);
+		}
+	}
+	up_write(&bus_socket_list_rwsem);
+	return 0;
+}
+
+
+/* the pcmcia_bus_interface is used to handle pcmcia socket devices */
+static struct device_interface pcmcia_bus_interface = {
+	.name = "pcmcia-bus",
+	.devclass = &pcmcia_socket_class,
+	.add_device = &pcmcia_bus_add_socket_dev,
+	.remove_device = __devexit_p(&pcmcia_bus_remove_socket_dev),
+	.kset = { .subsys = &pcmcia_socket_class.subsys, },
+	.devnum = 0,
+};
+
+
+struct bus_type pcmcia_bus_type = {
+	.name = "pcmcia",
+};
+EXPORT_SYMBOL(pcmcia_bus_type);
+
+
+static int __init init_pcmcia_bus(void)
 {
-    int i;
+	int i;
+
+	bus_register(&pcmcia_bus_type);
+	interface_register(&pcmcia_bus_interface);
+
+	/* Set up character device for user mode clients */
+	i = register_chrdev(0, "pcmcia", &ds_fops);
+	if (i == -EBUSY)
+		printk(KERN_NOTICE "unable to find a free device # for "
+		       "Driver Services\n");
+	else
+		major_dev = i;
+
 #ifdef CONFIG_PROC_FS
-    if (proc_pccard)
-	remove_proc_entry("drivers", proc_pccard);
+	if (proc_pccard)
+		create_proc_read_entry("drivers",0,proc_pccard,proc_read_drivers,NULL);
 #endif
-    if (major_dev != -1)
-	unregister_chrdev(major_dev, "pcmcia");
-    for (i = 0; i < sockets; i++)
-	pcmcia_deregister_client(socket_table[i].handle);
-    sockets = 0;
-    kfree(socket_table);
-    bus_unregister(&pcmcia_bus_type);
+
+	return 0;
 }
+fs_initcall(init_pcmcia_bus); /* one level after subsys_initcall so that 
+			       * pcmcia_socket_class is already registered */
 
-#ifdef MODULE
 
-/* init_pcmcia_bus must be done early, init_pcmcia_ds late. If we load this 
- * as a module, we can only specify one initcall, though... 
- */
-static int __init init_pcmcia_module(void) {
-	init_pcmcia_bus();
-	return init_pcmcia_ds();
-}
-module_init(init_pcmcia_module);
-
-#else /* !MODULE */
-subsys_initcall(init_pcmcia_bus);
-late_initcall(init_pcmcia_ds);
+static void __exit exit_pcmcia_bus(void)
+{
+	interface_unregister(&pcmcia_bus_interface);
+
+#ifdef CONFIG_PROC_FS
+	if (proc_pccard)
+		remove_proc_entry("drivers", proc_pccard);
 #endif
+	if (major_dev != -1)
+		unregister_chrdev(major_dev, "pcmcia");
+
+	bus_unregister(&pcmcia_bus_type);
+}
+module_exit(exit_pcmcia_bus);
 
-module_exit(exit_pcmcia_ds);
 
 
 /* helpers for backwards-compatible functions */
+
+
+static struct pcmcia_bus_socket * get_socket_info_by_nr(unsigned int nr)
+{
+	struct pcmcia_bus_socket * s;
+	down_read(&bus_socket_list_rwsem);
+	list_for_each_entry(s, &bus_socket_list, socket_list)
+		if (s->socket_no == nr) {
+			up_read(&bus_socket_list_rwsem);
+			return s;
+		}
+	up_read(&bus_socket_list_rwsem);
+	return NULL;
+}
 
 /* backwards-compatible accessing of driver --- by name! */
 
diff -Nru a/drivers/pcmcia/hd64465_ss.c b/drivers/pcmcia/hd64465_ss.c
--- a/drivers/pcmcia/hd64465_ss.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/pcmcia/hd64465_ss.c	Mon Mar 31 13:41:07 2003
@@ -599,21 +599,6 @@
 
 /*============================================================*/
 
-static int hs_get_io_map(unsigned int sock, struct pccard_io_map *io)
-{
-    	hs_socket_t *sp = &hs_sockets[sock];
-	int map = io->map;
-
-    	DPRINTK("hs_get_io_map(%d, %d)\n", sock, map);
-	if (map >= MAX_IO_WIN)
-	    return -EINVAL;
-	
-	*io = sp->io_maps[map];
-	return 0;
-}
-
-/*============================================================*/
-
 static int hs_set_io_map(unsigned int sock, struct pccard_io_map *io)
 {
     	hs_socket_t *sp = &hs_sockets[sock];
@@ -696,21 +681,6 @@
 
 /*============================================================*/
 
-static int hs_get_mem_map(unsigned int sock, struct pccard_mem_map *mem)
-{
-    	hs_socket_t *sp = &hs_sockets[sock];
-	int map = mem->map;
-
-    	DPRINTK("hs_get_mem_map(%d, %d)\n", sock, map);
-	if (map >= MAX_WIN)
-	    return -EINVAL;
-	
-	*mem = sp->mem_maps[map];
-	return 0;
-}
-
-/*============================================================*/
-
 static int hs_set_mem_map(unsigned int sock, struct pccard_mem_map *mem)
 {
     	hs_socket_t *sp = &hs_sockets[sock];
@@ -894,9 +864,7 @@
 	.get_status		= hs_get_status,
 	.get_socket		= hs_get_socket,
 	.set_socket		= hs_set_socket,
-	.get_io_map		= hs_get_io_map,
 	.set_io_map		= hs_set_io_map,
-	.get_mem_map		= hs_get_mem_map,
 	.set_mem_map		= hs_set_mem_map,
 	.proc_setup		= hs_proc_setup,
 };
@@ -1000,6 +968,8 @@
 	.name = "hd64465-pcmcia",
 	.bus = &platform_bus_type,
 	.devclass = &pcmcia_socket_class,
+	.suspend = pcmcia_socket_dev_suspend,
+	.resume = pcmcia_socket_dev_resume,
 };
 
 static struct platform_device hd64465_device = {
diff -Nru a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c
--- a/drivers/pcmcia/i82092.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/i82092.c	Mon Mar 31 13:41:08 2003
@@ -42,11 +42,23 @@
 };
 MODULE_DEVICE_TABLE(pci, i82092aa_pci_ids);
 
+static int i82092aa_socket_suspend (struct pci_dev *dev, u32 state)
+{
+	return pcmcia_socket_dev_suspend(&dev->dev, state, 0);
+}
+
+static int i82092aa_socket_resume (struct pci_dev *dev)
+{
+	return pcmcia_socket_dev_resume(&dev->dev, RESUME_RESTORE_STATE);
+}
+
 static struct pci_driver i82092aa_pci_drv = {
 	.name           = "i82092aa",
 	.id_table       = i82092aa_pci_ids,
 	.probe          = i82092aa_pci_probe,
 	.remove         = __devexit_p(i82092aa_pci_remove),
+	.suspend        = i82092aa_socket_suspend,
+	.resume         = i82092aa_socket_resume,
 	.driver		= {
 		.devclass = &pcmcia_socket_class,
 	},
@@ -63,9 +75,7 @@
 	.get_status		= i82092aa_get_status,
 	.get_socket		= i82092aa_get_socket,
 	.set_socket		= i82092aa_set_socket,
-	.get_io_map		= i82092aa_get_io_map,
 	.set_io_map		= i82092aa_set_io_map,
-	.get_mem_map		= i82092aa_get_mem_map,
 	.set_mem_map		= i82092aa_set_mem_map,
 	.proc_setup		= i82092aa_proc_setup,
 };
@@ -304,11 +314,6 @@
 		return 0;
 }
     
-static int to_ns(int cycles)
-{
-	return cycle_time*cycles;
-}
-
 
 /* Interrupt handler functionality */
 
@@ -688,34 +693,6 @@
 	return 0;
 }
 
-static int i82092aa_get_io_map(unsigned int sock, struct pccard_io_map *io)
-{
-	unsigned char map, ioctl, addr;
-	
-	enter("i82092aa_get_io_map");
-	map = io->map;
-	if (map > 1) {
-		leave("i82092aa_get_io_map with -EINVAL");
-		return -EINVAL;
-	}
-	
-	/* FIXME: How does this fit in with the PCI resource (re)allocation */
-	io->start = indirect_read16(sock, I365_IO(map)+I365_W_START);
-	io->stop  = indirect_read16(sock, I365_IO(map)+I365_W_START);
-	
-	ioctl = indirect_read(sock,I365_IOCTL); /* IOCREG: I/O Control Register */
-	addr  = indirect_read(sock,I365_ADDRWIN); /* */
-	
-	io->speed = to_ns(ioctl & I365_IOCTL_WAIT(map)) ? 1 : 0; /* check this out later */
-	io->flags = 0;
-	
-	if (addr & I365_IOCTL_16BIT(map))
-		io->flags |= MAP_AUTOSZ;
-		
-	leave("i82092aa_get_io_map");
-	return 0;
-}
-
 static int i82092aa_set_io_map(unsigned sock, struct pccard_io_map *io)
 {
 	unsigned char map, ioctl;
@@ -757,64 +734,6 @@
 			
 	leave("i82092aa_set_io_map");	
 	return 0;
-}
-
-static int i82092aa_get_mem_map(unsigned sock, struct pccard_mem_map *mem)
-{
-	unsigned short base, i;
-	unsigned char map, addr;
-	
-	enter("i82092aa_get_mem_map");
-	
-	mem->flags = 0;
-	mem->speed = 0;
-	map = mem->map;
-	if (map > 4) {
-		leave("i82092aa_get_mem_map: -EINVAL");
-		return -EINVAL;
-	}
-	
-	addr = indirect_read(sock, I365_ADDRWIN);
-		
-	if (addr & I365_ENA_MEM(map))
-		mem->flags |= MAP_ACTIVE;		/* yes this mapping is active */
-	
-	base = I365_MEM(map); 
-	
-	/* Find the start address - this register also has mapping info */
-	
-	i = indirect_read16(sock,base+I365_W_START);
-	if (i & I365_MEM_16BIT)
-		mem->flags |= MAP_16BIT;
-	if (i & I365_MEM_0WS)
-		mem->flags |= MAP_0WS;
-	
-	mem->sys_start = ((unsigned long)(i & 0x0fff) << 12);
-	
-	/* Find the end address - this register also has speed info */
-	i = indirect_read16(sock,base+I365_W_STOP);
-	if (i & I365_MEM_WS0)
-		mem->speed = 1;
-	if (i & I365_MEM_WS1)
-		mem->speed += 2;
-	mem->speed = to_ns(mem->speed);
-	mem->sys_stop = ( (unsigned long)(i & 0x0fff) << 12) + 0x0fff;
-	
-	/* Find the card start address, also some more MAP attributes */
-	
-	i = indirect_read16(sock, base+I365_W_OFF);
-	if (i & I365_MEM_WRPROT)
-		mem->flags |= MAP_WRPROT;
-	if (i & I365_MEM_REG)
-		mem->flags |= MAP_ATTRIB;
-	mem->card_start = ( (unsigned long)(i & 0x3fff)<12) + mem->sys_start;
-	mem->card_start &=  0x3ffffff;
-	
-	printk("Card %i is from %lx to %lx \n",sock,mem->sys_start,mem->sys_stop);
-	
-	leave("i82092aa_get_mem_map");
-	return 0;
-	
 }
 
 static int i82092aa_set_mem_map(unsigned sock, struct pccard_mem_map *mem)
diff -Nru a/drivers/pcmcia/i82092aa.h b/drivers/pcmcia/i82092aa.h
--- a/drivers/pcmcia/i82092aa.h	Mon Mar 31 13:41:06 2003
+++ b/drivers/pcmcia/i82092aa.h	Mon Mar 31 13:41:06 2003
@@ -29,9 +29,7 @@
 static int i82092aa_get_status(unsigned int sock, u_int *value);
 static int i82092aa_get_socket(unsigned int sock, socket_state_t *state);
 static int i82092aa_set_socket(unsigned int sock, socket_state_t *state);
-static int i82092aa_get_io_map(unsigned int sock, struct pccard_io_map *io);
 static int i82092aa_set_io_map(unsigned int sock, struct pccard_io_map *io);
-static int i82092aa_get_mem_map(unsigned int sock, struct pccard_mem_map *mem);
 static int i82092aa_set_mem_map(unsigned int sock, struct pccard_mem_map *mem);
 static int i82092aa_init(unsigned int s);
 static int i82092aa_suspend(unsigned int sock);
diff -Nru a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c
--- a/drivers/pcmcia/i82365.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/i82365.c	Mon Mar 31 13:41:08 2003
@@ -1250,29 +1250,6 @@
 
 /*====================================================================*/
 
-static int i365_get_io_map(u_short sock, struct pccard_io_map *io)
-{
-    u_char map, ioctl, addr;
-    
-    map = io->map;
-    if (map > 1) return -EINVAL;
-    io->start = i365_get_pair(sock, I365_IO(map)+I365_W_START);
-    io->stop = i365_get_pair(sock, I365_IO(map)+I365_W_STOP);
-    ioctl = i365_get(sock, I365_IOCTL);
-    addr = i365_get(sock, I365_ADDRWIN);
-    io->speed = to_ns(ioctl & I365_IOCTL_WAIT(map)) ? 1 : 0;
-    io->flags  = (addr & I365_ENA_IO(map)) ? MAP_ACTIVE : 0;
-    io->flags |= (ioctl & I365_IOCTL_0WS(map)) ? MAP_0WS : 0;
-    io->flags |= (ioctl & I365_IOCTL_16BIT(map)) ? MAP_16BIT : 0;
-    io->flags |= (ioctl & I365_IOCTL_IOCS16(map)) ? MAP_AUTOSZ : 0;
-    DEBUG(1, "i82365: GetIOMap(%d, %d) = %#2.2x, %d ns, "
-	  "%#4.4x-%#4.4x\n", sock, map, io->flags, io->speed,
-	  io->start, io->stop);
-    return 0;
-} /* i365_get_io_map */
-
-/*====================================================================*/
-
 static int i365_set_io_map(u_short sock, struct pccard_io_map *io)
 {
     u_char map, ioctl;
@@ -1302,42 +1279,6 @@
 
 /*====================================================================*/
 
-static int i365_get_mem_map(u_short sock, struct pccard_mem_map *mem)
-{
-    u_short base, i;
-    u_char map, addr;
-    
-    map = mem->map;
-    if (map > 4) return -EINVAL;
-    addr = i365_get(sock, I365_ADDRWIN);
-    mem->flags = (addr & I365_ENA_MEM(map)) ? MAP_ACTIVE : 0;
-    base = I365_MEM(map);
-    
-    i = i365_get_pair(sock, base+I365_W_START);
-    mem->flags |= (i & I365_MEM_16BIT) ? MAP_16BIT : 0;
-    mem->flags |= (i & I365_MEM_0WS) ? MAP_0WS : 0;
-    mem->sys_start = ((u_long)(i & 0x0fff) << 12);
-    
-    i = i365_get_pair(sock, base+I365_W_STOP);
-    mem->speed  = (i & I365_MEM_WS0) ? 1 : 0;
-    mem->speed += (i & I365_MEM_WS1) ? 2 : 0;
-    mem->speed = to_ns(mem->speed);
-    mem->sys_stop = ((u_long)(i & 0x0fff) << 12) + 0x0fff;
-    
-    i = i365_get_pair(sock, base+I365_W_OFF);
-    mem->flags |= (i & I365_MEM_WRPROT) ? MAP_WRPROT : 0;
-    mem->flags |= (i & I365_MEM_REG) ? MAP_ATTRIB : 0;
-    mem->card_start = ((u_int)(i & 0x3fff) << 12) + mem->sys_start;
-    mem->card_start &= 0x3ffffff;
-    
-    DEBUG(1, "i82365: GetMemMap(%d, %d) = %#2.2x, %d ns, %#5.5lx-%#5."
-	  "5lx, %#5.5x\n", sock, mem->map, mem->flags, mem->speed,
-	  mem->sys_start, mem->sys_stop, mem->card_start);
-    return 0;
-} /* i365_get_mem_map */
-
-/*====================================================================*/
-  
 static int i365_set_mem_map(u_short sock, struct pccard_mem_map *mem)
 {
     u_short base, i;
@@ -1506,14 +1447,6 @@
 	LOCKED(i365_set_socket(sock, state));
 }
 
-static int pcic_get_io_map(unsigned int sock, struct pccard_io_map *io)
-{
-	if (socket[sock].flags & IS_ALIVE)
-		return -EINVAL;
-
-	LOCKED(i365_get_io_map(sock, io));
-}
-
 static int pcic_set_io_map(unsigned int sock, struct pccard_io_map *io)
 {
 	if (socket[sock].flags & IS_ALIVE)
@@ -1522,14 +1455,6 @@
 	LOCKED(i365_set_io_map(sock, io));
 }
 
-static int pcic_get_mem_map(unsigned int sock, struct pccard_mem_map *mem)
-{
-	if (socket[sock].flags & IS_ALIVE)
-		return -EINVAL;
-
-	LOCKED(i365_get_mem_map(sock, mem));
-}
-
 static int pcic_set_mem_map(unsigned int sock, struct pccard_mem_map *mem)
 {
 	if (socket[sock].flags & IS_ALIVE)
@@ -1571,9 +1496,7 @@
 	.get_status		= pcic_get_status,
 	.get_socket		= pcic_get_socket,
 	.set_socket		= pcic_set_socket,
-	.get_io_map		= pcic_get_io_map,
 	.set_io_map		= pcic_set_io_map,
-	.get_mem_map		= pcic_get_mem_map,
 	.set_mem_map		= pcic_set_mem_map,
 	.proc_setup		= pcic_proc_setup,
 };
@@ -1588,6 +1511,8 @@
 	.name = "i82365",
 	.bus = &platform_bus_type,
 	.devclass = &pcmcia_socket_class,
+	.suspend = pcmcia_socket_dev_suspend,
+	.resume = pcmcia_socket_dev_resume,
 };
 
 static struct platform_device i82365_device = {
diff -Nru a/drivers/pcmcia/pci_socket.c b/drivers/pcmcia/pci_socket.c
--- a/drivers/pcmcia/pci_socket.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/pci_socket.c	Mon Mar 31 13:41:08 2003
@@ -31,10 +31,6 @@
 #include "pci_socket.h"
 
 
-extern void pcmcia_suspend_socket (struct socket_info_t *socket);
-extern void pcmcia_resume_socket (struct socket_info_t *socket);
-
-
 /*
  * Arbitrary define. This is the array of active cardbus
  * entries.
@@ -105,15 +101,6 @@
 	return -EINVAL;
 }
 
-static int pci_get_io_map(unsigned int sock, struct pccard_io_map *io)
-{
-	pci_socket_t *socket = pci_socket_array + sock;
-
-	if (socket->op && socket->op->get_io_map)
-		return socket->op->get_io_map(socket, io);
-	return -EINVAL;
-}
-
 static int pci_set_io_map(unsigned int sock, struct pccard_io_map *io)
 {
 	pci_socket_t *socket = pci_socket_array + sock;
@@ -123,15 +110,6 @@
 	return -EINVAL;
 }
 
-static int pci_get_mem_map(unsigned int sock, struct pccard_mem_map *mem)
-{
-	pci_socket_t *socket = pci_socket_array + sock;
-
-	if (socket->op && socket->op->get_mem_map)
-		return socket->op->get_mem_map(socket, mem);
-	return -EINVAL;
-}
-
 static int pci_set_mem_map(unsigned int sock, struct pccard_mem_map *mem)
 {
 	pci_socket_t *socket = pci_socket_array + sock;
@@ -158,9 +136,7 @@
 	.get_status		= pci_get_status,
 	.get_socket		= pci_get_socket,
 	.set_socket		= pci_set_socket,
-	.get_io_map		= pci_get_io_map,
 	.set_io_map		= pci_set_io_map,
-	.get_mem_map		= pci_get_mem_map,
 	.set_mem_map		= pci_set_mem_map,
 	.proc_setup		= pci_proc_setup,
 };
@@ -177,7 +153,6 @@
 	socket->cls_d.nsock = 1; /* yenta is 1, no other low-level driver uses
 			     this yet */
 	socket->cls_d.ops = &pci_socket_operations;
-	socket->cls_d.use_bus_pm = 1;
 	dev->dev.class_data = &socket->cls_d;
 
 	/* prepare pci_socket_t */
@@ -224,18 +199,12 @@
 
 static int cardbus_suspend (struct pci_dev *dev, u32 state)
 {
-	pci_socket_t *socket = pci_get_drvdata(dev);
-	if (socket && socket->cls_d.s_info)
-		pcmcia_suspend_socket (socket->cls_d.s_info);
-	return 0;
+	return pcmcia_socket_dev_suspend(&dev->dev, state, 0);
 }
 
 static int cardbus_resume (struct pci_dev *dev)
 {
-	pci_socket_t *socket = pci_get_drvdata(dev);
-	if (socket && socket->cls_d.s_info)
-		pcmcia_resume_socket (socket->cls_d.s_info);
-	return 0;
+	return pcmcia_socket_dev_resume(&dev->dev, RESUME_RESTORE_STATE);
 }
 
 
diff -Nru a/drivers/pcmcia/pci_socket.h b/drivers/pcmcia/pci_socket.h
--- a/drivers/pcmcia/pci_socket.h	Mon Mar 31 13:41:06 2003
+++ b/drivers/pcmcia/pci_socket.h	Mon Mar 31 13:41:06 2003
@@ -37,9 +37,7 @@
 	int (*get_status)(struct pci_socket *, unsigned int *);
 	int (*get_socket)(struct pci_socket *, socket_state_t *);
 	int (*set_socket)(struct pci_socket *, socket_state_t *);
-	int (*get_io_map)(struct pci_socket *, struct pccard_io_map *);
 	int (*set_io_map)(struct pci_socket *, struct pccard_io_map *);
-	int (*get_mem_map)(struct pci_socket *, struct pccard_mem_map *);
 	int (*set_mem_map)(struct pci_socket *, struct pccard_mem_map *);
 	void (*proc_setup)(struct pci_socket *, struct proc_dir_entry *base);
 };
diff -Nru a/drivers/pcmcia/ricoh.h b/drivers/pcmcia/ricoh.h
--- a/drivers/pcmcia/ricoh.h	Mon Mar 31 13:41:07 2003
+++ b/drivers/pcmcia/ricoh.h	Mon Mar 31 13:41:07 2003
@@ -170,9 +170,7 @@
 	yenta_get_status,
 	yenta_get_socket,
 	yenta_set_socket,
-	yenta_get_io_map,
 	yenta_set_io_map,
-	yenta_get_mem_map,
 	yenta_set_mem_map,
 	yenta_proc_setup
 };
diff -Nru a/drivers/pcmcia/rsrc_mgr.c b/drivers/pcmcia/rsrc_mgr.c
--- a/drivers/pcmcia/rsrc_mgr.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/pcmcia/rsrc_mgr.c	Mon Mar 31 13:41:07 2003
@@ -60,7 +60,7 @@
 #define INT_MODULE_PARM(n, v) static int n = v; MODULE_PARM(n, "i")
 
 INT_MODULE_PARM(probe_mem,	1);		/* memory probe? */
-#ifdef CONFIG_ISA
+#ifdef CONFIG_PCMCIA_PROBE
 INT_MODULE_PARM(probe_io,	1);		/* IO port probe? */
 INT_MODULE_PARM(mem_limit,	0x10000);
 #endif
@@ -83,7 +83,9 @@
 /* IO port resource database */
 static resource_map_t io_db = { 0, 0, &io_db };
 
-#ifdef CONFIG_ISA
+static DECLARE_MUTEX(rsrc_sem);
+
+#ifdef CONFIG_PCMCIA_PROBE
 
 typedef struct irq_info_t {
     u_int			Attributes;
@@ -269,7 +271,7 @@
     
 ======================================================================*/
 
-#ifdef CONFIG_ISA
+#ifdef CONFIG_PCMCIA_PROBE
 static void do_io_probe(ioaddr_t base, ioaddr_t num)
 {
     
@@ -333,15 +335,69 @@
 
 /*======================================================================
 
+    This is tricky... when we set up CIS memory, we try to validate
+    the memory window space allocations.
+    
+======================================================================*/
+
+/* Validation function for cards with a valid CIS */
+static int cis_readable(socket_info_t *s, u_long base)
+{
+    cisinfo_t info1, info2;
+    int ret;
+    s->cis_mem.sys_start = base;
+    s->cis_mem.sys_stop = base+s->cap.map_size-1;
+    s->cis_virt = ioremap(base, s->cap.map_size);
+    ret = pcmcia_validate_cis(s->clients, &info1);
+    /* invalidate mapping and CIS cache */
+    iounmap(s->cis_virt);
+    s->cis_used = 0;
+    if ((ret != 0) || (info1.Chains == 0))
+	return 0;
+    s->cis_mem.sys_start = base+s->cap.map_size;
+    s->cis_mem.sys_stop = base+2*s->cap.map_size-1;
+    s->cis_virt = ioremap(base+s->cap.map_size, s->cap.map_size);
+    ret = pcmcia_validate_cis(s->clients, &info2);
+    iounmap(s->cis_virt);
+    s->cis_used = 0;
+    return ((ret == 0) && (info1.Chains == info2.Chains));
+}
+
+/* Validation function for simple memory cards */
+static int checksum(socket_info_t *s, u_long base)
+{
+    int i, a, b, d;
+    s->cis_mem.sys_start = base;
+    s->cis_mem.sys_stop = base+s->cap.map_size-1;
+    s->cis_virt = ioremap(base, s->cap.map_size);
+    s->cis_mem.card_start = 0;
+    s->cis_mem.flags = MAP_ACTIVE;
+    s->ss_entry->set_mem_map(s->sock, &s->cis_mem);
+    /* Don't bother checking every word... */
+    a = 0; b = -1;
+    for (i = 0; i < s->cap.map_size; i += 44) {
+	d = readl(s->cis_virt+i);
+	a += d; b &= d;
+    }
+    iounmap(s->cis_virt);
+    return (b == -1) ? -1 : (a>>1);
+}
+
+static int checksum_match(socket_info_t *s, u_long base)
+{
+    int a = checksum(s, base), b = checksum(s, base+s->cap.map_size);
+    return ((a == b) && (a >= 0));
+}
+
+/*======================================================================
+
     The memory probe.  If the memory list includes a 64K-aligned block
     below 1MB, we probe in 64K chunks, and as soon as we accumulate at
     least mem_limit free space, we quit.
     
 ======================================================================*/
 
-static int do_mem_probe(u_long base, u_long num,
-			int (*is_valid)(u_long), int (*do_cksum)(u_long),
-			socket_info_t *s)
+static int do_mem_probe(u_long base, u_long num, socket_info_t *s)
 {
     u_long i, j, bad, fail, step;
 
@@ -349,18 +405,21 @@
 	   base, base+num-1);
     bad = fail = 0;
     step = (num < 0x20000) ? 0x2000 : ((num>>4) & ~0x1fff);
+    /* cis_readable wants to map 2x map_size */
+    if (step < 2 * s->cap.map_size)
+	step = 2 * s->cap.map_size;
     for (i = j = base; i < base+num; i = j + step) {
 	if (!fail) {	
 	    for (j = i; j < base+num; j += step)
 		if ((check_mem_resource(j, step, s->cap.cb_dev) == 0) &&
-		    is_valid(j))
+		    cis_readable(s, j))
 		    break;
 	    fail = ((i == base) && (j == base+num));
 	}
 	if (fail) {
 	    for (j = i; j < base+num; j += 2*step)
 		if ((check_mem_resource(j, 2*step, s->cap.cb_dev) == 0) &&
-		    do_cksum(j) && do_cksum(j+step))
+		    checksum_match(s, j) && checksum_match(s, j + step))
 		    break;
 	}
 	if (i != j) {
@@ -374,16 +433,14 @@
     return (num - bad);
 }
 
-#ifdef CONFIG_ISA
+#ifdef CONFIG_PCMCIA_PROBE
 
-static u_long inv_probe(int (*is_valid)(u_long),
-			int (*do_cksum)(u_long),
-			resource_map_t *m, socket_info_t *s)
+static u_long inv_probe(resource_map_t *m, socket_info_t *s)
 {
     u_long ok;
     if (m == &mem_db)
 	return 0;
-    ok = inv_probe(is_valid, do_cksum, m->next, s);
+    ok = inv_probe(m->next, s);
     if (ok) {
 	if (m->base >= 0x100000)
 	    sub_interval(&mem_db, m->base, m->num);
@@ -391,32 +448,36 @@
     }
     if (m->base < 0x100000)
 	return 0;
-    return do_mem_probe(m->base, m->num, is_valid, do_cksum, s);
+    return do_mem_probe(m->base, m->num, s);
 }
 
-void validate_mem(int (*is_valid)(u_long), int (*do_cksum)(u_long),
-		  int force_low, socket_info_t *s)
+void validate_mem(socket_info_t *s)
 {
     resource_map_t *m, *n;
     static u_char order[] = { 0xd0, 0xe0, 0xc0, 0xf0 };
     static int hi = 0, lo = 0;
     u_long b, i, ok = 0;
-    
-    if (!probe_mem) return;
+    int force_low = !(s->cap.features & SS_CAP_PAGE_REGS);
+
+    if (!probe_mem)
+	return;
+
+    down(&rsrc_sem);
     /* We do up to four passes through the list */
     if (!force_low) {
-	if (hi++ || (inv_probe(is_valid, do_cksum, mem_db.next, s) > 0))
-	    return;
+	if (hi++ || (inv_probe(mem_db.next, s) > 0))
+	    goto out;
 	printk(KERN_NOTICE "cs: warning: no high memory space "
 	       "available!\n");
     }
-    if (lo++) return;
+    if (lo++)
+	goto out;
     for (m = mem_db.next; m != &mem_db; m = n) {
 	n = m->next;
 	/* Only probe < 1 MB */
 	if (m->base >= 0x100000) continue;
 	if ((m->base | m->num) & 0xffff) {
-	    ok += do_mem_probe(m->base, m->num, is_valid, do_cksum, s);
+	    ok += do_mem_probe(m->base, m->num, s);
 	    continue;
 	}
 	/* Special probe for 64K-aligned block */
@@ -426,28 +487,31 @@
 		if (ok >= mem_limit)
 		    sub_interval(&mem_db, b, 0x10000);
 		else
-		    ok += do_mem_probe(b, 0x10000, is_valid, do_cksum, s);
+		    ok += do_mem_probe(b, 0x10000, s);
 	    }
 	}
     }
+ out:
+    up(&rsrc_sem);
 }
 
-#else /* CONFIG_ISA */
+#else /* CONFIG_PCMCIA_PROBE */
 
-void validate_mem(int (*is_valid)(u_long), int (*do_cksum)(u_long),
-		  int force_low, socket_info_t *s)
+void validate_mem(socket_info_t *s)
 {
     resource_map_t *m;
     static int done = 0;
     
-    if (!probe_mem || done++)
-	return;
-    for (m = mem_db.next; m != &mem_db; m = m->next)
-	if (do_mem_probe(m->base, m->num, is_valid, do_cksum, s))
-	    return;
+    if (probe_mem && done++ == 0) {
+	down(&rsrc_sem);
+	for (m = mem_db.next; m != &mem_db; m = m->next)
+	    if (do_mem_probe(m->base, m->num, s))
+		break;
+	up(&rsrc_sem);
+    }
 }
 
-#endif /* CONFIG_ISA */
+#endif /* CONFIG_PCMCIA_PROBE */
 
 /*======================================================================
 
@@ -467,7 +531,9 @@
 {
     ioaddr_t try;
     resource_map_t *m;
-    
+    int ret = -1;
+
+    down(&rsrc_sem);
     for (m = io_db.next; m != &io_db; m = m->next) {
 	try = (m->base & ~(align-1)) + *base;
 	for (try = (try >= m->base) ? try : try+align;
@@ -475,12 +541,16 @@
 	     try += align) {
 	    if (request_io_resource(try, num, name, s->cap.cb_dev) == 0) {
 		*base = try;
-		return 0;
+		ret = 0;
+		goto out;
 	    }
-	    if (!align) break;
+	    if (!align)
+		break;
 	}
     }
-    return -1;
+ out:
+    up(&rsrc_sem);
+    return ret;
 }
 
 int find_mem_region(u_long *base, u_long num, u_long align,
@@ -488,26 +558,35 @@
 {
     u_long try;
     resource_map_t *m;
+    int ret = -1;
 
+    down(&rsrc_sem);
     while (1) {
 	for (m = mem_db.next; m != &mem_db; m = m->next) {
 	    /* first pass >1MB, second pass <1MB */
-	    if ((force_low != 0) ^ (m->base < 0x100000)) continue;
+	    if ((force_low != 0) ^ (m->base < 0x100000))
+		continue;
+
 	    try = (m->base & ~(align-1)) + *base;
 	    for (try = (try >= m->base) ? try : try+align;
 		 (try >= m->base) && (try+num <= m->base+m->num);
 		 try += align) {
 		if (request_mem_resource(try, num, name, s->cap.cb_dev) == 0) {
 		    *base = try;
-		    return 0;
+		    ret = 0;
+		    goto out;
 		}
-		if (!align) break;
+		if (!align)
+		    break;
 	    }
 	}
-	if (force_low) break;
+	if (force_low)
+	    break;
 	force_low++;
     }
-    return -1;
+ out:
+    up(&rsrc_sem);
+    return ret;
 }
 
 /*======================================================================
@@ -518,7 +597,7 @@
     
 ======================================================================*/
 
-#ifdef CONFIG_ISA
+#ifdef CONFIG_PCMCIA_PROBE
 
 static void fake_irq(int i, void *d, struct pt_regs *r) { }
 static inline int check_irq(int irq)
@@ -532,66 +611,89 @@
 int try_irq(u_int Attributes, int irq, int specific)
 {
     irq_info_t *info = &irq_table[irq];
+    int ret = 0;
+
+    down(&rsrc_sem);
     if (info->Attributes & RES_ALLOCATED) {
 	switch (Attributes & IRQ_TYPE) {
 	case IRQ_TYPE_EXCLUSIVE:
-	    return CS_IN_USE;
+	    ret = CS_IN_USE;
+	    break;
 	case IRQ_TYPE_TIME:
 	    if ((info->Attributes & RES_IRQ_TYPE)
-		!= RES_IRQ_TYPE_TIME)
-		return CS_IN_USE;
-	    if (Attributes & IRQ_FIRST_SHARED)
-		return CS_BAD_ATTRIBUTE;
+		!= RES_IRQ_TYPE_TIME) {
+		ret = CS_IN_USE;
+		break;
+	    }
+	    if (Attributes & IRQ_FIRST_SHARED) {
+		ret = CS_BAD_ATTRIBUTE;
+		break;
+	    }
 	    info->Attributes |= RES_IRQ_TYPE_TIME | RES_ALLOCATED;
 	    info->time_share++;
 	    break;
 	case IRQ_TYPE_DYNAMIC_SHARING:
 	    if ((info->Attributes & RES_IRQ_TYPE)
-		!= RES_IRQ_TYPE_DYNAMIC)
-		return CS_IN_USE;
-	    if (Attributes & IRQ_FIRST_SHARED)
-		return CS_BAD_ATTRIBUTE;
+		!= RES_IRQ_TYPE_DYNAMIC) {
+		ret = CS_IN_USE;
+		break;
+	    }
+	    if (Attributes & IRQ_FIRST_SHARED) {
+		ret = CS_BAD_ATTRIBUTE;
+		break;
+	    }
 	    info->Attributes |= RES_IRQ_TYPE_DYNAMIC | RES_ALLOCATED;
 	    info->dyn_share++;
 	    break;
 	}
     } else {
-	if ((info->Attributes & RES_RESERVED) && !specific)
-	    return CS_IN_USE;
-	if (check_irq(irq) != 0)
-	    return CS_IN_USE;
+	if ((info->Attributes & RES_RESERVED) && !specific) {
+	    ret = CS_IN_USE;
+	    goto out;
+	}
+	if (check_irq(irq) != 0) {
+	    ret = CS_IN_USE;
+	    goto out;
+	}
 	switch (Attributes & IRQ_TYPE) {
 	case IRQ_TYPE_EXCLUSIVE:
 	    info->Attributes |= RES_ALLOCATED;
 	    break;
 	case IRQ_TYPE_TIME:
-	    if (!(Attributes & IRQ_FIRST_SHARED))
-		return CS_BAD_ATTRIBUTE;
+	    if (!(Attributes & IRQ_FIRST_SHARED)) {
+		ret = CS_BAD_ATTRIBUTE;
+		break;
+	    }
 	    info->Attributes |= RES_IRQ_TYPE_TIME | RES_ALLOCATED;
 	    info->time_share = 1;
 	    break;
 	case IRQ_TYPE_DYNAMIC_SHARING:
-	    if (!(Attributes & IRQ_FIRST_SHARED))
-		return CS_BAD_ATTRIBUTE;
+	    if (!(Attributes & IRQ_FIRST_SHARED)) {
+		ret = CS_BAD_ATTRIBUTE;
+		break;
+	    }
 	    info->Attributes |= RES_IRQ_TYPE_DYNAMIC | RES_ALLOCATED;
 	    info->dyn_share = 1;
 	    break;
 	}
     }
-    return 0;
+ out:
+    up(&rsrc_sem);
+    return ret;
 }
 
 #endif
 
 /*====================================================================*/
 
-#ifdef CONFIG_ISA
+#ifdef CONFIG_PCMCIA_PROBE
 
 void undo_irq(u_int Attributes, int irq)
 {
     irq_info_t *info;
 
     info = &irq_table[irq];
+    down(&rsrc_sem);
     switch (Attributes & IRQ_TYPE) {
     case IRQ_TYPE_EXCLUSIVE:
 	info->Attributes &= RES_RESERVED;
@@ -607,6 +709,7 @@
 	    info->Attributes &= RES_RESERVED;
 	break;
     }
+    up(&rsrc_sem);
 }
 
 #endif
@@ -629,6 +732,8 @@
 	return CS_BAD_SIZE;
 
     ret = CS_SUCCESS;
+
+    down(&rsrc_sem);
     switch (adj->Action) {
     case ADD_MANAGED_RESOURCE:
 	ret = add_interval(&mem_db, base, num);
@@ -647,6 +752,7 @@
     default:
 	ret = CS_UNSUPPORTED_FUNCTION;
     }
+    up(&rsrc_sem);
     
     return ret;
 }
@@ -655,7 +761,7 @@
 
 static int adjust_io(adjust_t *adj)
 {
-    int base, num;
+    int base, num, ret = CS_SUCCESS;
     
     base = adj->resource.io.BasePort;
     num = adj->resource.io.NumPorts;
@@ -664,11 +770,14 @@
     if ((num <= 0) || (base+num > 0x10000) || (base+num <= base))
 	return CS_BAD_SIZE;
 
+    down(&rsrc_sem);
     switch (adj->Action) {
     case ADD_MANAGED_RESOURCE:
-	if (add_interval(&io_db, base, num) != 0)
-	    return CS_IN_USE;
-#ifdef CONFIG_ISA
+	if (add_interval(&io_db, base, num) != 0) {
+	    ret = CS_IN_USE;
+	    break;
+	}
+#ifdef CONFIG_PCMCIA_PROBE
 	if (probe_io)
 	    do_io_probe(base, num);
 #endif
@@ -677,18 +786,20 @@
 	sub_interval(&io_db, base, num);
 	break;
     default:
-	return CS_UNSUPPORTED_FUNCTION;
+	ret = CS_UNSUPPORTED_FUNCTION;
 	break;
     }
+    up(&rsrc_sem);
 
-    return CS_SUCCESS;
+    return ret;
 }
 
 /*====================================================================*/
 
 static int adjust_irq(adjust_t *adj)
 {
-#ifdef CONFIG_ISA
+    int ret = CS_SUCCESS;
+#ifdef CONFIG_PCMCIA_PROBE
     int irq;
     irq_info_t *info;
     
@@ -696,33 +807,41 @@
     if ((irq < 0) || (irq > 15))
 	return CS_BAD_IRQ;
     info = &irq_table[irq];
-    
+
+    down(&rsrc_sem);
     switch (adj->Action) {
     case ADD_MANAGED_RESOURCE:
 	if (info->Attributes & RES_REMOVED)
 	    info->Attributes &= ~(RES_REMOVED|RES_ALLOCATED);
 	else
-	    if (adj->Attributes & RES_ALLOCATED)
-		return CS_IN_USE;
+	    if (adj->Attributes & RES_ALLOCATED) {
+		ret = CS_IN_USE;
+		break;
+	    }
 	if (adj->Attributes & RES_RESERVED)
 	    info->Attributes |= RES_RESERVED;
 	else
 	    info->Attributes &= ~RES_RESERVED;
 	break;
     case REMOVE_MANAGED_RESOURCE:
-	if (info->Attributes & RES_REMOVED)
-	    return 0;
-	if (info->Attributes & RES_ALLOCATED)
-	    return CS_IN_USE;
+	if (info->Attributes & RES_REMOVED) {
+	    ret = 0;
+	    break;
+	}
+	if (info->Attributes & RES_ALLOCATED) {
+	    ret = CS_IN_USE;
+	    break;
+	}
 	info->Attributes |= RES_ALLOCATED|RES_REMOVED;
 	info->Attributes &= ~RES_RESERVED;
 	break;
     default:
-	return CS_UNSUPPORTED_FUNCTION;
+	ret = CS_UNSUPPORTED_FUNCTION;
 	break;
     }
+    up(&rsrc_sem);
 #endif
-    return CS_SUCCESS;
+    return ret;
 }
 
 /*====================================================================*/
diff -Nru a/drivers/pcmcia/sa1100.h b/drivers/pcmcia/sa1100.h
--- a/drivers/pcmcia/sa1100.h	Mon Mar 31 13:41:07 2003
+++ b/drivers/pcmcia/sa1100.h	Mon Mar 31 13:41:07 2003
@@ -152,36 +152,6 @@
 
 struct pcmcia_low_level;
 
-/* This structure encapsulates per-socket state which we might need to
- * use when responding to a Card Services query of some kind.
- */
-struct sa1100_pcmcia_socket {
-  /*
-   * Core PCMCIA state
-   */
-  int			nr;
-  struct resource	res;
-  socket_state_t        cs_state;
-  pccard_io_map         io_map[MAX_IO_WIN];
-  pccard_mem_map        pc_mem_map[MAX_WIN];
-  void                  (*handler)(void *, unsigned int);
-  void                  *handler_info;
-
-  struct pcmcia_state   k_state;
-  ioaddr_t              phys_attr, phys_mem;
-  void			*virt_io;
-  unsigned short        speed_io, speed_attr, speed_mem;
-
-  /*
-   * Info from low level handler
-   */
-  unsigned int          irq;
-  unsigned int		irq_state;
-
-  struct pcmcia_low_level *ops;
-};
-
-
 /* I/O pins replacing memory pins
  * (PCMCIA System Architecture, 2nd ed., by Don Anderson, p.75)
  *
@@ -190,60 +160,5 @@
  */
 #define iostschg bvd1
 #define iospkr   bvd2
-
-
-/*
- * Declaration for all machine specific init/exit functions.
- */
-extern int pcmcia_adsbitsy_init(struct device *);
-extern void pcmcia_adsbitsy_exit(struct device *);
-
-extern int pcmcia_assabet_init(struct device *);
-extern void pcmcia_assabet_exit(struct device *);
-
-extern int pcmcia_badge4_init(struct device *);
-extern void pcmcia_badge4_exit(struct device *);
-
-extern int pcmcia_cerf_init(struct device *);
-extern void pcmcia_cerf_exit(struct device *);
-
-extern int pcmcia_flexanet_init(struct device *);
-extern void pcmcia_flexanet_exit(struct device *);
-
-extern int pcmcia_freebird_init(struct device *);
-extern void pcmcia_freebird_exit(struct device *);
-
-extern int pcmcia_gcplus_init(struct device *);
-extern void pcmcia_gcplus_exit(struct device *);
-
-extern int pcmcia_graphicsmaster_init(struct device *);
-extern void pcmcia_graphicsmaster_exit(struct device *);
-
-extern int pcmcia_pangolin_init(struct device *);
-extern void pcmcia_pangolin_exit(struct device *);
-
-extern int pcmcia_pfs168_init(struct device *);
-extern void pcmcia_pfs168_exit(struct device *);
-
-extern int pcmcia_shannon_init(struct device *);
-extern void pcmcia_shannon_exit(struct device *);
-
-extern int pcmcia_simpad_init(struct device *);
-extern void pcmcia_simpad_exit(struct device *);
-
-extern int pcmcia_stork_init(struct device *);
-extern void pcmcia_stork_exit(struct device *);
-
-extern int pcmcia_system3_init(struct device *);
-extern void pcmcia_system3_exit(struct device *);
-
-extern int pcmcia_trizeps_init(struct device *);
-extern void pcmcia_trizeps_exit(struct device *);
-
-extern int pcmcia_xp860_init(struct device *);
-extern void pcmcia_xp860_exit(struct device *);
-
-extern int pcmcia_yopy_init(struct device *);
-extern void pcmcia_yopy_exit(struct device *);
 
 #endif  /* !defined(_PCMCIA_SA1100_H) */
diff -Nru a/drivers/pcmcia/sa1100_adsbitsy.c b/drivers/pcmcia/sa1100_adsbitsy.c
--- a/drivers/pcmcia/sa1100_adsbitsy.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/sa1100_adsbitsy.c	Mon Mar 31 13:41:08 2003
@@ -18,93 +18,86 @@
 #include <asm/hardware.h>
 #include <asm/mach-types.h>
 
-#include "sa1100_generic.h"
 #include "sa1111_generic.h"
 
-static int adsbitsy_pcmcia_init(struct pcmcia_init *init)
+static int adsbitsy_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
 {
-  /* Set GPIO_A<3:0> to be outputs for PCMCIA/CF power controller: */
-  PA_DDR &= ~(GPIO_GPIO0 | GPIO_GPIO1 | GPIO_GPIO2 | GPIO_GPIO3);
+	/* Set GPIO_A<3:0> to be outputs for PCMCIA/CF power controller: */
+	PA_DDR &= ~(GPIO_GPIO0 | GPIO_GPIO1 | GPIO_GPIO2 | GPIO_GPIO3);
 
-  /* Disable Power 3.3V/5V for PCMCIA/CF */
-  PA_DWR |= GPIO_GPIO0 | GPIO_GPIO1 | GPIO_GPIO2 | GPIO_GPIO3;
+	/* Disable Power 3.3V/5V for PCMCIA/CF */
+	PA_DWR |= GPIO_GPIO0 | GPIO_GPIO1 | GPIO_GPIO2 | GPIO_GPIO3;
 
-  /* Why? */			 
-  MECR = 0x09430943;
+	/* Why? */			 
+	MECR = 0x09430943;
 
-  return sa1111_pcmcia_init(init);
+	return sa1111_pcmcia_init(skt);
 }
 
 static int
-adsbitsy_pcmcia_configure_socket(int sock, const struct pcmcia_configure *conf)
+adsbitsy_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt, const socket_state_t *state)
 {
-  unsigned int pa_dwr_mask, pa_dwr_set;
-  int ret;
+	unsigned int pa_dwr_mask, pa_dwr_set;
+	int ret;
 
-  switch (sock) {
-  case 0:
-    pa_dwr_mask = GPIO_GPIO0 | GPIO_GPIO1;
-
-    switch (conf->vcc) {
-    default:
-    case 0:	pa_dwr_set = GPIO_GPIO0 | GPIO_GPIO1;	break;
-    case 33:	pa_dwr_set = GPIO_GPIO1;		break;
-    case 50:	pa_dwr_set = GPIO_GPIO0;		break;
-    }
-    break;
-
-  case 1:
-    pa_dwr_mask = GPIO_GPIO2 | GPIO_GPIO3;
-
-    switch (conf->vcc) {
-    default:
-    case 0:	pa_dwr_set = 0;				break;
-    case 33:	pa_dwr_set = GPIO_GPIO2;		break;
-    case 50:	pa_dwr_set = GPIO_GPIO3;		break;
-    }
-
-  default:
-    return -1;
-  }
-
-  if (conf->vpp != conf->vcc && conf->vpp != 0) {
-    printk(KERN_ERR "%s(): CF slot cannot support VPP %u\n",
-		__FUNCTION__, conf->vpp);
-    return -1;
-  }
-
-  ret = sa1111_pcmcia_configure_socket(sock, conf);
-  if (ret == 0) {
-    unsigned long flags;
-
-    local_irq_save(flags);
-    PA_DWR = (PA_DWR & ~pa_dwr_mask) | pa_dwr_set;
-    local_irq_restore(flags);
-  }
+	switch (skt->nr) {
+	case 0:
+		pa_dwr_mask = GPIO_GPIO0 | GPIO_GPIO1;
+
+		switch (state->Vcc) {
+		default:
+		case 0:  pa_dwr_set = GPIO_GPIO0 | GPIO_GPIO1;	break;
+		case 33: pa_dwr_set = GPIO_GPIO1;		break;
+		case 50: pa_dwr_set = GPIO_GPIO0;		break;
+		}
+		break;
+
+	case 1:
+		pa_dwr_mask = GPIO_GPIO2 | GPIO_GPIO3;
+
+		switch (state->Vcc) {
+		default:
+		case 0:  pa_dwr_set = 0;			break;
+		case 33: pa_dwr_set = GPIO_GPIO2;		break;
+		case 50: pa_dwr_set = GPIO_GPIO3;		break;
+		}
+
+	default:
+		return -1;
+	}
+
+	if (state->Vpp != state->Vcc && state->Vpp != 0) {
+		printk(KERN_ERR "%s(): CF slot cannot support VPP %u\n",
+			__FUNCTION__, state->Vpp);
+		return -1;
+	}
+
+	ret = sa1111_pcmcia_configure_socket(skt, state);
+	if (ret == 0) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		PA_DWR = (PA_DWR & ~pa_dwr_mask) | pa_dwr_set;
+		local_irq_restore(flags);
+	}
 
-  return ret;
+	return ret;
 }
 
 static struct pcmcia_low_level adsbitsy_pcmcia_ops = {
-  .owner		= THIS_MODULE,
-  .init			= adsbitsy_pcmcia_init,
-  .shutdown		= sa1111_pcmcia_shutdown,
-  .socket_state		= sa1111_pcmcia_socket_state,
-  .configure_socket	= adsbitsy_pcmcia_configure_socket,
-
-  .socket_init		= sa1111_pcmcia_socket_init,
-  .socket_suspend	= sa1111_pcmcia_socket_suspend,
+	.owner			= THIS_MODULE,
+	.hw_init		= adsbitsy_pcmcia_hw_init,
+	.hw_shutdown		= sa1111_pcmcia_hw_shutdown,
+	.socket_state		= sa1111_pcmcia_socket_state,
+	.configure_socket	= adsbitsy_pcmcia_configure_socket,
+	.socket_init		= sa1111_pcmcia_socket_init,
+	.socket_suspend		= sa1111_pcmcia_socket_suspend,
 };
 
 int __init pcmcia_adsbitsy_init(struct device *dev)
 {
 	int ret = -ENODEV;
 	if (machine_is_adsbitsy())
-		ret = sa1100_register_pcmcia(&adsbitsy_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &adsbitsy_pcmcia_ops, 0, 2);
 	return ret;
-}
-
-void __exit pcmcia_adsbitsy_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&adsbitsy_pcmcia_ops, dev);
 }
diff -Nru a/drivers/pcmcia/sa1100_assabet.c b/drivers/pcmcia/sa1100_assabet.c
--- a/drivers/pcmcia/sa1100_assabet.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/sa1100_assabet.c	Mon Mar 31 13:41:08 2003
@@ -20,85 +20,47 @@
 
 #include "sa1100_generic.h"
 
-static struct irqs {
-	int irq;
-	const char *str;
-} irqs[] = {
-	{ ASSABET_IRQ_GPIO_CF_CD,   "CF_CD"   },
-	{ ASSABET_IRQ_GPIO_CF_BVD2, "CF_BVD2" },
-	{ ASSABET_IRQ_GPIO_CF_BVD1, "CF_BVD1" },
+static struct pcmcia_irqs irqs[] = {
+	{ 1, ASSABET_IRQ_GPIO_CF_CD,   "CF CD"   },
+	{ 1, ASSABET_IRQ_GPIO_CF_BVD2, "CF BVD2" },
+	{ 1, ASSABET_IRQ_GPIO_CF_BVD1, "CF BVD1" },
 };
 
-static int assabet_pcmcia_init(struct pcmcia_init *init)
+static int assabet_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
 {
-	int i, res;
+	skt->irq = ASSABET_IRQ_GPIO_CF_IRQ;
 
-	/* Register interrupts */
-	for (i = 0; i < ARRAY_SIZE(irqs); i++) {
-		res = request_irq(irqs[i].irq, sa1100_pcmcia_interrupt,
-				  SA_INTERRUPT, irqs[i].str, NULL);
-		if (res)
-			goto irq_err;
-		set_irq_type(irqs[i].irq, IRQT_NOEDGE);
-	}
-
-	init->socket_irq[0] = NO_IRQ;
-	init->socket_irq[1] = ASSABET_IRQ_GPIO_CF_IRQ;
-
-	/* There's only one slot, but it's "Slot 1": */
-	return 2;
-
- irq_err:
-	printk(KERN_ERR "%s: request for IRQ%d failed (%d)\n",
-		__FUNCTION__, irqs[i].irq, res);
-
-	while (i--)
-		free_irq(irqs[i].irq, NULL);
-
-	return res;
+	return sa11xx_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 /*
  * Release all resources.
  */
-static int assabet_pcmcia_shutdown(void)
+static void assabet_pcmcia_hw_shutdown(struct sa1100_pcmcia_socket *skt)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(irqs); i++)
-		free_irq(irqs[i].irq, NULL);
-  
-	return 0;
+	sa11xx_free_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 static void
-assabet_pcmcia_socket_state(int sock, struct pcmcia_state *state)
+assabet_pcmcia_socket_state(struct sa1100_pcmcia_socket *skt, struct pcmcia_state *state)
 {
 	unsigned long levels = GPLR;
 
-	if (sock == 1) {
-		state->detect = (levels & ASSABET_GPIO_CF_CD) ? 0 : 1;
-		state->ready  = (levels & ASSABET_GPIO_CF_IRQ) ? 1 : 0;
-		state->bvd1   = (levels & ASSABET_GPIO_CF_BVD1) ? 1 : 0;
-		state->bvd2   = (levels & ASSABET_GPIO_CF_BVD2) ? 1 : 0;
-		state->wrprot = 0; /* Not available on Assabet. */
-		state->vs_3v  = 1; /* Can only apply 3.3V on Assabet. */
-		state->vs_Xv  = 0;
-	}
+	state->detect = (levels & ASSABET_GPIO_CF_CD) ? 0 : 1;
+	state->ready  = (levels & ASSABET_GPIO_CF_IRQ) ? 1 : 0;
+	state->bvd1   = (levels & ASSABET_GPIO_CF_BVD1) ? 1 : 0;
+	state->bvd2   = (levels & ASSABET_GPIO_CF_BVD2) ? 1 : 0;
+	state->wrprot = 0; /* Not available on Assabet. */
+	state->vs_3v  = 1; /* Can only apply 3.3V on Assabet. */
+	state->vs_Xv  = 0;
 }
 
 static int
-assabet_pcmcia_configure_socket(int sock, const struct pcmcia_configure *configure)
+assabet_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt, const socket_state_t *state)
 {
 	unsigned int mask;
 
-	if (sock > 1)
-		return -1;
-
-	if (sock == 0)
-		return 0;
-
-	switch (configure->vcc) {
+	switch (state->Vcc) {
 	case 0:
 		mask = 0;
 		break;
@@ -113,13 +75,13 @@
 
 	default:
 		printk(KERN_ERR "%s(): unrecognized Vcc %u\n", __FUNCTION__,
-			configure->vcc);
+			state->Vcc);
 		return -1;
 	}
 
 	/* Silently ignore Vpp, output enable, speaker enable. */
 
-	if (configure->reset)
+	if (state->flags & SS_RESET)
 		mask |= ASSABET_BCR_CF_RST;
 
 	ASSABET_BCR_frob(ASSABET_BCR_CF_RST | ASSABET_BCR_CF_PWR, mask);
@@ -132,48 +94,36 @@
  * be called at initialisation, power management event, or
  * pcmcia event.
  */
-static int assabet_pcmcia_socket_init(int sock)
+static void assabet_pcmcia_socket_init(struct sa1100_pcmcia_socket *skt)
 {
-	int i;
-
-	if (sock == 1) {
-		/*
-		 * Enable CF bus
-		 */
-		ASSABET_BCR_clear(ASSABET_BCR_CF_BUS_OFF);
+	/*
+	 * Enable CF bus
+	 */
+	ASSABET_BCR_clear(ASSABET_BCR_CF_BUS_OFF);
 
-		for (i = 0; i < ARRAY_SIZE(irqs); i++)
-			set_irq_type(irqs[i].irq, IRQT_BOTHEDGE);
-	}
-
-	return 0;
+	sa11xx_enable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 /*
  * Disable card status IRQs on suspend.
  */
-static int assabet_pcmcia_socket_suspend(int sock)
+static void assabet_pcmcia_socket_suspend(struct sa1100_pcmcia_socket *skt)
 {
-	int i;
-
-	if (sock == 1) {
-		for (i = 0; i < ARRAY_SIZE(irqs); i++)
-			set_irq_type(irqs[i].irq, IRQT_NOEDGE);
-
-		/*
-		 * Tristate the CF bus signals.  Also assert CF
-		 * reset as per user guide page 4-11.
-		 */
-		ASSABET_BCR_set(ASSABET_BCR_CF_BUS_OFF | ASSABET_BCR_CF_RST);
-	}
+	sa11xx_disable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 
-	return 0;
+	/*
+	 * Tristate the CF bus signals.  Also assert CF
+	 * reset as per user guide page 4-11.
+	 */
+	ASSABET_BCR_set(ASSABET_BCR_CF_BUS_OFF | ASSABET_BCR_CF_RST);
 }
 
 static struct pcmcia_low_level assabet_pcmcia_ops = { 
 	.owner			= THIS_MODULE,
-	.init			= assabet_pcmcia_init,
-	.shutdown		= assabet_pcmcia_shutdown,
+
+	.hw_init		= assabet_pcmcia_hw_init,
+	.hw_shutdown		= assabet_pcmcia_hw_shutdown,
+
 	.socket_state		= assabet_pcmcia_socket_state,
 	.configure_socket	= assabet_pcmcia_configure_socket,
 
@@ -185,20 +135,8 @@
 {
 	int ret = -ENODEV;
 
-	if (machine_is_assabet()) {
-		if (!machine_has_neponset())
-			ret = sa1100_register_pcmcia(&assabet_pcmcia_ops, dev);
-#ifndef CONFIG_ASSABET_NEPONSET
-		else
-			printk(KERN_ERR "Card Services disabled: missing "
-				"Neponset support\n");
-#endif
-	}
-	return ret;
-}
+	if (machine_is_assabet() && !machine_has_neponset())
+		ret = sa11xx_drv_pcmcia_probe(dev, &assabet_pcmcia_ops, 1, 1);
 
-void __exit pcmcia_assabet_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&assabet_pcmcia_ops, dev);
+	return ret;
 }
-
diff -Nru a/drivers/pcmcia/sa1100_badge4.c b/drivers/pcmcia/sa1100_badge4.c
--- a/drivers/pcmcia/sa1100_badge4.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/pcmcia/sa1100_badge4.c	Mon Mar 31 13:41:06 2003
@@ -24,7 +24,6 @@
 #include <asm/arch/badge4.h>
 #include <asm/hardware/sa1111.h>
 
-#include "sa1100_generic.h"
 #include "sa1111_generic.h"
 
 /*
@@ -62,27 +61,6 @@
 static int badge4_pcmvpp = 50;  /* pins 2 and 4 jumpered on JP6 */
 static int badge4_cfvcc = 33;   /* pins 1 and 2 jumpered on JP10 */
 
-static int badge4_pcmcia_init(struct pcmcia_init *init)
-{
-	printk(KERN_INFO
-	       "%s: badge4_pcmvcc=%d, badge4_pcmvpp=%d, badge4_cfvcc=%d\n",
-	       __FUNCTION__,
-	       badge4_pcmvcc, badge4_pcmvpp, badge4_cfvcc);
-
-	return sa1111_pcmcia_init(init);
-}
-
-static int badge4_pcmcia_shutdown(void)
-{
-	int rc = sa1111_pcmcia_shutdown();
-
-	/* be sure to disable 5v0 use */
-	badge4_set_5V(BADGE4_5V_PCMCIA_SOCK0, 0);
-	badge4_set_5V(BADGE4_5V_PCMCIA_SOCK1, 0);
-
-	return rc;
-}
-
 static void complain_about_jumpering(const char *whom,
 				     const char *supply,
 				     int given, int wanted)
@@ -97,32 +75,32 @@
 }
 
 static int
-badge4_pcmcia_configure_socket(int sock, const struct pcmcia_configure *conf)
+badge4_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt, const socket_state_t *state)
 {
 	int ret;
 
-	switch (sock) {
+	switch (skt->nr) {
 	case 0:
-		if ((conf->vcc != 0) &&
-		    (conf->vcc != badge4_pcmvcc)) {
+		if ((state->Vcc != 0) &&
+		    (state->Vcc != badge4_pcmvcc)) {
 			complain_about_jumpering(__FUNCTION__, "pcmvcc",
-						 badge4_pcmvcc, conf->vcc);
+						 badge4_pcmvcc, state->Vcc);
 			// Apply power regardless of the jumpering.
 			// return -1;
 		}
-		if ((conf->vpp != 0) &&
-		    (conf->vpp != badge4_pcmvpp)) {
+		if ((state->Vpp != 0) &&
+		    (state->Vpp != badge4_pcmvpp)) {
 			complain_about_jumpering(__FUNCTION__, "pcmvpp",
-						 badge4_pcmvpp, conf->vpp);
+						 badge4_pcmvpp, state->Vpp);
 			return -1;
 		}
 		break;
 
 	case 1:
-		if ((conf->vcc != 0) &&
-		    (conf->vcc != badge4_cfvcc)) {
+		if ((state->Vcc != 0) &&
+		    (state->Vcc != badge4_cfvcc)) {
 			complain_about_jumpering(__FUNCTION__, "cfvcc",
-						 badge4_cfvcc, conf->vcc);
+						 badge4_cfvcc, state->Vcc);
 			return -1;
 		}
 		break;
@@ -131,16 +109,16 @@
 		return -1;
 	}
 
-	ret = sa1111_pcmcia_configure_socket(sock, conf);
+	ret = sa1111_pcmcia_configure_socket(skt, state);
 	if (ret == 0) {
 		unsigned long flags;
 		int need5V;
 
 		local_irq_save(flags);
 
-		need5V = ((conf->vcc == 50) || (conf->vpp == 50));
+		need5V = ((state->Vcc == 50) || (state->Vpp == 50));
 
-		badge4_set_5V(BADGE4_5V_PCMCIA_SOCK(conf->sock), need5V);
+		badge4_set_5V(BADGE4_5V_PCMCIA_SOCK(skt->nr), need5V);
 
 		local_irq_restore(flags);
 	}
@@ -150,8 +128,8 @@
 
 static struct pcmcia_low_level badge4_pcmcia_ops = {
 	.owner			= THIS_MODULE,
-	.init			= badge4_pcmcia_init,
-	.shutdown		= badge4_pcmcia_shutdown,
+	.init			= sa1111_pcmcia_hw_init,
+	.shutdown		= sa1111_pcmcia_hw_shutdown,
 	.socket_state		= sa1111_pcmcia_socket_state,
 	.configure_socket	= badge4_pcmcia_configure_socket,
 
@@ -163,15 +141,16 @@
 {
 	int ret = -ENODEV;
 
-	if (machine_is_badge4())
-		ret = sa1100_register_pcmcia(&badge4_pcmcia_ops, dev);
+	if (machine_is_badge4()) {
+		printk(KERN_INFO
+		       "%s: badge4_pcmvcc=%d, badge4_pcmvpp=%d, badge4_cfvcc=%d\n",
+		       __FUNCTION__,
+		       badge4_pcmvcc, badge4_pcmvpp, badge4_cfvcc);
 
-	return ret;
-}
+		ret = sa11xx_drv_pcmcia_probe(dev, &badge4_pcmcia_ops, 0, 2);
+	}
 
-void __devexit pcmcia_badge4_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&badge4_pcmcia_ops, dev);
+	return ret;
 }
 
 static int __init pcmv_setup(char *s)
diff -Nru a/drivers/pcmcia/sa1100_cerf.c b/drivers/pcmcia/sa1100_cerf.c
--- a/drivers/pcmcia/sa1100_cerf.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/sa1100_cerf.c	Mon Mar 31 13:41:08 2003
@@ -23,139 +23,91 @@
 #define CERF_SOCKET	1
 #endif
 
-static struct irqs {
-	int irq;
-	const char *str;
-} irqs[] = {
-	{ IRQ_GPIO_CF_CD,   "CF_CD"   },
-	{ IRQ_GPIO_CF_BVD2, "CF_BVD2" },
-	{ IRQ_GPIO_CF_BVD1, "CF_BVD1" }
+static struct pcmcia_irqs irqs[] = {
+	{ CERF_SOCKET, IRQ_GPIO_CF_CD,   "CF_CD"   },
+	{ CERF_SOCKET, IRQ_GPIO_CF_BVD2, "CF_BVD2" },
+	{ CERF_SOCKET, IRQ_GPIO_CF_BVD1, "CF_BVD1" }
 };
 
-static int cerf_pcmcia_init(struct pcmcia_init *init)
+static int cerf_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
 {
-  int i, res;
+	skt->irq = IRQ_GPIO_CF_IRQ;
 
-  for (i = 0; i < ARRAY_SIZE(irqs); i++) {
-    res = request_irq(irqs[i].irq, sa1100_pcmcia_interrupt, SA_INTERRUPT,
-		      irqs[i].str, NULL);
-    if (res)
-      goto irq_err;
-    set_irq_type(irqs[i].irq, IRQT_NOEDGE);
-  }
-
-  init->socket_irq[CERF_SOCKET] = IRQ_GPIO_CF_IRQ;
-
-  return 2;
-
- irq_err:
-  printk(KERN_ERR "%s: request for IRQ%d failed (%d)\n",
-	 __FUNCTION__, irqs[i].irq, res);
-
-  while (i--)
-    free_irq(irqs[i].irq, NULL);
-
-  return res;
+	return sa11xx_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int cerf_pcmcia_shutdown(void)
+static void cerf_pcmcia_hw_shutdown(struct sa1100_pcmcia_socket *skt)
 {
-  int i;
-
-  for (i = 0; i < ARRAY_SIZE(irqs); i++)
-    free_irq(irqs[i].irq, NULL);
-
-  return 0;
+	sa11xx_free_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static void cerf_pcmcia_socket_state(int sock, struct pcmcia_state *state)
+static void
+cerf_pcmcia_socket_state(struct sa1100_pcmcia_socket *skt, struct pcmcia_state *state)
 {
-  unsigned long levels=GPLR;
+	unsigned long levels = GPLR;
 
-  if (sock == CERF_SOCKET) {
-    state->detect=((levels & GPIO_CF_CD)==0)?1:0;
-    state->ready=(levels & GPIO_CF_IRQ)?1:0;
-    state->bvd1=(levels & GPIO_CF_BVD1)?1:0;
-    state->bvd2=(levels & GPIO_CF_BVD2)?1:0;
-    state->wrprot=0;
-    state->vs_3v=1;
-    state->vs_Xv=0;
-  }
+	state->detect=((levels & GPIO_CF_CD)==0)?1:0;
+	state->ready=(levels & GPIO_CF_IRQ)?1:0;
+	state->bvd1=(levels & GPIO_CF_BVD1)?1:0;
+	state->bvd2=(levels & GPIO_CF_BVD2)?1:0;
+	state->wrprot=0;
+	state->vs_3v=1;
+	state->vs_Xv=0;
 }
 
-static int cerf_pcmcia_configure_socket(int sock, const struct pcmcia_configure
-					   *configure)
+static int
+cerf_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt,
+			     const socket_state_t *state)
 {
-  if (sock>1)
-    return -1;
-
-  if (sock != CERF_SOCKET)
-    return 0;
+	switch (state->Vcc) {
+	case 0:
+		break;
 
-  switch(configure->vcc){
-  case 0:
-    break;
-
-  case 50:
-  case 33:
+	case 50:
+	case 33:
 #ifdef CONFIG_SA1100_CERF_CPLD
-     GPCR = GPIO_PWR_SHUTDOWN;
+		GPCR = GPIO_PWR_SHUTDOWN;
 #endif
-     break;
+		break;
 
-  default:
-    printk(KERN_ERR "%s(): unrecognized Vcc %u\n", __FUNCTION__,
-	   configure->vcc);
-    return -1;
-  }
+	default:
+		printk(KERN_ERR "%s(): unrecognized Vcc %u\n",
+			__FUNCTION__, state->Vcc);
+		return -1;
+	}
 
-  if(configure->reset)
-  {
+	if (state->flags & SS_RESET) {
 #ifdef CONFIG_SA1100_CERF_CPLD
-    GPSR = GPIO_CF_RESET;
+		GPSR = GPIO_CF_RESET;
 #endif
-  }
-  else
-  {
+	} else {
 #ifdef CONFIG_SA1100_CERF_CPLD
-    GPCR = GPIO_CF_RESET;
+		GPCR = GPIO_CF_RESET;
 #endif
-  }
+	}
 
-  return 0;
+	return 0;
 }
 
-static int cerf_pcmcia_socket_init(int sock)
+static void cerf_pcmcia_socket_init(struct sa1100_pcmcia_socket *skt)
 {
-  int i;
-
-  if (sock == CERF_SOCKET)
-    for (i = 0; i < ARRAY_SIZE(irqs); i++)
-      set_irq_type(irqs[i].irq, IRQT_BOTHEDGE);
-
-  return 0;
+	sa11xx_enable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int cerf_pcmcia_socket_suspend(int sock)
+static void cerf_pcmcia_socket_suspend(struct sa1100_pcmcia_socket *skt)
 {
-  int i;
-
-  if (sock == CERF_SOCKET)
-    for (i = 0; i < ARRAY_SIZE(irqs); i++)
-      set_irq_type(irqs[i].irq, IRQT_NOEDGE);
-
-  return 0;
+	sa11xx_disable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 static struct pcmcia_low_level cerf_pcmcia_ops = { 
-  .owner		= THIS_MODULE,
-  .init			= cerf_pcmcia_init,
-  .shutdown		= cerf_pcmcia_shutdown,
-  .socket_state		= cerf_pcmcia_socket_state,
-  .configure_socket	= cerf_pcmcia_configure_socket,
+	.owner			= THIS_MODULE,
+	.init			= cerf_pcmcia_hw_init,
+	.shutdown		= cerf_pcmcia_hw_shutdown,
+	.socket_state		= cerf_pcmcia_socket_state,
+	.configure_socket	= cerf_pcmcia_configure_socket,
 
-  .socket_init		= cerf_pcmcia_socket_init,
-  .socket_suspend	= cerf_pcmcia_socket_suspend,
+	.socket_init		= cerf_pcmcia_socket_init,
+	.socket_suspend		= cerf_pcmcia_socket_suspend,
 };
 
 int __init pcmcia_cerf_init(struct device *dev)
@@ -163,12 +115,7 @@
 	int ret = -ENODEV;
 
 	if (machine_is_cerf())
-		ret = sa1100_register_pcmcia(&cerf_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &cerf_pcmcia_ops, CERF_SOCKET, 1);
 
 	return ret;
-}
-
-void __exit pcmcia_cerf_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&cerf_pcmcia_ops, dev);
 }
diff -Nru a/drivers/pcmcia/sa1100_flexanet.c b/drivers/pcmcia/sa1100_flexanet.c
--- a/drivers/pcmcia/sa1100_flexanet.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/pcmcia/sa1100_flexanet.c	Mon Mar 31 13:41:07 2003
@@ -16,69 +16,32 @@
 #include <asm/irq.h>
 #include "sa1100_generic.h"
 
-static struct {
-  int irq;
-  const char *name;
-} irqs[] = {
-  { IRQ_GPIO_CF1_CD,   "CF1_CD"   },
-  { IRQ_GPIO_CF1_BVD1, "CF1_BVD1" },
-  { IRQ_GPIO_CF2_CD,   "CF2_CD"   },
-  { IRQ_GPIO_CF2_BVD1, "CF2_BVD1" }
+static struct pcmcia_irqs irqs[] = {
+	{ 0, IRQ_GPIO_CF1_CD,   "CF1_CD"   },
+	{ 0, IRQ_GPIO_CF1_BVD1, "CF1_BVD1" },
+	{ 1, IRQ_GPIO_CF2_CD,   "CF2_CD"   },
+	{ 1, IRQ_GPIO_CF2_BVD1, "CF2_BVD1" }
 };
 
 /*
  * Socket initialization.
  *
  * Called by sa1100_pcmcia_driver_init on startup.
- * Must return the number of slots.
- *
  */
-static int flexanet_pcmcia_init(struct pcmcia_init *init)
+static int flexanet_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
 {
-  int i, res;
-
-  /* Configure the GPIOs as inputs (BVD2 is not implemented) */
-  GPDR &= ~(GPIO_CF1_NCD | GPIO_CF1_BVD1 | GPIO_CF1_IRQ |
-            GPIO_CF2_NCD | GPIO_CF2_BVD1 | GPIO_CF2_IRQ );
-
-  /* Register the socket interrupts (not the card interrupts) */
-  for (i = 0; i < ARRAY_SIZE(irqs); i++) {
-    res = request_irq(irqs[i].irq, sa1100_pcmcia_interrupt, SA_INTERRUPT,
-		      irqs[i].name, NULL);
-    if (res < 0)
-      break;
-    set_irq_type(irqs[i].irq, IRQT_NOEDGE);
-  }
-
-  init->socket_irq[0] = IRQ_GPIO_CF1_IRQ;
-  init->socket_irq[1] = IRQ_GPIO_CF2_IRQ;
-
-  /* If we failed, then free all interrupts requested thus far. */
-  if (res < 0) {
-    printk(KERN_ERR "%s: request for IRQ%d failed: %d\n",
-	   __FUNCTION__, irqs[i].irq, res);
-    while (i--)
-      free_irq(irqs[i].irq, NULL);
-    return res;
-  }
+	skt->irq = skt->nr ? IRQ_GPIO_CF2_IRQ : IRQ_GPIO_CF1_IRQ;
 
-  return 2;
+	return sa11xx_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 
 /*
  * Socket shutdown
- *
  */
-static int flexanet_pcmcia_shutdown(void)
+static void flexanet_pcmcia_hw_shutdown(struct sa1100_pcmcia_socket *skt)
 {
-  int i;
-
-  /* disable IRQs */
-  for (i = 0; i < ARRAY_SIZE(irqs); i++)
-    free_irq(irqs[i].irq, NULL);
-
-  return 0;
+	sa11xx_free_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 
@@ -88,109 +51,96 @@
  *  Sockets in Flexanet are 3.3V only, without BVD2.
  *
  */
-static void flexanet_pcmcia_socket_state(int sock, struct pcmcia_state *state)
-{
-  unsigned long levels = GPLR; /* Sense the GPIOs, asynchronously */
-
-  switch (sock) {
-  case 0: /* Socket 0 */
-    state->detect = ((levels & GPIO_CF1_NCD)==0)?1:0;
-    state->ready  = (levels & GPIO_CF1_IRQ)?1:0;
-    state->bvd1   = (levels & GPIO_CF1_BVD1)?1:0;
-    state->bvd2   = 1;
-    state->wrprot = 0;
-    state->vs_3v  = 1;
-    state->vs_Xv  = 0;
-    break;
-
-  case 1: /* Socket 1 */
-    state->detect = ((levels & GPIO_CF2_NCD)==0)?1:0;
-    state->ready  = (levels & GPIO_CF2_IRQ)?1:0;
-    state->bvd1   = (levels & GPIO_CF2_BVD1)?1:0;
-    state->bvd2   = 1;
-    state->wrprot = 0;
-    state->vs_3v  = 1;
-    state->vs_Xv  = 0;
-    break;
-  }
+static void
+flexanet_pcmcia_socket_state(struct sa1100_pcmcia_socket *skt,
+			     struct pcmcia_state *state)
+{
+	unsigned long levels = GPLR; /* Sense the GPIOs, asynchronously */
+
+	switch (skt->nr) {
+	ase 0: /* Socket 0 */
+		state->detect = ((levels & GPIO_CF1_NCD)==0)?1:0;
+		state->ready  = (levels & GPIO_CF1_IRQ)?1:0;
+		state->bvd1   = (levels & GPIO_CF1_BVD1)?1:0;
+		state->bvd2   = 1;
+		state->wrprot = 0;
+		state->vs_3v  = 1;
+		state->vs_Xv  = 0;
+		break;
+
+	case 1: /* Socket 1 */
+		state->detect = ((levels & GPIO_CF2_NCD)==0)?1:0;
+		state->ready  = (levels & GPIO_CF2_IRQ)?1:0;
+		state->bvd1   = (levels & GPIO_CF2_BVD1)?1:0;
+		state->bvd2   = 1;
+		state->wrprot = 0;
+		state->vs_3v  = 1;
+		state->vs_Xv  = 0;
+		break;
+	}
 }
 
 
 /*
  *
  */
-static int flexanet_pcmcia_configure_socket(int sock, const struct pcmcia_configure
-					   *configure)
+static int
+flexanet_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt,
+				 const socket_state_t *state)
 {
-  unsigned long value, flags, mask;
-
-  if (sock > 1)
-    return -1;
+	unsigned long value, flags, mask;
 
-  /* Ignore the VCC level since it is 3.3V and always on */
-  switch (configure->vcc)
-  {
-    case 0:
-      printk(KERN_WARNING "%s(): CS asked to power off.\n", __FUNCTION__);
-      break;
+	/* Ignore the VCC level since it is 3.3V and always on */
+	switch (state->Vcc) {
+	case 0:
+		printk(KERN_WARNING "%s(): CS asked to power off.\n",
+			__FUNCTION__);
+		break;
 
-    case 50:
-      printk(KERN_WARNING "%s(): CS asked for 5V, applying 3.3V...\n",
-  	   __FUNCTION__);
+	case 50:
+		printk(KERN_WARNING "%s(): CS asked for 5V, applying 3.3V...\n",
+			__FUNCTION__);
 
-    case 33:
-      break;
+	case 33:
+		break;
 
-    default:
-      printk(KERN_ERR "%s(): unrecognized Vcc %u\n", __FUNCTION__,
-  	   configure->vcc);
-      return -1;
-  }
+	default:
+		printk(KERN_ERR "%s(): unrecognized Vcc %u\n", __FUNCTION__,
+		       state->Vcc);
+		return -1;
+	}
 
-  /* Reset the slot(s) using the controls in the BCR */
-  mask = 0;
+	/* Reset the slot(s) using the controls in the BCR */
+	mask = 0;
 
-  switch (sock)
-  {
-    case 0 : mask = FHH_BCR_CF1_RST; break;
-    case 1 : mask = FHH_BCR_CF2_RST; break;
-  }
+	switch (skt->nr) {
+	case 0:
+		mask = FHH_BCR_CF1_RST;
+		break;
+	case 1:
+		mask = FHH_BCR_CF2_RST;
+		break;
+	}
 
-  local_irq_save(flags);
+	local_irq_save(flags);
 
-  value = flexanet_BCR;
-  value = (configure->reset) ? (value | mask) : (value & ~mask);
-  FHH_BCR = flexanet_BCR = value;
+	value = flexanet_BCR;
+	value = (state->flags & SS_RESET) ? (value | mask) : (value & ~mask);
+	FHH_BCR = flexanet_BCR = value;
 
-  local_irq_restore(flags);
+	local_irq_restore(flags);
 
-  return 0;
+	return 0;
 }
 
-static int flexanet_pcmcia_socket_init(int sock)
+static void flexanet_pcmcia_socket_init(struct sa1100_pcmcia_socket *skt)
 {
-  if (sock == 0) {
-    set_irq_type(IRQ_GPIO_CF1_CD, IRQT_BOTHEDGE);
-    set_irq_type(IRQ_GPIO_CF1_BVD1, IRQT_BOTHEDGE);
-  } else if (sock == 1) {
-    set_irq_type(IRQ_GPIO_CF2_CD, IRQT_BOTHEDGE);
-    set_irq_type(IRQ_GPIO_CF2_BVD1, IRQT_BOTHEDGE);
-  }
-
-  return 0;
+	sa11xx_enable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int flexanet_pcmcia_socket_suspend(int sock)
+static void flexanet_pcmcia_socket_suspend(struct sa1100_pcmcia_socket *skt)
 {
-  if (sock == 0) {
-    set_irq_type(IRQ_GPIO_CF1_CD, IRQT_NOEDGE);
-    set_irq_type(IRQ_GPIO_CF1_BVD1, IRQT_NOEDGE);
-  } else if (sock == 1) {
-    set_irq_type(IRQ_GPIO_CF2_CD, IRQT_NOEDGE);
-    set_irq_type(IRQ_GPIO_CF2_BVD1, IRQT_NOEDGE);
-  }
-
-  return 0;
+	sa11xx_disable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 /*
@@ -198,14 +148,13 @@
  *
  */
 static struct pcmcia_low_level flexanet_pcmcia_ops = {
-  .owner		= THIS_MODULE,
-  .init			= flexanet_pcmcia_init,
-  .shutdown		= flexanet_pcmcia_shutdown,
-  .socket_state		= flexanet_pcmcia_socket_state,
-  .configure_socket	= flexanet_pcmcia_configure_socket,
-
-  .socket_init		= flexanet_pcmcia_socket_init,
-  .socket_suspend	= flexanet_pcmcia_socket_suspend,
+	.owner			= THIS_MODULE,
+	.hw_init		= flexanet_pcmcia_hw_init,
+	.hw_shutdown		= flexanet_pcmcia_hw_shutdown,
+	.socket_state		= flexanet_pcmcia_socket_state,
+	.configure_socket	= flexanet_pcmcia_configure_socket,
+	.socket_init		= flexanet_pcmcia_socket_init,
+	.socket_suspend		= flexanet_pcmcia_socket_suspend,
 };
 
 int __init pcmcia_flexanet_init(struct device *dev)
@@ -213,13 +162,7 @@
 	int ret = -ENODEV;
 
 	if (machine_is_flexanet())
-		ret = sa1100_register_pcmcia(&flexanet_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &flexanet_pcmcia_ops, 0, 2);
 
 	return ret;
 }
-
-void __exit pcmcia_flexanet_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&flexanet_pcmcia_ops, dev);
-}
-
diff -Nru a/drivers/pcmcia/sa1100_freebird.c b/drivers/pcmcia/sa1100_freebird.c
--- a/drivers/pcmcia/sa1100_freebird.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/pcmcia/sa1100_freebird.c	Mon Mar 31 13:41:06 2003
@@ -15,155 +15,113 @@
 #include <asm/irq.h>
 #include "sa1100_generic.h"
 
-static struct irqs {
-	int irq;
-	const char *str;
-} irqs[] = {
-	{ IRQ_GPIO_FREEBIRD_CF_CD,  "CF_CD"   },
-	{ IRQ_GPIO_FREEBIRD_CF_BVD, "CF_BVD1" },
+static struct pcmcia_irqs irqs[] = {
+	{ 0, IRQ_GPIO_FREEBIRD_CF_CD,  "CF_CD"   },
+	{ 0, IRQ_GPIO_FREEBIRD_CF_BVD, "CF_BVD1" },
 };
 
-static int freebird_pcmcia_init(struct pcmcia_init *init){
-  int i, res;
-
-  /* Enable Linkup CF card */
-  LINKUP_PRC = 0xc0;
-  mdelay(100);
-  LINKUP_PRC = 0xc1;
-  mdelay(100);
-  LINKUP_PRC = 0xd1;
-  mdelay(100);
-  LINKUP_PRC = 0xd1;
-  mdelay(100);
-  LINKUP_PRC = 0xc0;
-
-  /* Register interrupts */
-  for (i = 0; i < ARRAY_SIZE(irqs); i++) {
-    res = request_irq(irqs[i].irq, sa1100_pcmcia_interrupt, SA_INTERRUPT,
-    		      irqs[i].str, NULL);
-    if (res)
-      goto irq_err;
-    set_irq_type(irqs[i].irq, IRQT_NOEDGE);
-  }
-
-  init->socket_irq[0] = IRQ_GPIO_FREEBIRD_CF_IRQ;
-
-  /* There's only one slot, but it's "Slot 1": */
-  return 2;
-
-irq_err:
-  printk(KERN_ERR "%s: request for IRQ%d failed (%d)\n",
-	 __FUNCTION__, irqs[i].irq, res);
+static int freebird_pcmcia_init(struct sa1100_pcmcia_socket *skt)
+{
+	/* Enable Linkup CF card */
+	LINKUP_PRC = 0xc0;
+	mdelay(100);
+	LINKUP_PRC = 0xc1;
+	mdelay(100);
+	LINKUP_PRC = 0xd1;
+	mdelay(100);
+	LINKUP_PRC = 0xd1;
+	mdelay(100);
+	LINKUP_PRC = 0xc0;
 
-  while (i--)
-    free_irq(irqs[i].irq, NULL);
+	skt->irq = IRQ_GPIO_FREEBIRD_CF_IRQ;
 
-  return res;
+	return sa11xx_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int freebird_pcmcia_shutdown(void)
+static void freebird_pcmcia_shutdown(struct sa1100_pcmcia_socket *skt)
 {
-  int i;
-
-  /* disable IRQs */
-  for (i = 0; i < ARRAY_SIZE(irqs); i++)
-    free_irq(irqs[i].irq, NULL);
+	sa11xx_free_irqs(skt, irqs, ARRAY_SIZE(irqs);
 
-  /* Disable CF card */
-  LINKUP_PRC = 0x40;  /* SSP=1   SOE=0 */
-  mdelay(100);
-
-  return 0;
+	/* Disable CF card */
+	LINKUP_PRC = 0x40;  /* SSP=1   SOE=0 */
+	mdelay(100);
 }
 
-static void freebird_pcmcia_socket_state(int sock, struct pcmcia_state *state)
+static void
+freebird_pcmcia_socket_state(struct sa1100_pcmcia_socket *skt, struct pcmcia_state *state)
 {
-  unsigned long levels = LINKUP_PRS;
-//printk("LINKUP_PRS=%x\n",levels);
+	unsigned long levels = LINKUP_PRS;
+//	printk("LINKUP_PRS=%x\n",levels);
 
-  if (sock == 0) {
-    state->detect = ((levels & (LINKUP_CD1 | LINKUP_CD2))==0)?1:0;
-    state->ready  = (levels & LINKUP_RDY)?1:0;
-    state->bvd1   = (levels & LINKUP_BVD1)?1:0;
-    state->bvd2   = (levels & LINKUP_BVD2)?1:0;
-    state->wrprot = 0; /* Not available on Assabet. */
-    state->vs_3v  = 1;  /* Can only apply 3.3V on Assabet. */
-    state->vs_Xv  = 0;
-  }
+	state->detect = ((levels & (LINKUP_CD1 | LINKUP_CD2))==0)?1:0;
+	state->ready  = (levels & LINKUP_RDY)?1:0;
+	state->bvd1   = (levels & LINKUP_BVD1)?1:0;
+	state->bvd2   = (levels & LINKUP_BVD2)?1:0;
+	state->wrprot = 0; /* Not available on Assabet. */
+	state->vs_3v  = 1;  /* Can only apply 3.3V on Assabet. */
+	state->vs_Xv  = 0;
 }
 
-static int freebird_pcmcia_configure_socket(int sock, const struct pcmcia_configure
-					   *configure)
+static int
+freebird_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt,
+				 socket_state_t *state)
 {
-  unsigned long value, flags;
-
-  if(sock>1) return -1;
-
-  if(sock==1) return 0;
-
-  local_irq_save(flags);
+	unsigned long value, flags;
 
-  value = 0xc0;   /* SSP=1  SOE=1  CFE=1 */
+	local_irq_save(flags);
 
-  switch(configure->vcc){
-  case 0:
+	value = 0xc0;   /* SSP=1  SOE=1  CFE=1 */
 
-    break;
+	switch (state->Vcc) {
+	case 0:
+		break;
 
-  case 50:
-    printk(KERN_WARNING "%s(): CS asked for 5V, applying 3.3V...\n",
-	   __FUNCTION__);
+	case 50:
+		printk(KERN_WARNING "%s(): CS asked for 5V, applying 3.3V...\n",
+			__FUNCTION__);
 
-  case 33:  /* Can only apply 3.3V to the CF slot. */
-    value |= LINKUP_S1;
-    break;
+	case 33:  /* Can only apply 3.3V to the CF slot. */
+		value |= LINKUP_S1;
+		break;
 
-  default:
-    printk(KERN_ERR "%s(): unrecognized Vcc %u\n", __FUNCTION__,
-	   configure->vcc);
-    local_irq_restore(flags);
-    return -1;
-  }
+	default:
+		printk(KERN_ERR "%s(): unrecognized Vcc %u\n",
+			__FUNCTION__, state->Vcc);
+		local_irq_restore(flags);
+		return -1;
+	}
 
-  if (configure->reset)
-  value = (configure->reset) ? (value | LINKUP_RESET) : (value & ~LINKUP_RESET);
+	if (state->flags & SS_RESET)
+		value |= LINKUP_RESET;
 
-  /* Silently ignore Vpp, output enable, speaker enable. */
+	/* Silently ignore Vpp, output enable, speaker enable. */
 
-  LINKUP_PRC = value;
-//printk("LINKUP_PRC=%x\n",value);
-  local_irq_restore(flags);
+	LINKUP_PRC = value;
+//	printk("LINKUP_PRC=%x\n",value);
+	local_irq_restore(flags);
 
-  return 0;
+	return 0;
 }
 
-static int freebird_pcmcia_socket_init(int sock)
+static void freebird_pcmcia_socket_init(struct sa1100_pcmcia_socket *skt)
 {
-  if (sock == 1) {
-    set_irq_type(IRQ_GPIO_FREEBIRD_CF_CD, IRQT_BOTHEDGE);
-    set_irq_type(IRQ_GPIO_FREEBIRD_CF_BVD, IRQT_BOTHEDGE);
-  }
-  return 0;
+	sa11xx_disable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int freebird_pcmcia_socket_suspend(int sock)
+static void freebird_pcmcia_socket_suspend(struct sa1100_pcmcia_socket *skt)
 {
-  if (sock == 1) {
-    set_irq_type(IRQ_GPIO_FREEBIRD_CF_CD, IRQT_NOEDGE);
-    set_irq_type(IRQ_GPIO_FREEBIRD_CF_BVD, IRQT_NOEDGE);
-  }
-  return 0;
+	sa11xx_enable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 static struct pcmcia_low_level freebird_pcmcia_ops = {
-  .owner		= THIS_MODULE,
-  .init			= freebird_pcmcia_init,
-  .shutdown		= freebird_pcmcia_shutdown,
-  .socket_state		= freebird_pcmcia_socket_state,
-  .configure_socket	= freebird_pcmcia_configure_socket,
+	.owner			= THIS_MODULE,
+	.hw_init		= freebird_pcmcia_hw_init,
+	.hw_shutdown		= freebird_pcmcia_hw_shutdown,
+	.socket_state		= freebird_pcmcia_socket_state,
+	.configure_socket	= freebird_pcmcia_configure_socket,
 
-  .socket_init		= freebird_pcmcia_socket_init,
-  .socket_suspend	= freebird_pcmcia_socket_suspend,
+	.socket_init		= freebird_pcmcia_socket_init,
+	.socket_suspend		= freebird_pcmcia_socket_suspend,
 };
 
 int __init pcmcia_freebird_init(struct device *dev)
@@ -171,12 +129,7 @@
 	int ret = -ENODEV;
 
 	if (machine_is_freebird())
-		ret = sa1100_register_pcmcia(&freebird_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &freebird_pcmcia_ops, 0, 1);
 
 	return ret;
-}
-
-void __exit pcmcia_freebird_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&freebird_pcmcia_ops, dev);
 }
diff -Nru a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c
--- a/drivers/pcmcia/sa1100_generic.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/pcmcia/sa1100_generic.c	Mon Mar 31 13:41:07 2003
@@ -37,1172 +37,124 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/config.h>
-#include <linux/cpufreq.h>
-#include <linux/delay.h>
-#include <linux/ioport.h>
-#include <linux/kernel.h>
-#include <linux/workqueue.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/notifier.h>
 #include <linux/proc_fs.h>
-#include <linux/version.h>
 
 #include <pcmcia/version.h>
 #include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
-#include <pcmcia/bus_ops.h>
-
-#include <asm/hardware.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/system.h>
-#include <asm/arch/assabet.h>
 
 #include "sa1100.h"
 
-#ifdef PCMCIA_DEBUG
-static int pc_debug;
-#endif
-
-/* This structure maintains housekeeping state for each socket, such
- * as the last known values of the card detect pins, or the Card Services
- * callback value associated with the socket:
- */
-static int sa1100_pcmcia_socket_count;
-static struct sa1100_pcmcia_socket sa1100_pcmcia_socket[SA1100_PCMCIA_MAX_SOCK];
-
-#define PCMCIA_SOCKET(x)	(sa1100_pcmcia_socket + (x))
-
-/* Returned by the low-level PCMCIA interface: */
-static struct pcmcia_low_level *pcmcia_low_level;
-
-static struct timer_list poll_timer;
-static struct work_struct sa1100_pcmcia_task;
-
-/*
- * sa1100_pcmcia_default_mecr_timing
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- *
- * Calculate MECR clock wait states for given CPU clock
- * speed and command wait state. This function can be over-
- * written by a board specific version.
- *
- * The default is to simply calculate the BS values as specified in
- * the INTEL SA1100 development manual
- * "Expansion Memory (PCMCIA) Configuration Register (MECR)"
- * that's section 10.2.5 in _my_ version of the manuial ;)
- */
-static unsigned int
-sa1100_pcmcia_default_mecr_timing(unsigned int sock, unsigned int cpu_speed,
-				  unsigned int cmd_time)
-{
-	return sa1100_pcmcia_mecr_bs(cmd_time, cpu_speed);
-}
-
-/* sa1100_pcmcia_set_mecr()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^^
- *
- * set MECR value for socket <sock> based on this sockets
- * io, mem and attribute space access speed.
- * Call board specific BS value calculation to allow boards
- * to tweak the BS values.
- */
-static int
-sa1100_pcmcia_set_mecr(struct sa1100_pcmcia_socket *skt, unsigned int cpu_clock)
-{
-	u32 mecr;
-	unsigned long flags;
-	unsigned int bs;
-
-	local_irq_save(flags);
-
-	bs = skt->ops->socket_get_timing(skt->nr, cpu_clock, skt->speed_io);
-
-	mecr = MECR;
-	MECR_FAST_SET(mecr, skt->nr, 0);
-	MECR_BSIO_SET(mecr, skt->nr, bs );
-	MECR_BSA_SET(mecr, skt->nr, bs );
-	MECR_BSM_SET(mecr, skt->nr, bs );
-	MECR = mecr;
-
-	local_irq_restore(flags);
-
-	DEBUG(4, "%s(): sock %u FAST %X  BSM %X  BSA %X  BSIO %X\n",
-	      __FUNCTION__, skt->nr, MECR_FAST_GET(mecr, skt->nr),
-	      MECR_BSM_GET(mecr, skt->nr), MECR_BSA_GET(mecr, skt->nr),
-	      MECR_BSIO_GET(mecr, skt->nr));
-
-	return 0;
-}
-
-/*
- * sa1100_pcmcia_config_skt
- * ^^^^^^^^^^^^^^^^^^^^^^^^
- *
- * Convert PCMCIA socket state to our socket configure structure.
- */
-static int
-sa1100_pcmcia_config_skt(struct sa1100_pcmcia_socket *skt, socket_state_t *state)
-{
-	struct pcmcia_configure conf;
-	int ret;
-
-	conf.vcc     = state->Vcc;
-	conf.vpp     = state->Vpp;
-	conf.output  = state->flags & SS_OUTPUT_ENA ? 1 : 0;
-	conf.speaker = state->flags & SS_SPKR_ENA ? 1 : 0;
-	conf.reset   = state->flags & SS_RESET ? 1 : 0;
-
-	ret = skt->ops->configure_socket(skt->nr, &conf);
-	if (ret == 0) {
-		/*
-		 * This really needs a better solution.  The IRQ
-		 * may or may not be claimed by the driver.
-		 */
-		if (skt->irq_state != 1 && state->io_irq) {
-			skt->irq_state = 1;
-			set_irq_type(skt->irq, IRQT_FALLING);
-		} else if (skt->irq_state == 1 && state->io_irq == 0) {
-		  	skt->irq_state = 0;
-			set_irq_type(skt->irq, IRQT_NOEDGE);
-		}
-
-		skt->cs_state = *state;
-	}
-
-	if (ret < 0)
-		printk(KERN_ERR "sa1100_pcmcia: unable to configure "
-		       "socket %d\n", skt->nr);
-
-	return ret;
-}
-
-/* sa1100_pcmcia_sock_init()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^
- *
- * (Re-)Initialise the socket, turning on status interrupts
- * and PCMCIA bus.  This must wait for power to stabilise
- * so that the card status signals report correctly.
- *
- * Returns: 0
- */
-static int sa1100_pcmcia_sock_init(unsigned int sock)
-{
-  struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
-
-  DEBUG(2, "%s(): initializing socket %u\n", __FUNCTION__, sock);
-
-  sa1100_pcmcia_config_skt(skt, &dead_socket);
-
-  return skt->ops->socket_init(skt->nr);
-}
-
-
-/*
- * sa1100_pcmcia_suspend()
- * ^^^^^^^^^^^^^^^^^^^^^^^
- *
- * Remove power on the socket, disable IRQs from the card.
- * Turn off status interrupts, and disable the PCMCIA bus.
- *
- * Returns: 0
- */
-static int sa1100_pcmcia_suspend(unsigned int sock)
-{
-  struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
-  int ret;
-
-  DEBUG(2, "%s(): suspending socket %u\n", __FUNCTION__, skt->nr);
-
-  ret = sa1100_pcmcia_config_skt(skt, &dead_socket);
-
-  if (ret == 0)
-    ret = skt->ops->socket_suspend(skt->nr);
-
-  return ret;
-}
-
-
-/* sa1100_pcmcia_events()
- * ^^^^^^^^^^^^^^^^^^^^^^
- * Helper routine to generate a Card Services event mask based on
- * state information obtained from the kernel low-level PCMCIA layer
- * in a recent (and previous) sampling. Updates `prev_state'.
- *
- * Returns: an event mask for the given socket state.
- */
-static inline unsigned int
-sa1100_pcmcia_events(struct pcmcia_state *state,
-		     struct pcmcia_state *prev_state,
-		     unsigned int mask, unsigned int flags)
-{
-  unsigned int events = 0;
-
-  if (state->detect != prev_state->detect) {
-    DEBUG(3, "%s(): card detect value %u\n", __FUNCTION__, state->detect);
-
-    events |= SS_DETECT;
-  }
-
-  if (state->ready != prev_state->ready) {
-    DEBUG(3, "%s(): card ready value %u\n", __FUNCTION__, state->ready);
-
-    events |= flags & SS_IOCARD ? 0 : SS_READY;
-  }
-
-  if (state->bvd1 != prev_state->bvd1) {
-    DEBUG(3, "%s(): card BVD1 value %u\n", __FUNCTION__, state->bvd1);
-
-    events |= flags & SS_IOCARD ? SS_STSCHG : SS_BATDEAD;
-  }
-
-  if (state->bvd2 != prev_state->bvd2) {
-    DEBUG(3, "%s(): card BVD2 value %u\n", __FUNCTION__, state->bvd2);
-
-    events |= flags & SS_IOCARD ? 0 : SS_BATWARN;
-  }
-
-  *prev_state = *state;
-
-  events &= mask;
-
-  DEBUG(2, "events: %s%s%s%s%s%s\n",
-	events == 0         ? "<NONE>"   : "",
-	events & SS_DETECT  ? "DETECT "  : "",
-	events & SS_READY   ? "READY "   : "",
-	events & SS_BATDEAD ? "BATDEAD " : "",
-	events & SS_BATWARN ? "BATWARN " : "",
-	events & SS_STSCHG  ? "STSCHG "  : "");
-
-  return events;
-}  /* sa1100_pcmcia_events() */
-
-
-/* sa1100_pcmcia_task_handler()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- * Processes serviceable socket events using the "eventd" thread context.
- *
- * Event processing (specifically, the invocation of the Card Services event
- * callback) occurs in this thread rather than in the actual interrupt
- * handler due to the use of scheduling operations in the PCMCIA core.
- */
-static void sa1100_pcmcia_task_handler(void *data)
-{
-  struct pcmcia_state state;
-  unsigned int all_events;
-
-  DEBUG(4, "%s(): entering PCMCIA monitoring thread\n", __FUNCTION__);
-
-  do {
-    unsigned int events;
-    int i;
-
-    DEBUG(4, "%s(): interrogating low-level PCMCIA service\n", __FUNCTION__);
-
-    all_events = 0;
-
-    for (i = 0; i < sa1100_pcmcia_socket_count; i++) {
-      struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(i);
-
-      memset(&state, 0, sizeof(state));
-
-      skt->ops->socket_state(skt->nr, &state);
-
-      events = sa1100_pcmcia_events(&state, &skt->k_state,
-				    skt->cs_state.csc_mask,
-				    skt->cs_state.flags);
-
-      if (events && skt->handler != NULL)
-	skt->handler(skt->handler_info, events);
-      all_events |= events;
-    }
-  } while(all_events);
-}  /* sa1100_pcmcia_task_handler() */
-
-static DECLARE_WORK(sa1100_pcmcia_task, sa1100_pcmcia_task_handler, NULL);
-
-
-/* sa1100_pcmcia_poll_event()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^
- * Let's poll for events in addition to IRQs since IRQ only is unreliable...
- */
-static void sa1100_pcmcia_poll_event(unsigned long dummy)
-{
-  DEBUG(4, "%s(): polling for events\n", __FUNCTION__);
-  init_timer(&poll_timer);
-  poll_timer.function = sa1100_pcmcia_poll_event;
-  poll_timer.expires = jiffies + SA1100_PCMCIA_POLL_PERIOD;
-  add_timer(&poll_timer);
-  schedule_work(&sa1100_pcmcia_task);
-}
-
-
-/* sa1100_pcmcia_interrupt()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^
- * Service routine for socket driver interrupts (requested by the
- * low-level PCMCIA init() operation via sa1100_pcmcia_thread()).
- * The actual interrupt-servicing work is performed by
- * sa1100_pcmcia_thread(), largely because the Card Services event-
- * handling code performs scheduling operations which cannot be
- * executed from within an interrupt context.
- */
-void sa1100_pcmcia_interrupt(int irq, void *dev, struct pt_regs *regs)
-{
-  DEBUG(3, "%s(): servicing IRQ %d\n", __FUNCTION__, irq);
-  schedule_work(&sa1100_pcmcia_task);
-}
-
-
-/* sa1100_pcmcia_register_callback()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- * Implements the register_callback() operation for the in-kernel
- * PCMCIA service (formerly SS_RegisterCallback in Card Services). If 
- * the function pointer `handler' is not NULL, remember the callback 
- * location in the state for `sock', and increment the usage counter 
- * for the driver module. (The callback is invoked from the interrupt
- * service routine, sa1100_pcmcia_interrupt(), to notify Card Services
- * of interesting events.) Otherwise, clear the callback pointer in the
- * socket state and decrement the module usage count.
- *
- * Returns: 0
- */
-static int
-sa1100_pcmcia_register_callback(unsigned int sock,
-				void (*handler)(void *, unsigned int),
-				void *info)
-{
-  struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
-
-  if (handler && !try_module_get(skt->ops->owner))
-  	return -ENODEV;
-  if (handler == NULL) {
-    skt->handler = NULL;
-  } else {
-    skt->handler_info = info;
-    skt->handler = handler;
-  }
-  if (!handler)
-  	module_put(skt->ops->owner);
-
-  return 0;
-}
-
-
-/* sa1100_pcmcia_inquire_socket()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- * Implements the inquire_socket() operation for the in-kernel PCMCIA
- * service (formerly SS_InquireSocket in Card Services). Of note is
- * the setting of the SS_CAP_PAGE_REGS bit in the `features' field of
- * `cap' to "trick" Card Services into tolerating large "I/O memory" 
- * addresses. Also set is SS_CAP_STATIC_MAP, which disables the memory
- * resource database check. (Mapped memory is set up within the socket
- * driver itself.)
- *
- * In conjunction with the STATIC_MAP capability is a new field,
- * `io_offset', recommended by David Hinds. Rather than go through
- * the SetIOMap interface (which is not quite suited for communicating
- * window locations up from the socket driver), we just pass up
- * an offset which is applied to client-requested base I/O addresses
- * in alloc_io_space().
- *
- * SS_CAP_PAGE_REGS: used by setup_cis_mem() in cistpl.c to set the
- *   force_low argument to validate_mem() in rsrc_mgr.c -- since in
- *   general, the mapped * addresses of the PCMCIA memory regions
- *   will not be within 0xffff, setting force_low would be
- *   undesirable.
- *
- * SS_CAP_STATIC_MAP: don't bother with the (user-configured) memory
- *   resource database; we instead pass up physical address ranges
- *   and allow other parts of Card Services to deal with remapping.
- *
- * SS_CAP_PCCARD: we can deal with 16-bit PCMCIA & CF cards, but
- *   not 32-bit CardBus devices.
- *
- * Return value is irrelevant; the pcmcia subsystem ignores it.
- */
-static int
-sa1100_pcmcia_inquire_socket(unsigned int sock, socket_cap_t *cap)
-{
-  struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
-  int ret = -1;
-
-  DEBUG(2, "%s() for sock %u\n", __FUNCTION__, skt->nr);
-
-  if (sock < sa1100_pcmcia_socket_count) {
-    cap->features  = SS_CAP_PAGE_REGS | SS_CAP_STATIC_MAP | SS_CAP_PCCARD;
-    cap->irq_mask  = 0;
-    cap->map_size  = PAGE_SIZE;
-    cap->pci_irq   = skt->irq;
-    cap->io_offset = (unsigned long)skt->virt_io;
-
-    ret = 0;
-  }
-
-  return ret;
-}
-
-
-/* sa1100_pcmcia_get_status()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^
- * Implements the get_status() operation for the in-kernel PCMCIA
- * service (formerly SS_GetStatus in Card Services). Essentially just
- * fills in bits in `status' according to internal driver state or
- * the value of the voltage detect chipselect register.
- *
- * As a debugging note, during card startup, the PCMCIA core issues
- * three set_socket() commands in a row the first with RESET deasserted,
- * the second with RESET asserted, and the last with RESET deasserted
- * again. Following the third set_socket(), a get_status() command will
- * be issued. The kernel is looking for the SS_READY flag (see
- * setup_socket(), reset_socket(), and unreset_socket() in cs.c).
- *
- * Returns: 0
- */
-static int
-sa1100_pcmcia_get_status(unsigned int sock, unsigned int *status)
-{
-  struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
-  struct pcmcia_state state;
-  unsigned int stat;
-
-  DEBUG(2, "%s() for sock %u\n", __FUNCTION__, skt->nr);
-
-  memset(&state, 0, sizeof(state));
-
-  skt->ops->socket_state(skt->nr, &state);
-  skt->k_state = state;
-
-  stat = state.detect ? SS_DETECT : 0;
-  stat |= state.ready ? SS_READY  : 0;
-  stat |= state.vs_3v ? SS_3VCARD : 0;
-  stat |= state.vs_Xv ? SS_XVCARD : 0;
-
-  /* The power status of individual sockets is not available
-   * explicitly from the hardware, so we just remember the state
-   * and regurgitate it upon request:
-   */
-  stat |= skt->cs_state.Vcc ? SS_POWERON : 0;
-
-  if (skt->cs_state.flags & SS_IOCARD)
-    stat |= state.bvd1 ? SS_STSCHG : 0;
-  else {
-    if (state.bvd1 == 0)
-      stat |= SS_BATDEAD;
-    else if (state.bvd2 == 0)
-      stat |= SS_BATWARN;
-  }
-
-  DEBUG(3, "\tstatus: %s%s%s%s%s%s%s%s\n",
-	stat & SS_DETECT  ? "DETECT "  : "",
-	stat & SS_READY   ? "READY "   : "", 
-	stat & SS_BATDEAD ? "BATDEAD " : "",
-	stat & SS_BATWARN ? "BATWARN " : "",
-	stat & SS_POWERON ? "POWERON " : "",
-	stat & SS_STSCHG  ? "STSCHG "  : "",
-	stat & SS_3VCARD  ? "3VCARD "  : "",
-	stat & SS_XVCARD  ? "XVCARD "  : "");
-
-  *status = stat;
-
-  return 0;
-}  /* sa1100_pcmcia_get_status() */
-
-
-/* sa1100_pcmcia_get_socket()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^
- * Implements the get_socket() operation for the in-kernel PCMCIA
- * service (formerly SS_GetSocket in Card Services). Not a very 
- * exciting routine.
- *
- * Returns: 0
- */
-static int
-sa1100_pcmcia_get_socket(unsigned int sock, socket_state_t *state)
-{
-  struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
-
-  DEBUG(2, "%s() for sock %u\n", __FUNCTION__, skt->nr);
-
-  *state = skt->cs_state;
-
-  return 0;
-}
-
-/* sa1100_pcmcia_set_socket()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^
- * Implements the set_socket() operation for the in-kernel PCMCIA
- * service (formerly SS_SetSocket in Card Services). We more or
- * less punt all of this work and let the kernel handle the details
- * of power configuration, reset, &c. We also record the value of
- * `state' in order to regurgitate it to the PCMCIA core later.
- *
- * Returns: 0
- */
-static int
-sa1100_pcmcia_set_socket(unsigned int sock, socket_state_t *state)
-{
-  struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
-
-  DEBUG(2, "%s() for sock %u\n", __FUNCTION__, skt->nr);
-
-  DEBUG(3, "\tmask:  %s%s%s%s%s%s\n\tflags: %s%s%s%s%s%s\n",
-	(state->csc_mask==0)?"<NONE>":"",
-	(state->csc_mask&SS_DETECT)?"DETECT ":"",
-	(state->csc_mask&SS_READY)?"READY ":"",
-	(state->csc_mask&SS_BATDEAD)?"BATDEAD ":"",
-	(state->csc_mask&SS_BATWARN)?"BATWARN ":"",
-	(state->csc_mask&SS_STSCHG)?"STSCHG ":"",
-	(state->flags==0)?"<NONE>":"",
-	(state->flags&SS_PWR_AUTO)?"PWR_AUTO ":"",
-	(state->flags&SS_IOCARD)?"IOCARD ":"",
-	(state->flags&SS_RESET)?"RESET ":"",
-	(state->flags&SS_SPKR_ENA)?"SPKR_ENA ":"",
-	(state->flags&SS_OUTPUT_ENA)?"OUTPUT_ENA ":"");
-  DEBUG(3, "\tVcc %d  Vpp %d  irq %d\n",
-	state->Vcc, state->Vpp, state->io_irq);
-
-  return sa1100_pcmcia_config_skt(skt, state);
-}  /* sa1100_pcmcia_set_socket() */
-
-
-/* sa1100_pcmcia_get_io_map()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^
- * Implements the get_io_map() operation for the in-kernel PCMCIA
- * service (formerly SS_GetIOMap in Card Services). Just returns an
- * I/O map descriptor which was assigned earlier by a set_io_map().
- *
- * Returns: 0 on success, -1 if the map index was out of range
- */
-static int
-sa1100_pcmcia_get_io_map(unsigned int sock, struct pccard_io_map *map)
-{
-  struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
-  int ret = -1;
-
-  DEBUG(2, "%s() for sock %u\n", __FUNCTION__, sock);
-
-  if (map->map < MAX_IO_WIN) {
-    *map = skt->io_map[map->map];
-    ret = 0;
-  }
-
-  return ret;
-}
-
-
-/* sa1100_pcmcia_set_io_map()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^
- * Implements the set_io_map() operation for the in-kernel PCMCIA
- * service (formerly SS_SetIOMap in Card Services). We configure
- * the map speed as requested, but override the address ranges
- * supplied by Card Services.
- *
- * Returns: 0 on success, -1 on error
- */
-static int
-sa1100_pcmcia_set_io_map(unsigned int sock, struct pccard_io_map *map)
-{
-  struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
-
-  DEBUG(2, "%s() for sock %u\n", __FUNCTION__, skt->nr);
-
-  DEBUG(3, "\tmap %u  speed %u\n\tstart 0x%08x  stop 0x%08x\n",
-	map->map, map->speed, map->start, map->stop);
-  DEBUG(3, "\tflags: %s%s%s%s%s%s%s%s\n",
-	(map->flags==0)?"<NONE>":"",
-	(map->flags&MAP_ACTIVE)?"ACTIVE ":"",
-	(map->flags&MAP_16BIT)?"16BIT ":"",
-	(map->flags&MAP_AUTOSZ)?"AUTOSZ ":"",
-	(map->flags&MAP_0WS)?"0WS ":"",
-	(map->flags&MAP_WRPROT)?"WRPROT ":"",
-	(map->flags&MAP_USE_WAIT)?"USE_WAIT ":"",
-	(map->flags&MAP_PREFETCH)?"PREFETCH ":"");
-
-  if (map->map >= MAX_IO_WIN) {
-    printk(KERN_ERR "%s(): map (%d) out of range\n", __FUNCTION__,
-	   map->map);
-    return -1;
-  }
-
-  if (map->flags & MAP_ACTIVE) {
-    if ( map->speed == 0)
-       map->speed = SA1100_PCMCIA_IO_ACCESS;
-
-	sa1100_pcmcia_set_mecr(skt, cpufreq_get(0));
-  }
-
-  if (map->stop == 1)
-    map->stop = PAGE_SIZE-1;
-
-  map->stop -= map->start;
-  map->stop += (unsigned long)skt->virt_io;
-  map->start = (unsigned long)skt->virt_io;
-
-  skt->io_map[map->map] = *map;
-
-  return 0;
-}  /* sa1100_pcmcia_set_io_map() */
-
-
-/* sa1100_pcmcia_get_mem_map()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^^
- * Implements the get_mem_map() operation for the in-kernel PCMCIA
- * service (formerly SS_GetMemMap in Card Services). Just returns a
- *  memory map descriptor which was assigned earlier by a
- *  set_mem_map() request.
- *
- * Returns: 0 on success, -1 if the map index was out of range
- */
-static int
-sa1100_pcmcia_get_mem_map(unsigned int sock, struct pccard_mem_map *map)
-{
-  struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
-  int ret = -1;
-
-  DEBUG(2, "%s() for sock %u\n", __FUNCTION__, sock);
-
-  if (map->map < MAX_WIN) {
-    *map = skt->pc_mem_map[map->map];
-    ret = 0;
-  }
-
-  return ret;
-}
-
-
-/* sa1100_pcmcia_set_mem_map()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^^
- * Implements the set_mem_map() operation for the in-kernel PCMCIA
- * service (formerly SS_SetMemMap in Card Services). We configure
- * the map speed as requested, but override the address ranges
- * supplied by Card Services.
- *
- * Returns: 0 on success, -1 on error
- */
-static int
-sa1100_pcmcia_set_mem_map(unsigned int sock, struct pccard_mem_map *map)
-{
-  struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
-  unsigned long start;
-
-  DEBUG(2, "%s() for sock %u\n", __FUNCTION__, skt->nr);
-
-  DEBUG(3, "\tmap %u speed %u sys_start %08lx sys_stop %08lx card_start %08x\n",
-	map->map, map->speed, map->sys_start, map->sys_stop, map->card_start);
-  DEBUG(3, "\tflags: %s%s%s%s%s%s%s%s\n",
-	(map->flags==0)?"<NONE>":"",
-	(map->flags&MAP_ACTIVE)?"ACTIVE ":"",
-	(map->flags&MAP_16BIT)?"16BIT ":"",
-	(map->flags&MAP_AUTOSZ)?"AUTOSZ ":"",
-	(map->flags&MAP_0WS)?"0WS ":"",
-	(map->flags&MAP_WRPROT)?"WRPROT ":"",
-	(map->flags&MAP_ATTRIB)?"ATTRIB ":"",
-	(map->flags&MAP_USE_WAIT)?"USE_WAIT ":"");
-
-  if (map->map >= MAX_WIN) {
-    printk(KERN_ERR "%s(): map (%d) out of range\n", __FUNCTION__,
-	   map->map);
-    return -1;
-  }
-
-  if (map->flags & MAP_ACTIVE) {
-	  /*
-	   * When clients issue RequestMap, the access speed is not always
-	   * properly configured.  Choose some sensible defaults.
-	   */
-	  if (map->speed == 0) {
-		  if (skt->cs_state.Vcc == 33)
-			  map->speed = SA1100_PCMCIA_3V_MEM_ACCESS;
-		  else
-			  map->speed = SA1100_PCMCIA_5V_MEM_ACCESS;
-	  }
-
-	  sa1100_pcmcia_set_mecr(skt, cpufreq_get(0));
-
-  }
-
-  if (map->sys_stop == 0)
-    map->sys_stop = PAGE_SIZE-1;
-
-  start = (map->flags & MAP_ATTRIB) ? skt->phys_attr : skt->phys_mem;
-  map->sys_stop -= map->sys_start;
-  map->sys_stop += start + map->card_start;
-  map->sys_start = start + map->card_start;
-
-  skt->pc_mem_map[map->map] = *map;
-
-  return 0;
-}  /* sa1100_pcmcia_set_mem_map() */
-
-
-#if defined(CONFIG_PROC_FS)
-
-/* sa1100_pcmcia_proc_status()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^^
- * Implements the /proc/bus/pccard/??/status file.
- *
- * Returns: the number of characters added to the buffer
- */
-static int
-sa1100_pcmcia_proc_status(char *buf, char **start, off_t pos,
-			  int count, int *eof, void *data)
-{
-  struct sa1100_pcmcia_socket *skt = data;
-  unsigned int clock = cpufreq_get(0);
-  unsigned long mecr = MECR;
-  char *p = buf;
-
-  p+=sprintf(p, "k_state  : %s%s%s%s%s%s%s\n", 
-	     skt->k_state.detect ? "detect " : "",
-	     skt->k_state.ready  ? "ready "  : "",
-	     skt->k_state.bvd1   ? "bvd1 "   : "",
-	     skt->k_state.bvd2   ? "bvd2 "   : "",
-	     skt->k_state.wrprot ? "wrprot " : "",
-	     skt->k_state.vs_3v  ? "vs_3v "  : "",
-	     skt->k_state.vs_Xv  ? "vs_Xv "  : "");
-
-  p+=sprintf(p, "status   : %s%s%s%s%s%s%s%s%s\n",
-	     skt->k_state.detect ? "SS_DETECT " : "",
-	     skt->k_state.ready  ? "SS_READY " : "",
-	     skt->cs_state.Vcc   ? "SS_POWERON " : "",
-	     skt->cs_state.flags & SS_IOCARD ? "SS_IOCARD " : "",
-	     (skt->cs_state.flags & SS_IOCARD &&
-	      skt->k_state.bvd1) ? "SS_STSCHG " : "",
-	     ((skt->cs_state.flags & SS_IOCARD)==0 &&
-	      (skt->k_state.bvd1==0)) ? "SS_BATDEAD " : "",
-	     ((skt->cs_state.flags & SS_IOCARD)==0 &&
-	      (skt->k_state.bvd2==0)) ? "SS_BATWARN " : "",
-	     skt->k_state.vs_3v  ? "SS_3VCARD " : "",
-	     skt->k_state.vs_Xv  ? "SS_XVCARD " : "");
-
-  p+=sprintf(p, "mask     : %s%s%s%s%s\n",
-	     skt->cs_state.csc_mask & SS_DETECT  ? "SS_DETECT "  : "",
-	     skt->cs_state.csc_mask & SS_READY   ? "SS_READY "   : "",
-	     skt->cs_state.csc_mask & SS_BATDEAD ? "SS_BATDEAD " : "",
-	     skt->cs_state.csc_mask & SS_BATWARN ? "SS_BATWARN " : "",
-	     skt->cs_state.csc_mask & SS_STSCHG  ? "SS_STSCHG "  : "");
-
-  p+=sprintf(p, "cs_flags : %s%s%s%s%s\n",
-	     skt->cs_state.flags & SS_PWR_AUTO   ? "SS_PWR_AUTO "   : "",
-	     skt->cs_state.flags & SS_IOCARD     ? "SS_IOCARD "     : "",
-	     skt->cs_state.flags & SS_RESET      ? "SS_RESET "      : "",
-	     skt->cs_state.flags & SS_SPKR_ENA   ? "SS_SPKR_ENA "   : "",
-	     skt->cs_state.flags & SS_OUTPUT_ENA ? "SS_OUTPUT_ENA " : "");
-
-  p+=sprintf(p, "Vcc      : %d\n", skt->cs_state.Vcc);
-  p+=sprintf(p, "Vpp      : %d\n", skt->cs_state.Vpp);
-  p+=sprintf(p, "IRQ      : %d\n", skt->cs_state.io_irq);
-
-  p+=sprintf(p, "I/O      : %u (%u)\n", skt->speed_io,
-	     sa1100_pcmcia_cmd_time(clock, MECR_BSIO_GET(mecr, skt->nr)));
-
-  p+=sprintf(p, "attribute: %u (%u)\n", skt->speed_attr,
-	     sa1100_pcmcia_cmd_time(clock, MECR_BSA_GET(mecr, skt->nr)));
-
-  p+=sprintf(p, "common   : %u (%u)\n", skt->speed_mem,
-	     sa1100_pcmcia_cmd_time(clock, MECR_BSM_GET(mecr, skt->nr)));
-
-  return p-buf;
-}
-
-/* sa1100_pcmcia_proc_setup()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^
- * Implements the proc_setup() operation for the in-kernel PCMCIA
- * service (formerly SS_ProcSetup in Card Services).
- *
- * Returns: 0 on success, -1 on error
- */
-static void
-sa1100_pcmcia_proc_setup(unsigned int sock, struct proc_dir_entry *base)
-{
-  struct proc_dir_entry *entry;
-
-  DEBUG(4, "%s() for sock %u\n", __FUNCTION__, sock);
-
-  if ((entry = create_proc_entry("status", 0, base)) == NULL){
-    printk(KERN_ERR "unable to install \"status\" procfs entry\n");
-    return;
-  }
-
-  entry->read_proc = sa1100_pcmcia_proc_status;
-  entry->data = PCMCIA_SOCKET(sock);
-}
-
-#endif  /* defined(CONFIG_PROC_FS) */
-
-static struct pccard_operations sa1100_pcmcia_operations = {
-  .owner		= THIS_MODULE,
-  .init			= sa1100_pcmcia_sock_init,
-  .suspend		= sa1100_pcmcia_suspend,
-  .register_callback	= sa1100_pcmcia_register_callback,
-  .inquire_socket	= sa1100_pcmcia_inquire_socket,
-  .get_status		= sa1100_pcmcia_get_status,
-  .get_socket		= sa1100_pcmcia_get_socket,
-  .set_socket		= sa1100_pcmcia_set_socket,
-  .get_io_map		= sa1100_pcmcia_get_io_map,
-  .set_io_map		= sa1100_pcmcia_set_io_map,
-  .get_mem_map		= sa1100_pcmcia_get_mem_map,
-  .set_mem_map		= sa1100_pcmcia_set_mem_map,
-#ifdef CONFIG_PROC_FS
-  .proc_setup		= sa1100_pcmcia_proc_setup
-#endif
-};
-
-#ifdef CONFIG_CPU_FREQ
-
-/* sa1100_pcmcia_update_mecr()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^^
- * When sa1100_pcmcia_notifier() decides that a MECR adjustment (due
- * to a core clock frequency change) is needed, this routine establishes
- * new BS_xx values consistent with the clock speed `clock'.
- */
-static void sa1100_pcmcia_update_mecr(unsigned int clock)
-{
-	unsigned int sock;
-
-	for (sock = 0; sock < SA1100_PCMCIA_MAX_SOCK; ++sock) {
-		struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
-		sa1100_pcmcia_set_mecr(skt, clock);
-	}
-}
-
-/* sa1100_pcmcia_notifier()
- * ^^^^^^^^^^^^^^^^^^^^^^^^
- * When changing the processor core clock frequency, it is necessary
- * to adjust the MECR timings accordingly. We've recorded the timings
- * requested by Card Services, so this is just a matter of finding
- * out what our current speed is, and then recomputing the new MECR
- * values.
- *
- * Returns: 0 on success, -1 on error
- */
-static int
-sa1100_pcmcia_notifier(struct notifier_block *nb, unsigned long val,
-		       void *data)
-{
-	struct cpufreq_freqs *freqs = data;
-
-	switch (val) {
-	case CPUFREQ_PRECHANGE:
-		if (freqs->new > freqs->old) {
-			DEBUG(2, "%s(): new frequency %u.%uMHz > %u.%uMHz, "
-				"pre-updating\n", __FUNCTION__,
-			    freqs->new / 1000, (freqs->new / 100) % 10,
-			    freqs->old / 1000, (freqs->old / 100) % 10);
-			sa1100_pcmcia_update_mecr(freqs->new);
-		}
-		break;
-
-	case CPUFREQ_POSTCHANGE:
-		if (freqs->new < freqs->old) {
-			DEBUG(2, "%s(): new frequency %u.%uMHz < %u.%uMHz, "
-				"post-updating\n", __FUNCTION__,
-			    freqs->new / 1000, (freqs->new / 100) % 10,
-			    freqs->old / 1000, (freqs->old / 100) % 10);
-			sa1100_pcmcia_update_mecr(freqs->new);
-		}
-		break;
-	}
-
-	return 0;
-}
-
-static struct notifier_block sa1100_pcmcia_notifier_block = {
-	.notifier_call	= sa1100_pcmcia_notifier
-};
-#endif
-
-/* sa1100_register_pcmcia()
- * ^^^^^^^^^^^^^^^^^^^^^^^^
- *
- * Register an SA1100 PCMCIA low level driver with the SA1100 core.
- */
-int sa1100_register_pcmcia(struct pcmcia_low_level *ops, struct device *dev)
-{
-	struct pcmcia_init pcmcia_init;
-	struct pcmcia_socket_class_data *cls;
-	unsigned int i, cpu_clock;
-	int ret;
-
-	/*
-	 * Refuse to replace an existing driver.
-	 */
-	if (pcmcia_low_level)
-		return -EBUSY;
-
-	pcmcia_low_level = ops;
-
-	/*
-	 * set default MECR calculation if the board specific
-	 * code did not specify one...
-	 */
-	if (!ops->socket_get_timing)
-		ops->socket_get_timing = sa1100_pcmcia_default_mecr_timing;
-
-	pcmcia_init.socket_irq[0] = NO_IRQ;
-	pcmcia_init.socket_irq[1] = NO_IRQ;
-	ret = ops->init(&pcmcia_init);
-	if (ret < 0) {
-		printk(KERN_ERR "Unable to initialize kernel PCMCIA service (%d).\n", ret);
-		goto out;
-	}
-
-	sa1100_pcmcia_socket_count = ret;
-
-	cpu_clock = cpufreq_get(0);
-
-	for (i = 0; i < sa1100_pcmcia_socket_count; i++) {
-		struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(i);
-		memset(skt, 0, sizeof(*skt));
-	}
-
-	/*
-	 * We initialize the MECR to default values here, because we are
-	 * not guaranteed to see a SetIOMap operation at runtime.
-	 */
-	for (i = 0; i < sa1100_pcmcia_socket_count; i++) {
-		struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(i);
-
-		skt->res.start	= _PCMCIA(i);
-		skt->res.end	= _PCMCIA(i) + PCMCIASp - 1;
-		skt->res.name	= "PCMCIA";
-		skt->res.flags	= IORESOURCE_MEM;
-
-		ret = request_resource(&iomem_resource, &skt->res);
-		if (ret)
-			goto out_err;
-
-		skt->nr		= i;
-		skt->ops	= ops;
-		skt->irq	= pcmcia_init.socket_irq[i];
-		skt->irq_state	= 0;
-		skt->speed_io   = SA1100_PCMCIA_IO_ACCESS;
-		skt->speed_attr = SA1100_PCMCIA_5V_MEM_ACCESS;
-		skt->speed_mem  = SA1100_PCMCIA_5V_MEM_ACCESS;
-		skt->phys_attr  = _PCMCIAAttr(i);
-		skt->phys_mem   = _PCMCIAMem(i);
-		skt->virt_io    = ioremap(_PCMCIAIO(i), 0x10000);
-
-		if (skt->virt_io == NULL) {
-			ret = -ENOMEM;
-			goto out_err;
-		}
-
-		ops->socket_state(skt->nr, &skt->k_state);
-		sa1100_pcmcia_set_mecr(skt, cpu_clock);
-	}
-
-	cls = kmalloc(sizeof(struct pcmcia_socket_class_data), GFP_KERNEL);
-	if (!cls) {
-		ret = -ENOMEM;
-		goto out_err;
-	}
-
-	memset(cls, 0, sizeof(struct pcmcia_socket_class_data));
-
-	cls->ops	= &sa1100_pcmcia_operations;
-	cls->nsock	= sa1100_pcmcia_socket_count;
-	dev->class_data = cls;
-
-	/*
-	 * Start the event poll timer.  It will reschedule by itself afterwards.
-	 */
-	sa1100_pcmcia_poll_event(0);
-	return 0;
-
- out_err:
-	for (i = 0; i < sa1100_pcmcia_socket_count; i++) {
-		struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(i);
-		iounmap(skt->virt_io);
-		skt->virt_io = NULL;
-		if (skt->res.start)
-			release_resource(&skt->res);
-	}
-
-	ops->shutdown();
-
- out:
-	pcmcia_low_level = NULL;
-	return ret;
-}
-EXPORT_SYMBOL(sa1100_register_pcmcia);
-
-/* sa1100_unregister_pcmcia()
- * ^^^^^^^^^^^^^^^^^^^^^^^^^^
- *
- * Unregister a previously registered pcmcia driver
- */
-void sa1100_unregister_pcmcia(struct pcmcia_low_level *ops, struct device *dev)
-{
-	int i;
-
-	if (!ops)
-		return;
-
-	if (ops != pcmcia_low_level) {
-		printk(KERN_DEBUG "PCMCIA: Trying to unregister wrong "
-			"low-level driver (%p != %p)", ops,
-			pcmcia_low_level);
-		return;
-	}
-
-	del_timer_sync(&poll_timer);
-
-	for (i = 0; i < sa1100_pcmcia_socket_count; i++) {
-		struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(i);
-
-		iounmap(skt->virt_io);
-		skt->virt_io = NULL;
-		
-		release_resource(&skt->res);
-	}
-
-	ops->shutdown();
-
-	flush_scheduled_work();
-
-	kfree(dev->class_data);
-	dev->class_data = NULL;
-
-	pcmcia_low_level = NULL;
-}
-EXPORT_SYMBOL(sa1100_unregister_pcmcia);
-
-static struct device_driver sa1100_pcmcia_driver = {
-	.name		= "sa11x0-pcmcia",
-	.bus		= &platform_bus_type,
-	.devclass	= &pcmcia_socket_class,
-};
-
-static struct platform_device sa1100_pcmcia_device = {
-	.name		= "sa11x0-pcmcia",
-	.id		= 0,
-	.dev		= {
-		.name	= "Intel Corporation SA11x0 [PCMCIA]",
-	},
-};
-
-struct ll_fns {
-	int (*init)(struct device *dev);
-	void (*exit)(struct device *dev);
-};
-
-static struct ll_fns sa1100_ll_fns[] = {
+static int (*sa11x0_pcmcia_hw_init[])(struct device *dev) = {
 #ifdef CONFIG_SA1100_ASSABET
-	{ .init = pcmcia_assabet_init,	.exit = pcmcia_assabet_exit,	},
+	pcmcia_assabet_init,
 #endif
 #ifdef CONFIG_SA1100_CERF
-	{ .init = pcmcia_cerf_init,	.exit = pcmcia_cerf_exit,	},
+	pcmcia_cerf_init,
 #endif
 #ifdef CONFIG_SA1100_FLEXANET
-	{ .init = pcmcia_flexanet_init,	.exit = pcmcia_flexanet_exit,	},
+	pcmcia_flexanet_init,
 #endif
 #ifdef CONFIG_SA1100_FREEBIRD
-	{ .init = pcmcia_freebird_init,	.exit = pcmcia_freebird_exit,	},
+	pcmcia_freebird_init,
 #endif
 #ifdef CONFIG_SA1100_GRAPHICSCLIENT
-	{ .init = pcmcia_gcplus_init,	.exit = pcmcia_gcplus_exit,	},
+	pcmcia_gcplus_init,
 #endif
 #ifdef CONFIG_SA1100_H3600
-	{ .init = pcmcia_h3600_init,	.exit = pcmcia_h3600_exit,	},
+	pcmcia_h3600_init,
 #endif
 #ifdef CONFIG_SA1100_PANGOLIN
-	{ .init = pcmcia_pangolin_init,	.exit = pcmcia_pangolin_exit,	},
+	pcmcia_pangolin_init,
 #endif
 #ifdef CONFIG_SA1100_SHANNON
-	{ .init = pcmcia_shannon_init,	.exit = pcmcia_shannon_exit,	},
+	pcmcia_shannon_init,
 #endif
 #ifdef CONFIG_SA1100_SIMPAD
-	{ .init = pcmcia_simpad_init,	.exit = pcmcia_simpad_exit,	},
+	pcmcia_simpad_init,
 #endif
 #ifdef CONFIG_SA1100_STORK
-	{ .init = pcmcia_stork_init,	.exit = pcmcia_stork_exit,	},
+	pcmcia_stork_init,
 #endif
 #ifdef CONFIG_SA1100_TRIZEPS
-	{ .init = pcmcia_trizeps_init,	.exit = pcmcia_trizeps_exit,	},
+	pcmcia_trizeps_init,
 #endif
 #ifdef CONFIG_SA1100_YOPY
-	{ .init = pcmcia_yopy_init,	.exit = pcmcia_yopy_exit,	},
+	pcmcia_yopy_init,
 #endif
 };
 
-/* sa1100_pcmcia_init()
- * ^^^^^^^^^^^^^^^^^^^^
- *
- * This routine performs a basic sanity check to ensure that this
- * kernel has been built with the appropriate board-specific low-level
- * PCMCIA support, performs low-level PCMCIA initialization, registers
- * this socket driver with Card Services, and then spawns the daemon
- * thread which is the real workhorse of the socket driver.
- *
- * Returns: 0 on success, -1 on error
- */
-static int __init sa1100_pcmcia_init(void)
+static int sa11x0_drv_pcmcia_probe(struct device *dev)
 {
-	servinfo_t info;
-	int ret, i;
-
-	printk(KERN_INFO "SA11x0 PCMCIA (CS release %s)\n", CS_RELEASE);
-
-	CardServices(GetCardServicesInfo, &info);
-	if (info.Revision != CS_RELEASE_CODE) {
-		printk(KERN_ERR "Card Services release codes do not match\n");
-		return -EINVAL;
-	}
-
-#ifdef CONFIG_CPU_FREQ
-	ret = cpufreq_register_notifier(&sa1100_pcmcia_notifier_block,
-					CPUFREQ_TRANSITION_NOTIFIER);
-	if (ret < 0) {
-		printk(KERN_ERR "Unable to register CPU frequency change "
-			"notifier (%d)\n", ret);
-		driver_unregister(&sa1100_pcmcia_driver);
-		return ret;
-	}
-#endif
-
-	driver_register(&sa1100_pcmcia_driver);
+	int i, ret = -ENODEV;
 
 	/*
 	 * Initialise any "on-board" PCMCIA sockets.
 	 */
-	for (i = 0; i < ARRAY_SIZE(sa1100_ll_fns); i++) {
-		ret = sa1100_ll_fns[i].init(&sa1100_pcmcia_device.dev);
+	for (i = 0; i < ARRAY_SIZE(sa11x0_pcmcia_hw_init); i++) {
+		ret = sa11x0_pcmcia_hw_init[i](dev);
 		if (ret == 0)
 			break;
 	}
 
-	if (ret == 0)
-		platform_device_register(&sa1100_pcmcia_device);
-
-	/*
-	 * Don't fail if we don't find any on-board sockets.
-	 */
-	return 0;
+	return ret;
 }
 
-/* sa1100_pcmcia_exit()
+static struct device_driver sa11x0_pcmcia_driver = {
+	.probe		= sa11x0_drv_pcmcia_probe,
+	.remove		= sa11xx_drv_pcmcia_remove,
+	.name		= "sa11x0-pcmcia",
+	.bus		= &platform_bus_type,
+	.devclass	= &pcmcia_socket_class,
+	.suspend 	= pcmcia_socket_dev_suspend,
+	.resume 	= pcmcia_socket_dev_resume,
+};
+
+static struct platform_device sa11x0_pcmcia_device = {
+	.name		= "sa11x0-pcmcia",
+	.id		= 0,
+	.dev		= {
+		.name	= "Intel Corporation SA11x0 [PCMCIA]",
+	},
+};
+
+/* sa11x0_pcmcia_init()
  * ^^^^^^^^^^^^^^^^^^^^
- * Invokes the low-level kernel service to free IRQs associated with this
- * socket controller and reset GPIO edge detection.
+ *
+ * This routine performs low-level PCMCIA initialization and then
+ * registers this socket driver with Card Services.
+ *
+ * Returns: 0 on success, -ve error code on failure
  */
-static void __exit sa1100_pcmcia_exit(void)
+static int __init sa11x0_pcmcia_init(void)
 {
-	platform_device_unregister(&sa1100_pcmcia_device);
+	int ret;
 
+	ret = driver_register(&sa11x0_pcmcia_driver);
+	if (ret == 0) {
+		ret = platform_device_register(&sa11x0_pcmcia_device);
+		if (ret)
+			driver_unregister(&sa11x0_pcmcia_driver);
+	}
 
-#ifdef CONFIG_CPU_FREQ
-	cpufreq_unregister_notifier(&sa1100_pcmcia_notifier_block, CPUFREQ_TRANSITION_NOTIFIER);
-#endif
+	return ret;
+}
 
-	driver_unregister(&sa1100_pcmcia_driver);
+/* sa11x0_pcmcia_exit()
+ * ^^^^^^^^^^^^^^^^^^^^
+ * Invokes the low-level kernel service to free IRQs associated with this
+ * socket controller and reset GPIO edge detection.
+ */
+static void __exit sa11x0_pcmcia_exit(void)
+{
+	platform_device_unregister(&sa11x0_pcmcia_device);
+	driver_unregister(&sa11x0_pcmcia_driver);
 }
 
 MODULE_AUTHOR("John Dorsey <john+@cs.cmu.edu>");
-MODULE_DESCRIPTION("Linux PCMCIA Card Services: SA-1100 Socket Controller");
+MODULE_DESCRIPTION("Linux PCMCIA Card Services: SA-11x0 Socket Controller");
 MODULE_LICENSE("Dual MPL/GPL");
 
-module_init(sa1100_pcmcia_init);
-module_exit(sa1100_pcmcia_exit);
+module_init(sa11x0_pcmcia_init);
+module_exit(sa11x0_pcmcia_exit);
diff -Nru a/drivers/pcmcia/sa1100_generic.h b/drivers/pcmcia/sa1100_generic.h
--- a/drivers/pcmcia/sa1100_generic.h	Mon Mar 31 13:41:07 2003
+++ b/drivers/pcmcia/sa1100_generic.h	Mon Mar 31 13:41:07 2003
@@ -1,74 +1,23 @@
-/*
- * linux/include/asm/arch/pcmcia.h
- *
- * Copyright (C) 2000 John G Dorsey <john+@cs.cmu.edu>
- *
- * This file contains definitions for the low-level SA-1100 kernel PCMCIA
- * interface. Please see linux/Documentation/arm/SA1100/PCMCIA for details.
- */
-#ifndef _ASM_ARCH_PCMCIA
-#define _ASM_ARCH_PCMCIA
+#include "sa11xx_core.h"
 
-/* Ideally, we'd support up to MAX_SOCK sockets, but the SA-1100 only
- * has support for two. This shows up in lots of hardwired ways, such
- * as the fact that MECR only has enough bits to configure two sockets.
- * Since it's so entrenched in the hardware, limiting the software
- * in this way doesn't seem too terrible.
+/*
+ * Declaration for all machine specific init/exit functions.
  */
-#define SA1100_PCMCIA_MAX_SOCK   (2)
-
-struct pcmcia_init {
-	int	socket_irq[SA1100_PCMCIA_MAX_SOCK];
-};
-
-struct pcmcia_state {
-  unsigned detect: 1,
-            ready: 1,
-             bvd1: 1,
-             bvd2: 1,
-           wrprot: 1,
-            vs_3v: 1,
-            vs_Xv: 1;
-};
-
-struct pcmcia_configure {
-  unsigned  vcc: 8,
-            vpp: 8,
-         output: 1,
-        speaker: 1,
-          reset: 1,
-            irq: 1;
-};
-
-struct pcmcia_low_level {
-  struct module *owner;
-
-  int (*init)(struct pcmcia_init *);
-  int (*shutdown)(void);
-  void (*socket_state)(int sock, struct pcmcia_state *);
-  int (*configure_socket)(int sock, const struct pcmcia_configure *);
-
-  /*
-   * Enable card status IRQs on (re-)initialisation.  This can
-   * be called at initialisation, power management event, or
-   * pcmcia event.
-   */
-  int (*socket_init)(int sock);
-
-  /*
-   * Disable card status IRQs and PCMCIA bus on suspend.
-   */
-  int (*socket_suspend)(int sock);
-
-  /*
-   * Calculate MECR timing clock wait states
-   */
-  unsigned int (*socket_get_timing)(unsigned int sock,
-		unsigned int cpu_speed, unsigned int cmd_time);
-};
-
-extern int sa1100_register_pcmcia(struct pcmcia_low_level *, struct device *);
-extern void sa1100_unregister_pcmcia(struct pcmcia_low_level *, struct device *);
-extern void sa1100_pcmcia_interrupt(int, void *, struct pt_regs *);
-
-#endif
+extern int pcmcia_adsbitsy_init(struct device *);
+extern int pcmcia_assabet_init(struct device *);
+extern int pcmcia_badge4_init(struct device *);
+extern int pcmcia_cerf_init(struct device *);
+extern int pcmcia_flexanet_init(struct device *);
+extern int pcmcia_freebird_init(struct device *);
+extern int pcmcia_gcplus_init(struct device *);
+extern int pcmcia_graphicsmaster_init(struct device *);
+extern int pcmcia_h3600_init(struct device *);
+extern int pcmcia_pangolin_init(struct device *);
+extern int pcmcia_pfs168_init(struct device *);
+extern int pcmcia_shannon_init(struct device *);
+extern int pcmcia_simpad_init(struct device *);
+extern int pcmcia_stork_init(struct device *);
+extern int pcmcia_system3_init(struct device *);
+extern int pcmcia_trizeps_init(struct device *);
+extern int pcmcia_xp860_init(struct device *);
+extern int pcmcia_yopy_init(struct device *);
diff -Nru a/drivers/pcmcia/sa1100_graphicsclient.c b/drivers/pcmcia/sa1100_graphicsclient.c
--- a/drivers/pcmcia/sa1100_graphicsclient.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/pcmcia/sa1100_graphicsclient.c	Mon Mar 31 13:41:06 2003
@@ -34,126 +34,113 @@
 static volatile unsigned long *PCMCIA_Power = 
 		((volatile unsigned long *) ADS_p2v(_ADS_CS_PR));
 
-static int gcplus_pcmcia_init(struct pcmcia_init *init)
-{
-  int irq, res;
-
-  // Reset PCMCIA
-  // Reset Timing for CPLD(U2) version 8001E or later
-  *PCMCIA_Power &= ~ ADS_CS_PR_A_RESET;
-  udelay(12);			// 12 uSec
+static struct pcmcia_irqs irqs[] = {
+	{ 0, S0_CD_IRQ, "PCMCIA 0 CD" },
+};
 
-  *PCMCIA_Power |= ADS_CS_PR_A_RESET;
-  mdelay(30);			// 30 mSec
+static int gcplus_pcmcia_init(struct sa1100_pcmcia_socket *skt)
+{
+	// Reset PCMCIA
+	// Reset Timing for CPLD(U2) version 8001E or later
+	*PCMCIA_Power &= ~ ADS_CS_PR_A_RESET;
+	udelay(12);			// 12 uSec
 
-  // Turn off 5V
-  *PCMCIA_Power &= ~0x03;
+	*PCMCIA_Power |= ADS_CS_PR_A_RESET;
+	mdelay(30);			// 30 mSec
 
-  /* Register interrupts */
-  irq = S0_CD_IRQ;
-  res = request_irq(irq, sa1100_pcmcia_interrupt, SA_INTERRUPT, "PCMCIA 0 CD", NULL);
-  if (res < 0) {
-    printk(KERN_ERR "%s: request for IRQ%d failed (%d)\n",
-	   __FUNCTION__, irq, res);
-    return res;
-  }
+	// Turn off 5V
+	*PCMCIA_Power &= ~0x03;
 
-  init->socket_irq[0] = S0_STS_IRQ;
+	skt->irq = S0_STS_IRQ;
 
-  return 1;			// 1 PCMCIA Slot
+	/* Register interrupts */
+	return sa11xx_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int gcplus_pcmcia_shutdown(void)
+static void gcplus_pcmcia_hw_shutdown(struct sa1100_pcmcia_socket *skt)
 {
-  /* disable IRQs */
-  free_irq( S0_CD_IRQ, NULL);
+	/* disable IRQs */
+	free_irq(S0_CD_IRQ, skt);
   
-  /* Shutdown PCMCIA power */
-  mdelay(2);						// 2msec
-  *PCMCIA_Power &= ~0x03;
-
-  return 0;
+	/* Shutdown PCMCIA power */
+	mdelay(2);			// 2msec
+	*PCMCIA_Power &= ~0x03;
 }
 
-static void gcplus_pcmcia_socket_state(int sock, struct pcmcia_state *state_array)
+static void
+gcplus_pcmcia_socket_state(struct sa1100_pcmcia_socket *skt, struct pcmcia_state *state)
 {
-  unsigned long levels = *PCMCIA_Status;
+	unsigned long levels = *PCMCIA_Status;
 
-  if (sock == 0) {
-    state->detect=(levels & ADS_CS_ST_A_CD)?1:0;
-    state->ready=(levels & ADS_CS_ST_A_READY)?1:0;
-    state->bvd1= 0;
-    state->bvd2= 0;
-    state->wrprot=0;
-    state->vs_3v=0;
-    state->vs_Xv=0;
-  }
+	state->detect=(levels & ADS_CS_ST_A_CD)?1:0;
+	state->ready=(levels & ADS_CS_ST_A_READY)?1:0;
+	state->bvd1= 0;
+	state->bvd2= 0;
+	state->wrprot=0;
+	state->vs_3v=0;
+	state->vs_Xv=0;
 }
 
-static int gcplus_pcmcia_configure_socket(int sock, const struct pcmcia_configure
-					   *configure)
+static int
+gcplus_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt,
+			       const socket_state_t *state)
 {
-  unsigned long flags;
-
-  if(sock>1) return -1;
+	unsigned long flags;
 
-  local_irq_save(flags);
+	local_irq_save(flags);
 
-  switch (configure->vcc) {
-  case 0:
-	  *PCMCIA_Power &= ~(ADS_CS_PR_A_3V_POWER | ADS_CS_PR_A_5V_POWER);
-    break;
+	switch (state->Vcc) {
+	case 0:
+		*PCMCIA_Power &= ~(ADS_CS_PR_A_3V_POWER | ADS_CS_PR_A_5V_POWER);
+		break;
 
-  case 50:
-	  *PCMCIA_Power &= ~(ADS_CS_PR_A_3V_POWER | ADS_CS_PR_A_5V_POWER);
-	  *PCMCIA_Power |= ADS_CS_PR_A_5V_POWER;
-	break;
+	case 50:
+		*PCMCIA_Power &= ~(ADS_CS_PR_A_3V_POWER | ADS_CS_PR_A_5V_POWER);
+		*PCMCIA_Power |= ADS_CS_PR_A_5V_POWER;
+		break;
 
-  case 33:
-	  *PCMCIA_Power &= ~(ADS_CS_PR_A_3V_POWER | ADS_CS_PR_A_5V_POWER);
-	  *PCMCIA_Power |= ADS_CS_PR_A_3V_POWER;
-    break;
+	case 33:
+		*PCMCIA_Power &= ~(ADS_CS_PR_A_3V_POWER | ADS_CS_PR_A_5V_POWER);
+		*PCMCIA_Power |= ADS_CS_PR_A_3V_POWER;
+		break;
 
-  default:
-    printk(KERN_ERR "%s(): unrecognized Vcc %u\n", __FUNCTION__,
-	   configure->vcc);
-    local_irq_restore(flags);
-    return -1;
-  }
+	default:
+		printk(KERN_ERR "%s(): unrecognized Vcc %u\n",
+			__FUNCTION__, state->Vcc);
+		local_irq_restore(flags);
+		return -1;
+	}
 
-  /* Silently ignore Vpp, output enable, speaker enable. */
+	/* Silently ignore Vpp, output enable, speaker enable. */
 
-  // Reset PCMCIA
-  *PCMCIA_Power &= ~ ADS_CS_PR_A_RESET;
-  udelay(12);
+	// Reset PCMCIA
+	*PCMCIA_Power &= ~ ADS_CS_PR_A_RESET;
+	udelay(12);
 
-  *PCMCIA_Power |= ADS_CS_PR_A_RESET;
-  mdelay(30);
+	*PCMCIA_Power |= ADS_CS_PR_A_RESET;
+	mdelay(30);
 
-  local_irq_restore(flags);
+	local_irq_restore(flags);
 
-  return 0;
+	return 0;
 }
 
-static int gcplus_pcmcia_socket_init(int sock)
+static void gcplus_pcmcia_socket_init(struct sa1100_pcmcia_socket *skt)
 {
-  return 0;
 }
 
-static int gcplus_pcmcia_socket_suspend(int sock)
+static void gcplus_pcmcia_socket_suspend(struct sa1100_pcmcia_socket *skt)
 {
-  return 0;
 }
 
 static struct pcmcia_low_level gcplus_pcmcia_ops = { 
-  .owner		= THIS_MODULE,
-  .init			= gcplus_pcmcia_init,
-  .shutdown		= gcplus_pcmcia_shutdown,
-  .socket_state		= gcplus_pcmcia_socket_state,
-  .configure_socket	= gcplus_pcmcia_configure_socket,
-
-  .socket_init		= gcplus_pcmcia_socket_init,
-  .socket_suspend	= gcplus_pcmcia_socket_suspend,
+	.owner			= THIS_MODULE,
+	.hw_init		= gcplus_pcmcia_hw_init,
+	.hw_shutdown		= gcplus_pcmcia_hw_shutdown,
+	.socket_state		= gcplus_pcmcia_socket_state,
+	.configure_socket	= gcplus_pcmcia_configure_socket,
+	.socket_init		= gcplus_pcmcia_socket_init,
+	.socket_suspend		= gcplus_pcmcia_socket_suspend,
 };
 
 int __init pcmcia_gcplus_init(struct device *dev)
@@ -161,13 +148,7 @@
 	int ret = -ENODEV;
 
 	if (machine_is_gcplus())
-		ret = sa1100_register_pcmcia(&gcplus_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &gcplus_pcmcia_ops, 0, 1);
 
 	return ret;
 }
-
-void __exit pcmcia_gcplus_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&gcplus_pcmcia_ops, dev);
-}
-
diff -Nru a/drivers/pcmcia/sa1100_graphicsmaster.c b/drivers/pcmcia/sa1100_graphicsmaster.c
--- a/drivers/pcmcia/sa1100_graphicsmaster.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/sa1100_graphicsmaster.c	Mon Mar 31 13:41:08 2003
@@ -17,10 +17,9 @@
 #include <asm/hardware.h>
 #include <asm/mach-types.h>
 
-#include "sa1100_generic.h"
 #include "sa1111_generic.h"
 
-static int graphicsmaster_pcmcia_init(struct pcmcia_init *init)
+static int graphicsmaster_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
 {
   int return_val=0;
 
@@ -33,65 +32,67 @@
   /* why? */
   MECR = 0x09430943;
 
-  return sa1111_pcmcia_init(init);
+  return sa1111_pcmcia_hwinit(skt);
 }
 
 static int
-graphicsmaster_pcmcia_configure_socket(int sock, const struct pcmcia_configure *conf)
+graphicsmaster_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt,
+				       const socket_state_t *state)
 {
-  unsigned int pa_dwr_mask, pa_dwr_set;
-  int ret;
+	unsigned int pa_dwr_mask, pa_dwr_set;
+	int ret;
 
-  switch (sock) {
-  case 0:
-    pa_dwr_mask = GPIO_GPIO0 | GPIO_GPIO1;
-
-    switch (conf->vcc) {
-    default:
-    case 0:	pa_dwr_set = GPIO_GPIO0 | GPIO_GPIO1;	break;
-    case 33:	pa_dwr_set = GPIO_GPIO1;		break;
-    case 50:	pa_dwr_set = GPIO_GPIO0;		break;
-    }
-    break;
-
-  case 1:
-    pa_dwr_mask = GPIO_GPIO2 | GPIO_GPIO3;
-
-    switch (conf->vcc) {
-    default:
-    case 0:	pa_dwr_set = GPIO_GPIO2 | GPIO_GPIO3;	break;
-    case 33:	pa_dwr_set = GPIO_GPIO3;		break;
-    case 50:	pa_dwr_set = GPIO_GPIO2;		break;
-    }
-  }
-
-  if (conf->vpp != conf->vcc && conf->vpp != 0) {
-    printk(KERN_ERR "%s(): CF slot cannot support Vpp %u\n", __FUNCTION__,
-	   conf->vpp);
-    return -1;
-  }
-
-  ret = sa1111_pcmcia_configure_socket(sock, conf);
-  if (ret == 0) {
-    unsigned long flags;
-
-    local_irq_save(flags);
-    PA_DWR = (PA_DWR & ~pa_dwr_mask) | pa_dwr_set;
-    local_irq_restore(flags);
-  }
+	switch (skt->nr) {
+	case 0:
+		pa_dwr_mask = GPIO_GPIO0 | GPIO_GPIO1;
+
+		switch (state->Vcc) {
+		default:
+		case 0:  pa_dwr_set = GPIO_GPIO0 | GPIO_GPIO1;	break;
+		case 33: pa_dwr_set = GPIO_GPIO1;		break;
+		case 50: pa_dwr_set = GPIO_GPIO0;		break;
+		}
+		break;
+
+	case 1:
+		pa_dwr_mask = GPIO_GPIO2 | GPIO_GPIO3;
+
+		switch (state->Vcc) {
+		default:
+		case 0:  pa_dwr_set = GPIO_GPIO2 | GPIO_GPIO3;	break;
+		case 33: pa_dwr_set = GPIO_GPIO3;		break;
+		case 50: pa_dwr_set = GPIO_GPIO2;		break;
+		}
+		break;
+	}
+
+	if (state->Vpp != state->Vcc && state->Vpp != 0) {
+		printk(KERN_ERR "%s(): CF slot cannot support Vpp %u\n",
+			__FUNCTION__, state->Vpp);
+		return -1;
+	}
+
+	ret = sa1111_pcmcia_configure_socket(skt, state);
+	if (ret == 0) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		PA_DWR = (PA_DWR & ~pa_dwr_mask) | pa_dwr_set;
+		local_irq_restore(flags);
+	}
 
-  return ret;
+	return ret;
 }
 
 static struct pcmcia_low_level graphicsmaster_pcmcia_ops = {
-  .owner		= THIS_MODULE,
-  .init			= graphicsmaster_pcmcia_init,
-  .shutdown		= sa1111_pcmcia_shutdown,
-  .socket_state		= sa1111_pcmcia_socket_state,
-  .configure_socket	= graphicsmaster_pcmcia_configure_socket,
+	.owner			= THIS_MODULE,
+	.hw_init		= graphicsmaster_pcmcia_init,
+	.hw_shutdown		= sa1111_pcmcia_hw_shutdown,
+	.socket_state		= sa1111_pcmcia_socket_state,
+	.configure_socket	= graphicsmaster_pcmcia_configure_socket,
 
-  .socket_init		= sa1111_pcmcia_socket_init,
-  .socket_suspend	= sa1111_pcmcia_socket_suspend,
+	.socket_init		= sa1111_pcmcia_socket_init,
+	.socket_suspend		= sa1111_pcmcia_socket_suspend,
 };
 
 int __init pcmcia_graphicsmaster_init(struct device *dev)
@@ -99,13 +100,7 @@
 	int ret = -ENODEV;
 
 	if (machine_is_graphicsmaster())
-		ret = sa1100_register_pcmcia(&graphicsmaster_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &graphicsmaster_pcmcia_ops, 0, 2);
 
 	return ret;
 }
-
-void __exit pcmcia_graphicsmaster_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&graphicsmaster_pcmcia_ops, dev);
-}
-
diff -Nru a/drivers/pcmcia/sa1100_h3600.c b/drivers/pcmcia/sa1100_h3600.c
--- a/drivers/pcmcia/sa1100_h3600.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/sa1100_h3600.c	Mon Mar 31 13:41:08 2003
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/device.h>
+#include <linux/interrupt.h>
 #include <linux/init.h>
 
 #include <asm/hardware.h>
@@ -17,65 +18,36 @@
 
 #include "sa1100_generic.h"
 
-static struct irqs {
-	int irq;
-	const char *str;
-} irqs[] = {
-	{ IRQ_GPIO_H3600_PCMCIA_CD0, "PCMCIA CD0" },
-	{ IRQ_GPIO_H3600_PCMCIA_CD1, "PCMCIA CD1" }
+static struct pcmcia_irqs irqs[] = {
+	{ 0, IRQ_GPIO_H3600_PCMCIA_CD0, "PCMCIA CD0" },
+	{ 1, IRQ_GPIO_H3600_PCMCIA_CD1, "PCMCIA CD1" }
 };
 
-static int h3600_pcmcia_init(struct pcmcia_init *init)
+static int h3600_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
 {
-	int i, res;
+	skt->irq = skt->nr ? IRQ_GPIO_H3600_PCMCIA_IRQ1
+			   : IRQ_GPIO_H3600_PCMCIA_IRQ0;
 
-	/*
-	 * Register interrupts
-	 */
-	for (i = res = 0; i < ARRAY_SIZE(irqs); i++) {
-		res = request_irq(irqs[i].irq, sa1100_pcmcia_interrupt,
-				  SA_INTERRUPT, irqs[i].str, NULL);
-		if (res)
-			break;
-	}
-
-	if (res) {
-		printk(KERN_ERR "%s: request for IRQ%d failed (%d)\n",
-		       __FUNCTION__, irqs[i].irq, res);
 
-		while (i--)
-			free_irq(irqs[i].irq, NULL);
-	}
-
-	init->socket_irq[0] = IRQ_GPIO_H3600_PCMCIA_IRQ0;
-	init->socket_irq[1] = IRQ_GPIO_H3600_PCMCIA_IRQ1;
-
-	return res ? res : 2;
+	return sa11xx_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int h3600_pcmcia_shutdown(void)
+static void h3600_pcmcia_hw_shutdown(struct sa1100_pcmcia_socket *skt)
 {
-	int i;
-
-	/*
-	 * disable IRQs
-	 */
-	for (i = 0; i < ARRAY_SIZE(irqs); i++)
-		free_irq(irqs[i].irq, NULL);
+	sa11xx_free_irqs(skt, irqs, ARRAY_SIZE(irqs));
   
 	/* Disable CF bus: */
 	clr_h3600_egpio(IPAQ_EGPIO_OPT_NVRAM_ON);
 	clr_h3600_egpio(IPAQ_EGPIO_OPT_ON);
 	set_h3600_egpio(IPAQ_EGPIO_OPT_RESET);
-
-	return 0;
 }
 
-static void h3600_pcmcia_socket_state(int sock, struct pcmcia_state *state)
+static void
+h3600_pcmcia_socket_state(struct sa1100_pcmcia_socket *skt, struct pcmcia_state *state)
 {
 	unsigned long levels = GPLR;
 
-	switch (sock) {
+	switch (skt->nr) {
 	case 0:
 		state->detect = levels & GPIO_H3600_PCMCIA_CD0 ? 0 : 1;
 		state->ready = levels & GPIO_H3600_PCMCIA_IRQ0 ? 1 : 0;
@@ -99,18 +71,15 @@
 }
 
 static int
-h3600_pcmcia_configure_socket(int sock, const struct pcmcia_configure *conf)
+h3600_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt, const socket_state_t *state)
 {
-	if (sock > 1)
-		return -1;
-
-	if (conf->vcc != 0 && conf->vcc != 33 && conf->vcc != 50) {
+	if (state->Vcc != 0 && state->Vcc != 33 && state->Vcc != 50) {
 		printk(KERN_ERR "h3600_pcmcia: unrecognized Vcc %u.%uV\n",
-		       conf->vcc / 10, conf->vcc % 10);
+		       state->Vcc / 10, state->Vcc % 10);
 		return -1;
 	}
 
-	if (conf->reset)
+	if (state->flags & SS_RESET)
 		set_h3600_egpio(IPAQ_EGPIO_CARD_RESET);
 	else
 		clr_h3600_egpio(IPAQ_EGPIO_CARD_RESET);
@@ -120,7 +89,7 @@
 	return 0;
 }
 
-static int h3600_pcmcia_socket_init(int sock)
+static void h3600_pcmcia_socket_init(struct sa1100_pcmcia_socket *skt)
 {
 	/* Enable CF bus: */
 	set_h3600_egpio(IPAQ_EGPIO_OPT_NVRAM_ON);
@@ -130,28 +99,12 @@
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	schedule_timeout(10*HZ / 1000);
 
-	switch (sock) {
-	case 0:
-		set_irq_type(IRQ_GPIO_H3600_PCMCIA_CD0, IRQT_BOTHEDGE);
-		break;
-	case 1:
-		set_irq_type(IRQ_GPIO_H3600_PCMCIA_CD1, IRQT_BOTHEDGE);
-		break;
-	}
-
-	return 0;
+	sa11xx_enable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int h3600_pcmcia_socket_suspend(int sock)
+static void h3600_pcmcia_socket_suspend(struct sa1100_pcmcia_socket *skt)
 {
-	switch (sock) {
-	case 0:
-		set_irq_type(IRQ_GPIO_H3600_PCMCIA_CD0, IRQT_NOEDGE);
-		break;
-	case 1:
-		set_irq_type(IRQ_GPIO_H3600_PCMCIA_CD1, IRQT_NOEDGE);
-		break;
-	}
+	sa11xx_disable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 
 	/*
 	 * FIXME:  This doesn't fit well.  We don't have the mechanism in
@@ -159,20 +112,18 @@
 	 * on one bus.  We rely on the cs.c behaviour shutting down
 	 * socket 0 then socket 1.
 	 */
-	if (sock == 1) {
+	if (skt->nr == 1) {
 		clr_h3600_egpio(IPAQ_EGPIO_OPT_ON);
 		clr_h3600_egpio(IPAQ_EGPIO_OPT_NVRAM_ON);
 		/* hmm, does this suck power? */
 		set_h3600_egpio(IPAQ_EGPIO_OPT_RESET);
 	}
-
-	return 0;
 }
 
 struct pcmcia_low_level h3600_pcmcia_ops = { 
 	.owner			= THIS_MODULE,
-	.init			= h3600_pcmcia_init,
-	.shutdown		= h3600_pcmcia_shutdown,
+	.hw_init		= h3600_pcmcia_hw_init,
+	.hw_shutdown		= h3600_pcmcia_hw_shutdown,
 	.socket_state		= h3600_pcmcia_socket_state,
 	.configure_socket	= h3600_pcmcia_configure_socket,
 
@@ -185,12 +136,7 @@
 	int ret = -ENODEV;
 
 	if (machine_is_h3600())
-		ret = sa1100_register_pcmcia(&h3600_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &h3600_pcmcia_ops, 0, 2);
 
 	return ret;
-}
-
-void __exit pcmcia_h3600_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&h3600_pcmcia_ops, dev);
 }
diff -Nru a/drivers/pcmcia/sa1100_jornada720.c b/drivers/pcmcia/sa1100_jornada720.c
--- a/drivers/pcmcia/sa1100_jornada720.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/pcmcia/sa1100_jornada720.c	Mon Mar 31 13:41:07 2003
@@ -15,7 +15,6 @@
 #include <asm/hardware/sa1111.h>
 #include <asm/mach-types.h>
 
-#include "sa1100_generic.h"
 #include "sa1111_generic.h"
 
 #define SOCKET0_POWER   GPIO_GPIO0
@@ -24,7 +23,7 @@
 #warning *** Does SOCKET1_3V actually do anything?
 #define SOCKET1_3V	GPIO_GPIO3
 
-static int jornada720_pcmcia_init(struct pcmcia_init *init)
+static int jornada720_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
 {
   /*
    * What is all this crap for?
@@ -46,23 +45,23 @@
   PC_SDR = 0;
   PC_SSR = 0;
 
-  return sa1111_pcmcia_init(init);
+  return sa1111_pcmcia_hw_init(skt);
 }
 
 static int
-jornada720_pcmcia_configure_socket(int sock, const struct pcmcia_configure *conf)
+jornada720_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt, const socket_state_t *state)
 {
   unsigned int pa_dwr_mask, pa_dwr_set;
   int ret;
 
 printk("%s(): config socket %d vcc %d vpp %d\n", __FUNCTION__,
-	       sock, conf->vcc, conf->vpp);
+	skt->nr, state->Vcc, state->Vpp);
 
-  switch (sock) {
+  switch (skt->nr) {
   case 0:
     pa_dwr_mask = SOCKET0_POWER | SOCKET0_3V;
 
-    switch (conf->vcc) {
+    switch (state->Vcc) {
     default:
     case 0:	pa_dwr_set = 0;					break;
     case 33:	pa_dwr_set = SOCKET0_POWER | SOCKET0_3V;	break;
@@ -73,7 +72,7 @@
   case 1:
     pa_dwr_mask = SOCKET1_POWER;
 
-    switch (conf->vcc) {
+    switch (state->Vcc) {
     default:
     case 0:	pa_dwr_set = 0;					break;
     case 33:	pa_dwr_set = SOCKET1_POWER;			break;
@@ -85,13 +84,13 @@
     return -1;
   }
 
-  if (conf->vpp != conf->vcc && conf->vpp != 0) {
+  if (state->Vpp != state->Vcc && state->Vpp != 0) {
     printk(KERN_ERR "%s(): slot cannot support VPP %u\n",
-	   __FUNCTION__, conf->vpp);
+	   __FUNCTION__, state->Vpp);
     return -1;
   }
 
-  ret = sa1111_pcmcia_configure_socket(sock, conf);
+  ret = sa1111_pcmcia_configure_socket(skt, state);
   if (ret == 0) {
     unsigned long flags;
 
@@ -105,8 +104,8 @@
 
 static struct pcmcia_low_level jornada720_pcmcia_ops = {
   .owner		= THIS_MODULE,
-  .init			= jornada720_pcmcia_init,
-  .shutdown		= sa1111_pcmcia_shutdown,
+  .hw_init		= jornada720_pcmcia_hw_init,
+  .hw_shutdown		= sa1111_pcmcia_hw_shutdown,
   .socket_state		= sa1111_pcmcia_socket_state,
   .configure_socket	= jornada720_pcmcia_configure_socket,
 
@@ -119,12 +118,7 @@
 	int ret = -ENODEV;
 
 	if (machine_is_jornada720())
-		ret = sa1100_register_pcmcia(&jornada720_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &jornada720_pcmcia_ops, 0, 2);
 
 	return ret;
-}
-
-void __devexit pcmcia_jornada720_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&jornada720_pcmcia_ops, dev);
 }
diff -Nru a/drivers/pcmcia/sa1100_neponset.c b/drivers/pcmcia/sa1100_neponset.c
--- a/drivers/pcmcia/sa1100_neponset.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/sa1100_neponset.c	Mon Mar 31 13:41:08 2003
@@ -15,7 +15,6 @@
 #include <asm/arch/neponset.h>
 #include <asm/hardware/sa1111.h>
 
-#include "sa1100_generic.h"
 #include "sa1111_generic.h"
 
 /*
@@ -42,52 +41,27 @@
  * the corresponding truth table.
  */
 
-static int neponset_pcmcia_init(struct pcmcia_init *init)
-{
-	NCR_0 &= ~(NCR_A0VPP | NCR_A1VPP);
-
-	/*
-	 * Set GPIO_A<3:0> to be outputs for the MAX1600,
-	 * and switch to standby mode.
-	 */
-	PA_DDR = 0;
-	PA_SDR = 0;
-	PA_DWR = 0;
-	PA_SSR = 0;
-
-	return sa1111_pcmcia_init(init);
-}
-
 static int
-neponset_pcmcia_configure_socket(int sock, const struct pcmcia_configure *conf)
+neponset_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt, const socket_state_t *state)
 {
-	unsigned int ncr_mask, pa_dwr_mask;
-	unsigned int ncr_set, pa_dwr_set;
+	unsigned int ncr_mask, ncr_set, pa_dwr_mask, pa_dwr_set;
 	int ret;
 
-	switch (sock) {
+	switch (skt->nr) {
 	case 0:
 		pa_dwr_mask = GPIO_GPIO0 | GPIO_GPIO1;
 		ncr_mask = NCR_A0VPP | NCR_A1VPP;
 
-		switch (conf->vcc) {
-		default:
-		case 0:		pa_dwr_set = 0;			break;
-		case 33:	pa_dwr_set = GPIO_GPIO1;	break;
-		case 50:	pa_dwr_set = GPIO_GPIO0;	break;
-		}
-
-		switch (conf->vpp) {
-		case 0:		ncr_set = 0;			break;
-		case 120:	ncr_set = NCR_A1VPP;		break;
-		default:
-			if (conf->vpp == conf->vcc)
-				ncr_set = NCR_A0VPP;
-			else {
-				printk(KERN_ERR "%s(): unrecognized VPP %u\n",
-				       __FUNCTION__, conf->vpp);
-				return -1;
-			}
+		if (state->Vpp == 0)
+			ncr_set = 0;
+		else if (state->Vpp == 120)
+			ncr_set = NCR_A1VPP;
+		else if (state->Vpp == state->Vcc)
+			ncr_set = NCR_A0VPP;
+		else {
+			printk(KERN_ERR "%s(): unrecognized VPP %u\n",
+			       __FUNCTION__, state->Vpp);
+			return -1;
 		}
 		break;
 
@@ -96,16 +70,9 @@
 		ncr_mask = 0;
 		ncr_set = 0;
 
-		switch (conf->vcc) {
-		default:
-		case 0:		pa_dwr_set = 0;			break;
-		case 33:	pa_dwr_set = GPIO_GPIO2;	break;
-		case 50:	pa_dwr_set = GPIO_GPIO3;	break;
-		}
-
-		if (conf->vpp != conf->vcc && conf->vpp != 0) {
+		if (state->Vpp != state->Vcc && state->Vpp != 0) {
 			printk(KERN_ERR "%s(): CF slot cannot support VPP %u\n",
-			       __FUNCTION__, conf->vpp);
+			       __FUNCTION__, state->Vpp);
 			return -1;
 		}
 		break;
@@ -114,41 +81,64 @@
 		return -1;
 	}
 
-	ret = sa1111_pcmcia_configure_socket(sock, conf);
+	/*
+	 * pa_dwr_set is the mask for selecting Vcc on both sockets.
+	 * pa_dwr_mask selects which bits (and therefore socket) we change.
+	 */
+	switch (state->Vcc) {
+	default:
+	case 0:  pa_dwr_set = 0;			break;
+	case 33: pa_dwr_set = GPIO_GPIO1|GPIO_GPIO2;	break;
+	case 50: pa_dwr_set = GPIO_GPIO0|GPIO_GPIO3;	break;
+	}
+
+	ret = sa1111_pcmcia_configure_socket(skt, state);
 	if (ret == 0) {
 		unsigned long flags;
 
 		local_irq_save(flags);
 		NCR_0 = (NCR_0 & ~ncr_mask) | ncr_set;
-		PA_DWR = (PA_DWR & ~pa_dwr_mask) | pa_dwr_set;
+
+		PA_DWR = (PA_DWR & ~pa_dwr_mask) | (pa_dwr_set & pa_dwr_mask);
 		local_irq_restore(flags);
 	}
 
 	return 0;
 }
 
-static struct pcmcia_low_level neponset_pcmcia_ops = {
-	.owner			= THIS_MODULE,
-	.init			= neponset_pcmcia_init,
-	.shutdown		= sa1111_pcmcia_shutdown,
-	.socket_state		= sa1111_pcmcia_socket_state,
-	.configure_socket	= neponset_pcmcia_configure_socket,
+static void neponset_pcmcia_socket_init(struct sa1100_pcmcia_socket *skt)
+{
+	if (skt->nr == 0)
+		NCR_0 &= ~(NCR_A0VPP | NCR_A1VPP);
 
-	.socket_init		= sa1111_pcmcia_socket_init,
-	.socket_suspend		= sa1111_pcmcia_socket_suspend,
+        sa1111_pcmcia_socket_init(skt);
+}
+
+static struct pcmcia_low_level neponset_pcmcia_ops = {
+        .owner                  = THIS_MODULE,
+        .hw_init                = sa1111_pcmcia_hw_init,
+        .hw_shutdown            = sa1111_pcmcia_hw_shutdown,
+        .socket_state           = sa1111_pcmcia_socket_state,
+        .configure_socket       = neponset_pcmcia_configure_socket,
+        .socket_init            = neponset_pcmcia_socket_init,
+        .socket_suspend         = sa1111_pcmcia_socket_suspend,
 };
 
 int __init pcmcia_neponset_init(struct device *dev)
 {
-	int ret = -ENODEV;
+        int ret = -ENODEV;
 
-	if (machine_is_assabet())
-		ret = sa1100_register_pcmcia(&neponset_pcmcia_ops, dev);
+        if (machine_is_assabet()) {
+                /*
+                 * Set GPIO_A<3:0> to be outputs for the MAX1600,
+                 * and switch to standby mode.
+                 */
+                PA_DDR = 0;
+                PA_DWR = 0;
+		PA_SDR = 0;
+		PA_SSR = 0;
+		ret = sa11xx_drv_pcmcia_probe(dev, &neponset_pcmcia_ops, 0, 2);
+	}
 
 	return ret;
-}
-
-void __devexit pcmcia_neponset_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&neponset_pcmcia_ops, dev);
 }
diff -Nru a/drivers/pcmcia/sa1100_pangolin.c b/drivers/pcmcia/sa1100_pangolin.c
--- a/drivers/pcmcia/sa1100_pangolin.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/pcmcia/sa1100_pangolin.c	Mon Mar 31 13:41:06 2003
@@ -22,134 +22,118 @@
 #define PANGOLIN_SOCK	0
 #endif
 
-static int pangolin_pcmcia_init(struct pcmcia_init *init){
-  int res;
+static struct pcmcia_irqs irqs[] = {
+	{ PANGOLIN_SOCK, IRQ_PCMCIA_CD, "PCMCIA CD" },
+};
+
+static int pangolin_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
+{
+	int res;
 
 #ifndef CONFIG_SA1100_PANGOLIN_PCMCIA_IDE
-  /* Enable PCMCIA bus: */
-  GPCR = GPIO_PCMCIA_BUS_ON;
+	/* Enable PCMCIA bus: */
+	GPCR = GPIO_PCMCIA_BUS_ON;
 #endif
 
-  init->socket_irq[PANGOLIN_SOCK] = IRQ_PCMCIA_IRQ;
-
-  /* Set transition detect */
-  set_irq_type(IRQ_PCMCIA_CD, IRQT_NOEDGE);
-  set_irq_type(IRQ_PCMCIA_IRQ, IRQT_FALLING);
+	skt->irq = IRQ_PCMCIA_IRQ;
 
-  /* Register interrupts */
-  res = request_irq(IRQ_PCMCIA_CD, sa1100_pcmcia_interrupt, SA_INTERRUPT,
-		    "PCMCIA_CD", NULL);
-  if (res >= 0)
-    /* There's only one slot, but it's "Slot 1": */
-    return 2;
-
-irq_err:
-  printk(KERN_ERR "%s: request for IRQ%d failed (%d)\n",
-	 __FUNCTION__, IRQ_PCMCIA_CD, res);
-
-  return res;
+	return sa11xx_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int pangolin_pcmcia_shutdown(void)
+static void pangolin_pcmcia_hw_shutdown(struct sa1100_pcmcia_socket *skt)
 {
-  /* disable IRQs */
-  free_irq(IRQ_PCMCIA_CD, NULL);
+	sa11xx_free_irqs(skt, irqs, ARRAY_SIZE(irqs));
+
 #ifndef CONFIG_SA1100_PANGOLIN_PCMCIA_IDE
-    /* Disable PCMCIA bus: */
-    GPSR = GPIO_PCMCIA_BUS_ON;
+	/* Disable PCMCIA bus: */
+	GPSR = GPIO_PCMCIA_BUS_ON;
 #endif
-  return 0;
 }
 
-static void pangolin_pcmcia_socket_state(int sock, struct pcmcia_state *state)
+static void
+pangolin_pcmcia_socket_state(struct sa1100_pcmcia_socket *skt,
+			     struct pcmcia_state *state)
 {
-  unsigned long levels = GPLR;;
+	unsigned long levels = GPLR;;
 
-  if (sock == PANGOLIN_SOCK) {
-    state->detect=((levels & GPIO_PCMCIA_CD)==0)?1:0;
-    state->ready=(levels & GPIO_PCMCIA_IRQ)?1:0;
-    state->bvd1=1; /* Not available on Pangolin. */
-    state->bvd2=1; /* Not available on Pangolin. */
-    state->wrprot=0; /* Not available on Pangolin. */
-    state->vs_3v=1;  /* Can only apply 3.3V on Pangolin. */
-    state->vs_Xv=0;
-  }
+	state->detect=((levels & GPIO_PCMCIA_CD)==0)?1:0;
+	state->ready=(levels & GPIO_PCMCIA_IRQ)?1:0;
+	state->bvd1=1; /* Not available on Pangolin. */
+	state->bvd2=1; /* Not available on Pangolin. */
+	state->wrprot=0; /* Not available on Pangolin. */
+	state->vs_3v=1;  /* Can only apply 3.3V on Pangolin. */
+	state->vs_Xv=0;
 }
 
-static int pangolin_pcmcia_configure_socket(int sock, const struct pcmcia_configure
-					   *configure)
+static int
+pangolin_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt,
+				 const socket_state_t *state)
 {
-  unsigned long value, flags;
+	unsigned long value, flags;
 
-  if(sock>1) return -1;
-#ifndef CONFIG_SA1100_PANGOLIN_PCMCIA_IDE
-  if(sock==0) return 0;
-#endif
-  local_irq_save(flags);
+	local_irq_save(flags);
 
-  /* Murphy: BUS_ON different from POWER ? */
+	/* Murphy: BUS_ON different from POWER ? */
 
-  switch(configure->vcc){
-  case 0:
-    break;
+	switch (state->Vcc) {
+	case 0:
+		break;
 #ifndef CONFIG_SA1100_PANGOLIN_PCMCIA_IDE
-  case 50:
-    printk(KERN_WARNING "%s(): CS asked for 5V, applying 3.3V...\n",
-	   __FUNCTION__);
-  case 33:  /* Can only apply 3.3V to the CF slot. */
-    break;
+	case 50:
+		printk(KERN_WARNING "%s(): CS asked for 5V, applying 3.3V...\n",
+			__FUNCTION__);
+	case 33:  /* Can only apply 3.3V to the CF slot. */
+		break;
 #else
-  case 50:
-    printk(KERN_WARNING "%s(): CS asked for 5V, determinded by jumper setting...\n", __FUNCTION__);
-    break;
-  case 33:
-    printk(KERN_WARNING "%s(): CS asked for 3.3V, determined by jumper setting...\n", __FUNCTION__);
-    break;
+	case 50:
+		printk(KERN_WARNING "%s(): CS asked for 5V, determinded by "
+			"jumper setting...\n", __FUNCTION__);
+		break;
+	case 33:
+		printk(KERN_WARNING "%s(): CS asked for 3.3V, determined by "
+			"jumper setting...\n", __FUNCTION__);
+		break;
 #endif
-  default:
-    printk(KERN_ERR "%s(): unrecognized Vcc %u\n", __FUNCTION__,
-	   configure->vcc);
-    local_irq_restore(flags);
-    return -1;
-  }
+	default:
+		printk(KERN_ERR "%s(): unrecognized Vcc %u\n",
+			__FUNCTION__, state->Vcc);
+		local_irq_restore(flags);
+		return -1;
+	}
 #ifdef CONFIG_SA1100_PANGOLIN_PCMCIA_IDE
-  /* reset & unreset request */
-  if(sock==0) {
-	if(configure->reset) {
-		GPSR |= GPIO_PCMCIA_RESET;
-	} else {
-		GPCR |= GPIO_PCMCIA_RESET;
+	/* reset & unreset request */
+	if (skt->nr == 0) {
+		if (state->flags & SS_RESET) {
+			GPSR = GPIO_PCMCIA_RESET;
+		} else {
+			GPCR = GPIO_PCMCIA_RESET;
+		}
 	}
-  }
 #endif
-  /* Silently ignore Vpp, output enable, speaker enable. */
-  local_irq_restore(flags);
-  return 0;
+	/* Silently ignore Vpp, output enable, speaker enable. */
+	local_irq_restore(flags);
+	return 0;
 }
 
-static int pangolin_pcmcia_socket_init(int sock)
+static void pangolin_pcmcia_socket_init(struct sa1100_pcmcia_socket *skt)
 {
-  if (sock == 1)
-    set_irq_type(IRQ_PCMCIA_CD, IRQT_BOTHEDGE);
-  return 0;
+	sa11xx_enable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int pangolin_pcmcia_socket_suspend(int sock)
+static void pangolin_pcmcia_socket_suspend(struct sa1100_pcmcia_socket *skt)
 {
-  if (sock == 1)
-    set_irq_type(IRQ_PCMCIA_CD, IRQT_NOEDGE);
-  return 0;
+	sa11xx_disable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 static struct pcmcia_low_level pangolin_pcmcia_ops = { 
-  .owner		= THIS_MODULE,
-  .init			= pangolin_pcmcia_init,
-  .shutdown		= pangolin_pcmcia_shutdown,
-  .socket_state		= pangolin_pcmcia_socket_state,
-  .configure_socket	= pangolin_pcmcia_configure_socket,
+	.owner			= THIS_MODULE,
+	.hw_init		= pangolin_pcmcia_hw_init,
+	.hw_shutdown		= pangolin_pcmcia_hw_shutdown,
+	.socket_state		= pangolin_pcmcia_socket_state,
+	.configure_socket	= pangolin_pcmcia_configure_socket,
 
-  .socket_init		= pangolin_pcmcia_socket_init,
-  .socket_suspend	= pangolin_pcmcia_socket_suspend,
+	.socket_init		= pangolin_pcmcia_socket_init,
+	.socket_suspend		= pangolin_pcmcia_socket_suspend,
 };
 
 int __init pcmcia_pangolin_init(struct device *dev)
@@ -157,13 +141,7 @@
 	int ret = -ENODEV;
 
 	if (machine_is_pangolin())
-		ret = sa1100_register_pcmcia(&pangolin_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &pangolin_pcmcia_ops, PANGOLIN_SOCK, 1);
 
 	return ret;
 }
-
-void __exit pcmcia_pangolin_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&pangolin_pcmcia_ops, dev);
-}
-
diff -Nru a/drivers/pcmcia/sa1100_pfs168.c b/drivers/pcmcia/sa1100_pfs168.c
--- a/drivers/pcmcia/sa1100_pfs168.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/pcmcia/sa1100_pfs168.c	Mon Mar 31 13:41:07 2003
@@ -16,10 +16,9 @@
 #include <asm/mach-types.h>
 #include <asm/irq.h>
 
-#include "sa1100_generic.h"
 #include "sa1111_generic.h"
 
-static int pfs168_pcmcia_init(struct pcmcia_init *init)
+static int pfs168_pcmcia_init(struct sa1100_pcmcia_socket *skt)
 {
   /* TPS2211 to standby mode: */
   PA_DWR &= ~(GPIO_GPIO0 | GPIO_GPIO1 | GPIO_GPIO2 | GPIO_GPIO3);
@@ -27,11 +26,12 @@
   /* Set GPIO_A<3:0> to be outputs for PCMCIA (socket 0) power controller: */
   PA_DDR &= ~(GPIO_GPIO0 | GPIO_GPIO1 | GPIO_GPIO2 | GPIO_GPIO3);
 
-  return sa1111_pcmcia_init(init);
+  return sa1111_pcmcia_init(skt);
 }
 
 static int
-pfs168_pcmcia_configure_socket(int sock, const struct pcmcia_configure *conf)
+pfs168_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt,
+			       const socket_state_t *state)
 {
   unsigned int pa_dwr_mask = 0, pa_dwr_set = 0;
   int ret;
@@ -48,33 +48,33 @@
    *
    */
 
-  switch (sock) {
+  switch (skt->nr) {
   case 0:
     pa_dwr_mask = GPIO_GPIO0 | GPIO_GPIO1 | GPIO_GPIO2 | GPIO_GPIO3;
 
-    switch (conf->vcc) {
+    switch (state->Vcc) {
     default:
     case 0:	pa_dwr_set = 0;			break;
     case 33:	pa_dwr_set = GPIO_GPIO0;	break;
     case 50:	pa_dwr_set = GPIO_GPIO1;	break;
     }
 
-    switch (conf->vpp) {
+    switch (state->Vpp) {
     case 0:
       break;
 
     case 120:
       printk(KERN_ERR "%s(): PFS-168 does not support VPP %uV\n",
-	     __FUNCTION__, conf->vpp / 10);
+	     __FUNCTION__, state->Vpp / 10);
       return -1;
       break;
 
     default:
-      if (conf->vpp == conf->vcc)
+      if (state->Vpp == state->Vcc)
         pa_dwr_set |= GPIO_GPIO3;
       else {
 	printk(KERN_ERR "%s(): unrecognized VPP %u\n", __FUNCTION__,
-	       conf->vpp);
+	       state->Vpp);
 	return -1;
       }
     }
@@ -91,24 +91,24 @@
 
     case 50:
       printk(KERN_ERR "%s(): PFS-168 CompactFlash socket does not support VCC %uV\n",
-	     __FUNCTION__, conf->vcc / 10);
+	     __FUNCTION__, state->Vcc / 10);
       return -1;
 
     default:
       printk(KERN_ERR "%s(): unrecognized VCC %u\n", __FUNCTION__,
-	     conf->vcc);
+	     state->Vcc);
       return -1;
     }
 
-    if (conf->vpp != conf->vcc && conf->vpp != 0) {
+    if (state->Vpp != state->Vcc && state->Vpp != 0) {
       printk(KERN_ERR "%s(): CompactFlash socket does not support VPP %uV\n"
-	     __FUNCTION__, conf->vpp / 10);
+	     __FUNCTION__, state->Vpp / 10);
       return -1;
     }
     break;
   }
 
-  ret = sa1111_pcmcia_configure_socket(sock, conf);
+  ret = sa1111_pcmcia_configure_socket(skt, state);
   if (ret == 0) {
     unsigned long flags;
 
@@ -121,14 +121,13 @@
 }
 
 static struct pcmcia_low_level pfs168_pcmcia_ops = {
-  .owner		= THIS_MODULE,
-  .init			= pfs168_pcmcia_init,
-  .shutdown		= sa1111_pcmcia_shutdown,
-  .socket_state		= sa1111_pcmcia_socket_state,
-  .configure_socket	= pfs168_pcmcia_configure_socket,
-
-  .socket_init		= sa1111_pcmcia_socket_init,
-  .socket_suspend	= sa1111_pcmcia_socket_suspend,
+	.owner			= THIS_MODULE,
+	.hw_init		= pfs168_pcmcia_hw_init,
+	.hw_shutdown		= sa1111_pcmcia_hw_shutdown,
+	.socket_state		= sa1111_pcmcia_socket_state,
+	.configure_socket	= pfs168_pcmcia_configure_socket,
+	.socket_init		= sa1111_pcmcia_socket_init,
+	.socket_suspend		= sa1111_pcmcia_socket_suspend,
 };
 
 int __init pcmcia_pfs168_init(struct device *dev)
@@ -136,12 +135,7 @@
 	int ret = -ENODEV;
 
 	if (machine_is_pfs168())
-		ret = sa1100_register_pcmcia(&pfs168_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &pfs168_pcmcia_ops, 0, 2);
 
 	return ret;
-}
-
-void __exit pcmcia_pfs168_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&pfs168_pcmcia_ops, dev);
 }
diff -Nru a/drivers/pcmcia/sa1100_shannon.c b/drivers/pcmcia/sa1100_shannon.c
--- a/drivers/pcmcia/sa1100_shannon.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/sa1100_shannon.c	Mon Mar 31 13:41:08 2003
@@ -16,64 +16,36 @@
 #include <asm/irq.h>
 #include "sa1100_generic.h"
 
-static struct irqs {
-	int irq;
-	const char *str;
-} irqs[] = {
-	{ SHANNON_IRQ_GPIO_EJECT_0, "PCMCIA_CD_0" },
-	{ SHANNON_IRQ_GPIO_EJECT_1, "PCMCIA_CD_1" },
+static struct pcmcia_irqs irqs[] = {
+	{ 0, SHANNON_IRQ_GPIO_EJECT_0, "PCMCIA_CD_0" },
+	{ 1, SHANNON_IRQ_GPIO_EJECT_1, "PCMCIA_CD_1" },
 };
 
-static int shannon_pcmcia_init(struct pcmcia_init *init)
+static int shannon_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
 {
-	int i, res;
-
 	/* All those are inputs */
 	GPDR &= ~(SHANNON_GPIO_EJECT_0 | SHANNON_GPIO_EJECT_1 | 
 		  SHANNON_GPIO_RDY_0 | SHANNON_GPIO_RDY_1);
 	GAFR &= ~(SHANNON_GPIO_EJECT_0 | SHANNON_GPIO_EJECT_1 | 
 		  SHANNON_GPIO_RDY_0 | SHANNON_GPIO_RDY_1);
 
-	init->socket_irq[0] = SHANNON_IRQ_GPIO_RDY_0;
-	init->socket_irq[1] = SHANNON_IRQ_GPIO_RDY_1;
-
-	/* Register interrupts */
-	for (i = 0; i < ARRAY_SIZE(irqs); i++) {
-		res = request_irq(irqs[i].irq, sa1100_pcmcia_interrupt,
-				  SA_INTERRUPT, irqs[i].str, NULL);
-		if (res)
-			goto irq_err;
-		set_irq_type(irqs[i].irq, IRQT_NOEDGE);
-	}
-
-	return 2;
+	skt->irq = skt->nr ? SHANNON_IRQ_GPIO_RDY_1 : SHANNON_IRQ_GPIO_RDY_0;
 
- irq_err:
-	printk(KERN_ERR "%s: request for IRQ%d failed (%d)\n",
-		__FUNCTION__, irqs[i].irq, res);
-
-	while (i--)
-		free_irq(irqs[i].irq, NULL);
-
-	return res;
+	return sa11xx_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int shannon_pcmcia_shutdown(void)
+static void shannon_pcmcia_hw_shutdown(struct sa1100_pcmcia_socket *skt)
 {
-	int i;
-
-	/* disable IRQs */
-	for (i = 0; i < ARRAY_SIZE(irqs); i++)
-		free_irq(irqs[i].irq, NULL);
-
-	return 0;
+	sa11xx_free_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static void shannon_pcmcia_socket_state(int sock, struct pcmcia_state *state)
+static void
+shannon_pcmcia_socket_state(struct sa1100_pcmcia_socket *skt,
+			    struct pcmcia_state *state)
 {
 	unsigned long levels = GPLR;
 
-	switch (sock) {
+	switch (skt->nr) {
 	case 0:
 		state->detect = (levels & SHANNON_GPIO_EJECT_0) ? 0 : 1;
 		state->ready  = (levels & SHANNON_GPIO_RDY_0) ? 1 : 0;
@@ -96,9 +68,11 @@
 	}
 }
 
-static int shannon_pcmcia_configure_socket(int sock, const struct pcmcia_configure *configure)
+static int
+shannon_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt,
+				const socket_state_t *state)
 {
-	switch (configure->vcc) {
+	switch (state->Vcc) {
 	case 0:	/* power off */
 		printk(KERN_WARNING __FUNCTION__"(): CS asked for 0V, still applying 3.3V..\n");
 		break;
@@ -108,7 +82,7 @@
 		break;
 	default:
 		printk(KERN_ERR __FUNCTION__"(): unrecognized Vcc %u\n",
-		       configure->vcc);
+		       state->Vcc);
 		return -1;
 	}
 
@@ -119,30 +93,20 @@
 	return 0;
 }
 
-static int shannon_pcmcia_socket_init(int sock)
+static void shannon_pcmcia_socket_init(struct sa1100_pcmcia_socket *skt)
 {
-	if (sock == 0)
-		set_irq_type(SHANNON_IRQ_GPIO_EJECT_0, IRQT_BOTHEDGE);
-	else if (sock == 1)
-		set_irq_Type(SHANNON_IRQ_GPIO_EJECT_1, IRQT_BOTHEDGE);
-
-	return 0;
+	sa11xx_enable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int shannon_pcmcia_socket_suspend(int sock)
+static void shannon_pcmcia_socket_suspend(struct sa1100_pcmcia_socket *skt)
 {
-	if (sock == 0)
-		set_irq_type(SHANNON_IRQ_GPIO_EJECT_0, IRQT_NOEDGE);
-	else if (sock == 1)
-		set_irq_type(SHANNON_IRQ_GPIO_EJECT_1, IRQT_NOEDGE);
-
-	return 0;
+	sa11xx_disable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 static struct pcmcia_low_level shannon_pcmcia_ops = {
 	.owner			= THIS_MODULE,
-	.init			= shannon_pcmcia_init,
-	.shutdown		= shannon_pcmcia_shutdown,
+	.hw_init		= shannon_pcmcia_hw_init,
+	.hw_shutdown		= shannon_pcmcia_hw_shutdown,
 	.socket_state		= shannon_pcmcia_socket_state,
 	.configure_socket	= shannon_pcmcia_configure_socket,
 
@@ -155,12 +119,7 @@
 	int ret = -ENODEV;
 
 	if (machine_is_shannon())
-		ret = sa1100_register_pcmcia(&shannon_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &shannon_pcmcia_ops, 0, 2);
 
 	return ret;
-}
-
-void __exit pcmcia_shannon_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&shannon_pcmcia_ops, dev);
 }
diff -Nru a/drivers/pcmcia/sa1100_simpad.c b/drivers/pcmcia/sa1100_simpad.c
--- a/drivers/pcmcia/sa1100_simpad.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/sa1100_simpad.c	Mon Mar 31 13:41:08 2003
@@ -19,134 +19,110 @@
 extern void set_cs3_bit(int value); 
 extern void clear_cs3_bit(int value);
 
+static struct pcmcia_irqs irqs[] = {
+	{ 1, IRQ_GPIO_CF_CD, "CF_CD" },
+};
 
-static int simpad_pcmcia_init(struct pcmcia_init *init){
-  int irq, res;
-
-  set_cs3_bit(PCMCIA_RESET);
-  clear_cs3_bit(PCMCIA_BUFF_DIS);
-  clear_cs3_bit(PCMCIA_RESET);
-
-  clear_cs3_bit(VCC_3V_EN|VCC_5V_EN|EN0|EN1);
-
-  init->socket_irq[1] = IRQ_GPIO_CF_IRQ;
-
-  /* Register interrupts */
-  irq = IRQ_GPIO_CF_CD;
-  res = request_irq(irq, sa1100_pcmcia_interrupt, SA_INTERRUPT,
-		    "CF_CD", NULL );
-  if( res < 0 ) goto irq_err;
+static int simpad_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
+{
+	set_cs3_bit(PCMCIA_RESET);
+	clear_cs3_bit(PCMCIA_BUFF_DIS);
+	clear_cs3_bit(PCMCIA_RESET);
 
-  set_irq_type( IRQ_GPIO_CF_CD, IRQT_NOEDGE );
+	clear_cs3_bit(VCC_3V_EN|VCC_5V_EN|EN0|EN1);
 
-  /* There's only one slot, but it's "Slot 1": */
-  return 2;
+	skt->irq = IRQ_GPIO_CF_IRQ;
 
-irq_err:
-  printk( KERN_ERR "%s: request for IRQ%d failed (%d)\n",
-	 __FUNCTION__, irq, res);
-  return res;
+	return sa11xx_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int simpad_pcmcia_shutdown(void)
+static void simpad_pcmcia_hw_shutdown(struct sa1100_pcmcia_socket *skt)
 {
-  /* disable IRQs */
-  free_irq( IRQ_GPIO_CF_CD, NULL );
-  
-  /* Disable CF bus: */
-  
-  //set_cs3_bit(PCMCIA_BUFF_DIS);
-  clear_cs3_bit(PCMCIA_RESET);       
-  
-  return 0;
+	sa11xx_free_irqs(skt, irqs, ARRAY_SIZE(irqs));
+
+	/* Disable CF bus: */
+	//set_cs3_bit(PCMCIA_BUFF_DIS);
+	clear_cs3_bit(PCMCIA_RESET);       
 }
 
-static void simpad_pcmcia_socket_state(int sock, struct pcmcia_state *state)
+static void
+simpad_pcmcia_socket_state(struct sa1100_pcmcia_socket *skt,
+			   struct pcmcia_state *state)
 {
-  if (sock == 1) {
-    unsigned long levels = GPLR;
-    unsigned long *cs3reg = CS3_BASE;
+	unsigned long levels = GPLR;
+	unsigned long *cs3reg = CS3_BASE;
 
-    state->detect=((levels & GPIO_CF_CD)==0)?1:0;
-    state->ready=(levels & GPIO_CF_IRQ)?1:0;
-    state->bvd1=1; /* Not available on Simpad. */
-    state->bvd2=1; /* Not available on Simpad. */
-    state->wrprot=0; /* Not available on Simpad. */
+	state->detect=((levels & GPIO_CF_CD)==0)?1:0;
+	state->ready=(levels & GPIO_CF_IRQ)?1:0;
+	state->bvd1=1; /* Not available on Simpad. */
+	state->bvd2=1; /* Not available on Simpad. */
+	state->wrprot=0; /* Not available on Simpad. */
   
-    if((*cs3reg & 0x0c) == 0x0c) {
-      state->vs_3v=0;
-      state->vs_Xv=0;
-    } else {
-      state->vs_3v=1;
-      state->vs_Xv=0;
-    }
-  }
+	if((*cs3reg & 0x0c) == 0x0c) {
+		state->vs_3v=0;
+		state->vs_Xv=0;
+	} else {
+		state->vs_3v=1;
+		state->vs_Xv=0;
+	}
 }
 
-static int simpad_pcmcia_configure_socket(int sock, const struct pcmcia_configure
-					   *configure)
+static int
+simpad_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt,
+			       const socket_state_t *state)
 {
-  unsigned long value, flags;
-
-  if(sock>1) return -1;
-
-  if(sock==0) return 0;
-
-  local_irq_save(flags);
-
-  /* Murphy: see table of MIC2562a-1 */
+	unsigned long value, flags;
 
-  switch(configure->vcc){
-  case 0:
-    clear_cs3_bit(VCC_3V_EN|VCC_5V_EN|EN0|EN1);
-    break;
+	local_irq_save(flags);
 
-  case 33:  
-    clear_cs3_bit(VCC_3V_EN|EN0);
-    set_cs3_bit(VCC_5V_EN|EN1);
-    break;
+	/* Murphy: see table of MIC2562a-1 */
+	switch (state->Vcc) {
+	case 0:
+		clear_cs3_bit(VCC_3V_EN|VCC_5V_EN|EN0|EN1);
+		break;
 
-  case 50:
-    clear_cs3_bit(VCC_5V_EN|EN1);
-    set_cs3_bit(VCC_3V_EN|EN0);
-    break;
+	case 33:  
+		clear_cs3_bit(VCC_3V_EN|EN0);
+		set_cs3_bit(VCC_5V_EN|EN1);
+		break;
 
-  default:
-    printk(KERN_ERR "%s(): unrecognized Vcc %u\n", __FUNCTION__,
-	   configure->vcc);
-    clear_cs3_bit(VCC_3V_EN|VCC_5V_EN|EN0|EN1);
-    local_irq_restore(flags);
-    return -1;
-  }
+	case 50:
+		clear_cs3_bit(VCC_5V_EN|EN1);
+		set_cs3_bit(VCC_3V_EN|EN0);
+		break;
 
-  /* Silently ignore Vpp, output enable, speaker enable. */
+	default:
+		printk(KERN_ERR "%s(): unrecognized Vcc %u\n",
+			__FUNCTION__, state->Vcc);
+		clear_cs3_bit(VCC_3V_EN|VCC_5V_EN|EN0|EN1);
+		local_irq_restore(flags);
+		return -1;
+	}
 
-  local_irq_restore(flags);
+	/* Silently ignore Vpp, output enable, speaker enable. */
+	local_irq_restore(flags);
 
-  return 0;
+	return 0;
 }
 
-static int simpad_pcmcia_socket_init(int sock)
+static void simpad_pcmcia_socket_init(struct sa1100_pcmcia_socket *skt)
 {
-  set_irq_type(IRQ_GPIO_CF_CD, IRQT_BOTHEDGE);
-  return 0;
+	sa11xx_enable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int simpad_pcmcia_socket_suspend(int sock)
+static void simpad_pcmcia_socket_suspend(struct sa1100_pcmcia_socket *skt)
 {
-  set_irq_type(IRQ_GPIO_CF_CD, IRQT_NOEDGE);
-  return 0;
+	sa11xx_disable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 static struct pcmcia_low_level simpad_pcmcia_ops = { 
-  .owner		= THIS_MODULE,
-  .init			= simpad_pcmcia_init,
-  .shutdown		= simpad_pcmcia_shutdown,
-  .socket_state		= simpad_pcmcia_socket_state,
-  .configure_socket	= simpad_pcmcia_configure_socket,
-
-  .socket_init		= simpad_pcmcia_socket_init,
-  .socket_suspend	= simpad_pcmcia_socket_suspend,
+	.owner			= THIS_MODULE,
+	.hw_init		= simpad_pcmcia_hw_init,
+	.hw_shutdown		= simpad_pcmcia_hw_shutdown,
+	.socket_state		= simpad_pcmcia_socket_state,
+	.configure_socket	= simpad_pcmcia_configure_socket,
+	.socket_init		= simpad_pcmcia_socket_init,
+	.socket_suspend		= simpad_pcmcia_socket_suspend,
 };
 
 int __init pcmcia_simpad_init(struct device *dev)
@@ -154,12 +130,7 @@
 	int ret = -ENODEV;
 
 	if (machine_is_simpad())
-		ret = sa1100_register_pcmcia(&simpad_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &simpad_pcmcia_ops, 1, 1);
 
 	return ret;
-}
-
-void __exit pcmcia_simpad_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&simpad_pcmcia_ops, dev);
 }
diff -Nru a/drivers/pcmcia/sa1100_stork.c b/drivers/pcmcia/sa1100_stork.c
--- a/drivers/pcmcia/sa1100_stork.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/pcmcia/sa1100_stork.c	Mon Mar 31 13:41:07 2003
@@ -32,62 +32,39 @@
 
 static int debug = 0;
 
-static struct irqs {
-	int irq;
-	const char *str;
-} irqs[] = {
-	{ IRQ_GPIO_STORK_PCMCIA_A_CARD_DETECT, "PCMCIA_CD0" },
-	{ IRQ_GPIO_STORK_PCMCIA_B_CARD_DETECT, "PCMCIA_CD1" },
+static struct pcmcia_irqs irqs[] = {
+	{ 0, IRQ_GPIO_STORK_PCMCIA_A_CARD_DETECT, "PCMCIA_CD0" },
+	{ 1, IRQ_GPIO_STORK_PCMCIA_B_CARD_DETECT, "PCMCIA_CD1" },
 };
 
-static int stork_pcmcia_init(struct pcmcia_init *init)
+static int stork_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
 {
-	int irq, res;
-
 	printk("in stork_pcmcia_init\n");
 
-	init->socket_irq[0] = IRQ_GPIO_STORK_PCMCIA_A_RDY;
-	init->socket_irq[1] = IRQ_GPIO_STORK_PCMCIA_B_RDY;
-
-	/* Register interrupts */
-	for (i = 0; i < ARRAY_SIZE(irqs); i++) {
-		res = request_irq(irqs[i].irq, sa1100_pcmcia_interrupt,
-				  SA_INTERRUPT, irqs[i].str, NULL);
-		if (res)
-			goto irq_err;
-		set_irq_type(irqs[i].irq, IRQT_NOEDGE);
-	}
-
-        return 2;
-
- irq_err:
-        printk(KERN_ERR "%s: request for IRQ%d failed (%d)\n",
-	       __FUNCTION__, irq, res);
+	skt->irq = skt->nr ? IRQ_GPIO_STORK_PCMCIA_B_RDY
+			   : IRQ_GPIO_STORK_PCMCIA_A_RDY;
 
-	while (i--)
-		free_irq(irqs[i].irq, NULL);
-
-        return res;
+	return sa11xx_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int stork_pcmcia_shutdown(void)
+static void stork_pcmcia_hw_shutdown(struct sa1100_pcmcia_socket *skt)
 {
 	int i;
 
         printk(__FUNCTION__ "\n");
 
         /* disable IRQs */
-        for (i = 0; i < ARRAY_SIZE(irqs); i++)
-        	free_irq(irqs[i].irq, NULL);
+        sa11xx_free_irqs(skt, irqs, ARRAY_SIZE(irqs));
   
         /* Disable CF bus: */
         storkClearLatchA(STORK_PCMCIA_PULL_UPS_POWER_ON);
 	storkClearLatchA(STORK_PCMCIA_A_POWER_ON);
 	storkClearLatchA(STORK_PCMCIA_B_POWER_ON);
-        return 0;
 }
 
-static void stork_pcmcia_socket_state(int sock, struct pcmcia_state *state)
+static void
+stork_pcmcia_socket_state(struct sa1100_pcmcia_socket *skt,
+			  struct pcmcia_state *state)
 {
         unsigned long levels = GPLR;
 
@@ -95,7 +72,7 @@
 		printk(__FUNCTION__ " GPLR=%x IRQ[1:0]=%x\n", levels,
 			(levels & (GPIO_STORK_PCMCIA_A_RDY|GPIO_STORK_PCMCIA_B_RDY)));
 
-	switch (sock) {
+	switch (skt->nr) {
 	case 0:
 		state->detect=((levels & GPIO_STORK_PCMCIA_A_CARD_DETECT)==0)?1:0;
 		state->ready=(levels & GPIO_STORK_PCMCIA_A_RDY)?1:0;
@@ -118,20 +95,19 @@
 	}
 }
 
-static int stork_pcmcia_configure_socket(int sock, const struct pcmcia_configure *configure)
+static int
+stork_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt,
+			      const socket_state_t *state)
 {
 	unsigned long flags;
-
         int DETECT, RDY, POWER, RESET;
 
-        if (sock > 1) return -1;
-
-	printk(__FUNCTION__ ": socket=%d vcc=%d vpp=%d reset=%d\n", 
-                       sock, configure->vcc, configure->vpp, configure->reset);
+	printk("%s: socket=%d vcc=%d vpp=%d reset=%d\n", __FUNCTION__,
+		skt->nr, state->Vcc, state->Vpp, state->flags & SS_RESET ? 1 : 0);
 
 	local_irq_save(flags);
 
-        if (sock == 0) {
+        if (skt->nr == 0) {
     	    DETECT = GPIO_STORK_PCMCIA_A_CARD_DETECT;
     	    RDY = GPIO_STORK_PCMCIA_A_RDY;
     	    POWER = STORK_PCMCIA_A_POWER_ON;
@@ -148,7 +124,7 @@
            printk("no card detected - but resetting anyway\r\n");
         }
 */
-	switch (configure->vcc) {
+	switch (state->Vcc) {
 	case 0:
 /*		storkClearLatchA(STORK_PCMCIA_PULL_UPS_POWER_ON); */
                 storkClearLatchA(POWER);
@@ -162,12 +138,12 @@
 
 	default:
 		printk(KERN_ERR "%s(): unrecognized Vcc %u\n", __FUNCTION__,
-		       configure->vcc);
+		       state->Vcc);
 		local_irq_restore(flags);
 		return -1;
 	}
 
-	if (configure->reset)
+	if (state->flags & SS_RESET)
                 storkSetLatchB(RESET);
 	else
                 storkClearLatchB(RESET);
@@ -176,43 +152,35 @@
 
         /* silently ignore vpp and speaker enables. */
 
-        printk(__FUNCTION__ ": finished\n");
+        printk("%s: finished\n", __FUNCTION__);
 
         return 0;
 }
 
-static int stork_pcmcia_socket_init(int sock)
+static void stork_pcmcia_socket_init(struct sa1100_pcmcia_socket *skt)
 {
         storkSetLatchA(STORK_PCMCIA_PULL_UPS_POWER_ON);
 
-        if (sock == 0)
-		set_irq_type(IRQ_GPIO_STORK_PCMCIA_A_CARD_DETECT, IRQT_BOTHEDGE);
-        else if (sock == 1)
-		set_irq_type(IRQ_GPIO_STORK_PCMCIA_B_CARD_DETECT, IRQT_BOTHEDGE);
-
-	return 0;
+        sa11xx_enable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int stork_pcmcia_socket_suspend(int sock)
+static void stork_pcmcia_socket_suspend(struct sa1100_pcmcia_socket *skt)
 {
-        if (sock == 0)
-		set_irq_type(IRQ_GPIO_STORK_PCMCIA_A_CARD_DETECT, IRQT_NOEDGE);
-        else if (sock == 1) {
-		set_irq_type(IRQ_GPIO_STORK_PCMCIA_B_CARD_DETECT, IRQT_NOEDGE);
-
-		/*
-		 * Hack!
-		 */
+	sa11xx_disable_irqs(skt, irqs, ARRAY_SIZE(irqs));
+
+	/*
+	 * Hack!
+	 */
+	if (skt->nr == 1)
 	        storkClearLatchA(STORK_PCMCIA_PULL_UPS_POWER_ON);
-	}
 
 	return 0;
 }
 
 static struct pcmcia_low_level stork_pcmcia_ops = { 
 	.owner			= THIS_MODULE,
-	.init			= stork_pcmcia_init,
-	.shutdown		= stork_pcmcia_shutdown,
+	.hw_init		= stork_pcmcia_hw_init,
+	.hw_shutdown		= stork_pcmcia_hw_shutdown,
 	.socket_state		= stork_pcmcia_socket_state,
 	.configure_socket	= stork_pcmcia_configure_socket,
 
@@ -225,13 +193,7 @@
 	int ret = -ENODEV;
 
 	if (machine_is_stork())
-		ret = sa1100_register_pcmcia(&stork_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &stork_pcmcia_ops, 0, 2);
 
 	return ret;
 }
-
-void __exit pcmcia_stork_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&stork_pcmcia_ops, dev);
-}
-
diff -Nru a/drivers/pcmcia/sa1100_system3.c b/drivers/pcmcia/sa1100_system3.c
--- a/drivers/pcmcia/sa1100_system3.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/sa1100_system3.c	Mon Mar 31 13:41:08 2003
@@ -37,7 +37,6 @@
 #include <asm/irq.h>
 #include <asm/hardware/sa1111.h>
 
-#include "sa1100_generic.h"
 #include "sa1111_generic.h"
 
 #define DEBUG 0
@@ -48,34 +47,24 @@
 #	define DPRINTK( x, args... )	/* nix */
 #endif
 
-int system3_pcmcia_init(struct pcmcia_init *init)
+static int system3_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
 {
-	init->socket_irq[0] = IRQ_S0_READY_NINT;
-	init->socket_irq[1] = IRQ_S1_READY_NINT;
+	skt->irq = skt->nr ? IRQ_S1_READY_NINT : IRQ_S0_READY_NINT;
 
 	/* Don't need no CD and BVD* interrupts */
-	return 2;
-}
-
-int system3_pcmcia_shutdown(void)
-{
 	return 0;
 }
 
-int system3_pcmcia_configure_socket(int sock, const struct pcmcia_configure *conf)
+void system3_pcmcia_hw_shutdown(struct sa1100_pcmcia_socket *skt)
 {
-	/* only CF ATM */
-	if (sock == 0)
-		return -1;
-
-	return sa1111_pcmcia_configure_socket(sock, conf);
 }
 
-static void system3_pcmcia_socket_state(int sock, struct pcmcia_state *state)
+static void
+system3_pcmcia_socket_state(struct sa1100_pcmcia_socket *skt, struct pcmcia_state *state)
 {
 	unsigned long status = PCSR;
 
-	switch (sock) {
+	switch (skt->nr) {
 #if 0 /* PCMCIA socket not yet connected */
 	case 0:
 		state->detect = status & PCSR_S0_DETECT ? 0 : 1;
@@ -100,15 +89,15 @@
 	}
 
 	DPRINTK("Sock %d PCSR=0x%08lx, Sx_RDY_nIREQ=%d\n",
-		sock, status, state->ready);
+		skt->nr, status, state->ready);
 }
 
 struct pcmcia_low_level system3_pcmcia_ops = {
 	.owner			= THIS_MODULE,
-	.init			= system3_pcmcia_init,
-	.shutdown		= system3_pcmcia_shutdown,
+	.init			= system3_pcmcia_hw_init,
+	.shutdown		= system3_pcmcia_hw_shutdown,
 	.socket_state		= system3_pcmcia_socket_state,
-	.configure_socket	= system3_pcmcia_configure_socket,
+	.configure_socket	= sa1111_pcmcia_configure_socket,
 
 	.socket_init		= sa1111_pcmcia_socket_init,
 	.socket_suspend		= sa1111_pcmcia_socket_suspend,
@@ -119,12 +108,8 @@
 	int ret = -ENODEV;
 
 	if (machine_is_pt_system3())
-		ret = sa1100_register_pcmcia(&system3_pcmcia_ops, dev);
+		/* only CF ATM */
+		ret = sa11xx_drv_pcmcia_probe(dev, &system3_pcmcia_ops, 1, 1);
 
 	return ret;
-}
-
-void __exit pcmcia_system3_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&system3_pcmcia_ops, dev);
 }
diff -Nru a/drivers/pcmcia/sa1100_trizeps.c b/drivers/pcmcia/sa1100_trizeps.c
--- a/drivers/pcmcia/sa1100_trizeps.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/pcmcia/sa1100_trizeps.c	Mon Mar 31 13:41:07 2003
@@ -23,15 +23,18 @@
 #include "sa1100_generic.h"
 
 #define NUMBER_OF_TRIZEPS_PCMCIA_SLOTS 1
+
+static struct pcmcia_irqs irqs[] = {
+	{ 0, TRIZEPS_IRQ_PCMCIA_CD0, "PCMCIA_CD0" },
+};
+
 /**
  *
  *
  ******************************************************/
-static int trizeps_pcmcia_init(struct pcmcia_init *init)
+static int trizeps_pcmcia_init(struct sa1100_pcmcia_socket *skt)
 {
-	int res;
-
-	init->socket_irq[0] = TRIZEPS_IRQ_PCMCIA_IRQ0;
+	skt->irq = TRIZEPS_IRQ_PCMCIA_IRQ0;
 
 	/* Enable CF bus: */
 	TRIZEPS_BCR_clear(TRIZEPS_BCR1, TRIZEPS_nPCM_ENA_REG);
@@ -40,74 +43,54 @@
 	GPDR &= ~((GPIO_GPIO(TRIZEPS_GPIO_PCMCIA_CD0))
 		    | (GPIO_GPIO(TRIZEPS_GPIO_PCMCIA_IRQ0)));
 
-	/* Register SOCKET interrupts */
-	/* WHY? */
-	res = request_irq(TRIZEPS_IRQ_PCMCIA_CD0, sa1100_pcmcia_interrupt,
-			  SA_INTERRUPT, "PCMCIA_CD0", NULL );
-	if( res < 0 ) goto irq_err;
-	set_irq_type(TRIZEPS_IRQ_PCMCIA_CD0, IRQT_NOEDGE);
-
-	//MECR = 0x00060006; // Initialised on trizeps init
-
-	// return=sa1100_pcmcia_socket_count (sa1100_generic.c)
-	//        -> number of PCMCIA Slots
-	// Slot 0 -> Trizeps PCMCIA
-	// Slot 1 -> Trizeps ISA-Bus
-	return NUMBER_OF_TRIZEPS_PCMCIA_SLOTS;
-
- irq_err:
-	printk( KERN_ERR "%s(): PCMCIA Request for IRQ %u failed\n", __FUNCTION__, TRIZEPS_IRQ_PCMCIA_CD0 );
-	return -1;
+	return sa11xx_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 /**
  *
  *
  ******************************************************/
-static int trizeps_pcmcia_shutdown(void)
+static void trizeps_pcmcia_shutdown(struct sa1100_pcmcia_socket *skt)
 {
 	printk(">>>>>PCMCIA TRIZEPS shutdown\n");
-	/* disable IRQs */
-	free_irq(TRIZEPS_IRQ_PCMCIA_CD0, NULL );
+
+	sa11xx_free_irqs(skt, irqs, ARRAY_SIZE(irqs));
 
 	/* Disable CF bus: */
 	TRIZEPS_BCR_set(TRIZEPS_BCR1, TRIZEPS_nPCM_ENA_REG);
-
-	return 0;
 }
 
 /**
  *
-
  ******************************************************/
-static void trizeps_pcmcia_socket_state(int sock, struct pcmcia_state *state_array)
+static void
+trizeps_pcmcia_socket_state(struct sa1100_pcmcia_socket *skt,
+			    struct pcmcia_state *state_array)
 {
 	unsigned long levels = GPLR;
 
-	if (sock == 0) {
-		state->detect = ((levels & GPIO_GPIO(TRIZEPS_GPIO_PCMCIA_CD0)) == 0) ? 1 : 0;
-		state->ready  = ((levels & GPIO_GPIO(TRIZEPS_GPIO_PCMCIA_IRQ0)) != 0) ? 1 : 0;
-		state->bvd1   = ((TRIZEPS_BCR1 & TRIZEPS_PCM_BVD1) !=0 ) ? 1 : 0;
-		state->bvd2   = ((TRIZEPS_BCR1 & TRIZEPS_PCM_BVD2) != 0) ? 1 : 0;
-		state->wrprot = 0; // not write protected
-		state->vs_3v  = ((TRIZEPS_BCR1 & TRIZEPS_nPCM_VS1) == 0) ? 1 : 0; //VS1=0 -> vs_3v=1
-		state->vs_Xv  = ((TRIZEPS_BCR1 & TRIZEPS_nPCM_VS2) == 0) ? 1 : 0; //VS2=0 -> vs_Xv=1
-	}
+	state->detect = ((levels & GPIO_GPIO(TRIZEPS_GPIO_PCMCIA_CD0)) == 0) ? 1 : 0;
+	state->ready  = ((levels & GPIO_GPIO(TRIZEPS_GPIO_PCMCIA_IRQ0)) != 0) ? 1 : 0;
+	state->bvd1   = ((TRIZEPS_BCR1 & TRIZEPS_PCM_BVD1) !=0 ) ? 1 : 0;
+	state->bvd2   = ((TRIZEPS_BCR1 & TRIZEPS_PCM_BVD2) != 0) ? 1 : 0;
+	state->wrprot = 0; // not write protected
+	state->vs_3v  = ((TRIZEPS_BCR1 & TRIZEPS_nPCM_VS1) == 0) ? 1 : 0; //VS1=0 -> vs_3v=1
+	state->vs_Xv  = ((TRIZEPS_BCR1 & TRIZEPS_nPCM_VS2) == 0) ? 1 : 0; //VS2=0 -> vs_Xv=1
 }
 
 /**
  *
  *
  ******************************************************/
-static int trizeps_pcmcia_configure_socket(int sock, const struct pcmcia_configure *configure)
+static int
+trizeps_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt,
+				const socket_state_t *state)
 {
 	unsigned long flags;
 
-	if(sock>1) return -1;
-
 	local_irq_save(flags);
 
-	switch (configure->vcc) {
+	switch (state->Vcc) {
 	case 0:
 		printk(">>> PCMCIA Power off\n");
 		TRIZEPS_BCR_clear(TRIZEPS_BCR1, TRIZEPS_PCM_V3_EN_REG);
@@ -126,19 +109,19 @@
 		break;
 	default:
 		printk(KERN_ERR "%s(): unrecognized Vcc %u\n", __FUNCTION__,
-		       configure->vcc);
+		       state->Vcc);
 		local_irq_restore(flags);
 		return -1;
 	}
 
-	if (configure->reset)
+	if (state->flags & SS_RESET)
 		TRIZEPS_BCR_set(TRIZEPS_BCR1, TRIZEPS_nPCM_RESET_DISABLE);   // Reset
 	else
 		TRIZEPS_BCR_clear(TRIZEPS_BCR1, TRIZEPS_nPCM_RESET_DISABLE); // no Reset
 	/*
 	  printk(" vcc=%u vpp=%u -->reset=%i\n",
-	  configure->vcc,
-	  configure->vpp,
+	  state->Vcc,
+	  state->Vpp,
 	  ((BCR_read(1) & nPCM_RESET_DISABLE)? 1:0));
 	*/
 	local_irq_restore(flags);
@@ -146,16 +129,14 @@
 	return 0;
 }
 
-static int trizeps_pcmcia_socket_init(int sock)
+static void trizeps_pcmcia_socket_init(struct sa1100_pcmcia_socket *skt)
 {
-	set_irq_type(TRIZEPS_IRQ_PCMCIA_CD0, IRQT_BOTHEDGE);
-	return 0;
+	sa11xx_enable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int trizeps_pcmcia_socket_suspend(int sock)
+static void trizeps_pcmcia_socket_suspend(struct sa1100_pcmcia_socket *skt)
 {
-	set_irq_type(TRIZEPS_IRQ_PCMCIA_CD0, IRQT_NOEDGE);
-	return 0;
+	sa11xx_disable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 /**
@@ -164,8 +145,8 @@
  ******************************************************/
 struct pcmcia_low_level trizeps_pcmcia_ops = {
 	.owner			= THIS_MODULE,
-	.init			= trizeps_pcmcia_init,
-	.shutdown		= trizeps_pcmcia_shutdown,
+	.hw_init		= trizeps_pcmcia_hw_init,
+	.hw_shutdown		= trizeps_pcmcia_hw_shutdown,
 	.socket_state		= trizeps_pcmcia_socket_state,
 	.configure_socket	= trizeps_pcmcia_configure_socket,
 	.socket_init		= trizeps_pcmcia_socket_init,
@@ -174,13 +155,11 @@
 
 int __init pcmcia_trizeps_init(struct device *dev)
 {
-	if (machine_is_trizeps()) {
-		return sa1100_register_pcmcia(&trizeps_pcmcia_ops, dev);
-	}
-	return -ENODEV;
-}
+	int ret = -ENODEV;
 
-void __exit pcmcia_trizeps_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&trizeps_pcmcia_ops, dev);
+	if (machine_is_trizeps())
+		ret = sa11xx_drv_pcmcia_probe(dev, &trizeps_pcmcia_ops, 0,
+					      NUMBER_OF_TRIZEPS_PCMCIA_SLOTS);
+
+	return ret;
 }
diff -Nru a/drivers/pcmcia/sa1100_xp860.c b/drivers/pcmcia/sa1100_xp860.c
--- a/drivers/pcmcia/sa1100_xp860.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/pcmcia/sa1100_xp860.c	Mon Mar 31 13:41:07 2003
@@ -19,7 +19,7 @@
 #define NCR_A0VPP	(1<<16)
 #define NCR_A1VPP	(1<<17)
 
-static int xp860_pcmcia_init(struct pcmcia_init *init)
+static int xp860_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
 {
   /* Set GPIO_A<3:0> to be outputs for PCMCIA/CF power controller: */
   PA_DDR &= ~(GPIO_GPIO0 | GPIO_GPIO1 | GPIO_GPIO2 | GPIO_GPIO3);
@@ -38,11 +38,11 @@
   GPDR |= (NCR_A0VPP | NCR_A1VPP);
   GPCR &= ~(NCR_A0VPP | NCR_A1VPP);
 
-  return sa1111_pcmcia_init(init);
+  return sa1111_pcmcia_hw_init(skt);
 }
 
 static int
-xp860_pcmcia_configure_socket(int sock, const struct pcmcia_configure *conf)
+xp860_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt, const socket_state_t *state)
 {
   unsigned int gpio_mask, pa_dwr_mask;
   unsigned int gpio_set, pa_dwr_set;
@@ -72,28 +72,28 @@
    * the corresponding truth table.
    */
 
-  switch (sock) {
+  switch (skt->nr) {
   case 0:
     pa_dwr_mask = GPIO_GPIO0 | GPIO_GPIO1;
     gpio_mask = NCR_A0VPP | NCR_A1VPP;
 
-    switch (conf->vcc) {
+    switch (state->Vcc) {
     default:
     case 0:	pa_dwr_set = 0;			break;
     case 33:	pa_dwr_set = GPIO_GPIO1;	break;
     case 50:	pa_dwr_set = GPIO_GPIO0;	break;
     }
 
-    switch (conf->vpp) {
+    switch (state->Vpp) {
     case 0:	gpio_set = 0;			break;
     case 120:	gpio_set = NCR_A1VPP;		break;
 
     default:
-      if (conf->vpp == conf->vcc)
+      if (state->Vpp == state->Vcc)
 	gpio_set = NCR_A0VPP;
       else {
 	printk(KERN_ERR "%s(): unrecognized Vpp %u\n",
-	       __FUNCTION__, conf->vpp);
+	       __FUNCTION__, state->Vpp);
 	return -1;
       }
     }
@@ -104,22 +104,22 @@
     gpio_mask = 0;
     gpio_set = 0;
 
-    switch (conf->vcc) {
+    switch (state->Vcc) {
     default:
     case 0:	pa_dwr_set = 0;			break;
     case 33:	pa_dwr_set = GPIO_GPIO2;	break;
     case 50:	pa_dwr_set = GPIO_GPIO3;	break;
     }
 
-    if (conf->vpp != conf->vcc && conf->vpp != 0) {
+    if (state->Vpp != state->Vcc && state->Vpp != 0) {
       printk(KERN_ERR "%s(): CF slot cannot support Vpp %u\n",
-	     __FUNCTION__, conf->vpp);
+	     __FUNCTION__, state->Vpp);
       return -1;
     }
     break;
   }
 
-  ret = sa1111_pcmcia_configure_socket(sock, conf);
+  ret = sa1111_pcmcia_configure_socket(skt, state);
   if (ret == 0) {
     unsigned long flags;
 
@@ -134,14 +134,13 @@
 }
 
 static struct pcmcia_low_level xp860_pcmcia_ops = { 
-  .owner		= THIS_MODULE,
-  .init			= xp860_pcmcia_init,
-  .shutdown		= sa1111_pcmcia_shutdown,
-  .socket_state		= sa1111_pcmcia_socket_state,
-  .configure_socket	= xp860_pcmcia_configure_socket,
-
-  .socket_init		= sa1111_pcmcia_socket_init,
-  .socket_suspend	= sa1111_pcmcia_socket_suspend,
+	.owner			= THIS_MODULE,
+	.hw_init		= xp860_pcmcia_hw_init,
+	.hw_shutdown		= sa1111_pcmcia_hw_shutdown,
+	.socket_state		= sa1111_pcmcia_socket_state,
+	.configure_socket	= xp860_pcmcia_configure_socket,
+	.socket_init		= sa1111_pcmcia_socket_init,
+	.socket_suspend		= sa1111_pcmcia_socket_suspend,
 };
 
 int __init pcmcia_xp860_init(struct device *dev)
@@ -149,13 +148,7 @@
 	int ret = -ENODEV;
 
 	if (machine_is_xp860())
-		ret = sa1100_register_pcmcia(&xp860_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &xp860_pcmcia_ops, 0, 2);
 
 	return ret;
 }
-
-void __exit pcmcia_xp860_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&xp860_pcmcia_ops, dev);
-}
-
diff -Nru a/drivers/pcmcia/sa1100_yopy.c b/drivers/pcmcia/sa1100_yopy.c
--- a/drivers/pcmcia/sa1100_yopy.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/sa1100_yopy.c	Mon Mar 31 13:41:08 2003
@@ -27,81 +27,51 @@
 	yopy_gpio_set(GPIO_CF_RESET, reset);
 }
 
-static struct irqs {
-	int irq;
-	const char *str;
-} irqs[] = {
-	{ IRQ_CF_CD,   "CF_CD"   },
-	{ IRQ_CF_BVD2, "CF_BVD2" },
-	{ IRQ_CF_BVD1, "CF_BVD1" },
+static struct pcmcia_irqs irqs[] = {
+	{ 0, IRQ_CF_CD,   "CF_CD"   },
+	{ 0, IRQ_CF_BVD2, "CF_BVD2" },
+	{ 0, IRQ_CF_BVD1, "CF_BVD1" },
 };
 
-static int yopy_pcmcia_init(struct pcmcia_init *init)
+static int yopy_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
 {
-	int i, res;
-
-	init->socket_irq[0] = IRQ_CF_IREQ;
+	skt->irq = IRQ_CF_IREQ;
 
 	pcmcia_power(0);
 	pcmcia_reset(1);
 
-	/* Register interrupts */
-	for (i = 0; i < ARRAY_SIZE(irqs); i++) {
-		res = request_irq(irqs[i].irq, sa1100_pcmcia_interrupt,
-				  SA_INTERRUPT, irqs[i].str, NULL);
-		if (res)
-			goto irq_err;
-		set_irq_type(irqs[i].irq, IRQT_NOEDGE);
-	}
-
-	return 1;
-
- irq_err:
-	printk(KERN_ERR "%s: request for IRQ%d failed (%d)\n",
-	       __FUNCTION__, irqs[i].irq, res);
-
-	while (i--)
-		free_irq(irqs[i].irq, NULL);
-
-	return res;
+	return sa11xx_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int yopy_pcmcia_shutdown(void)
+static void yopy_pcmcia_hw_shutdown(struct sa1100_pcmcia_socket *skt)
 {
-	int i;
-
-	/* disable IRQs */
-	for (i = 0; i < ARRAY_SIZE(irqs); i++)
-		free_irq(irqs[i].irq, NULL);
+	sa11xx_free_irqs(skt, irqs, ARRAY_SIZE(irqs));
 
 	/* Disable CF */
 	pcmcia_reset(1);
 	pcmcia_power(0);
-
-	return 0;
 }
 
-static void yopy_pcmcia_socket_state(int sock, struct pcmcia_state_array *state)
+static void
+yopy_pcmcia_socket_state(struct sa1100_pcmcia_socket *skt,
+			 struct pcmcia_state_array *state)
 {
 	unsigned long levels = GPLR;
 
-	if (sock == 0) {
-		state->detect = (levels & GPIO_CF_CD)    ? 0 : 1;
-		state->ready  = (levels & GPIO_CF_READY) ? 1 : 0;
-		state->bvd1   = (levels & GPIO_CF_BVD1)  ? 1 : 0;
-		state->bvd2   = (levels & GPIO_CF_BVD2)  ? 1 : 0;
-		state->wrprot = 0; /* Not available on Yopy. */
-		state->vs_3v  = 0; /* FIXME Can only apply 3.3V on Yopy. */
-		state->vs_Xv  = 0;
-	}
+	state->detect = (levels & GPIO_CF_CD)    ? 0 : 1;
+	state->ready  = (levels & GPIO_CF_READY) ? 1 : 0;
+	state->bvd1   = (levels & GPIO_CF_BVD1)  ? 1 : 0;
+	state->bvd2   = (levels & GPIO_CF_BVD2)  ? 1 : 0;
+	state->wrprot = 0; /* Not available on Yopy. */
+	state->vs_3v  = 0; /* FIXME Can only apply 3.3V on Yopy. */
+	state->vs_Xv  = 0;
 }
 
-static int yopy_pcmcia_configure_socket(int sock, const struct pcmcia_configure *configure)
+static int
+yopy_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt,
+			     const socket_state_t *state)
 {
-	if (sock != 0)
-		return -1;
-
-	switch (configure->vcc) {
+	switch (state->Vcc) {
 	case 0:	/* power off */
 		pcmcia_power(0);
 		break;
@@ -112,35 +82,25 @@
 		break;
 	default:
 		printk(KERN_ERR __FUNCTION__"(): unrecognized Vcc %u\n",
-		       configure->vcc);
+		       state->Vcc);
 		return -1;
 	}
 
-	pcmcia_reset(configure->reset);
+	pcmcia_reset(state->flags & SS_RESET ? 1 : 0);
 
 	/* Silently ignore Vpp, output enable, speaker enable. */
 
 	return 0;
 }
 
-static int yopy_pcmcia_socket_init(int sock)
+static void yopy_pcmcia_socket_init(struct sa1100_pcmcia_socket *skt)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(irqs); i++)
-		set_irq_type(irqs[i].irq, IRQT_BOTHEDGE);
-
-	return 0;
+	sa11xx_enable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-static int yopy_pcmcia_socket_suspend(int sock)
+static void yopy_pcmcia_socket_suspend(struct sa1100_pcmcia_socket *skt)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(irqs); i++)
-		set_irq_type(irqs[i].irq, IRQT_NOEDGE);
-
-	return 0;
+	sa11xx_disable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 static struct pcmcia_low_level yopy_pcmcia_ops = {
@@ -159,13 +119,7 @@
 	int ret = -ENODEV;
 
 	if (machine_is_yopy())
-		ret = sa1100_register_pcmcia(&yopy_pcmcia_ops, dev);
+		ret = sa11xx_drv_pcmcia_probe(dev, &yopy_pcmcia_ops, 0, 1);
 
 	return ret;
 }
-
-void __exit pcmcia_yopy_exit(struct device *dev)
-{
-	sa1100_unregister_pcmcia(&yopy_pcmcia_ops, dev);
-}
-
diff -Nru a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c
--- a/drivers/pcmcia/sa1111_generic.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/pcmcia/sa1111_generic.c	Mon Mar 31 13:41:06 2003
@@ -1,5 +1,5 @@
 /*
- * linux/drivers/pcmcia/sa1100_sa1111.c
+ * linux/drivers/pcmcia/sa1111_generic.c
  *
  * We implement the generic parts of a SA1111 PCMCIA driver.  This
  * basically means we handle everything except controlling the
@@ -19,63 +19,34 @@
 #include <asm/hardware/sa1111.h>
 #include <asm/irq.h>
 
-#include "sa1100_generic.h"
 #include "sa1111_generic.h"
 
-static struct irqs {
-	int irq;
-	const char *str;
-} irqs[] = {
-	{ IRQ_S0_CD_VALID,    "SA1111 PCMCIA card detect" },
-	{ IRQ_S0_BVD1_STSCHG, "SA1111 PCMCIA BVD1"        },
-	{ IRQ_S1_CD_VALID,    "SA1111 CF card detect"     },
-	{ IRQ_S1_BVD1_STSCHG, "SA1111 CF BVD1"            },
+static struct pcmcia_irqs irqs[] = {
+	{ 0, IRQ_S0_CD_VALID,    "SA1111 PCMCIA card detect" },
+	{ 0, IRQ_S0_BVD1_STSCHG, "SA1111 PCMCIA BVD1"        },
+	{ 1, IRQ_S1_CD_VALID,    "SA1111 CF card detect"     },
+	{ 1, IRQ_S1_BVD1_STSCHG, "SA1111 CF BVD1"            },
 };
 
-static struct sa1111_dev *pcmcia;
-
-int sa1111_pcmcia_init(struct pcmcia_init *init)
+int sa1111_pcmcia_hw_init(struct sa1100_pcmcia_socket *skt)
 {
-	int i, ret;
-
-	if (init->socket_irq[0] == NO_IRQ)
-		init->socket_irq[0] = IRQ_S0_READY_NINT;
-	if (init->socket_irq[1] == NO_IRQ)
-		init->socket_irq[1] = IRQ_S1_READY_NINT;
-
-	for (i = ret = 0; i < ARRAY_SIZE(irqs); i++) {
-		ret = request_irq(irqs[i].irq, sa1100_pcmcia_interrupt,
-				  SA_INTERRUPT, irqs[i].str, NULL);
-		if (ret)
-			break;
-		set_irq_type(irqs[i].irq, IRQT_FALLING);
-	}
+	if (skt->irq == NO_IRQ)
+		skt->irq = skt->nr ? IRQ_S1_READY_NINT : IRQ_S0_READY_NINT;
 
-	if (i < ARRAY_SIZE(irqs)) {
-		printk(KERN_ERR "sa1111_pcmcia: unable to grab IRQ%d (%d)\n",
-			irqs[i].irq, ret);
-		while (i--)
-			free_irq(irqs[i].irq, NULL);
-	}
-
-	return ret ? -1 : 2;
+	return sa11xx_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-int sa1111_pcmcia_shutdown(void)
+void sa1111_pcmcia_hw_shutdown(struct sa1100_pcmcia_socket *skt)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(irqs); i++)
-		free_irq(irqs[i].irq, NULL);
-
-	return 0;
+	sa11xx_free_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-void sa1111_pcmcia_socket_state(int sock, struct pcmcia_state *state)
+void sa1111_pcmcia_socket_state(struct sa1100_pcmcia_socket *skt, struct pcmcia_state *state)
 {
-	unsigned long status = sa1111_readl(pcmcia->mapbase + SA1111_PCSR);
+	struct sa1111_dev *sadev = SA1111_DEV(skt->dev);
+	unsigned long status = sa1111_readl(sadev->mapbase + SA1111_PCSR);
 
-	switch (sock) {
+	switch (skt->nr) {
 	case 0:
 		state->detect = status & PCSR_S0_DETECT ? 0 : 1;
 		state->ready  = status & PCSR_S0_READY  ? 1 : 0;
@@ -98,91 +69,61 @@
 	}
 }
 
-int sa1111_pcmcia_configure_socket(int sock, const struct pcmcia_configure *conf)
+int sa1111_pcmcia_configure_socket(struct sa1100_pcmcia_socket *skt, const socket_state_t *state)
 {
-	unsigned int rst, flt, wait, pse, irq, pccr_mask, val;
+	struct sa1111_dev *sadev = SA1111_DEV(skt->dev);
+	unsigned int pccr_skt_mask, pccr_set_mask, val;
 	unsigned long flags;
 
-	switch (sock) {
+	switch (skt->nr) {
 	case 0:
-		rst = PCCR_S0_RST;
-		flt = PCCR_S0_FLT;
-		wait = PCCR_S0_PWAITEN;
-		pse = PCCR_S0_PSE;
-		irq = IRQ_S0_READY_NINT;
+		pccr_skt_mask = PCCR_S0_RST|PCCR_S0_FLT|PCCR_S0_PWAITEN|PCCR_S0_PSE;
 		break;
 
 	case 1:
-		rst = PCCR_S1_RST;
-		flt = PCCR_S1_FLT;
-		wait = PCCR_S1_PWAITEN;
-		pse = PCCR_S1_PSE;
-		irq = IRQ_S1_READY_NINT;
+		pccr_skt_mask = PCCR_S1_RST|PCCR_S1_FLT|PCCR_S1_PWAITEN|PCCR_S1_PSE;
 		break;
 
 	default:
 		return -1;
 	}
 
-	switch (conf->vcc) {
-	case 0:
-		pccr_mask = 0;
-		break;
+	pccr_set_mask = 0;
 
-	case 33:
-		pccr_mask = wait;
-		break;
-
-	case 50:
-		pccr_mask = pse | wait;
-		break;
-
-	default:
-		printk(KERN_ERR "sa1111_pcmcia: unrecognised VCC %u\n",
-			conf->vcc);
-		return -1;
-	}
-
-	if (conf->reset)
-		pccr_mask |= rst;
-
-	if (conf->output)
-		pccr_mask |= flt;
+	if (state->Vcc != 0)
+		pccr_set_mask |= PCCR_S0_PWAITEN|PCCR_S1_PWAITEN;
+	if (state->Vcc == 50)
+		pccr_set_mask |= PCCR_S0_PSE|PCCR_S1_PSE;
+	if (state->flags & SS_RESET)
+		pccr_set_mask |= PCCR_S0_RST|PCCR_S1_RST;
+	if (state->flags & SS_OUTPUT_ENA)
+		pccr_set_mask |= PCCR_S0_FLT|PCCR_S1_FLT;
 
 	local_irq_save(flags);
-	val = sa1111_readl(pcmcia->mapbase + SA1111_PCCR);
-	val = (val & ~(pse | flt | wait | rst)) | pccr_mask;
-	sa1111_writel(val, pcmcia->mapbase + SA1111_PCCR);
+	val = sa1111_readl(sadev->mapbase + SA1111_PCCR);
+	val &= ~pccr_skt_mask;
+	val |= pccr_set_mask & pccr_skt_mask;
+	sa1111_writel(val, sadev->mapbase + SA1111_PCCR);
 	local_irq_restore(flags);
 
 	return 0;
 }
 
-int sa1111_pcmcia_socket_init(int sock)
+void sa1111_pcmcia_socket_init(struct sa1100_pcmcia_socket *skt)
 {
-	return 0;
+	sa11xx_enable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
-int sa1111_pcmcia_socket_suspend(int sock)
+void sa1111_pcmcia_socket_suspend(struct sa1100_pcmcia_socket *skt)
 {
-	return 0;
+	sa11xx_disable_irqs(skt, irqs, ARRAY_SIZE(irqs));
 }
 
 static int pcmcia_probe(struct device *dev)
 {
 	struct sa1111_dev *sadev = SA1111_DEV(dev);
-	unsigned long flags;
 	char *base;
 
-	local_irq_save(flags);
-	if (pcmcia) {
-		local_irq_restore(flags);
-		return -EBUSY;
-	}
-
-	pcmcia = sadev;
-	local_irq_restore(flags);
-
 	if (!request_mem_region(sadev->res.start, 512,
 				SA1111_DRIVER_NAME(sadev)))
 		return -EBUSY;
@@ -226,44 +167,8 @@
 {
 	struct sa1111_dev *sadev = SA1111_DEV(dev);
 
-#ifdef CONFIG_SA1100_ADSBITSY
-	pcmcia_adsbitsy_exit(dev);
-#endif
-#ifdef CONFIG_SA1100_BADGE4
-	pcmcia_badge4_exit(dev);
-#endif
-#ifdef CONFIG_SA1100_GRAPHICSMASTER
-	pcmcia_graphicsmaster_exit(dev);
-#endif
-#ifdef CONFIG_SA1100_JORNADA720
-	pcmcia_jornada720_exit(dev);
-#endif
-#ifdef CONFIG_ASSABET_NEPONSET
-	pcmcia_neponset_exit(dev);
-#endif
-#ifdef CONFIG_SA1100_PFS168
-	pcmcia_pfs_exit(dev);
-#endif
-#ifdef CONFIG_SA1100_PT_SYSTEM3
-	pcmcia_system3_exit(dev);
-#endif
-#ifdef CONFIG_SA1100_XP860
-	pcmcia_xp860_exit(dev);
-#endif
-
+	sa11xx_drv_pcmcia_remove(dev);
 	release_mem_region(sadev->res.start, 512);
-	pcmcia = NULL;
-
-	return 0;
-}
-
-static int pcmcia_suspend(struct device *dev, u32 state, u32 level)
-{
-	return 0;
-}
-
-static int pcmcia_resume(struct device *dev, u32 level)
-{
 	return 0;
 }
 
@@ -274,8 +179,8 @@
 		.devclass	= &pcmcia_socket_class,
 		.probe		= pcmcia_probe,
 		.remove		= __devexit_p(pcmcia_remove),
-		.suspend	= pcmcia_suspend,
-		.resume		= pcmcia_resume,
+		.suspend 	= pcmcia_socket_dev_suspend,
+		.resume 	= pcmcia_socket_dev_resume,
 	},
 	.devid			= SA1111_DEVID_PCMCIA,
 };
diff -Nru a/drivers/pcmcia/sa1111_generic.h b/drivers/pcmcia/sa1111_generic.h
--- a/drivers/pcmcia/sa1111_generic.h	Mon Mar 31 13:41:08 2003
+++ b/drivers/pcmcia/sa1111_generic.h	Mon Mar 31 13:41:08 2003
@@ -1,14 +1,12 @@
-extern int sa1111_pcmcia_init(struct pcmcia_init *);
-extern int sa1111_pcmcia_shutdown(void);
-extern void sa1111_pcmcia_socket_state(int sock, struct pcmcia_state *);
-extern int sa1111_pcmcia_configure_socket(int sock, const struct pcmcia_configure *);
-extern int sa1111_pcmcia_socket_init(int);
-extern int sa1111_pcmcia_socket_suspend(int);
+#include "sa11xx_core.h"
 
+extern int sa1111_pcmcia_hw_init(struct sa1100_pcmcia_socket *);
+extern void sa1111_pcmcia_hw_shutdown(struct sa1100_pcmcia_socket *);
+extern void sa1111_pcmcia_socket_state(struct sa1100_pcmcia_socket *, struct pcmcia_state *);
+extern int sa1111_pcmcia_configure_socket(struct sa1100_pcmcia_socket *, const socket_state_t *);
+extern void sa1111_pcmcia_socket_init(struct sa1100_pcmcia_socket *);
+extern void sa1111_pcmcia_socket_suspend(struct sa1100_pcmcia_socket *);
 
 extern int pcmcia_jornada720_init(struct device *);
-extern void pcmcia_jornada720_exit(struct device *);
-
 extern int pcmcia_neponset_init(struct device *);
-extern void pcmcia_neponset_exit(struct device *);
 
diff -Nru a/drivers/pcmcia/sa11xx_core.c b/drivers/pcmcia/sa11xx_core.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/drivers/pcmcia/sa11xx_core.c	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,1054 @@
+/*======================================================================
+
+    Device driver for the PCMCIA control functionality of StrongARM
+    SA-1100 microprocessors.
+
+    The contents of this file are subject to the Mozilla Public
+    License Version 1.1 (the "License"); you may not use this file
+    except in compliance with the License. You may obtain a copy of
+    the License at http://www.mozilla.org/MPL/
+
+    Software distributed under the License is distributed on an "AS
+    IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+    implied. See the License for the specific language governing
+    rights and limitations under the License.
+
+    The initial developer of the original code is John G. Dorsey
+    <john+@cs.cmu.edu>.  Portions created by John G. Dorsey are
+    Copyright (C) 1999 John G. Dorsey.  All Rights Reserved.
+
+    Alternatively, the contents of this file may be used under the
+    terms of the GNU Public License version 2 (the "GPL"), in which
+    case the provisions of the GPL are applicable instead of the
+    above.  If you wish to allow the use of your version of this file
+    only under the terms of the GPL and not to allow others to use
+    your version of this file under the MPL, indicate your decision
+    by deleting the provisions above and replace them with the notice
+    and other provisions required by the GPL.  If you do not delete
+    the provisions above, a recipient may use your version of this
+    file under either the MPL or the GPL.
+    
+======================================================================*/
+/*
+ * Please see linux/Documentation/arm/SA1100/PCMCIA for more information
+ * on the low-level kernel interface.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/config.h>
+#include <linux/cpufreq.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/proc_fs.h>
+#include <linux/version.h>
+#include <linux/interrupt.h>
+
+#include <asm/hardware.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/system.h>
+
+#include "sa11xx_core.h"
+#include "sa1100.h"
+
+#ifdef PCMCIA_DEBUG
+static int pc_debug;
+#endif
+
+/* This structure maintains housekeeping state for each socket, such
+ * as the last known values of the card detect pins, or the Card Services
+ * callback value associated with the socket:
+ */
+static struct sa1100_pcmcia_socket sa1100_pcmcia_socket[SA1100_PCMCIA_MAX_SOCK];
+
+#define PCMCIA_SOCKET(x)	(sa1100_pcmcia_socket + (x))
+
+/*
+ * sa1100_pcmcia_default_mecr_timing
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ *
+ * Calculate MECR clock wait states for given CPU clock
+ * speed and command wait state. This function can be over-
+ * written by a board specific version.
+ *
+ * The default is to simply calculate the BS values as specified in
+ * the INTEL SA1100 development manual
+ * "Expansion Memory (PCMCIA) Configuration Register (MECR)"
+ * that's section 10.2.5 in _my_ version of the manual ;)
+ */
+static unsigned int
+sa1100_pcmcia_default_mecr_timing(struct sa1100_pcmcia_socket *skt,
+				  unsigned int cpu_speed,
+				  unsigned int cmd_time)
+{
+	return sa1100_pcmcia_mecr_bs(cmd_time, cpu_speed);
+}
+
+static unsigned short
+calc_speed(unsigned short *spds, int num, unsigned short dflt)
+{
+	unsigned short speed = 0;
+	int i;
+
+	for (i = 0; i < num; i++)
+		if (speed < spds[i])
+			speed = spds[i];
+	if (speed == 0)
+		speed = dflt;
+
+	return speed;
+}
+
+/* sa1100_pcmcia_set_mecr()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ *
+ * set MECR value for socket <sock> based on this sockets
+ * io, mem and attribute space access speed.
+ * Call board specific BS value calculation to allow boards
+ * to tweak the BS values.
+ */
+static int
+sa1100_pcmcia_set_mecr(struct sa1100_pcmcia_socket *skt, unsigned int cpu_clock)
+{
+	u32 mecr, old_mecr;
+	unsigned long flags;
+	unsigned short speed;
+	unsigned int bs_io, bs_mem, bs_attr;
+	int i;
+
+	speed = calc_speed(skt->spd_io, MAX_IO_WIN, SA1100_PCMCIA_IO_ACCESS);
+	bs_io = skt->ops->socket_get_timing(skt, cpu_clock, speed);
+
+	speed = calc_speed(skt->spd_mem, MAX_WIN, SA1100_PCMCIA_3V_MEM_ACCESS);
+	bs_mem = skt->ops->socket_get_timing(skt, cpu_clock, speed);
+
+	speed = calc_speed(skt->spd_attr, MAX_WIN, SA1100_PCMCIA_3V_MEM_ACCESS);
+	bs_attr = skt->ops->socket_get_timing(skt, cpu_clock, speed);
+
+	local_irq_save(flags);
+
+	old_mecr = mecr = MECR;
+	MECR_FAST_SET(mecr, skt->nr, 0);
+	MECR_BSIO_SET(mecr, skt->nr, bs_io);
+	MECR_BSA_SET(mecr, skt->nr, bs_attr);
+	MECR_BSM_SET(mecr, skt->nr, bs_mem);
+	if (old_mecr != mecr)
+		MECR = mecr;
+
+	local_irq_restore(flags);
+
+	DEBUG(4, "%s(): sock %u FAST %X  BSM %X  BSA %X  BSIO %X\n",
+	      __FUNCTION__, skt->nr, MECR_FAST_GET(mecr, skt->nr),
+	      MECR_BSM_GET(mecr, skt->nr), MECR_BSA_GET(mecr, skt->nr),
+	      MECR_BSIO_GET(mecr, skt->nr));
+
+	return 0;
+}
+
+static unsigned int sa1100_pcmcia_skt_state(struct sa1100_pcmcia_socket *skt)
+{
+	struct pcmcia_state state;
+	unsigned int stat;
+
+	memset(&state, 0, sizeof(struct pcmcia_state));
+
+	skt->ops->socket_state(skt, &state);
+
+	stat = state.detect  ? SS_DETECT : 0;
+	stat |= state.ready  ? SS_READY  : 0;
+	stat |= state.wrprot ? SS_WRPROT : 0;
+	stat |= state.vs_3v  ? SS_3VCARD : 0;
+	stat |= state.vs_Xv  ? SS_XVCARD : 0;
+
+	/* The power status of individual sockets is not available
+	 * explicitly from the hardware, so we just remember the state
+	 * and regurgitate it upon request:
+	 */
+	stat |= skt->cs_state.Vcc ? SS_POWERON : 0;
+
+	if (skt->cs_state.flags & SS_IOCARD)
+		stat |= state.bvd1 ? SS_STSCHG : 0;
+	else {
+		if (state.bvd1 == 0)
+			stat |= SS_BATDEAD;
+		else if (state.bvd2 == 0)
+			stat |= SS_BATWARN;
+	}
+	return stat;
+}
+
+/*
+ * sa1100_pcmcia_config_skt
+ * ^^^^^^^^^^^^^^^^^^^^^^^^
+ *
+ * Convert PCMCIA socket state to our socket configure structure.
+ */
+static int
+sa1100_pcmcia_config_skt(struct sa1100_pcmcia_socket *skt, socket_state_t *state)
+{
+	int ret;
+
+	ret = skt->ops->configure_socket(skt, state);
+	if (ret == 0) {
+		/*
+		 * This really needs a better solution.  The IRQ
+		 * may or may not be claimed by the driver.
+		 */
+		if (skt->irq_state != 1 && state->io_irq) {
+			skt->irq_state = 1;
+			set_irq_type(skt->irq, IRQT_FALLING);
+		} else if (skt->irq_state == 1 && state->io_irq == 0) {
+			skt->irq_state = 0;
+			set_irq_type(skt->irq, IRQT_NOEDGE);
+		}
+
+		skt->cs_state = *state;
+	}
+
+	if (ret < 0)
+		printk(KERN_ERR "sa1100_pcmcia: unable to configure "
+		       "socket %d\n", skt->nr);
+
+	return ret;
+}
+
+/* sa1100_pcmcia_sock_init()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^
+ *
+ * (Re-)Initialise the socket, turning on status interrupts
+ * and PCMCIA bus.  This must wait for power to stabilise
+ * so that the card status signals report correctly.
+ *
+ * Returns: 0
+ */
+static int sa1100_pcmcia_sock_init(unsigned int sock)
+{
+	struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
+
+	DEBUG(2, "%s(): initializing socket %u\n", __FUNCTION__, skt->nr);
+
+	skt->ops->socket_init(skt);
+	sa1100_pcmcia_config_skt(skt, &dead_socket);
+
+	return 0;
+}
+
+
+/*
+ * sa1100_pcmcia_suspend()
+ * ^^^^^^^^^^^^^^^^^^^^^^^
+ *
+ * Remove power on the socket, disable IRQs from the card.
+ * Turn off status interrupts, and disable the PCMCIA bus.
+ *
+ * Returns: 0
+ */
+static int sa1100_pcmcia_suspend(unsigned int sock)
+{
+	struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
+	int ret;
+
+	DEBUG(2, "%s(): suspending socket %u\n", __FUNCTION__, skt->nr);
+
+	ret = sa1100_pcmcia_config_skt(skt, &dead_socket);
+	if (ret == 0)
+		skt->ops->socket_suspend(skt);
+
+	return ret;
+}
+
+
+/* sa1100_pcmcia_task_handler()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Processes serviceable socket events using the "eventd" thread context.
+ *
+ * Event processing (specifically, the invocation of the Card Services event
+ * callback) occurs in this thread rather than in the actual interrupt
+ * handler due to the use of scheduling operations in the PCMCIA core.
+ */
+static void sa1100_pcmcia_task_handler(void *data)
+{
+	struct sa1100_pcmcia_socket *skt = data;
+	unsigned int events;
+
+	DEBUG(4, "%s(): entering PCMCIA monitoring thread\n", __FUNCTION__);
+
+	do {
+		unsigned int status;
+
+		status = sa1100_pcmcia_skt_state(skt);
+
+		events = (status ^ skt->status) & skt->cs_state.csc_mask;
+		skt->status = status;
+
+		DEBUG(2, "events: %s%s%s%s%s%s\n",
+			events == 0         ? "<NONE>"   : "",
+			events & SS_DETECT  ? "DETECT "  : "",
+			events & SS_READY   ? "READY "   : "",
+			events & SS_BATDEAD ? "BATDEAD " : "",
+			events & SS_BATWARN ? "BATWARN " : "",
+			events & SS_STSCHG  ? "STSCHG "  : "");
+
+		if (events && skt->handler != NULL)
+			skt->handler(skt->handler_info, events);
+	} while (events);
+}
+
+/* sa1100_pcmcia_poll_event()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Let's poll for events in addition to IRQs since IRQ only is unreliable...
+ */
+static void sa1100_pcmcia_poll_event(unsigned long dummy)
+{
+	struct sa1100_pcmcia_socket *skt = (struct sa1100_pcmcia_socket *)dummy;
+	DEBUG(4, "%s(): polling for events\n", __FUNCTION__);
+
+	mod_timer(&skt->poll_timer, jiffies + SA1100_PCMCIA_POLL_PERIOD);
+
+	schedule_work(&skt->work);
+}
+
+
+/* sa1100_pcmcia_interrupt()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Service routine for socket driver interrupts (requested by the
+ * low-level PCMCIA init() operation via sa1100_pcmcia_thread()).
+ * The actual interrupt-servicing work is performed by
+ * sa1100_pcmcia_thread(), largely because the Card Services event-
+ * handling code performs scheduling operations which cannot be
+ * executed from within an interrupt context.
+ */
+static void sa1100_pcmcia_interrupt(int irq, void *dev, struct pt_regs *regs)
+{
+	struct sa1100_pcmcia_socket *skt = dev;
+
+	DEBUG(3, "%s(): servicing IRQ %d\n", __FUNCTION__, irq);
+
+	schedule_work(&skt->work);
+}
+
+/* sa1100_pcmcia_register_callback()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the register_callback() operation for the in-kernel
+ * PCMCIA service (formerly SS_RegisterCallback in Card Services). If 
+ * the function pointer `handler' is not NULL, remember the callback 
+ * location in the state for `sock', and increment the usage counter 
+ * for the driver module. (The callback is invoked from the interrupt
+ * service routine, sa1100_pcmcia_interrupt(), to notify Card Services
+ * of interesting events.) Otherwise, clear the callback pointer in the
+ * socket state and decrement the module usage count.
+ *
+ * Returns: 0
+ */
+static int
+sa1100_pcmcia_register_callback(unsigned int sock,
+				void (*handler)(void *, unsigned int),
+				void *info)
+{
+	struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
+
+	if (handler) {
+		if (!try_module_get(skt->ops->owner))
+			return -ENODEV;
+		skt->handler_info = info;
+		skt->handler = handler;
+	} else {
+		skt->handler = NULL;
+		module_put(skt->ops->owner);
+	}
+
+	return 0;
+}
+
+
+/* sa1100_pcmcia_inquire_socket()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the inquire_socket() operation for the in-kernel PCMCIA
+ * service (formerly SS_InquireSocket in Card Services).  We set
+ * SS_CAP_STATIC_MAP, which disables the memory resource database
+ * check. (Mapped memory is set up within the socket driver itself.)
+ *
+ * In conjunction with the STATIC_MAP capability is a new field,
+ * `io_offset', recommended by David Hinds. Rather than go through
+ * the SetIOMap interface (which is not quite suited for communicating
+ * window locations up from the socket driver), we just pass up
+ * an offset which is applied to client-requested base I/O addresses
+ * in alloc_io_space().
+ *
+ * SS_CAP_STATIC_MAP: don't bother with the (user-configured) memory
+ *   resource database; we instead pass up physical address ranges
+ *   and allow other parts of Card Services to deal with remapping.
+ *
+ * SS_CAP_PCCARD: we can deal with 16-bit PCMCIA & CF cards, but
+ *   not 32-bit CardBus devices.
+ *
+ * Return value is irrelevant; the pcmcia subsystem ignores it.
+ */
+static int
+sa1100_pcmcia_inquire_socket(unsigned int sock, socket_cap_t *cap)
+{
+	struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
+	int ret = -1;
+
+	if (skt) {
+		DEBUG(2, "%s() for sock %u\n", __FUNCTION__, skt->nr);
+
+		cap->features  = SS_CAP_STATIC_MAP|SS_CAP_PCCARD;
+		cap->irq_mask  = 0;
+		cap->map_size  = PAGE_SIZE;
+		cap->pci_irq   = skt->irq;
+		cap->io_offset = (unsigned long)skt->virt_io;
+
+		ret = 0;
+	}
+
+	return ret;
+}
+
+
+/* sa1100_pcmcia_get_status()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the get_status() operation for the in-kernel PCMCIA
+ * service (formerly SS_GetStatus in Card Services). Essentially just
+ * fills in bits in `status' according to internal driver state or
+ * the value of the voltage detect chipselect register.
+ *
+ * As a debugging note, during card startup, the PCMCIA core issues
+ * three set_socket() commands in a row the first with RESET deasserted,
+ * the second with RESET asserted, and the last with RESET deasserted
+ * again. Following the third set_socket(), a get_status() command will
+ * be issued. The kernel is looking for the SS_READY flag (see
+ * setup_socket(), reset_socket(), and unreset_socket() in cs.c).
+ *
+ * Returns: 0
+ */
+static int
+sa1100_pcmcia_get_status(unsigned int sock, unsigned int *status)
+{
+	struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
+
+	skt->status = sa1100_pcmcia_skt_state(skt);
+	*status = skt->status;
+
+	return 0;
+}
+
+
+/* sa1100_pcmcia_get_socket()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the get_socket() operation for the in-kernel PCMCIA
+ * service (formerly SS_GetSocket in Card Services). Not a very 
+ * exciting routine.
+ *
+ * Returns: 0
+ */
+static int
+sa1100_pcmcia_get_socket(unsigned int sock, socket_state_t *state)
+{
+  struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
+
+  DEBUG(2, "%s() for sock %u\n", __FUNCTION__, skt->nr);
+
+  *state = skt->cs_state;
+
+  return 0;
+}
+
+/* sa1100_pcmcia_set_socket()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the set_socket() operation for the in-kernel PCMCIA
+ * service (formerly SS_SetSocket in Card Services). We more or
+ * less punt all of this work and let the kernel handle the details
+ * of power configuration, reset, &c. We also record the value of
+ * `state' in order to regurgitate it to the PCMCIA core later.
+ *
+ * Returns: 0
+ */
+static int
+sa1100_pcmcia_set_socket(unsigned int sock, socket_state_t *state)
+{
+  struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
+
+  DEBUG(2, "%s() for sock %u\n", __FUNCTION__, skt->nr);
+
+  DEBUG(3, "\tmask:  %s%s%s%s%s%s\n\tflags: %s%s%s%s%s%s\n",
+	(state->csc_mask==0)?"<NONE>":"",
+	(state->csc_mask&SS_DETECT)?"DETECT ":"",
+	(state->csc_mask&SS_READY)?"READY ":"",
+	(state->csc_mask&SS_BATDEAD)?"BATDEAD ":"",
+	(state->csc_mask&SS_BATWARN)?"BATWARN ":"",
+	(state->csc_mask&SS_STSCHG)?"STSCHG ":"",
+	(state->flags==0)?"<NONE>":"",
+	(state->flags&SS_PWR_AUTO)?"PWR_AUTO ":"",
+	(state->flags&SS_IOCARD)?"IOCARD ":"",
+	(state->flags&SS_RESET)?"RESET ":"",
+	(state->flags&SS_SPKR_ENA)?"SPKR_ENA ":"",
+	(state->flags&SS_OUTPUT_ENA)?"OUTPUT_ENA ":"");
+  DEBUG(3, "\tVcc %d  Vpp %d  irq %d\n",
+	state->Vcc, state->Vpp, state->io_irq);
+
+  return sa1100_pcmcia_config_skt(skt, state);
+}  /* sa1100_pcmcia_set_socket() */
+
+
+/* sa1100_pcmcia_set_io_map()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the set_io_map() operation for the in-kernel PCMCIA
+ * service (formerly SS_SetIOMap in Card Services). We configure
+ * the map speed as requested, but override the address ranges
+ * supplied by Card Services.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+static int
+sa1100_pcmcia_set_io_map(unsigned int sock, struct pccard_io_map *map)
+{
+	struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
+	unsigned short speed = map->speed;
+
+	DEBUG(2, "%s() for sock %u\n", __FUNCTION__, skt->nr);
+
+	DEBUG(3, "\tmap %u  speed %u\n\tstart 0x%08x  stop 0x%08x\n",
+		map->map, map->speed, map->start, map->stop);
+	DEBUG(3, "\tflags: %s%s%s%s%s%s%s%s\n",
+		(map->flags==0)?"<NONE>":"",
+		(map->flags&MAP_ACTIVE)?"ACTIVE ":"",
+		(map->flags&MAP_16BIT)?"16BIT ":"",
+		(map->flags&MAP_AUTOSZ)?"AUTOSZ ":"",
+		(map->flags&MAP_0WS)?"0WS ":"",
+		(map->flags&MAP_WRPROT)?"WRPROT ":"",
+		(map->flags&MAP_USE_WAIT)?"USE_WAIT ":"",
+		(map->flags&MAP_PREFETCH)?"PREFETCH ":"");
+
+	if (map->map >= MAX_IO_WIN) {
+		printk(KERN_ERR "%s(): map (%d) out of range\n", __FUNCTION__,
+		       map->map);
+		return -1;
+	}
+
+	if (map->flags & MAP_ACTIVE) {
+		if (speed == 0)
+			speed = SA1100_PCMCIA_IO_ACCESS;
+	} else {
+		speed = 0;
+	}
+
+	skt->spd_io[map->map] = speed;
+	sa1100_pcmcia_set_mecr(skt, cpufreq_get(0));
+
+	if (map->stop == 1)
+		map->stop = PAGE_SIZE-1;
+
+	map->stop -= map->start;
+	map->stop += (unsigned long)skt->virt_io;
+	map->start = (unsigned long)skt->virt_io;
+
+	return 0;
+}  /* sa1100_pcmcia_set_io_map() */
+
+
+/* sa1100_pcmcia_set_mem_map()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the set_mem_map() operation for the in-kernel PCMCIA
+ * service (formerly SS_SetMemMap in Card Services). We configure
+ * the map speed as requested, but override the address ranges
+ * supplied by Card Services.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+static int
+sa1100_pcmcia_set_mem_map(unsigned int sock, struct pccard_mem_map *map)
+{
+	struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
+	struct resource *res;
+	unsigned short speed = map->speed;
+
+	DEBUG(2, "%s() for sock %u\n", __FUNCTION__, skt->nr);
+
+	DEBUG(3, "\tmap %u speed %u card_start %08x\n",
+		map->map, map->speed, map->card_start);
+	DEBUG(3, "\tflags: %s%s%s%s%s%s%s%s\n",
+		(map->flags==0)?"<NONE>":"",
+		(map->flags&MAP_ACTIVE)?"ACTIVE ":"",
+		(map->flags&MAP_16BIT)?"16BIT ":"",
+		(map->flags&MAP_AUTOSZ)?"AUTOSZ ":"",
+		(map->flags&MAP_0WS)?"0WS ":"",
+		(map->flags&MAP_WRPROT)?"WRPROT ":"",
+		(map->flags&MAP_ATTRIB)?"ATTRIB ":"",
+		(map->flags&MAP_USE_WAIT)?"USE_WAIT ":"");
+
+	if (map->map >= MAX_WIN)
+		return -EINVAL;
+
+	if (map->flags & MAP_ACTIVE) {
+		if (speed == 0)
+			speed = 300;
+	} else {
+		speed = 0;
+	}
+
+	if (map->flags & MAP_ATTRIB) {
+		res = &skt->res_attr;
+		skt->spd_attr[map->map] = speed;
+		skt->spd_mem[map->map] = 0;
+	} else {
+		res = &skt->res_mem;
+		skt->spd_attr[map->map] = 0;
+		skt->spd_mem[map->map] = speed;
+	}
+
+	sa1100_pcmcia_set_mecr(skt, cpufreq_get(0));
+
+	map->sys_stop -= map->sys_start;
+	map->sys_stop += res->start + map->card_start;
+	map->sys_start = res->start + map->card_start;
+
+	return 0;
+}
+
+
+#if defined(CONFIG_PROC_FS)
+
+struct bittbl {
+	unsigned int mask;
+	const char *name;
+};
+
+static struct bittbl status_bits[] = {
+	{ SS_WRPROT,		"SS_WRPROT"	},
+	{ SS_BATDEAD,		"SS_BATDEAD"	},
+	{ SS_BATWARN,		"SS_BATWARN"	},
+	{ SS_READY,		"SS_READY"	},
+	{ SS_DETECT,		"SS_DETECT"	},
+	{ SS_POWERON,		"SS_POWERON"	},
+	{ SS_STSCHG,		"SS_STSCHG"	},
+	{ SS_3VCARD,		"SS_3VCARD"	},
+	{ SS_XVCARD,		"SS_XVCARD"	},
+};
+
+static struct bittbl conf_bits[] = {
+	{ SS_PWR_AUTO,		"SS_PWR_AUTO"	},
+	{ SS_IOCARD,		"SS_IOCARD"	},
+	{ SS_RESET,		"SS_RESET"	},
+	{ SS_DMA_MODE,		"SS_DMA_MODE"	},
+	{ SS_SPKR_ENA,		"SS_SPKR_ENA"	},
+	{ SS_OUTPUT_ENA,	"SS_OUTPUT_ENA"	},
+	{ SS_DEBOUNCED,		"SS_DEBOUNCED"	},
+};
+
+static void
+dump_bits(char **p, const char *prefix, unsigned int val, struct bittbl *bits, int sz)
+{
+	char *b = *p;
+	int i;
+
+	b += sprintf(b, "%-9s:", prefix);
+	for (i = 0; i < sz; i++)
+		if (val & bits[i].mask)
+			b += sprintf(b, " %s", bits[i].name);
+	*b++ = '\n';
+	*p = b;
+}
+
+/* sa1100_pcmcia_proc_status()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the /proc/bus/pccard/??/status file.
+ *
+ * Returns: the number of characters added to the buffer
+ */
+static int
+sa1100_pcmcia_proc_status(char *buf, char **start, off_t pos,
+			  int count, int *eof, void *data)
+{
+	struct sa1100_pcmcia_socket *skt = data;
+	unsigned int clock = cpufreq_get(0);
+	unsigned long mecr = MECR;
+	char *p = buf;
+
+	p+=sprintf(p, "slot     : %d\n", skt->nr);
+
+	dump_bits(&p, "status", skt->status,
+		  status_bits, ARRAY_SIZE(status_bits));
+	dump_bits(&p, "csc_mask", skt->cs_state.csc_mask,
+		  status_bits, ARRAY_SIZE(status_bits));
+	dump_bits(&p, "cs_flags", skt->cs_state.flags,
+		  conf_bits, ARRAY_SIZE(conf_bits));
+
+	p+=sprintf(p, "Vcc      : %d\n", skt->cs_state.Vcc);
+	p+=sprintf(p, "Vpp      : %d\n", skt->cs_state.Vpp);
+	p+=sprintf(p, "IRQ      : %d (%d)\n", skt->cs_state.io_irq, skt->irq);
+
+	p+=sprintf(p, "I/O      : %u (%u)\n",
+		calc_speed(skt->spd_io, MAX_IO_WIN, SA1100_PCMCIA_IO_ACCESS),
+		sa1100_pcmcia_cmd_time(clock, MECR_BSIO_GET(mecr, skt->nr)));
+
+	p+=sprintf(p, "attribute: %u (%u)\n",
+		calc_speed(skt->spd_attr, MAX_WIN, SA1100_PCMCIA_3V_MEM_ACCESS),
+		sa1100_pcmcia_cmd_time(clock, MECR_BSA_GET(mecr, skt->nr)));
+
+	p+=sprintf(p, "common   : %u (%u)\n",
+		calc_speed(skt->spd_mem, MAX_WIN, SA1100_PCMCIA_3V_MEM_ACCESS),
+		sa1100_pcmcia_cmd_time(clock, MECR_BSM_GET(mecr, skt->nr)));
+
+	return p-buf;
+}
+
+/* sa1100_pcmcia_proc_setup()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the proc_setup() operation for the in-kernel PCMCIA
+ * service (formerly SS_ProcSetup in Card Services).
+ *
+ * Returns: 0 on success, -1 on error
+ */
+static void
+sa1100_pcmcia_proc_setup(unsigned int sock, struct proc_dir_entry *base)
+{
+	struct proc_dir_entry *entry;
+
+	if ((entry = create_proc_entry("status", 0, base)) == NULL){
+		printk(KERN_ERR "unable to install \"status\" procfs entry\n");
+		return;
+	}
+	entry->read_proc = sa1100_pcmcia_proc_status;
+	entry->data = PCMCIA_SOCKET(sock);
+}
+#else
+#define sa1100_pcmcia_proc_setup	NULL
+#endif  /* defined(CONFIG_PROC_FS) */
+
+static struct pccard_operations sa11xx_pcmcia_operations = {
+	.owner			= THIS_MODULE,
+	.init			= sa1100_pcmcia_sock_init,
+	.suspend		= sa1100_pcmcia_suspend,
+	.register_callback	= sa1100_pcmcia_register_callback,
+	.inquire_socket		= sa1100_pcmcia_inquire_socket,
+	.get_status		= sa1100_pcmcia_get_status,
+	.get_socket		= sa1100_pcmcia_get_socket,
+	.set_socket		= sa1100_pcmcia_set_socket,
+	.set_io_map		= sa1100_pcmcia_set_io_map,
+	.set_mem_map		= sa1100_pcmcia_set_mem_map,
+	.proc_setup		= sa1100_pcmcia_proc_setup
+};
+
+int sa11xx_request_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr)
+{
+	int i, res = 0;
+
+	for (i = 0; i < nr; i++) {
+		if (irqs[i].sock != skt->nr)
+			continue;
+		res = request_irq(irqs[i].irq, sa1100_pcmcia_interrupt,
+				  SA_INTERRUPT, irqs[i].str, skt);
+		if (res)
+			break;
+		set_irq_type(irqs[i].irq, IRQT_NOEDGE);
+	}
+
+	if (res) {
+		printk(KERN_ERR "PCMCIA: request for IRQ%d failed (%d)\n",
+			irqs[i].irq, res);
+
+		while (i--)
+			if (irqs[i].sock == skt->nr)
+				free_irq(irqs[i].irq, skt);
+	}
+	return res;
+}
+EXPORT_SYMBOL(sa11xx_request_irqs);
+
+void sa11xx_free_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr)
+{
+	int i;
+
+	for (i = 0; i < nr; i++)
+		if (irqs[i].sock == skt->nr)
+			free_irq(irqs[i].irq, skt);
+}
+EXPORT_SYMBOL(sa11xx_free_irqs);
+
+void sa11xx_disable_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr)
+{
+	int i;
+
+	for (i = 0; i < nr; i++)
+		if (irqs[i].sock == skt->nr)
+			set_irq_type(irqs[i].irq, IRQT_NOEDGE);
+}
+EXPORT_SYMBOL(sa11xx_disable_irqs);
+
+void sa11xx_enable_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr)
+{
+	int i;
+
+	for (i = 0; i < nr; i++)
+		if (irqs[i].sock == skt->nr) {
+			set_irq_type(irqs[i].irq, IRQT_RISING);
+			set_irq_type(irqs[i].irq, IRQT_BOTHEDGE);
+		}
+}
+EXPORT_SYMBOL(sa11xx_enable_irqs);
+
+static const char *skt_names[] = {
+	"PCMCIA socket 0",
+	"PCMCIA socket 1",
+};
+
+int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, int first, int nr)
+{
+	struct pcmcia_socket_class_data *cls;
+	unsigned int cpu_clock;
+	int ret, i;
+
+	cls = kmalloc(sizeof(struct pcmcia_socket_class_data), GFP_KERNEL);
+	if (!cls) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	memset(cls, 0, sizeof(struct pcmcia_socket_class_data));
+	cls->ops	= &sa11xx_pcmcia_operations;
+	cls->nsock	= nr;
+
+	/*
+	 * set default MECR calculation if the board specific
+	 * code did not specify one...
+	 */
+	if (!ops->socket_get_timing)
+		ops->socket_get_timing = sa1100_pcmcia_default_mecr_timing;
+
+	cpu_clock = cpufreq_get(0);
+
+	/*
+	 * Initialise the per-socket structure.
+	 */
+	for (i = 0; i < nr; i++) {
+		struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(i);
+		memset(skt, 0, sizeof(*skt));
+
+		INIT_WORK(&skt->work, sa1100_pcmcia_task_handler, skt);
+
+		init_timer(&skt->poll_timer);
+		skt->poll_timer.function = sa1100_pcmcia_poll_event;
+		skt->poll_timer.data = (unsigned long)skt;
+		skt->poll_timer.expires = jiffies + SA1100_PCMCIA_POLL_PERIOD;
+
+		skt->nr		= first + i;
+		skt->irq	= NO_IRQ;
+		skt->dev	= dev;
+		skt->ops	= ops;
+
+		skt->res_skt.start	= _PCMCIA(skt->nr);
+		skt->res_skt.end	= _PCMCIA(skt->nr) + PCMCIASp - 1;
+		skt->res_skt.name	= skt_names[skt->nr];
+		skt->res_skt.flags	= IORESOURCE_MEM;
+
+		ret = request_resource(&iomem_resource, &skt->res_skt);
+		if (ret)
+			goto out_err_1;
+
+		skt->res_io.start	= _PCMCIAIO(skt->nr);
+		skt->res_io.end		= _PCMCIAIO(skt->nr) + PCMCIAIOSp - 1;
+		skt->res_io.name	= "io";
+		skt->res_io.flags	= IORESOURCE_MEM | IORESOURCE_BUSY;
+
+		ret = request_resource(&skt->res_skt, &skt->res_io);
+		if (ret)
+			goto out_err_2;
+
+		skt->res_mem.start	= _PCMCIAMem(skt->nr);
+		skt->res_mem.end	= _PCMCIAMem(skt->nr) + PCMCIAMemSp - 1;
+		skt->res_mem.name	= "memory";
+		skt->res_mem.flags	= IORESOURCE_MEM;
+
+		ret = request_resource(&skt->res_skt, &skt->res_mem);
+		if (ret)
+			goto out_err_3;
+
+		skt->res_attr.start	= _PCMCIAAttr(skt->nr);
+		skt->res_attr.end	= _PCMCIAAttr(skt->nr) + PCMCIAAttrSp - 1;
+		skt->res_attr.name	= "attribute";
+		skt->res_attr.flags	= IORESOURCE_MEM;
+		
+		ret = request_resource(&skt->res_skt, &skt->res_attr);
+		if (ret)
+			goto out_err_4;
+
+		skt->virt_io = ioremap(skt->res_io.start, 0x10000);
+		if (skt->virt_io == NULL) {
+			ret = -ENOMEM;
+			goto out_err_5;
+		}
+
+		/*
+		 * We initialize the MECR to default values here, because
+		 * we are not guaranteed to see a SetIOMap operation at
+		 * runtime.
+		 */
+		sa1100_pcmcia_set_mecr(skt, cpu_clock);
+
+		ret = ops->hw_init(skt);
+		if (ret)
+			goto out_err_6;
+
+		skt->status = sa1100_pcmcia_skt_state(skt);
+		add_timer(&skt->poll_timer);
+	}
+
+	dev->class_data = cls;
+	return 0;
+
+	do {
+		struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(i);
+
+		del_timer_sync(&skt->poll_timer);
+		flush_scheduled_work();
+
+		ops->hw_shutdown(skt);
+ out_err_6:
+		iounmap(skt->virt_io);
+ out_err_5:
+		release_resource(&skt->res_attr);
+ out_err_4:
+		release_resource(&skt->res_mem);
+ out_err_3:
+		release_resource(&skt->res_io);
+ out_err_2:
+		release_resource(&skt->res_skt);
+ out_err_1:
+		i--;
+	} while (i > 0);
+
+	kfree(cls);
+
+ out:
+	return ret;
+}
+EXPORT_SYMBOL(sa11xx_drv_pcmcia_probe);
+
+int sa11xx_drv_pcmcia_remove(struct device *dev)
+{
+	struct pcmcia_socket_class_data *cls = dev->class_data;
+	int i;
+
+	dev->class_data = NULL;
+
+	for (i = 0; i < cls->nsock; i++) {
+		struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(cls->sock_offset + i);
+
+		skt->ops->hw_shutdown(skt);
+
+		del_timer_sync(&skt->poll_timer);
+		flush_scheduled_work();
+
+		sa1100_pcmcia_config_skt(skt, &dead_socket);
+
+		iounmap(skt->virt_io);
+		skt->virt_io = NULL;
+		release_resource(&skt->res_attr);
+		release_resource(&skt->res_mem);
+		release_resource(&skt->res_io);
+		release_resource(&skt->res_skt);
+	}
+
+	kfree(cls);
+
+	return 0;
+}
+EXPORT_SYMBOL(sa11xx_drv_pcmcia_remove);
+
+#ifdef CONFIG_CPU_FREQ
+
+/* sa1100_pcmcia_update_mecr()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * When sa1100_pcmcia_notifier() decides that a MECR adjustment (due
+ * to a core clock frequency change) is needed, this routine establishes
+ * new BS_xx values consistent with the clock speed `clock'.
+ */
+static void sa1100_pcmcia_update_mecr(unsigned int clock)
+{
+	unsigned int sock;
+
+	for (sock = 0; sock < SA1100_PCMCIA_MAX_SOCK; ++sock) {
+		struct sa1100_pcmcia_socket *skt = PCMCIA_SOCKET(sock);
+		sa1100_pcmcia_set_mecr(skt, clock);
+	}
+}
+
+/* sa1100_pcmcia_notifier()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^
+ * When changing the processor core clock frequency, it is necessary
+ * to adjust the MECR timings accordingly. We've recorded the timings
+ * requested by Card Services, so this is just a matter of finding
+ * out what our current speed is, and then recomputing the new MECR
+ * values.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+static int
+sa1100_pcmcia_notifier(struct notifier_block *nb, unsigned long val,
+		       void *data)
+{
+	struct cpufreq_freqs *freqs = data;
+
+	switch (val) {
+	case CPUFREQ_PRECHANGE:
+		if (freqs->new > freqs->old) {
+			DEBUG(2, "%s(): new frequency %u.%uMHz > %u.%uMHz, "
+				"pre-updating\n", __FUNCTION__,
+			    freqs->new / 1000, (freqs->new / 100) % 10,
+			    freqs->old / 1000, (freqs->old / 100) % 10);
+			sa1100_pcmcia_update_mecr(freqs->new);
+		}
+		break;
+
+	case CPUFREQ_POSTCHANGE:
+		if (freqs->new < freqs->old) {
+			DEBUG(2, "%s(): new frequency %u.%uMHz < %u.%uMHz, "
+				"post-updating\n", __FUNCTION__,
+			    freqs->new / 1000, (freqs->new / 100) % 10,
+			    freqs->old / 1000, (freqs->old / 100) % 10);
+			sa1100_pcmcia_update_mecr(freqs->new);
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static struct notifier_block sa1100_pcmcia_notifier_block = {
+	.notifier_call	= sa1100_pcmcia_notifier
+};
+
+static int __init sa11xx_pcmcia_init(void)
+{
+	int ret;
+
+	printk(KERN_INFO "SA11xx PCMCIA (CS release %s)\n", CS_RELEASE);
+
+	ret = cpufreq_register_notifier(&sa1100_pcmcia_notifier_block,
+					CPUFREQ_TRANSITION_NOTIFIER);
+	if (ret < 0)
+		printk(KERN_ERR "Unable to register CPU frequency change "
+			"notifier (%d)\n", ret);
+
+	return ret;
+}
+module_init(sa11xx_pcmcia_init);
+
+static void __exit sa11xx_pcmcia_exit(void)
+{
+	cpufreq_unregister_notifier(&sa1100_pcmcia_notifier_block, CPUFREQ_TRANSITION_NOTIFIER);
+}
+
+module_exit(sa11xx_pcmcia_exit);
+#endif
+
+MODULE_AUTHOR("John Dorsey <john+@cs.cmu.edu>");
+MODULE_DESCRIPTION("Linux PCMCIA Card Services: SA-11xx core socket driver");
+MODULE_LICENSE("Dual MPL/GPL");
diff -Nru a/drivers/pcmcia/sa11xx_core.h b/drivers/pcmcia/sa11xx_core.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/drivers/pcmcia/sa11xx_core.h	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,122 @@
+/*
+ * linux/include/asm/arch/pcmcia.h
+ *
+ * Copyright (C) 2000 John G Dorsey <john+@cs.cmu.edu>
+ *
+ * This file contains definitions for the low-level SA-1100 kernel PCMCIA
+ * interface. Please see linux/Documentation/arm/SA1100/PCMCIA for details.
+ */
+#ifndef _ASM_ARCH_PCMCIA
+#define _ASM_ARCH_PCMCIA
+
+#include <linux/proc_fs.h>
+/* include the world */
+#include <pcmcia/version.h>
+#include <pcmcia/cs_types.h>
+#include <pcmcia/cs.h>
+#include <pcmcia/ss.h>
+#include <pcmcia/bulkmem.h>
+#include <pcmcia/cistpl.h>
+#include "cs_internal.h"
+
+struct device;
+
+/* Ideally, we'd support up to MAX_SOCK sockets, but the SA-1100 only
+ * has support for two. This shows up in lots of hardwired ways, such
+ * as the fact that MECR only has enough bits to configure two sockets.
+ * Since it's so entrenched in the hardware, limiting the software
+ * in this way doesn't seem too terrible.
+ */
+#define SA1100_PCMCIA_MAX_SOCK   (2)
+
+struct pcmcia_state {
+  unsigned detect: 1,
+            ready: 1,
+             bvd1: 1,
+             bvd2: 1,
+           wrprot: 1,
+            vs_3v: 1,
+            vs_Xv: 1;
+};
+
+/*
+ * This structure encapsulates per-socket state which we might need to
+ * use when responding to a Card Services query of some kind.
+ */
+struct sa1100_pcmcia_socket {
+	/*
+	 * Info from low level handler
+	 */
+	struct device		*dev;
+	unsigned int		nr;
+	unsigned int		irq;
+
+	/*
+	 * Core PCMCIA state
+	 */
+	struct pcmcia_low_level *ops;
+
+	unsigned int		status;
+	socket_state_t		cs_state;
+	void			(*handler)(void *, unsigned int);
+	void			*handler_info;
+
+	unsigned short		spd_io[MAX_IO_WIN];
+	unsigned short		spd_mem[MAX_WIN];
+	unsigned short		spd_attr[MAX_WIN];
+
+	struct resource		res_skt;
+	struct resource		res_io;
+	struct resource		res_mem;
+	struct resource		res_attr;
+	void			*virt_io;
+
+	unsigned int		irq_state;
+
+	struct timer_list	poll_timer;
+	struct work_struct	work;
+};
+
+struct pcmcia_low_level {
+	struct module *owner;
+
+	int (*hw_init)(struct sa1100_pcmcia_socket *);
+	void (*hw_shutdown)(struct sa1100_pcmcia_socket *);
+
+	void (*socket_state)(struct sa1100_pcmcia_socket *, struct pcmcia_state *);
+	int (*configure_socket)(struct sa1100_pcmcia_socket *, const socket_state_t *);
+
+	/*
+	 * Enable card status IRQs on (re-)initialisation.  This can
+	 * be called at initialisation, power management event, or
+	 * pcmcia event.
+	 */
+	void (*socket_init)(struct sa1100_pcmcia_socket *);
+
+	/*
+	 * Disable card status IRQs and PCMCIA bus on suspend.
+	 */
+	void (*socket_suspend)(struct sa1100_pcmcia_socket *);
+
+	/*
+	 * Calculate MECR timing clock wait states
+	 */
+	unsigned int (*socket_get_timing)(struct sa1100_pcmcia_socket *,
+			unsigned int cpu_speed, unsigned int cmd_time);
+};
+
+struct pcmcia_irqs {
+	int sock;
+	int irq;
+	const char *str;
+};
+
+int sa11xx_request_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr);
+void sa11xx_free_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr);
+void sa11xx_disable_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr);
+void sa11xx_enable_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr);
+
+extern int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, int first, int nr);
+extern int sa11xx_drv_pcmcia_remove(struct device *dev);
+
+#endif
diff -Nru a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c
--- a/drivers/pcmcia/tcic.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/pcmcia/tcic.c	Mon Mar 31 13:41:07 2003
@@ -225,11 +225,6 @@
 	return 2*(ns-14)/cycle_time;
 }
 
-static int to_ns(int cycles)
-{
-    return (cycles*cycle_time)/2 + 14;
-}
-
 /*====================================================================*/
 
 static volatile u_int irq_hits;
@@ -384,6 +379,8 @@
 	.name = "tcic-pcmcia",
 	.bus = &platform_bus_type,
 	.devclass = &pcmcia_socket_class,
+	.suspend = pcmcia_socket_dev_suspend,
+	.resume = pcmcia_socket_dev_resume,
 };
 
 static struct platform_device tcic_device = {
@@ -806,44 +803,6 @@
   
 /*====================================================================*/
 
-static int tcic_get_io_map(unsigned int lsock, struct pccard_io_map *io)
-{
-    u_short psock = socket_table[lsock].psock;
-    u_short base, ioctl;
-    u_int addr;
-    
-    if (io->map > 1) return -EINVAL;
-    tcic_setw(TCIC_ADDR+2, TCIC_ADR2_INDREG | (psock << TCIC_SS_SHFT));
-    addr = TCIC_IWIN(psock, io->map);
-    tcic_setw(TCIC_ADDR, addr + TCIC_IBASE_X);
-    base = tcic_getw(TCIC_DATA);
-    tcic_setw(TCIC_ADDR, addr + TCIC_ICTL_X);
-    ioctl = tcic_getw(TCIC_DATA);
-
-    if (ioctl & TCIC_ICTL_TINY)
-	io->start = io->stop = base;
-    else {
-	io->start = base & (base-1);
-	io->stop = io->start + (base ^ (base-1));
-    }
-    io->speed = to_ns(ioctl & TCIC_ICTL_WSCNT_MASK);
-    io->flags  = (ioctl & TCIC_ICTL_ENA) ? MAP_ACTIVE : 0;
-    switch (ioctl & TCIC_ICTL_BW_MASK) {
-    case TCIC_ICTL_BW_DYN:
-	io->flags |= MAP_AUTOSZ; break;
-    case TCIC_ICTL_BW_16:
-	io->flags |= MAP_16BIT; break;
-    default:
-	break;
-    }
-    DEBUG(1, "tcic: GetIOMap(%d, %d) = %#2.2x, %d ns, "
-	  "%#4.4x-%#4.4x\n", lsock, io->map, io->flags,
-	  io->speed, io->start, io->stop);
-    return 0;
-} /* tcic_get_io_map */
-
-/*====================================================================*/
-
 static int tcic_set_io_map(unsigned int lsock, struct pccard_io_map *io)
 {
     u_short psock = socket_table[lsock].psock;
@@ -881,51 +840,6 @@
 
 /*====================================================================*/
 
-static int tcic_get_mem_map(unsigned int lsock, struct pccard_mem_map *mem)
-{
-    u_short psock = socket_table[lsock].psock;
-    u_short addr, ctl;
-    u_long base, mmap;
-    
-    if (mem->map > 3) return -EINVAL;
-    tcic_setw(TCIC_ADDR+2, TCIC_ADR2_INDREG | (psock << TCIC_SS_SHFT));
-    addr = TCIC_MWIN(psock, mem->map);
-    
-    tcic_setw(TCIC_ADDR, addr + TCIC_MBASE_X);
-    base = tcic_getw(TCIC_DATA);
-    if (base & TCIC_MBASE_4K_BIT) {
-	mem->sys_start = base & TCIC_MBASE_HA_MASK;
-	mem->sys_stop = mem->sys_start;
-    } else {
-	base &= TCIC_MBASE_HA_MASK;
-	mem->sys_start = (base & (base-1));
-	mem->sys_stop = mem->sys_start + (base ^ (base-1));
-    }
-    mem->sys_start = mem->sys_start << TCIC_MBASE_HA_SHFT;
-    mem->sys_stop = (mem->sys_stop << TCIC_MBASE_HA_SHFT) + 0x0fff;
-    
-    tcic_setw(TCIC_ADDR, addr + TCIC_MMAP_X);
-    mmap = tcic_getw(TCIC_DATA);
-    mem->flags = (mmap & TCIC_MMAP_REG) ? MAP_ATTRIB : 0;
-    mmap &= TCIC_MMAP_CA_MASK;
-    mem->card_start = mem->sys_start + (mmap << TCIC_MMAP_CA_SHFT);
-    mem->card_start &= 0x3ffffff;
-    
-    tcic_setw(TCIC_ADDR, addr + TCIC_MCTL_X);
-    ctl = tcic_getw(TCIC_DATA);
-    mem->flags |= (ctl & TCIC_MCTL_ENA) ? MAP_ACTIVE : 0;
-    mem->flags |= (ctl & TCIC_MCTL_B8) ? 0 : MAP_16BIT;
-    mem->flags |= (ctl & TCIC_MCTL_WP) ? MAP_WRPROT : 0;
-    mem->speed = to_ns(ctl & TCIC_MCTL_WSCNT_MASK);
-    
-    DEBUG(1, "tcic: GetMemMap(%d, %d) = %#2.2x, %d ns, "
-	  "%#5.5lx-%#5.5lx, %#5.5x\n", lsock, mem->map, mem->flags,
-	  mem->speed, mem->sys_start, mem->sys_stop, mem->card_start);
-    return 0;
-} /* tcic_get_mem_map */
-
-/*====================================================================*/
-  
 static int tcic_set_mem_map(unsigned int lsock, struct pccard_mem_map *mem)
 {
     u_short psock = socket_table[lsock].psock;
@@ -1007,9 +921,7 @@
 	.get_status	   = tcic_get_status,
 	.get_socket	   = tcic_get_socket,
 	.set_socket	   = tcic_set_socket,
-	.get_io_map	   = tcic_get_io_map,
 	.set_io_map	   = tcic_set_io_map,
-	.get_mem_map	   = tcic_get_mem_map,
 	.set_mem_map	   = tcic_set_mem_map,
 	.proc_setup	   = tcic_proc_setup,
 };
diff -Nru a/drivers/pcmcia/ti113x.h b/drivers/pcmcia/ti113x.h
--- a/drivers/pcmcia/ti113x.h	Mon Mar 31 13:41:06 2003
+++ b/drivers/pcmcia/ti113x.h	Mon Mar 31 13:41:07 2003
@@ -185,9 +185,7 @@
 	yenta_get_status,
 	yenta_get_socket,
 	yenta_set_socket,
-	yenta_get_io_map,
 	yenta_set_io_map,
-	yenta_get_mem_map,
 	yenta_set_mem_map,
 	yenta_proc_setup
 };
@@ -230,9 +228,7 @@
 	yenta_get_status,
 	yenta_get_socket,
 	yenta_set_socket,
-	yenta_get_io_map,
 	yenta_set_io_map,
-	yenta_get_mem_map,
 	yenta_set_mem_map,
 	yenta_proc_setup
 };
@@ -272,9 +268,7 @@
 	yenta_get_status,
 	yenta_get_socket,
 	yenta_set_socket,
-	yenta_get_io_map,
 	yenta_set_io_map,
-	yenta_get_mem_map,
 	yenta_set_mem_map,
 	yenta_proc_setup
 };
diff -Nru a/drivers/pcmcia/yenta.c b/drivers/pcmcia/yenta.c
--- a/drivers/pcmcia/yenta.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/pcmcia/yenta.c	Mon Mar 31 13:41:07 2003
@@ -27,7 +27,7 @@
 #include "i82365.h"
 
 #if 0
-#define DEBUG(x,args...)	printk(__FUNCTION__ ": " x,##args)
+#define DEBUG(x,args...)	printk("%s: " x, __FUNCTION__, ##args)
 #else
 #define DEBUG(x,args...)
 #endif
@@ -300,29 +300,6 @@
 	return 0;
 }
 
-static int yenta_get_io_map(pci_socket_t *socket, struct pccard_io_map *io)
-{
-	int map;
-	unsigned char ioctl, addr;
-
-	map = io->map;
-	if (map > 1)
-		return -EINVAL;
-
-	io->start = exca_readw(socket, I365_IO(map)+I365_W_START);
-	io->stop = exca_readw(socket, I365_IO(map)+I365_W_STOP);
-
-	ioctl = exca_readb(socket, I365_IOCTL);
-	addr = exca_readb(socket, I365_ADDRWIN);
-	io->speed = to_ns(ioctl & I365_IOCTL_WAIT(map)) ? 1 : 0;
-	io->flags  = (addr & I365_ENA_IO(map)) ? MAP_ACTIVE : 0;
-	io->flags |= (ioctl & I365_IOCTL_0WS(map)) ? MAP_0WS : 0;
-	io->flags |= (ioctl & I365_IOCTL_16BIT(map)) ? MAP_16BIT : 0;
-	io->flags |= (ioctl & I365_IOCTL_IOCS16(map)) ? MAP_AUTOSZ : 0;
-
-	return 0;
-}
-
 static int yenta_set_io_map(pci_socket_t *socket, struct pccard_io_map *io)
 {
 	int map;
@@ -356,41 +333,6 @@
 	return 0;
 }
 
-static int yenta_get_mem_map(pci_socket_t *socket, struct pccard_mem_map *mem)
-{
-	int map;
-	unsigned char addr;
-	unsigned int start, stop, page, offset;
-
-	map = mem->map;
-	if (map > 4)
-		return -EINVAL;
-
-	addr = exca_readb(socket, I365_ADDRWIN);
-	mem->flags = (addr & I365_ENA_MEM(map)) ? MAP_ACTIVE : 0;
-
-	start = exca_readw(socket, I365_MEM(map) + I365_W_START);
-	mem->flags |= (start & I365_MEM_16BIT) ? MAP_16BIT : 0;
-	mem->flags |= (start & I365_MEM_0WS) ? MAP_0WS : 0;
-	start = (start & 0x0fff) << 12;
-
-	stop = exca_readw(socket, I365_MEM(map) + I365_W_STOP);
-	mem->speed = to_ns(stop >> 14);
-	stop = ((stop & 0x0fff) << 12) + 0x0fff;
-
-	offset = exca_readw(socket, I365_MEM(map) + I365_W_OFF);
-	mem->flags |= (offset & I365_MEM_WRPROT) ? MAP_WRPROT : 0;
-	mem->flags |= (offset & I365_MEM_REG) ? MAP_ATTRIB : 0;
-	offset = ((offset & 0x3fff) << 12) + start;
-	mem->card_start = offset & 0x3ffffff;
-
-	page = exca_readb(socket, CB_MEM_PAGE(map)) << 24;
-	mem->sys_start = start + page;
-	mem->sys_stop = start + page;
-
-	return 0;
-}
-
 static int yenta_set_mem_map(pci_socket_t *socket, struct pccard_mem_map *mem)
 {
 	int map;
@@ -572,7 +514,6 @@
 	socket->cap.pci_irq = socket->cb_irq;
 	socket->cap.irq_mask = yenta_probe_irq(socket, isa_irq_mask);
 	socket->cap.cb_dev = socket->dev;
-	socket->cap.bus = NULL;
 
 	printk("Yenta IRQ list %04x, PCI irq%d\n", socket->cap.irq_mask, socket->cb_irq);
 }
@@ -935,9 +876,7 @@
 	yenta_get_status,
 	yenta_get_socket,
 	yenta_set_socket,
-	yenta_get_io_map,
 	yenta_set_io_map,
-	yenta_get_mem_map,
 	yenta_set_mem_map,
 	yenta_proc_setup
 };
diff -Nru a/drivers/s390/net/ctcmain.c b/drivers/s390/net/ctcmain.c
--- a/drivers/s390/net/ctcmain.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/s390/net/ctcmain.c	Mon Mar 31 13:41:07 2003
@@ -2761,7 +2761,6 @@
 	dev->addr_len = 0;
 	dev->type = ARPHRD_SLIP;
 	dev->tx_queue_len = 100;
-	dev_init_buffers(dev);
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
 	return dev;
 }
diff -Nru a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
--- a/drivers/s390/net/netiucv.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/s390/net/netiucv.c	Mon Mar 31 13:41:07 2003
@@ -1630,7 +1630,6 @@
 	dev->addr_len            = 0;
 	dev->type                = ARPHRD_SLIP;
 	dev->tx_queue_len        = NETIUCV_QUEUELEN_DEFAULT;
-	dev_init_buffers(dev);
 	dev->flags	         = IFF_POINTOPOINT | IFF_NOARP;
 	return dev;
 }
diff -Nru a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
--- a/drivers/scsi/3w-xxxx.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/scsi/3w-xxxx.c	Mon Mar 31 13:41:08 2003
@@ -677,7 +677,7 @@
 			dprintk(KERN_WARNING "3w-xxxx: tw_chrdev_ioctl(): caught TW_AEN_LISTEN.\n");
 			memset(tw_ioctl->data_buffer, 0, tw_ioctl->data_buffer_length);
 
-			spin_lock_irqsave(&tw_dev->host->host_lock, flags);
+			spin_lock_irqsave(tw_dev->host->host_lock, flags);
 			if (tw_dev->aen_head == tw_dev->aen_tail) {
 				tw_aen_code = TW_AEN_QUEUE_EMPTY;
 			} else {
@@ -688,7 +688,7 @@
 					tw_dev->aen_head = tw_dev->aen_head + 1;
 				}
 			}
-			spin_unlock_irqrestore(&tw_dev->tw_lock, flags);
+			spin_unlock_irqrestore(tw_dev->host->host_lock, flags);
 			memcpy(tw_ioctl->data_buffer, &tw_aen_code, sizeof(tw_aen_code));
 			break;
 		case TW_CMD_PACKET_WITH_DATA:
diff -Nru a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
--- a/drivers/scsi/pcmcia/nsp_cs.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/scsi/pcmcia/nsp_cs.c	Mon Mar 31 13:41:08 2003
@@ -62,7 +62,6 @@
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
-#include <pcmcia/bus_ops.h>
 
 #include "nsp_cs.h"
 
@@ -93,7 +92,6 @@
 	int	               ndev;
 	dev_node_t             node[8];
 	int                    stop;
-	struct bus_operations *bus;
 } scsi_info_t;
 
 
@@ -1948,7 +1946,6 @@
 	case CS_EVENT_CARD_INSERTION:
 		DEBUG(0, " event: insert\n");
 		link->state |= DEV_PRESENT | DEV_CONFIG_PENDING;
-		info->bus    =  args->bus;
 		nsp_cs_config(link);
 		break;
 
diff -Nru a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
--- a/drivers/scsi/scsi_lib.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/scsi/scsi_lib.c	Mon Mar 31 13:41:07 2003
@@ -1072,14 +1072,6 @@
 		if (shost->in_recovery || blk_queue_plugged(q))
 			return;
 
-		/*
-		 * get next queueable request.  We do this early to make sure
-		 * that the request is fully prepared even if we cannot 
-		 * accept it.  If there is no request, we'll detect this
-		 * lower down.
-		 */
-		req = elv_next_request(q);
-
 		if (sdev->device_busy >= sdev->queue_depth)
 			break;
 
@@ -1134,11 +1126,12 @@
 			sdev->starved = 0;
 
 		/*
-		 * If we couldn't find a request that could be queued, then we
-		 * can also quit.
+		 * get next queueable request.  We do this early to make sure
+		 * that the request is fully prepared even if we cannot 
+		 * accept it.  If there is no request, we'll detect this
+		 * lower down.
 		 */
-		if (blk_queue_empty(q))
-			break;
+		req = elv_next_request(q);
 
 		if (!req) {
 			/* If the device is busy, a returning I/O
diff -Nru a/drivers/serial/21285.c b/drivers/serial/21285.c
--- a/drivers/serial/21285.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/serial/21285.c	Mon Mar 31 13:41:08 2003
@@ -501,10 +501,11 @@
 	.index		= -1,
 };
 
-static void __init rs285_console_init(void)
+static int __init rs285_console_init(void)
 {
 	serial21285_setup_ports();
 	register_console(&serial21285_console);
+	return 0;
 }
 console_initcall(rs285_console_init);
 
diff -Nru a/drivers/serial/8250_cs.c b/drivers/serial/8250_cs.c
--- a/drivers/serial/8250_cs.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/serial/8250_cs.c	Mon Mar 31 13:41:07 2003
@@ -690,26 +690,25 @@
 	return 0;
 }
 
-/*====================================================================*/
+static struct pcmcia_driver serial_cs_driver = {
+	.owner		= THIS_MODULE,
+	.drv		= {
+		.name	= "serial_cs",
+	},
+	.attach		= serial_attach,
+	.detach		= serial_detach,
+};
 
 static int __init init_serial_cs(void)
 {
-	servinfo_t serv;
-	DEBUG(0, "%s\n", version);
-	CardServices(GetCardServicesInfo, &serv);
-	if (serv.Revision != CS_RELEASE_CODE) {
-		printk(KERN_NOTICE "serial_cs: Card Services release "
-		       "does not match!\n");
-		return -1;
-	}
-	register_pccard_driver(&dev_info, &serial_attach, &serial_detach);
-	return 0;
+	return pcmcia_register_driver(&serial_cs_driver);
 }
 
 static void __exit exit_serial_cs(void)
 {
-	DEBUG(0, "serial_cs: unloading\n");
-	unregister_pccard_driver(&dev_info);
+	pcmcia_unregister_driver(&serial_cs_driver);
+
+	/* XXX: this really needs to move into generic code.. */
 	while (dev_list != NULL)
 		serial_detach(dev_list);
 }
diff -Nru a/drivers/serial/Kconfig b/drivers/serial/Kconfig
--- a/drivers/serial/Kconfig	Mon Mar 31 13:41:08 2003
+++ b/drivers/serial/Kconfig	Mon Mar 31 13:41:08 2003
@@ -372,14 +372,25 @@
 	bool "Use NEC V850E on-chip UART for console"
 	depends on V850E_NB85E_UART
 
+config SERIAL98
+	tristate "PC-9800 8251-based primary serial port support"
+	depends on X86_PC9800
+	help
+	  If you want to use standard primary serial ports on PC-9800, 
+	  say Y.  Otherwise, say N.
+
+config SERIAL98_CONSOLE
+        bool "Support for console on PC-9800 standard serial port"
+        depends on SERIAL98=y
+
 config SERIAL_CORE
 	tristate
-	default m if SERIAL_AMBA!=y && SERIAL_CLPS711X!=y && SERIAL_21285!=y && !SERIAL_SA1100 && !SERIAL_ANAKIN && !SERIAL_UART00 && SERIAL_8250!=y && SERIAL_MUX!=y && !SERIAL_ROCKETPORT && !SERIAL_SUNCORE && !V850E_NB85E_UART && (SERIAL_AMBA=m || SERIAL_CLPS711X=m || SERIAL_21285=m || SERIAL_8250=m || SERIAL_MUX=m)
-	default y if SERIAL_AMBA=y || SERIAL_CLPS711X=y || SERIAL_21285=y || SERIAL_SA1100 || SERIAL_ANAKIN || SERIAL_UART00 || SERIAL_8250=y || SERIAL_MUX=y || SERIAL_ROCKETPORT || SERIAL_SUNCORE || V850E_NB85E_UART
+	default m if SERIAL_AMBA!=y && SERIAL_CLPS711X!=y && SERIAL_21285!=y && !SERIAL_SA1100 && !SERIAL_ANAKIN && !SERIAL_UART00 && SERIAL_8250!=y && SERIAL_MUX!=y && !SERIAL_ROCKETPORT && !SERIAL_SUNCORE && !V850E_NB85E_UART && (SERIAL_AMBA=m || SERIAL_CLPS711X=m || SERIAL_21285=m || SERIAL_8250=m || SERIAL_MUX=m || SERIAL98=m)
+	default y if SERIAL_AMBA=y || SERIAL_CLPS711X=y || SERIAL_21285=y || SERIAL_SA1100 || SERIAL_ANAKIN || SERIAL_UART00 || SERIAL_8250=y || SERIAL_MUX=y || SERIAL_ROCKETPORT || SERIAL_SUNCORE || V850E_NB85E_UART || SERIAL98=y
 
 config SERIAL_CORE_CONSOLE
 	bool
-	depends on SERIAL_AMBA_CONSOLE || SERIAL_CLPS711X_CONSOLE || SERIAL_21285_CONSOLE || SERIAL_SA1100_CONSOLE || SERIAL_ANAKIN_CONSOLE || SERIAL_UART00_CONSOLE || SERIAL_8250_CONSOLE || SERIAL_MUX_CONSOLE || SERIAL_SUNCORE || V850E_NB85E_UART_CONSOLE
+	depends on SERIAL_AMBA_CONSOLE || SERIAL_CLPS711X_CONSOLE || SERIAL_21285_CONSOLE || SERIAL_SA1100_CONSOLE || SERIAL_ANAKIN_CONSOLE || SERIAL_UART00_CONSOLE || SERIAL_8250_CONSOLE || SERIAL_MUX_CONSOLE || SERIAL_SUNCORE || V850E_NB85E_UART_CONSOLE || SERIAL98_CONSOLE
 	default y
 
 config SERIAL_68328
diff -Nru a/drivers/serial/Makefile b/drivers/serial/Makefile
--- a/drivers/serial/Makefile	Mon Mar 31 13:41:07 2003
+++ b/drivers/serial/Makefile	Mon Mar 31 13:41:07 2003
@@ -27,3 +27,4 @@
 obj-$(CONFIG_SERIAL_68360) += 68360serial.o
 obj-$(CONFIG_SERIAL_COLDFIRE) += mcfserial.o
 obj-$(CONFIG_V850E_NB85E_UART) += nb85e_uart.o
+obj-$(CONFIG_SERIAL98) += serial98.o
diff -Nru a/drivers/usb/Makefile b/drivers/usb/Makefile
--- a/drivers/usb/Makefile	Mon Mar 31 13:41:08 2003
+++ b/drivers/usb/Makefile	Mon Mar 31 13:41:08 2003
@@ -14,6 +14,7 @@
 obj-$(CONFIG_USB_ACM)		+= class/
 obj-$(CONFIG_USB_AUDIO)		+= class/
 obj-$(CONFIG_USB_BLUETOOTH_TTY)	+= class/
+obj-$(CONFIG_USB_MIDI)		+= class/
 obj-$(CONFIG_USB_PRINTER)	+= class/
 
 obj-$(CONFIG_USB_STORAGE)	+= storage/
diff -Nru a/drivers/usb/class/audio.c b/drivers/usb/class/audio.c
--- a/drivers/usb/class/audio.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/usb/class/audio.c	Mon Mar 31 13:41:08 2003
@@ -172,7 +172,6 @@
 
 /*****************************************************************************/
 
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/string.h>
diff -Nru a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
--- a/drivers/usb/class/cdc-acm.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/usb/class/cdc-acm.c	Mon Mar 31 13:41:07 2003
@@ -538,7 +538,7 @@
 
 		cfacm = dev->config + i;
 
-		dbg("probing config %d", cfacm->bConfigurationValue);
+		dbg("probing config %d", cfacm->desc.bConfigurationValue);
 
 		if (cfacm->desc.bNumInterfaces != 2 ||
 		    usb_interface_claimed(cfacm->interface + 0) ||
diff -Nru a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c
--- a/drivers/usb/core/buffer.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/usb/core/buffer.c	Mon Mar 31 13:41:08 2003
@@ -7,7 +7,6 @@
 
 #include <linux/config.h>
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
diff -Nru a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
--- a/drivers/usb/core/hcd.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/usb/core/hcd.c	Mon Mar 31 13:41:07 2003
@@ -1024,9 +1024,7 @@
 		 */
 		urb->transfer_flags |= URB_NO_DMA_MAP;
 		status = rh_urb_enqueue (hcd, urb);
-		if (status)
-			urb_unlink (urb);
-		return status;
+		goto done;
 	}
 
 	/* lower level hcd code should use *_dma exclusively,
@@ -1051,8 +1049,11 @@
 	}
 
 	status = hcd->driver->urb_enqueue (hcd, urb, mem_flags);
-	if (status)
+done:
+	if (status) {
+		usb_put_urb (urb);
 		urb_unlink (urb);
+	}
 	return status;
 }
 
diff -Nru a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
--- a/drivers/usb/core/hub.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/usb/core/hub.c	Mon Mar 31 13:41:07 2003
@@ -135,8 +135,7 @@
 
 	default:		/* presumably an error */
 		/* Cause a hub reset after 10 consecutive errors */
-		dbg("hub '%s' status %d for interrupt transfer",
-			urb->dev->devpath, urb->status);
+		dev_dbg (&hub->intf->dev, "transfer --> %d\n", urb->status);
 		if ((++hub->nerrors < 10) || hub->error)
 			goto resubmit;
 		hub->error = urb->status;
@@ -158,10 +157,10 @@
 	spin_unlock_irqrestore(&hub_event_lock, flags);
 
 resubmit:
-	if ((status = usb_submit_urb (hub->urb, GFP_ATOMIC)) != 0)
-		err ("hub '%s-%s' status %d for interrupt resubmit",
-			urb->dev->bus->bus_name, urb->dev->devpath,
-			status);
+	if ((status = usb_submit_urb (hub->urb, GFP_ATOMIC)) != 0
+			/* ENODEV means we raced disconnect() */
+			&& status != -ENODEV)
+		dev_err (&hub->intf->dev, "resubmit --> %d\n", urb->status);
 }
 
 /* USB 2.0 spec Section 11.24.2.3 */
@@ -648,12 +647,13 @@
 	struct usb_port_status *portsts;
 	int ret = -ENOMEM;
 
-	portsts = kmalloc(sizeof(*portsts), GFP_KERNEL);
+	portsts = kmalloc(sizeof(*portsts), GFP_NOIO);
 	if (portsts) {
 		ret = usb_get_port_status(hub, port + 1, portsts);
 		if (ret < 0)
-			err("%s(%s-%s) failed (err = %d)", __FUNCTION__,
-				hub->bus->bus_name, hub->devpath, ret);
+			dev_err (hubdev (hub),
+				"%s failed (err = %d)\n", __FUNCTION__,
+				ret);
 		else {
 			*status = le16_to_cpu(portsts->wPortStatus);
 			*change = le16_to_cpu(portsts->wPortChange); 
@@ -759,8 +759,8 @@
 
 	ret = usb_clear_port_feature(hub, port + 1, USB_PORT_FEAT_ENABLE);
 	if (ret)
-		err("cannot disable port %d of hub %s (err = %d)",
-			port + 1, hub->devpath, ret);
+		dev_err(hubdev(hub), "cannot disable port %d (err = %d)\n",
+			port + 1, ret);
 }
 
 /* USB 2.0 spec, 7.1.7.3 / fig 7-29:
@@ -983,12 +983,12 @@
 		spin_unlock_irqrestore(&hub_event_lock, flags);
 
 		if (hub->error) {
-			dbg("resetting hub %s for error %d",
-				dev->devpath, hub->error);
+			dev_dbg (&hub->intf->dev, "resetting for error %d\n",
+				hub->error);
 
 			if (usb_hub_reset(hub)) {
-				err("error resetting hub %s - disconnecting",
-					dev->devpath);
+				dev_dbg (&hub->intf->dev,
+					"can't reset; disconnecting\n");
 				up(&hub->khubd_sem);
 				usb_hub_disconnect(dev);
 				continue;
@@ -1022,33 +1022,37 @@
 				if (!(portstatus & USB_PORT_STAT_ENABLE)
 				    && (portstatus & USB_PORT_STAT_CONNECTION)
 				    && (dev->children[i])) {
-					err("already running hub %s port %i "
+					dev_err (&hub->intf->dev,
+					    "port %i "
 					    "disabled by hub (EMI?), "
 					    "re-enabling...",
-						dev->devpath, i + 1);
+						i + 1);
 					usb_hub_port_connect_change(hub,
 						i, portstatus, portchange);
 				}
 			}
 
 			if (portchange & USB_PORT_STAT_C_SUSPEND) {
-				dbg("hub %s port %d suspend change",
-					dev->devpath, i + 1);
+				dev_dbg (&hub->intf->dev,
+					"suspend change on port %d\n",
+					i + 1);
 				usb_clear_port_feature(dev,
 					i + 1,  USB_PORT_FEAT_C_SUSPEND);
 			}
 			
 			if (portchange & USB_PORT_STAT_C_OVERCURRENT) {
-				err("hub %s port %d over-current change",
-					dev->devpath, i + 1);
+				dev_err (&hub->intf->dev,
+					"over-current change on port %d\n",
+					i + 1);
 				usb_clear_port_feature(dev,
 					i + 1, USB_PORT_FEAT_C_OVER_CURRENT);
 				usb_hub_power_on(hub);
 			}
 
 			if (portchange & USB_PORT_STAT_C_RESET) {
-				dbg("hub %s port %d reset change",
-					dev->devpath, i + 1);
+				dev_dbg (&hub->intf->dev,
+					"reset change on port %d\n",
+					i + 1);
 				usb_clear_port_feature(dev,
 					i + 1, USB_PORT_FEAT_C_RESET);
 			}
@@ -1056,16 +1060,16 @@
 
 		/* deal with hub status changes */
 		if (usb_get_hub_status(dev, &hubsts) < 0)
-			err("get_hub_status %s failed", dev->devpath);
+			dev_err (&hub->intf->dev, "get_hub_status failed\n");
 		else {
 			hubstatus = le16_to_cpup(&hubsts.wHubStatus);
 			hubchange = le16_to_cpup(&hubsts.wHubChange);
 			if (hubchange & HUB_CHANGE_LOCAL_POWER) {
-				dbg("hub %s power change", dev->devpath);
+				dev_dbg (&hub->intf->dev, "power change\n");
 				usb_clear_hub_feature(dev, C_HUB_LOCAL_POWER);
 			}
 			if (hubchange & HUB_CHANGE_OVERCURRENT) {
-				dbg("hub %s overcurrent change", dev->devpath);
+				dev_dbg (&hub->intf->dev, "overcurrent change\n");
 				wait_ms(500);	/* Cool down */
 				usb_clear_hub_feature(dev, C_HUB_OVER_CURRENT);
                         	usb_hub_power_on(hub);
diff -Nru a/drivers/usb/core/message.c b/drivers/usb/core/message.c
--- a/drivers/usb/core/message.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/usb/core/message.c	Mon Mar 31 13:41:06 2003
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/init.h>
+#include <linux/mm.h>
 #include <asm/byteorder.h>
 
 #include "hcd.h"	/* for usbcore internals */
diff -Nru a/drivers/usb/core/usb-debug.c b/drivers/usb/core/usb-debug.c
--- a/drivers/usb/core/usb-debug.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/usb/core/usb-debug.c	Mon Mar 31 13:41:08 2003
@@ -5,7 +5,6 @@
  * face, but so that you can still use them..
  */
 #include <linux/config.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
diff -Nru a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
--- a/drivers/usb/core/usb.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/usb/core/usb.c	Mon Mar 31 13:41:07 2003
@@ -866,14 +866,11 @@
 }
 
 /**
- * usb_connect - connects a new device during enumeration (usbcore-internal)
- * @dev: partially enumerated device
- *
- * Connect a new USB device. This basically just initializes
- * the USB device information and sets up the topology - it's
- * up to the low-level driver to reset the port and actually
- * do the setup (the upper levels don't know how to do that).
+ * usb_connect - pick device address (usbcore-internal)
+ * @dev: newly detected device (in DEFAULT state)
  *
+ * Picks a device address.  It's up to the hub (or root hub) driver
+ * to handle and manage enumeration, starting from the DEFAULT state.
  * Only hub drivers (including virtual root hub drivers for host
  * controllers) should ever call this.
  */
@@ -983,7 +980,7 @@
 }
 
 /*
- * By the time we get here, the device has gotten a new device ID
+ * By the time we get here, we chose a new device address
  * and is in the default state. We need to identify the thing and
  * get the ball rolling..
  *
diff -Nru a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
--- a/drivers/usb/host/ohci-hcd.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/usb/host/ohci-hcd.c	Mon Mar 31 13:41:06 2003
@@ -94,7 +94,6 @@
 #include <linux/list.h>
 #include <linux/interrupt.h>  /* for in_interrupt () */
 #include <linux/usb.h>
-#include <linux/version.h>
 #include "../core/hcd.h"
 
 #include <asm/io.h>
diff -Nru a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c
--- a/drivers/usb/host/ohci-pci.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/usb/host/ohci-pci.c	Mon Mar 31 13:41:08 2003
@@ -389,7 +389,7 @@
 	if (usb_disabled())
 		return -ENODEV;
 
-	printk (KERN_DEBUG "%s: block sizes: ed %d td %d\n", hcd_name,
+	printk (KERN_DEBUG "%s: block sizes: ed %Zd td %Zd\n", hcd_name,
 		sizeof (struct ed), sizeof (struct td));
 	return pci_module_init (&ohci_pci_driver);
 }
diff -Nru a/drivers/usb/image/mdc800.c b/drivers/usb/image/mdc800.c
--- a/drivers/usb/image/mdc800.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/usb/image/mdc800.c	Mon Mar 31 13:41:07 2003
@@ -85,7 +85,6 @@
  * (20/10/1999)
  */
 
-#include <linux/version.h>
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/spinlock.h>
diff -Nru a/drivers/usb/media/ov511.c b/drivers/usb/media/ov511.c
--- a/drivers/usb/media/ov511.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/usb/media/ov511.c	Mon Mar 31 13:41:06 2003
@@ -36,7 +36,6 @@
  */
 
 #include <linux/config.h>
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/fs.h>
diff -Nru a/drivers/usb/media/stv680.c b/drivers/usb/media/stv680.c
--- a/drivers/usb/media/stv680.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/usb/media/stv680.c	Mon Mar 31 13:41:07 2003
@@ -60,7 +60,6 @@
 
 #include <linux/config.h>
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/vmalloc.h>
diff -Nru a/drivers/usb/misc/emi26.c b/drivers/usb/misc/emi26.c
--- a/drivers/usb/misc/emi26.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/usb/misc/emi26.c	Mon Mar 31 13:41:06 2003
@@ -78,19 +78,28 @@
 	return response;
 }
 
+#define FW_LOAD_SIZE		1023
+
 static int emi26_load_firmware (struct usb_device *dev)
 {
 	int err;
 	int i;
 	int pos = 0;	/* Position in hex record */
 	__u32 addr;	/* Address to write */
-	__u8 buf[1023];
+	__u8 *buf;
+
+	buf = kmalloc(FW_LOAD_SIZE, GFP_KERNEL);
+	if (!buf) {
+		err( "%s - error loading firmware: error = %d", __FUNCTION__, -ENOMEM);
+		err = -ENOMEM;
+		goto wraperr;
+	}
 
 	/* Assert reset (stop the CPU in the EMI) */
 	err = emi26_set_reset(dev,1);
 	if (err < 0) {
 		err( "%s - error loading firmware: error = %d", __FUNCTION__, err);
-		return err;
+		goto wraperr;
 	}
 
 	/* 1. We need to put the loader for the FPGA into the EZ-USB */
@@ -98,7 +107,7 @@
 		err = emi26_writememory(dev, g_Loader[i].address, g_Loader[i].data, g_Loader[i].length, ANCHOR_LOAD_INTERNAL);
 		if (err < 0) {
 			err("%s - error loading firmware: error = %d", __FUNCTION__, err);
-			return err;
+			goto wraperr;
 		}
 	}
 
@@ -113,7 +122,7 @@
 		addr = g_bitstream[pos].address;
 
 		/* intel hex records are terminated with type 0 element */
-		while ((g_bitstream[pos].type == 0) && (i + g_bitstream[pos].length < sizeof(buf))) {
+		while ((g_bitstream[pos].type == 0) && (i + g_bitstream[pos].length < FW_LOAD_SIZE)) {
 			memcpy(buf + i, g_bitstream[pos].data, g_bitstream[pos].length);
 			i += g_bitstream[pos].length;
 			pos++;
@@ -121,7 +130,7 @@
 		err = emi26_writememory(dev, addr, buf, i, ANCHOR_LOAD_FPGA);
 		if (err < 0) {
 			err("%s - error loading firmware: error = %d", __FUNCTION__, err);
-			return err;
+			goto wraperr;
 		}
 	} while (i > 0);
 
@@ -129,7 +138,7 @@
 	err = emi26_set_reset(dev,1);
 	if (err < 0) {
 		err("%s - error loading firmware: error = %d", __FUNCTION__, err);
-		return err;
+		goto wraperr;
 	}
 
 	/* 3. We need to put the loader for the firmware into the EZ-USB (again...) */
@@ -137,7 +146,7 @@
 		err = emi26_writememory(dev, g_Loader[i].address, g_Loader[i].data, g_Loader[i].length, ANCHOR_LOAD_INTERNAL);
 		if (err < 0) {
 			err("%s - error loading firmware: error = %d", __FUNCTION__, err);
-			return err;
+			goto wraperr;
 		}
 	}
 
@@ -145,7 +154,7 @@
 	err = emi26_set_reset(dev,0);
 	if (err < 0) {
 		err("%s - error loading firmware: error = %d", __FUNCTION__, err);
-		return err;
+		goto wraperr;
 	}
 
 	/* 4. We put the part of the firmware that lies in the external RAM into the EZ-USB */
@@ -154,7 +163,7 @@
 			err = emi26_writememory(dev, g_Firmware[i].address, g_Firmware[i].data, g_Firmware[i].length, ANCHOR_LOAD_EXTERNAL);
 			if (err < 0) {
 				err("%s - error loading firmware: error = %d", __FUNCTION__, err);
-				return err;
+				goto wraperr;
 			}
 		}
 	}
@@ -163,7 +172,7 @@
 	err = emi26_set_reset(dev,1);
 	if (err < 0) {
 		err("%s - error loading firmware: error = %d", __FUNCTION__, err);
-		return err;
+		goto wraperr;
 	}
 
 	for (i=0; g_Firmware[i].type == 0; i++) {
@@ -171,7 +180,7 @@
 			err = emi26_writememory(dev, g_Firmware[i].address, g_Firmware[i].data, g_Firmware[i].length, ANCHOR_LOAD_INTERNAL);
 			if (err < 0) {
 				err("%s - error loading firmware: error = %d", __FUNCTION__, err);
-				return err;
+				goto wraperr;
 			}
 		}
 	}
@@ -180,12 +189,16 @@
 	err = emi26_set_reset(dev,0);
 	if (err < 0) {
 		err("%s - error loading firmware: error = %d", __FUNCTION__, err);
-		return err;
+		goto wraperr;
 	}
 
 	/* return 1 to fail the driver inialization
 	 * and give real driver change to load */
 	return 1;
+
+wraperr:
+	kfree(buf);
+	return err;
 }
 
 static __devinitdata struct usb_device_id id_table [] = {
diff -Nru a/drivers/usb/misc/speedtch.c b/drivers/usb/misc/speedtch.c
--- a/drivers/usb/misc/speedtch.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/usb/misc/speedtch.c	Mon Mar 31 13:41:08 2003
@@ -148,27 +148,15 @@
 
 #define UDSL_SKB(x)		((struct udsl_control *)(x)->cb)
 
-struct atmsar_vcc_data {
-	struct atmsar_vcc_data *next;
-
-	/* general atmsar flags, per connection */
-	int flags;
-	int type;
-
-	/* connection specific non-atmsar data */
+struct udsl_vcc_data {
+	/* vpi/vci lookup */
+	struct list_head list;
+	short vpi;
+	int vci;
 	struct atm_vcc *vcc;
-	struct k_atm_aal_stats *stats;
-	unsigned short mtu;	/* max is actually  65k for AAL5... */
-
-	/* cell data */
-	unsigned int vp;
-	unsigned int vc;
-	unsigned char gfc;
-	unsigned char pti;
-	unsigned int headerFlags;
-	unsigned long atmHeader;
 
 	/* raw cell reassembly */
+	unsigned short mtu;
 	struct sk_buff *reasBuffer;
 };
 
@@ -186,7 +174,7 @@
 
 	/* atm device part */
 	struct atm_dev *atm_dev;
-	struct atmsar_vcc_data *atmsar_vcc_list;
+	struct list_head vcc_list;
 
 	/* receiving */
 	struct udsl_receiver all_receivers [UDSL_NUMBER_RCV_URBS];
@@ -258,73 +246,46 @@
 **  decode  **
 *************/
 
-#define ATM_HDR_VPVC_MASK		(ATM_HDR_VPI_MASK | ATM_HDR_VCI_MASK)
-#define ATMSAR_USE_53BYTE_CELL		0x1L
+static inline struct udsl_vcc_data *udsl_find_vcc (struct udsl_instance_data *instance, short vpi, int vci)
+{
+	struct udsl_vcc_data *vcc;
 
-struct sk_buff *atmsar_decode_rawcell (struct atmsar_vcc_data *list, struct sk_buff *skb,
-				       struct atmsar_vcc_data **ctx)
+	list_for_each_entry (vcc, &instance->vcc_list, list)
+		if ((vcc->vpi == vpi) && (vcc->vci == vci))
+			return vcc;
+	return NULL;
+}
+
+static struct sk_buff *udsl_decode_rawcell (struct udsl_instance_data *instance, struct sk_buff *skb, struct udsl_vcc_data **ctx)
 {
+	if (!instance || !skb || !ctx)
+		return NULL;
+	if (!skb->data || !skb->tail)
+		return NULL;
+
 	while (skb->len) {
 		unsigned char *cell = skb->data;
 		unsigned char *cell_payload;
-		struct atmsar_vcc_data *vcc = list;
-		unsigned long atmHeader =
-		    ((unsigned long) (cell[0]) << 24) | ((unsigned long) (cell[1]) << 16) |
-		    ((unsigned long) (cell[2]) << 8) | (cell[3] & 0xff);
-
-		dbg ("atmsar_decode_rawcell (0x%p, 0x%p, 0x%p) called", list, skb, ctx);
-		dbg ("atmsar_decode_rawcell skb->data %p, skb->tail %p", skb->data, skb->tail);
-
-		if (!list || !skb || !ctx)
-			return NULL;
-		if (!skb->data || !skb->tail)
-			return NULL;
+		struct udsl_vcc_data *vcc;
+		short vpi;
+		int vci;
+
+		vpi = ((cell[0] & 0x0f) << 4) | (cell[1] >> 4);
+		vci = ((cell[1] & 0x0f) << 12) | (cell[2] << 4) | (cell[3] >> 4);
+
+		dbg ("udsl_decode_rawcell (0x%p, 0x%p, 0x%p) called", instance, skb, ctx);
+		dbg ("udsl_decode_rawcell skb->data %p, skb->tail %p", skb->data, skb->tail);
 
 		/* here should the header CRC check be... */
 
-		/* look up correct vcc */
-		for (;
-		     vcc
-		     && ((vcc->atmHeader & ATM_HDR_VPVC_MASK) != (atmHeader & ATM_HDR_VPVC_MASK));
-		     vcc = vcc->next);
-
-		dbg ("atmsar_decode_rawcell found vcc %p for packet on vp %d, vc %d", vcc,
-			(int) ((atmHeader & ATM_HDR_VPI_MASK) >> ATM_HDR_VPI_SHIFT),
-			(int) ((atmHeader & ATM_HDR_VCI_MASK) >> ATM_HDR_VCI_SHIFT));
-
-		if (vcc && (skb->len >= (vcc->flags & ATMSAR_USE_53BYTE_CELL ? 53 : 52))) {
-			cell_payload = cell + (vcc->flags & ATMSAR_USE_53BYTE_CELL ? 5 : 4);
-
-			switch (vcc->type) {
-			case ATM_AAL0:
-				/* case ATM_AAL1: when we have a decode AAL1 function... */
-				{
-					struct sk_buff *tmp = dev_alloc_skb (vcc->mtu);
-
-					if (tmp) {
-						memcpy (tmp->tail, cell_payload, 48);
-						skb_put (tmp, 48);
-
-						if (vcc->stats)
-							atomic_inc (&vcc->stats->rx);
-
-						skb_pull (skb,
-							  (vcc->
-							   flags & ATMSAR_USE_53BYTE_CELL ? 53 :
-							   52));
-						dbg
-						    ("atmsar_decode_rawcell returns ATM_AAL0 pdu 0x%p with length %d",
-						     tmp, tmp->len);
-						return tmp;
-					};
-				}
-				break;
-			case ATM_AAL1:
-			case ATM_AAL2:
-			case ATM_AAL34:
-				/* not supported */
-				break;
-			case ATM_AAL5:
+		if (!(vcc = udsl_find_vcc (instance, vpi, vci)))
+			dbg ("udsl_decode_rawcell: no vcc found for packet on vpi %d, vci %d", vpi, vci);
+		else {
+			dbg ("udsl_decode_rawcell found vcc %p for packet on vpi %d, vci %d", vcc, vpi, vci);
+
+			if (skb->len >= 53) {
+				cell_payload = cell + 5;
+
 				if (!vcc->reasBuffer)
 					vcc->reasBuffer = dev_alloc_skb (vcc->mtu);
 
@@ -347,43 +308,36 @@
 						/* the aal5 buffer ends here, cut the buffer. */
 						/* buffer will always have at least one whole cell, so */
 						/* don't need to check return from skb_pull */
-						skb_pull (skb,
-							  (vcc->
-							   flags & ATMSAR_USE_53BYTE_CELL ? 53 :
-							   52));
+						skb_pull (skb, 53);
 						*ctx = vcc;
 						tmp = vcc->reasBuffer;
 						vcc->reasBuffer = NULL;
 
-						dbg
-						    ("atmsar_decode_rawcell returns ATM_AAL5 pdu 0x%p with length %d",
-						     tmp, tmp->len);
+						dbg ("udsl_decode_rawcell returns ATM_AAL5 pdu 0x%p with length %d", tmp, tmp->len);
 						return tmp;
 					}
 				}
-				break;
-			};
-			/* flush the cell */
-			/* buffer will always contain at least one whole cell, so don't */
-			/* need to check return value from skb_pull */
-			skb_pull (skb, (vcc->flags & ATMSAR_USE_53BYTE_CELL ? 53 : 52));
-		} else {
-			/* If data is corrupt and skb doesn't hold a whole cell, flush the lot */
-			if (skb_pull (skb, (list->flags & ATMSAR_USE_53BYTE_CELL ? 53 : 52)) ==
-			    NULL)
-				return NULL;
+				/* flush the cell */
+				/* buffer will always contain at least one whole cell, so don't */
+				/* need to check return value from skb_pull */
+				skb_pull (skb, 53);
+			} else {
+				/* If data is corrupt and skb doesn't hold a whole cell, flush the lot */
+				if (skb_pull (skb, 53) == NULL)
+					return NULL;
+			}
 		}
 	}
 
 	return NULL;
-};
+}
 
-struct sk_buff *atmsar_decode_aal5 (struct atmsar_vcc_data *ctx, struct sk_buff *skb)
+static struct sk_buff *udsl_decode_aal5 (struct udsl_vcc_data *ctx, struct sk_buff *skb)
 {
 	uint crc = 0xffffffff;
 	uint length, pdu_crc, pdu_length;
 
-	dbg ("atmsar_decode_aal5 (0x%p, 0x%p) called", ctx, skb);
+	dbg ("udsl_decode_aal5 (0x%p, 0x%p) called", ctx, skb);
 
 	if (skb->len && (skb->len % 48))
 		return NULL;
@@ -393,20 +347,18 @@
 	    (skb->tail[-4] << 24) + (skb->tail[-3] << 16) + (skb->tail[-2] << 8) + skb->tail[-1];
 	pdu_length = ((length + 47 + 8) / 48) * 48;
 
-	dbg ("atmsar_decode_aal5: skb->len = %d, length = %d, pdu_crc = 0x%x, pdu_length = %d",
-		skb->len, length, pdu_crc, pdu_length);
+	dbg ("udsl_decode_aal5: skb->len = %d, length = %d, pdu_crc = 0x%x, pdu_length = %d", skb->len, length, pdu_crc, pdu_length);
 
 	/* is skb long enough ? */
 	if (skb->len < pdu_length) {
-		if (ctx->stats)
-			atomic_inc (&ctx->stats->rx_err);
+		if (ctx->vcc->stats)
+			atomic_inc (&ctx->vcc->stats->rx_err);
 		return NULL;
 	}
 
 	/* is skb too long ? */
 	if (skb->len > pdu_length) {
-		dbg ("atmsar_decode_aal5: Warning: readjusting illeagl size %d -> %d",
-			skb->len, pdu_length);
+		dbg ("udsl_decode_aal5: Warning: readjusting illegal size %d -> %d", skb->len, pdu_length);
 		/* buffer is too long. we can try to recover
 		 * if we discard the first part of the skb.
 		 * the crc will decide whether this was ok
@@ -418,9 +370,9 @@
 
 	/* check crc */
 	if (pdu_crc != crc) {
-		dbg ("atmsar_decode_aal5: crc check failed!");
-		if (ctx->stats)
-			atomic_inc (&ctx->stats->rx_err);
+		dbg ("udsl_decode_aal5: crc check failed!");
+		if (ctx->vcc->stats)
+			atomic_inc (&ctx->vcc->stats->rx_err);
 		return NULL;
 	}
 
@@ -428,19 +380,20 @@
 	skb_trim (skb, length);
 
 	/* update stats */
-	if (ctx->stats)
-		atomic_inc (&ctx->stats->rx);
+	if (ctx->vcc->stats)
+		atomic_inc (&ctx->vcc->stats->rx);
 
-	dbg ("atmsar_decode_aal5 returns pdu 0x%p with length %d", skb, skb->len);
+	dbg ("udsl_decode_aal5 returns pdu 0x%p with length %d", skb, skb->len);
 	return skb;
-};
+}
 
 
 /*************
 **  encode  **
 *************/
 
-static void udsl_groom_skb (struct atm_vcc *vcc, struct sk_buff *skb) {
+static void udsl_groom_skb (struct atm_vcc *vcc, struct sk_buff *skb)
+{
 	struct udsl_control *ctrl = UDSL_SKB (skb);
 	unsigned int i, zero_padding;
 	unsigned char zero = 0;
@@ -480,7 +433,8 @@
 	ctrl->aal5_trailer [7] = crc;
 }
 
-unsigned int udsl_write_cells (unsigned int howmany, struct sk_buff *skb, unsigned char **target_p) {
+static unsigned int udsl_write_cells (unsigned int howmany, struct sk_buff *skb, unsigned char **target_p)
+{
 	struct udsl_control *ctrl = UDSL_SKB (skb);
 	unsigned char *target = *target_p;
 	unsigned int nc, ne, i;
@@ -569,7 +523,7 @@
 	unsigned char *data_start;
 	struct sk_buff *skb;
 	struct urb *urb;
-	struct atmsar_vcc_data *atmsar_vcc = NULL;
+	struct udsl_vcc_data *atmsar_vcc = NULL;
 	struct sk_buff *new = NULL, *tmp = NULL;
 	int err;
 
@@ -597,40 +551,28 @@
 			dbg ("skb->len = %d", skb->len);
 			PACKETDEBUG (skb->data, skb->len);
 
-			while ((new =
-				atmsar_decode_rawcell (instance->atmsar_vcc_list, skb,
-						       &atmsar_vcc)) != NULL) {
+			while ((new = udsl_decode_rawcell (instance, skb, &atmsar_vcc))) {
 				dbg ("(after cell processing)skb->len = %d", new->len);
 
-				switch (atmsar_vcc->type) {
-				case ATM_AAL5:
-					tmp = new;
-					new = atmsar_decode_aal5 (atmsar_vcc, new);
-
-					/* we can't send NULL skbs upstream, the ATM layer would try to close the vcc... */
-					if (new) {
-						dbg ("(after aal5 decap) skb->len = %d", new->len);
-						if (new->len && atm_charge (atmsar_vcc->vcc, new->truesize)) {
-							PACKETDEBUG (new->data, new->len);
-							atmsar_vcc->vcc->push (atmsar_vcc->vcc, new);
-						} else {
-							dbg
-							    ("dropping incoming packet : rx_inuse = %d, vcc->sk->rcvbuf = %d, skb->true_size = %d",
-							     atomic_read (&atmsar_vcc->vcc->rx_inuse),
-							     atmsar_vcc->vcc->sk->rcvbuf, new->truesize);
-							dev_kfree_skb (new);
-						}
+				tmp = new;
+				new = udsl_decode_aal5 (atmsar_vcc, new);
+
+				/* we can't send NULL skbs upstream, the ATM layer would try to close the vcc... */
+				if (new) {
+					dbg ("(after aal5 decap) skb->len = %d", new->len);
+					if (new->len && atm_charge (atmsar_vcc->vcc, new->truesize)) {
+						PACKETDEBUG (new->data, new->len);
+						atmsar_vcc->vcc->push (atmsar_vcc->vcc, new);
 					} else {
-						dbg ("atmsar_decode_aal5 returned NULL!");
-						dev_kfree_skb (tmp);
+						dbg
+						    ("dropping incoming packet : rx_inuse = %d, vcc->sk->rcvbuf = %d, skb->true_size = %d",
+						     atomic_read (&atmsar_vcc->vcc->rx_inuse),
+						     atmsar_vcc->vcc->sk->rcvbuf, new->truesize);
+						dev_kfree_skb (new);
 					}
-					break;
-				default:
-					/* not supported. we delete the skb. */
-					printk (KERN_INFO
-						"SpeedTouch USB: illegal vcc type. Dropping packet.\n");
-					dev_kfree_skb (new);
-					break;
+				} else {
+					dbg ("udsl_decode_aal5 returned NULL!");
+					dev_kfree_skb (tmp);
 				}
 			}
 
@@ -901,95 +843,6 @@
 **  ATM  **
 **********/
 
-#define ATMSAR_DEF_MTU_AAL0		48
-#define ATMSAR_DEF_MTU_AAL1		47
-#define ATMSAR_DEF_MTU_AAL2		0  /* not supported */
-#define ATMSAR_DEF_MTU_AAL34		0  /* not supported */
-#define ATMSAR_DEF_MTU_AAL5		65535  /* max mtu ..    */
-
-struct atmsar_vcc_data *atmsar_open (struct atmsar_vcc_data **list, struct atm_vcc *vcc, uint type,
-				     ushort vpi, ushort vci, unchar pti, unchar gfc, uint flags)
-{
-	struct atmsar_vcc_data *new;
-
-	if (!vcc)
-		return NULL;
-
-	new = kmalloc (sizeof (struct atmsar_vcc_data), GFP_KERNEL);
-
-	if (!new)
-		return NULL;
-
-	memset (new, 0, sizeof (struct atmsar_vcc_data));
-	new->vcc = vcc;
-	new->stats = vcc->stats;
-	new->type = type;
-	new->next = NULL;
-	new->gfc = gfc;
-	new->vp = vpi;
-	new->vc = vci;
-	new->pti = pti;
-
-	switch (type) {
-	case ATM_AAL0:
-		new->mtu = ATMSAR_DEF_MTU_AAL0;
-		break;
-	case ATM_AAL1:
-		new->mtu = ATMSAR_DEF_MTU_AAL1;
-		break;
-	case ATM_AAL2:
-		new->mtu = ATMSAR_DEF_MTU_AAL2;
-		break;
-	case ATM_AAL34:
-		/* not supported */
-		new->mtu = ATMSAR_DEF_MTU_AAL34;
-		break;
-	case ATM_AAL5:
-		new->mtu = ATMSAR_DEF_MTU_AAL5;
-		break;
-	}
-
-	new->atmHeader = ((unsigned long) gfc << ATM_HDR_GFC_SHIFT)
-	    | ((unsigned long) vpi << ATM_HDR_VPI_SHIFT)
-	    | ((unsigned long) vci << ATM_HDR_VCI_SHIFT)
-	    | ((unsigned long) pti << ATM_HDR_PTI_SHIFT);
-	new->flags = flags;
-	new->next = NULL;
-	new->reasBuffer = NULL;
-
-	new->next = *list;
-	*list = new;
-
-	dbg ("Allocated new SARLib vcc 0x%p with vp %d vc %d", new, vpi, vci);
-
-	return new;
-}
-
-void atmsar_close (struct atmsar_vcc_data **list, struct atmsar_vcc_data *vcc)
-{
-	struct atmsar_vcc_data *work;
-
-	if (*list == vcc) {
-		*list = (*list)->next;
-	} else {
-		for (work = *list; work && work->next && (work->next != vcc); work = work->next);
-
-		/* return if not found */
-		if (work->next != vcc)
-			return;
-
-		work->next = work->next->next;
-	}
-
-	if (vcc->reasBuffer) {
-		dev_kfree_skb (vcc->reasBuffer);
-	}
-
-	dbg ("Allocated SARLib vcc 0x%p with vp %d vc %d", vcc, vcc->vp, vcc->vc);
-
-	kfree (vcc);
-}
-
 static void udsl_atm_dev_close (struct atm_dev *dev)
 {
 	struct udsl_instance_data *instance = dev->dev_data;
@@ -1061,11 +914,10 @@
 	return 0;
 }
 
-#define ATMSAR_SET_PTI		0x2L
-
 static int udsl_atm_open (struct atm_vcc *vcc, short vpi, int vci)
 {
 	struct udsl_instance_data *instance = vcc->dev->dev_data;
+	struct udsl_vcc_data *new;
 
 	dbg ("udsl_atm_open called");
 
@@ -1074,63 +926,97 @@
 		return -ENODEV;
 	}
 
-	/* at the moment only AAL5 support */
+	if ((vpi == ATM_VPI_ANY) || (vci == ATM_VCI_ANY))
+		return -EINVAL;
+
+	/* only support AAL5 */
 	if (vcc->qos.aal != ATM_AAL5)
 		return -EINVAL;
 
-	MOD_INC_USE_COUNT;
+	down (&instance->serialize); /* vs self, udsl_atm_close */
 
-	vcc->dev_data =
-	    atmsar_open (&(instance->atmsar_vcc_list), vcc, ATM_AAL5, vpi, vci, 0, 0,
-			 ATMSAR_USE_53BYTE_CELL | ATMSAR_SET_PTI);
-	if (!vcc->dev_data) {
-		MOD_DEC_USE_COUNT;
-		return -ENOMEM;	/* this is the only reason atmsar_open can fail... */
+	if (udsl_find_vcc (instance, vpi, vci)) {
+		up (&instance->serialize);
+		return -EADDRINUSE;
+	}
+
+	if (!(new = kmalloc (sizeof (struct udsl_vcc_data), GFP_KERNEL))) {
+		up (&instance->serialize);
+		return -ENOMEM;
 	}
 
+	memset (new, 0, sizeof (struct udsl_vcc_data));
+	new->vcc = vcc;
+	new->vpi = vpi;
+	new->vci = vci;
+	new->mtu = UDSL_MAX_AAL5_MRU;
+
+	vcc->dev_data = new;
 	vcc->vpi = vpi;
 	vcc->vci = vci;
+
+	tasklet_disable (&instance->receive_tasklet);
+	list_add (&new->list, &instance->vcc_list);
+	tasklet_enable (&instance->receive_tasklet);
+
 	set_bit (ATM_VF_ADDR, &vcc->flags);
 	set_bit (ATM_VF_PARTIAL, &vcc->flags);
 	set_bit (ATM_VF_READY, &vcc->flags);
 
-	((struct atmsar_vcc_data *)vcc->dev_data)->mtu = UDSL_MAX_AAL5_MRU;
+	up (&instance->serialize);
+
+	dbg ("Allocated new SARLib vcc 0x%p with vpi %d vci %d", new, vpi, vci);
+
+	MOD_INC_USE_COUNT;
 
 	if (instance->firmware_loaded)
 		udsl_fire_receivers (instance);
 
 	dbg ("udsl_atm_open successful");
+
 	return 0;
 }
 
 static void udsl_atm_close (struct atm_vcc *vcc)
 {
 	struct udsl_instance_data *instance = vcc->dev->dev_data;
+	struct udsl_vcc_data *vcc_data = vcc->dev_data;
 
 	dbg ("udsl_atm_close called");
 
-	if (!instance) {
-		dbg ("NULL instance!");
+	if (!instance || !vcc_data) {
+		dbg ("NULL data!");
 		return;
 	}
 
-	/* freeing resources */
-	/* cancel all sends on this vcc */
+	dbg ("Deallocating SARLib vcc 0x%p with vpi %d vci %d", vcc_data, vcc_data->vpi, vcc_data->vci);
+
 	udsl_cancel_send (instance, vcc);
 
-	atmsar_close (&(instance->atmsar_vcc_list), vcc->dev_data);
+	down (&instance->serialize); /* vs self, udsl_atm_open */
+
+	tasklet_disable (&instance->receive_tasklet);
+	list_del (&vcc_data->list);
+	tasklet_enable (&instance->receive_tasklet);
+
+	if (vcc_data->reasBuffer)
+		kfree_skb (vcc_data->reasBuffer);
+	vcc_data->reasBuffer = NULL;
+
+	kfree (vcc_data);
 	vcc->dev_data = NULL;
-	clear_bit (ATM_VF_PARTIAL, &vcc->flags);
 
-	/* freeing address */
 	vcc->vpi = ATM_VPI_UNSPEC;
 	vcc->vci = ATM_VCI_UNSPEC;
+	clear_bit (ATM_VF_READY, &vcc->flags);
+	clear_bit (ATM_VF_PARTIAL, &vcc->flags);
 	clear_bit (ATM_VF_ADDR, &vcc->flags);
 
+	up (&instance->serialize);
+
 	MOD_DEC_USE_COUNT;
 
 	dbg ("udsl_atm_close successful");
-	return;
 }
 
 static int udsl_atm_ioctl (struct atm_dev *dev, unsigned int cmd, void *arg)
@@ -1208,6 +1094,8 @@
 	init_MUTEX (&instance->serialize);
 
 	instance->usb_dev = dev;
+
+	INIT_LIST_HEAD (&instance->vcc_list);
 
 	spin_lock_init (&instance->spare_receivers_lock);
 	INIT_LIST_HEAD (&instance->spare_receivers);
diff -Nru a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
--- a/drivers/usb/misc/usbtest.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/usb/misc/usbtest.c	Mon Mar 31 13:41:08 2003
@@ -881,6 +881,8 @@
 
 	init_completion (&completion);
 	urb = simple_alloc_urb (testdev_to_usbdev (dev), pipe, size);
+	if (!urb)
+		return -ENOMEM;
 	if (async)
 		urb->transfer_flags |= URB_ASYNC_UNLINK;
 	urb->context = &completion;
@@ -1404,6 +1406,11 @@
 
 	/* re-enumerated usb test device firmware */
 	{ USB_DEVICE (0xfff0, 0xfff0),
+		.driver_info = (unsigned long) &fw_info,
+		},
+
+	/* "Gadget Zero" firmware runs under Linux */
+	{ USB_DEVICE (0x0525, 0xa4a0),
 		.driver_info = (unsigned long) &fw_info,
 		},
 
diff -Nru a/drivers/usb/net/cdc-ether.c b/drivers/usb/net/cdc-ether.c
--- a/drivers/usb/net/cdc-ether.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/usb/net/cdc-ether.c	Mon Mar 31 13:41:06 2003
@@ -269,22 +269,8 @@
 static int CDCEther_start_xmit( struct sk_buff *skb, struct net_device *net )
 {
 	ether_dev_t	*ether_dev = net->priv;
-	int 	count;
 	int 	res;
 
-	// If we are told to transmit an ethernet frame that fits EXACTLY 
-	// into an integer number of USB packets, we force it to send one 
-	// more byte so the device will get a runt USB packet signalling the 
-	// end of the ethernet frame
-	if ( (skb->len) ^ (ether_dev->data_ep_out_size) ) {
-		// It was not an exact multiple
-		// no need to add anything extra
-		count = skb->len;
-	} else {
-		// Add one to make it NOT an exact multiple
-		count = skb->len + 1;
-	}
-
 	// Tell the kernel, "No more frames 'til we are done
 	// with this one.'
 	netif_stop_queue( net );
@@ -299,7 +285,10 @@
 			write_bulk_callback, ether_dev );
 
 	// Tell the URB how much it will be transporting today
-	ether_dev->tx_urb->transfer_buffer_length = count;
+	ether_dev->tx_urb->transfer_buffer_length = skb->len;
+
+	/* Deal with the zero length problem, I hope */
+	ether_dev->tx_urb->transfer_flags |= URB_ZERO_PACKET;
 	
 	// Send the URB on its merry way.
 	if ((res = usb_submit_urb(ether_dev->tx_urb, GFP_ATOMIC)))  {
diff -Nru a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
--- a/drivers/usb/serial/kobil_sct.c	Mon Mar 31 13:41:08 2003
+++ b/drivers/usb/serial/kobil_sct.c	Mon Mar 31 13:41:08 2003
@@ -406,8 +406,6 @@
 	int result = 0;
 	int todo = 0;
 	struct kobil_private * priv;
-	int i;
-	char *data;
 
 	if (count == 0) {
 		dbg("%s - port %d write request of 0 bytes", __FUNCTION__, port->number);
@@ -421,19 +419,6 @@
 		return -ENOMEM;
 	}
 
-	// BEGIN DEBUG
-	data = (unsigned char *) kmalloc((3 * count + 10) * sizeof(char), GFP_KERNEL);  
-	if (! data) {
-		return (-1);
-	}
-	memset(data, 0, (3 * count + 10));
-	for (i = 0; i < count; i++) { 
-		sprintf(data +3*i, "%02X ", buf[i]); 
-	} 
-	dbg(" %d --> %s", port->number, data );
-	kfree(data);
-	// END DEBUG
-
 	// Copy data to buffer
 	if (from_user) {
 		if (copy_from_user(priv->buf + priv->filled, buf, count)) {
@@ -442,6 +427,8 @@
 	} else {
 		memcpy (priv->buf + priv->filled, buf, count);
 	}
+
+	usb_serial_debug_data (__FILE__, __FUNCTION__, count, priv->buf + priv->filled);
 
 	priv->filled = priv->filled + count;
 
diff -Nru a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c
--- a/drivers/usb/storage/isd200.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/usb/storage/isd200.c	Mon Mar 31 13:41:07 2003
@@ -405,11 +405,14 @@
 {
 	union ata_cdb ata;
 	struct scsi_cmnd srb;
+	struct scsi_device srb_dev;
 	struct isd200_info *info = (struct isd200_info *)us->extra;
 	int status;
 
 	memset(&ata, 0, sizeof(ata));
 	memset(&srb, 0, sizeof(srb));
+	memset(&srb_dev, 0, sizeof(srb_dev));
+	srb.device = &srb_dev;
 
 	ata.generic.SignatureByte0 = info->ConfigData.ATAMajorCommand;
 	ata.generic.SignatureByte1 = info->ConfigData.ATAMinorCommand;
@@ -479,6 +482,7 @@
 	}
 
 	memcpy(srb.cmnd, &ata, sizeof(ata.generic));
+	srb.cmd_len = sizeof(ata.generic);
 	status = usb_stor_Bulk_transport(&srb, us);
 	if (status == USB_STOR_TRANSPORT_GOOD)
 		status = ISD200_GOOD;
@@ -538,6 +542,7 @@
 	/* send the command to the transport layer */
 	srb->resid = 0;
 	memcpy(srb->cmnd, ataCdb, sizeof(ataCdb->generic));
+	srb->cmd_len = sizeof(ataCdb->generic);
 	transferStatus = usb_stor_Bulk_transport(srb, us);
 
 	/* if the command gets aborted by the higher layers, we need to
diff -Nru a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
--- a/drivers/usb/storage/scsiglue.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/usb/storage/scsiglue.c	Mon Mar 31 13:41:07 2003
@@ -209,21 +209,14 @@
 	return result;
 }
 
-/* This resets the device port, and simulates the device
- * disconnect/reconnect for all drivers which have claimed
- * interfaces, including ourself. */
+/* This resets the device port */
+/* It refuses to work if there's more than one interface in
+   this device, so that other users are not affected. */
 /* This is always called with scsi_lock(srb->host) held */
 
-/* FIXME: This needs to be re-examined in the face of the new
- * hotplug system -- this will implicitly cause a detach/reattach of
- * usb-storage, which is not what we want now.
- *
- * Can we just skip over usb-storage in the while loop?
- */
 static int usb_storage_bus_reset( Scsi_Cmnd *srb )
 {
 	struct us_data *us;
-	int i;
 	int result;
 
 	/* we use the usb_reset_device() function to handle this for us */
@@ -231,36 +224,25 @@
 	scsi_unlock(srb->device->host);
 	us = (struct us_data *)srb->device->host->hostdata[0];
 
-	/* attempt to reset the port */
-	result = usb_reset_device(us->pusb_dev);
-	US_DEBUGP("usb_reset_device returns %d\n", result);
-	if (result < 0) {
-		scsi_lock(srb->device->host);
-		return FAILED;
+	/* The USB subsystem doesn't handle synchronisation between
+	   a device's several drivers. Therefore we reset only devices
+	   with one interface which we of course own.
+	*/
+	
+	//FIXME: needs locking against config changes
+	
+	if ( us->pusb_dev->actconfig->desc.bNumInterfaces == 1) {
+		/* attempt to reset the port */
+		result = usb_reset_device(us->pusb_dev);
+		US_DEBUGP("usb_reset_device returns %d\n", result);
+	} else {
+		result = -EBUSY;
+		US_DEBUGP("cannot reset a multiinterface device. failing to reset.\n");
 	}
 
-	/* FIXME: This needs to lock out driver probing while it's working
-	 * or we can have race conditions */
-	/* This functionality really should be provided by the khubd thread */
-	for (i = 0; i < us->pusb_dev->actconfig->desc.bNumInterfaces; i++) {
- 		struct usb_interface *intf =
-			&us->pusb_dev->actconfig->interface[i];
-
-		/* if this is an unclaimed interface, skip it */
-		if (!intf->driver) {
-			continue;
-		}
-
-		US_DEBUGP("Examining driver %s...", intf->driver->name);
-
-		/* simulate a disconnect and reconnect for all interfaces */
-		US_DEBUGPX("simulating disconnect/reconnect.\n");
-		usb_device_remove (&intf->dev);
-		usb_device_probe (&intf->dev);
-	}
 	US_DEBUGP("bus_reset() complete\n");
 	scsi_lock(srb->device->host);
-	return SUCCESS;
+	return result < 0 ? FAILED : SUCCESS;
 }
 
 /***********************************************************************
diff -Nru a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c
--- a/drivers/usb/storage/transport.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/usb/storage/transport.c	Mon Mar 31 13:41:07 2003
@@ -126,6 +126,7 @@
 	us->current_urb->actual_length = 0;
 	us->current_urb->error_count = 0;
 	us->current_urb->transfer_flags = URB_ASYNC_UNLINK;
+	us->current_urb->status = 0;
 
 	/* submit the URB */
 	status = usb_submit_urb(us->current_urb, GFP_NOIO);
@@ -900,7 +901,7 @@
 	bcb.DataTransferLength = cpu_to_le32(transfer_length);
 	bcb.Flags = srb->sc_data_direction == SCSI_DATA_READ ? 1 << 7 : 0;
 	bcb.Tag = srb->serial_number;
-	bcb.Lun = srb->cmnd[1] >> 5;
+	bcb.Lun = srb->device->lun;
 	if (us->flags & US_FL_SCM_MULT_TARG)
 		bcb.Lun |= srb->device->id << 4;
 	bcb.Length = srb->cmd_len;
diff -Nru a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
--- a/drivers/usb/storage/usb.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/usb/storage/usb.c	Mon Mar 31 13:41:06 2003
@@ -360,7 +360,7 @@
 		}
 
 		else if (us->srb->device->lun > us->max_lun) {
-			US_DEBUGP("Bad LUN (%d/%d)\n",
+			US_DEBUGP("Bad LUN (%d:%d)\n",
 				  us->srb->device->id, us->srb->device->lun);
 			us->srb->result = DID_BAD_TARGET << 16;
 		}
@@ -475,8 +475,6 @@
  */
 static void usb_stor_deallocate_urbs(struct us_data *ss)
 {
-	int result;
-
 	/* free the scatter-gather request block */
 	if (ss->current_sg) {
 		kfree(ss->current_sg);
@@ -486,8 +484,6 @@
 	/* free up the main URB for this device */
 	if (ss->current_urb) {
 		US_DEBUGP("-- releasing main URB\n");
-		result = usb_unlink_urb(ss->current_urb);
-		US_DEBUGP("-- usb_unlink_urb() returned %d\n", result);
 		usb_free_urb(ss->current_urb);
 		ss->current_urb = NULL;
 	}
diff -Nru a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c
--- a/drivers/usb/usb-skeleton.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/usb/usb-skeleton.c	Mon Mar 31 13:41:07 2003
@@ -72,13 +72,15 @@
 MODULE_PARM_DESC(debug, "Debug enabled or not");
 
 
-/* Define these values to match your device */
+/* Define these values to match your devices */
 #define USB_SKEL_VENDOR_ID	0xfff0
 #define USB_SKEL_PRODUCT_ID	0xfff0
 
 /* table of devices that work with this driver */
 static struct usb_device_id skel_table [] = {
 	{ USB_DEVICE(USB_SKEL_VENDOR_ID, USB_SKEL_PRODUCT_ID) },
+	/* "Gadget Zero" firmware runs under Linux */
+	{ USB_DEVICE(0x0525, 0xa4a0) },
 	{ }					/* Terminating entry */
 };
 
@@ -707,7 +709,7 @@
 	/* register this driver with the USB subsystem */
 	result = usb_register(&skel_driver);
 	if (result < 0) {
-		err("usb_register failed for the "__FILE__" driver. Error number %d",
+		err("usb_register failed. Error number %d",
 		    result);
 		return -1;
 	}
diff -Nru a/drivers/video/Makefile b/drivers/video/Makefile
--- a/drivers/video/Makefile	Mon Mar 31 13:41:06 2003
+++ b/drivers/video/Makefile	Mon Mar 31 13:41:06 2003
@@ -31,7 +31,7 @@
 obj-$(CONFIG_FB_ANAKIN)           += anakinfb.o cfbfillrect.o cfbcopyarea.o cfbimgblt.o
 obj-$(CONFIG_FB_CLPS711X)         += clps711xfb.o cfbfillrect.o cfbcopyarea.o cfbimgblt.o
 obj-$(CONFIG_FB_CYBER)            += cyberfb.o
-obj-$(CONFIG_FB_CYBER2000)        += cyber2000fb.o
+obj-$(CONFIG_FB_CYBER2000)        += cyber2000fb.o cfbfillrect.o cfbcopyarea.o cfbimgblt.o
 obj-$(CONFIG_FB_SGIVW)            += sgivwfb.o cfbfillrect.o cfbcopyarea.o cfbimgblt.o
 obj-$(CONFIG_FB_3DFX)             += tdfxfb.o cfbimgblt.o
 obj-$(CONFIG_FB_MAC)              += macfb.o macmodes.o cfbfillrect.o cfbcopyarea.o cfbimgblt.o 
diff -Nru a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c
--- a/drivers/video/cyber2000fb.c	Mon Mar 31 13:41:06 2003
+++ b/drivers/video/cyber2000fb.c	Mon Mar 31 13:41:06 2003
@@ -55,12 +55,6 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
-#include <video/fbcon.h>
-#include <video/fbcon-cfb8.h>
-#include <video/fbcon-cfb16.h>
-#include <video/fbcon-cfb24.h>
-#include <video/fbcon-cfb32.h>
-
 #include "cyber2000fb.h"
 
 struct cfb_info {
@@ -147,167 +141,114 @@
 /*
  * Hardware Cyber2000 Acceleration
  */
-static void cyber2000_accel_wait(struct cfb_info *cfb)
-{
-	int count = 100000;
-
-	while (cyber2000fb_readb(CO_REG_CONTROL, cfb) & CO_CTRL_BUSY) {
-		if (!count--) {
-			debug_printf("accel_wait timed out\n");
-			cyber2000fb_writeb(0, CO_REG_CONTROL, cfb);
-			return;
-		}
-		udelay(1);
-	}
-}
-
-static void cyber2000_accel_setup(struct display *display)
-{
-	struct cfb_info *cfb = (struct cfb_info *)display->fb_info;
-
-	cfb->dispsw->setup(display);
-}
-
 static void
-cyber2000_accel_bmove(struct display *display, int sy, int sx, int dy, int dx,
-		      int height, int width)
+cyber2000fb_fillrect(struct fb_info *info, struct fb_fillrect *rect)
 {
-	struct cfb_info *cfb = (struct cfb_info *)display->fb_info;
-	struct fb_var_screeninfo *var = &display->var;
-	u_long src, dst;
-	u_int fh, fw, cmd = CO_CMD_L_PATTERN_FGCOL;
-
-	fw    = fontwidth(display);
-	sx    *= fw;
-	dx    *= fw;
-	width *= fw;
-	width -= 1;
-
-	if (sx < dx) {
-		sx += width;
-		dx += width;
-		cmd |= CO_CMD_L_INC_LEFT;
-	}
+	struct cfb_info *cfb = (struct cfb_info *)info;
+	unsigned long dst, col;
 
-	fh     = fontheight(display);
-	sy     *= fh;
-	dy     *= fh;
-	height *= fh;
-	height -= 1;
-
-	if (sy < dy) {
-		sy += height;
-		dy += height;
-		cmd |= CO_CMD_L_INC_UP;
+	if (!(cfb->fb.var.accel_flags & FB_ACCELF_TEXT)) {
+		cfb_fillrect(info, rect);
+		return;
 	}
 
-	src    = sx + sy * var->xres_virtual;
-	dst    = dx + dy * var->xres_virtual;
+	cyber2000fb_writeb(0, CO_REG_CONTROL, cfb);
+	cyber2000fb_writew(rect->width - 1, CO_REG_PIXWIDTH, cfb);
+	cyber2000fb_writew(rect->height - 1, CO_REG_PIXHEIGHT, cfb);
 
-	cyber2000_accel_wait(cfb);
-	cyber2000fb_writeb(0x00, CO_REG_CONTROL, cfb);
-	cyber2000fb_writew(width, CO_REG_PIXWIDTH, cfb);
-	cyber2000fb_writew(height, CO_REG_PIXHEIGHT, cfb);
+	col = rect->color;
+	if (cfb->fb.var.bits_per_pixel > 8)
+		col = ((u32 *)cfb->fb.pseudo_palette)[col];
+	cyber2000fb_writel(col, CO_REG_FGCOLOUR, cfb);
 
-	if (var->bits_per_pixel == 24) {
+	dst = rect->dx + rect->dy * cfb->fb.var.xres_virtual;
+	if (cfb->fb.var.bits_per_pixel == 24) {
 		cyber2000fb_writeb(dst, CO_REG_X_PHASE, cfb);
 		dst *= 3;
-		src *= 3;
 	}
 
-	cyber2000fb_writel(src, CO_REG_SRC1_PTR, cfb);
 	cyber2000fb_writel(dst, CO_REG_DEST_PTR, cfb);
 	cyber2000fb_writeb(CO_FG_MIX_SRC, CO_REG_FGMIX, cfb);
-	cyber2000fb_writew(cmd, CO_REG_CMD_L, cfb);
-	cyber2000fb_writew(CO_CMD_H_FGSRCMAP|CO_CMD_H_BLITTER, CO_REG_CMD_H, cfb);
+	cyber2000fb_writew(CO_CMD_L_PATTERN_FGCOL, CO_REG_CMD_L, cfb);
+	cyber2000fb_writew(CO_CMD_H_BLITTER, CO_REG_CMD_H, cfb);
 }
 
 static void
-cyber2000_accel_clear(struct vc_data *conp, struct display *display, int sy,
-		      int sx, int height, int width)
+cyber2000fb_copyarea(struct fb_info *info, struct fb_copyarea *region)
 {
-	struct cfb_info *cfb = (struct cfb_info *)display->fb_info;
-	struct fb_var_screeninfo *var = &display->var;
-	u_long dst;
-	u_int fw, fh;
-	u32 bgx = attr_bgcol_ec(display, conp);
-
-	fw = fontwidth(display);
-	fh = fontheight(display);
-
-	dst    = sx * fw + sy * var->xres_virtual * fh;
-	width  = width * fw - 1;
-	height = height * fh - 1;
-
-	cyber2000_accel_wait(cfb);
-	cyber2000fb_writeb(0x00, CO_REG_CONTROL, cfb);
-	cyber2000fb_writew(width, CO_REG_PIXWIDTH, cfb);
-	cyber2000fb_writew(height, CO_REG_PIXHEIGHT, cfb);
+	struct cfb_info *cfb = (struct cfb_info *)info;
+	unsigned int cmd = CO_CMD_L_PATTERN_FGCOL;
+	unsigned long src, dst;
 
-	if (var->bits_per_pixel == 24) {
-		cyber2000fb_writeb(dst, CO_REG_X_PHASE, cfb);
-		dst *= 3;
+	if (!(cfb->fb.var.accel_flags & FB_ACCELF_TEXT)) {
+		cfb_copyarea(info, region);
+		return;
 	}
 
-	if (var->bits_per_pixel == 16)
-		bgx = ((u16 *)display->dispsw_data)[bgx];
-	else if (var->bits_per_pixel >= 24)
-		bgx = ((u32 *)display->dispsw_data)[bgx];
-
-	cyber2000fb_writel(bgx, CO_REG_FGCOLOUR, cfb);
-	cyber2000fb_writel(dst, CO_REG_DEST_PTR, cfb);
-	cyber2000fb_writeb(CO_FG_MIX_SRC, CO_REG_FGMIX, cfb);
-	cyber2000fb_writew(CO_CMD_L_PATTERN_FGCOL, CO_REG_CMD_L, cfb);
-	cyber2000fb_writew(CO_CMD_H_BLITTER, CO_REG_CMD_H, cfb);
-}
+	if (region->sx < region->dx) {
+		region->sx += region->width - 1;
+		region->dx += region->width - 1;
+		cmd |= CO_CMD_L_INC_LEFT;
+	}
 
-static void
-cyber2000_accel_putc(struct vc_data *conp, struct display *display, int c,
-		     int yy, int xx)
-{
-	struct cfb_info *cfb = (struct cfb_info *)display->fb_info;
+	if (region->sy < region->dy) {
+		region->sy += region->height - 1;
+		region->dy += region->height - 1;
+		cmd |= CO_CMD_L_INC_UP;
+	}
 
-	cyber2000_accel_wait(cfb);
-	cfb->dispsw->putc(conp, display, c, yy, xx);
+	cyber2000fb_writeb(0, CO_REG_CONTROL, cfb);
+	cyber2000fb_writew(region->width - 1, CO_REG_PIXWIDTH, cfb);
+	cyber2000fb_writew(region->height - 1, CO_REG_PIXHEIGHT, cfb);
+
+	src = region->sx + region->sy * cfb->fb.var.xres_virtual;
+	dst = region->dx + region->dy * cfb->fb.var.xres_virtual;
+	if (cfb->fb.var.bits_per_pixel == 24) {
+		cyber2000fb_writeb(dst, CO_REG_X_PHASE, cfb);
+		src *= 3;
+		dst *= 3;
+	}
+	cyber2000fb_writel(src, CO_REG_SRC1_PTR, cfb);
+	cyber2000fb_writel(dst, CO_REG_DEST_PTR, cfb);
+	cyber2000fb_writew(CO_FG_MIX_SRC, CO_REG_FGMIX, cfb);
+	cyber2000fb_writew(cmd, CO_REG_CMD_L, cfb);
+	cyber2000fb_writew(CO_CMD_H_FGSRCMAP | CO_CMD_H_BLITTER,
+			   CO_REG_CMD_H, cfb);
 }
 
 static void
-cyber2000_accel_putcs(struct vc_data *conp, struct display *display,
-		      const unsigned short *s, int count, int yy, int xx)
+cyber2000fb_imageblit(struct fb_info *info, struct fb_image *image)
 {
-	struct cfb_info *cfb = (struct cfb_info *)display->fb_info;
+	struct cfb_info *cfb = (struct cfb_info *)info;
 
-	cyber2000_accel_wait(cfb);
-	cfb->dispsw->putcs(conp, display, s, count, yy, xx);
+//	if (!(cfb->fb.var.accel_flags & FB_ACCELF_TEXT)) {
+		cfb_imageblit(info, image);
+		return;
+//	}
 }
 
-static void cyber2000_accel_revc(struct display *display, int xx, int yy)
+static int cyber2000fb_sync(struct fb_info *info)
 {
-	struct cfb_info *cfb = (struct cfb_info *)display->fb_info;
-
-	cyber2000_accel_wait(cfb);
-	cfb->dispsw->revc(display, xx, yy);
-}
+	struct cfb_info *cfb = (struct cfb_info *)info;
+	int count = 100000;
 
-static void
-cyber2000_accel_clear_margins(struct vc_data *conp, struct display *display,
-			      int bottom_only)
-{
-	struct cfb_info *cfb = (struct cfb_info *)display->fb_info;
+	if (!(cfb->fb.var.accel_flags & FB_ACCELF_TEXT))
+		return 0;
 
-	cfb->dispsw->clear_margins(conp, display, bottom_only);
+	while (cyber2000fb_readb(CO_REG_CONTROL, cfb) & CO_CTRL_BUSY) {
+		if (!count--) {
+			debug_printf("accel_wait timed out\n");
+			cyber2000fb_writeb(0, CO_REG_CONTROL, cfb);
+			break;
+		}
+		udelay(1);
+	}
+	return 0;
 }
 
-static struct display_switch fbcon_cyber_accel = {
-	.setup		= cyber2000_accel_setup,
-	.bmove		= cyber2000_accel_bmove,
-	.clear		= cyber2000_accel_clear,
-	.putc		= cyber2000_accel_putc,
-	.putcs		= cyber2000_accel_putcs,
-	.revc		= cyber2000_accel_revc,
-	.clear_margins	= cyber2000_accel_clear_margins,
-	.fontwidthmask	= FONTWIDTH(8)|FONTWIDTH(16)
-};
+/*
+ * ===========================================================================
+ */
 
 static inline u32 convert_bitfield(u_int val, struct fb_bitfield *bf)
 {
@@ -324,7 +265,7 @@
 		      u_int transp, struct fb_info *info)
 {
 	struct cfb_info *cfb = (struct cfb_info *)info;
-	struct fb_var_screeninfo *var = &cfb->display->var;
+	struct fb_var_screeninfo *var = &cfb->fb.var;
 	u32 pseudo_val;
 	int ret = 1;
 
@@ -332,7 +273,6 @@
 	default:
 		return 1;
 
-#ifdef FBCON_HAS_CFB8
 	/*
 	 * Pseudocolour:
 	 *         8     8
@@ -359,7 +299,6 @@
 		cyber2000fb_writeb(green, 0x3c9, cfb);
 		cyber2000fb_writeb(blue, 0x3c9, cfb);
 		return 0;
-#endif
 
 	/*
 	 * Direct colour:
@@ -455,13 +394,8 @@
 	/*
 	 * Now set our pseudo palette for the CFB16/24/32 drivers.
 	 */
-	if (regno < 16) {
-		if (var->bits_per_pixel == 16)
-			((u16 *)cfb->fb.pseudo_palette)[regno] = pseudo_val;
-		else
-			((u32 *)cfb->fb.pseudo_palette)[regno] = pseudo_val;
-		ret = 0;
-	}
+	if (regno < 16)
+		((u32 *)cfb->fb.pseudo_palette)[regno] = pseudo_val;
 
 	return ret;
 }
@@ -800,20 +734,16 @@
 }
 
 /*
- * Decode the info required for the hardware.
- * This involves the PLL parameters for the dot clock,
- * CRTC registers, and accelerator settings.
+ *    Set the User Defined Part of the Display
  */
 static int
-cyber2000fb_decode_var(struct fb_var_screeninfo *var, struct cfb_info *cfb,
-		       struct par_info *hw)
+cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 {
+	struct cfb_info *cfb = (struct cfb_info *)info;
+	struct par_info hw;
 	unsigned int mem;
 	int err;
 
-	hw->width = var->xres_virtual;
-	hw->ramdac = RAMDAC_VREFEN | RAMDAC_DAC8BIT;
-
 	var->transp.msb_right	= 0;
 	var->red.msb_right	= 0;
 	var->green.msb_right	= 0;
@@ -822,10 +752,6 @@
 	switch (var->bits_per_pixel) {
 #ifdef FBCON_HAS_CFB8
 	case 8:	/* PSEUDOCOLOUR, 256 */
-		hw->co_pixfmt		= CO_PIXFMT_8BPP;
-		hw->pitch		= hw->width >> 3;
-		hw->extseqmisc		= EXT_SEQ_MISC_8;
-
 		var->transp.offset	= 0;
 		var->transp.length	= 0;
 		var->red.offset		= 0;
@@ -838,13 +764,8 @@
 #endif
 #ifdef FBCON_HAS_CFB16
 	case 16:/* DIRECTCOLOUR, 64k or 32k */
-		hw->co_pixfmt		= CO_PIXFMT_16BPP;
-		hw->pitch		= hw->width >> 2;
-
 		switch (var->green.length) {
 		case 6: /* RGB565, 64k */
-			hw->extseqmisc		= EXT_SEQ_MISC_16_RGB565;
-
 			var->transp.offset	= 0;
 			var->transp.length	= 0;
 			var->red.offset		= 11;
@@ -857,8 +778,6 @@
 
 		default:
 		case 5: /* RGB555, 32k */
-			hw->extseqmisc		= EXT_SEQ_MISC_16_RGB555;
-
 			var->transp.offset	= 0;
 			var->transp.length	= 0;
 			var->red.offset		= 10;
@@ -870,8 +789,6 @@
 			break;
 
 		case 4: /* RGB444, 4k + transparency? */
-			hw->extseqmisc		= EXT_SEQ_MISC_16_RGB444;
-
 			var->transp.offset	= 12;
 			var->transp.length	= 4;
 			var->red.offset		= 8;
@@ -886,12 +803,6 @@
 #endif
 #ifdef FBCON_HAS_CFB24
 	case 24:/* TRUECOLOUR, 16m */
-		hw->co_pixfmt		= CO_PIXFMT_24BPP;
-		hw->width		*= 3;
-		hw->pitch		= hw->width >> 3;
-		hw->ramdac		|= (RAMDAC_BYPASS | RAMDAC_RAMPWRDN);
-		hw->extseqmisc		= EXT_SEQ_MISC_24_RGB888;
-
 		var->transp.offset	= 0;
 		var->transp.length	= 0;
 		var->red.offset		= 16;
@@ -904,11 +815,6 @@
 #endif
 #ifdef FBCON_HAS_CFB32
 	case 32:/* TRUECOLOUR, 16m */
-		hw->co_pixfmt		= CO_PIXFMT_32BPP;
-		hw->pitch		= hw->width >> 1;
-		hw->ramdac		|= (RAMDAC_BYPASS | RAMDAC_RAMPWRDN);
-		hw->extseqmisc		= EXT_SEQ_MISC_32;
-
 		var->transp.offset	= 24;
 		var->transp.length	= 8;
 		var->red.offset		= 16;
@@ -933,127 +839,107 @@
 	if (var->xres > var->xres_virtual)
 		var->xres = var->xres_virtual;
 
-	err = cyber2000fb_decode_clock(hw, cfb, var);
+	err = cyber2000fb_decode_clock(&hw, cfb, var);
 	if (err)
 		return err;
 
-	err = cyber2000fb_decode_crtc(hw, cfb, var);
+	err = cyber2000fb_decode_crtc(&hw, cfb, var);
 	if (err)
 		return err;
 
-	hw->width -= 1;
-	hw->fetch = hw->pitch;
-	if (!(cfb->mem_ctl2 & MEM_CTL2_64BIT))
-		hw->fetch <<= 1;
-	hw->fetch += 1;
-
 	return 0;
 }
 
-/*
- *    Set the User Defined Part of the Display
- */
-static int
-cyber2000fb_set_var(struct fb_var_screeninfo *var, int con,
-		    struct fb_info *info)
+static int cyber2000fb_set_par(struct fb_info *info)
 {
 	struct cfb_info *cfb = (struct cfb_info *)info;
-	struct display *display;
+	struct fb_var_screeninfo *var = &cfb->fb.var;
 	struct par_info hw;
-	int err, chgvar;
-
-	/*
-	 * CONUPDATE and SMOOTH_XPAN are equal.  However,
-	 * SMOOTH_XPAN is only used internally by fbcon.
-	 */
-	if (var->vmode & FB_VMODE_CONUPDATE) {
-		var->vmode |= FB_VMODE_YWRAP;
-		var->xoffset = cfb->display->var.xoffset;
-		var->yoffset = cfb->display->var.yoffset;
-	}
-
-	err = cyber2000fb_decode_var(var, cfb, &hw);
-	if (err)
-		return err;
-
-	if (var->activate & FB_ACTIVATE_TEST)
-		return 0;
+	unsigned int mem;
 
-	if ((var->activate & FB_ACTIVATE_MASK) != FB_ACTIVATE_NOW)
-		return -EINVAL;
+	hw.width = var->xres_virtual;
+	hw.ramdac = RAMDAC_VREFEN | RAMDAC_DAC8BIT;
 
-	if (con < 0) {
-		display = cfb->fb.disp;
-	} else {
-		display = fb_display + con;
-	}
+	switch (var->bits_per_pixel) {
+	case 8:
+		hw.co_pixfmt		= CO_PIXFMT_8BPP;
+		hw.pitch		= hw.width >> 3;
+		hw.extseqmisc		= EXT_SEQ_MISC_8;
+		break;
 
-	chgvar = cfb->fb.var.xres != var->xres ||
-		 cfb->fb.var.yres != var->yres ||
-		 cfb->fb.var.xres_virtual != var->xres_virtual ||
-		 cfb->fb.var.yres_virtual != var->yres_virtual ||
-		 cfb->fb.var.bits_per_pixel != var->bits_per_pixel;
+	case 16:
+		hw.co_pixfmt		= CO_PIXFMT_16BPP;
+		hw.pitch		= hw.width >> 2;
 
-	if (memcmp(&cfb->fb.var.red, &var->red, sizeof(var->red)) ||
-	    memcmp(&cfb->fb.var.green, &var->green, sizeof(var->green)) ||
-	    memcmp(&cfb->fb.var.blue, &var->blue, sizeof(var->blue)))
-		chgvar = 1;
+		switch (var->green.length) {
+		case 6: /* RGB565, 64k */
+			hw.extseqmisc	= EXT_SEQ_MISC_16_RGB565;
+			break;
+		case 5: /* RGB555, 32k */
+			hw.extseqmisc	= EXT_SEQ_MISC_16_RGB555;
+			break;
+		case 4: /* RGB444, 4k + transparency? */
+			hw.extseqmisc	= EXT_SEQ_MISC_16_RGB444;
+			break;
+		default:
+			BUG();
+		}
+	case 24:/* TRUECOLOUR, 16m */
+		hw.co_pixfmt		= CO_PIXFMT_24BPP;
+		hw.width		*= 3;
+		hw.pitch		= hw.width >> 3;
+		hw.ramdac		|= (RAMDAC_BYPASS | RAMDAC_RAMPWRDN);
+		hw.extseqmisc		= EXT_SEQ_MISC_24_RGB888;
+		break;
 
-	if (con >= 0 && chgvar == 0)
-		return 0;
+	case 32:/* TRUECOLOUR, 16m */
+		hw.co_pixfmt		= CO_PIXFMT_32BPP;
+		hw.pitch		= hw.width >> 1;
+		hw.ramdac		|= (RAMDAC_BYPASS | RAMDAC_RAMPWRDN);
+		hw.extseqmisc		= EXT_SEQ_MISC_32;
+		break;
 
-	if (con < 0)
-		chgvar = 0;
+	default:
+		BUG();
+	}
 
 	/*
-	 * If we are setting all the virtual consoles, also set the
-	 * defaults used to create new consoles.
+	 * Sigh, this is absolutely disgusting, but caused by
+	 * the way the fbcon developers want to separate out
+	 * the "checking" and the "setting" of the video mode.
+	 *
+	 * If the mode is not suitable for the hardware here,
+	 * we can't prevent it being set by returning an error.
+	 *
+	 * In theory, since NetWinders contain just one VGA card,
+	 * we should never end up hitting this problem.
 	 */
-	err = var->activate;
-	var->activate = FB_ACTIVATE_NOW;
-	if (err & FB_ACTIVATE_ALL)
-		cfb->fb.disp->var = *var;
+	BUG_ON(cyber2000fb_decode_clock(&hw, cfb, var) != 0);
+	BUG_ON(cyber2000fb_decode_crtc(&hw, cfb, var) != 0);
+
+	hw.width -= 1;
+	hw.fetch = hw.pitch;
+	if (!(cfb->mem_ctl2 & MEM_CTL2_64BIT))
+		hw.fetch <<= 1;
+	hw.fetch += 1;
 
-	cfb->fb.var = *var;
 	cfb->fb.fix.line_length	= var->xres_virtual * var->bits_per_pixel / 8;
 
-	switch (var->bits_per_pixel) {
-#ifdef FBCON_HAS_CFB8
-	case 8:	/* PSEUDOCOLOUR, 256 */
-		cfb->dispsw		= &fbcon_cfb8;
-		display->dispsw_data	= NULL;
-		break;
-#endif
-#ifdef FBCON_HAS_CFB16
-	case 16:/* DIRECTCOLOUR */
-		cfb->dispsw		= &fbcon_cfb16;
-		display->dispsw_data	= cfb->fb.pseudo_palette;
-		break;
-#endif
-#ifdef FBCON_HAS_CFB24
-	case 24:/* TRUECOLOUR, 16m */
-		cfb->dispsw		= &fbcon_cfb24;
-		display->dispsw_data	= cfb->fb.pseudo_palette;
-		break;
-#endif
-#ifdef FBCON_HAS_CFB32
-	case 32:/* TRUECOLOUR, 16m */
-		cfb->dispsw		= &fbcon_cfb32;
-		display->dispsw_data	= cfb->fb.pseudo_palette;
-		break;
-#endif
-	default:/* in theory this should never happen */
-		printk(KERN_WARNING "%s: no support for %dbpp\n",
-		       cfb->fb.fix.id, var->bits_per_pixel);
-		cfb->dispsw = &fbcon_dummy;
-		break;
-	}
+	/*
+	 * Same here - if the size of the video mode exceeds the
+	 * available RAM, we can't prevent this mode being set.
+	 *
+	 * In theory, since NetWinders contain just one VGA card,
+	 * we should never end up hitting this problem.
+	 */
+	mem = cfb->fb.fix.line_length * var->yres_virtual;
+	BUG_ON(mem > cfb->fb.fix.smem_len);
 
 	/*
-	 * 8bpp displays are always pseudo colour.
-	 * 16bpp and above are direct colour or true colour, depending
-	 * on whether the RAMDAC palettes are bypassed.  (Direct colour
-	 * has palettes, true colour does not.)
+	 * 8bpp displays are always pseudo colour.  16bpp and above
+	 * are direct colour or true colour, depending on whether
+	 * the RAMDAC palettes are bypassed.  (Direct colour has
+	 * palettes, true colour does not.)
 	 */
 	if (var->bits_per_pixel == 8)
 		cfb->fb.fix.visual = FB_VISUAL_PSEUDOCOLOR;
@@ -1062,20 +948,8 @@
 	else
 		cfb->fb.fix.visual = FB_VISUAL_DIRECTCOLOR;
 
-	if (var->accel_flags & FB_ACCELF_TEXT && cfb->dispsw != &fbcon_dummy)
-		display->dispsw = &fbcon_cyber_accel;
-	else
-		display->dispsw = cfb->dispsw;
-
-	display->can_soft_blank = 1;
-	display->inverse	= 0;
-
 	cyber2000fb_set_timing(cfb, &hw);
 	cyber2000fb_update_start(cfb, var);
-	fb_set_cmap(&cfb->fb.cmap, 1, &cfb->fb);
-
-	if (chgvar && cfb->fb.changevar)
-		cfb->fb.changevar(con);
 
 	return 0;
 }
@@ -1085,85 +959,22 @@
  *    Pan or Wrap the Display
  */
 static int
-cyber2000fb_pan_display(struct fb_var_screeninfo *var, int con,
-			struct fb_info *info)
+cyber2000fb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
 {
 	struct cfb_info *cfb = (struct cfb_info *)info;
-	u_int y_bottom;
-
-	y_bottom = var->yoffset;
-
-	if (!(var->vmode & FB_VMODE_YWRAP))
-		y_bottom += var->yres;
-
-	if (var->xoffset > (var->xres_virtual - var->xres))
-		return -EINVAL;
-	if (y_bottom > cfb->display->var.yres_virtual)
-		return -EINVAL;
 
 	if (cyber2000fb_update_start(cfb, var))
 		return -EINVAL;
 
-	cfb->display->var.xoffset = var->xoffset;
-	cfb->display->var.yoffset = var->yoffset;
+	cfb->fb.var.xoffset = var->xoffset;
+	cfb->fb.var.yoffset = var->yoffset;
+
 	if (var->vmode & FB_VMODE_YWRAP) {
-		cfb->display->var.vmode |= FB_VMODE_YWRAP;
+		cfb->fb.var.vmode |= FB_VMODE_YWRAP;
 	} else {
-		cfb->display->var.vmode &= ~FB_VMODE_YWRAP;
-	}
-
-	return 0;
-}
-
-
-/*
- *    Update the `var' structure (called by fbcon.c)
- *
- *    This call looks only at yoffset and the FB_VMODE_YWRAP flag in `var'.
- *    Since it's called by a kernel driver, no range checking is done.
- */
-static int cyber2000fb_updatevar(int con, struct fb_info *info)
-{
-	struct cfb_info *cfb = (struct cfb_info *)info;
-
-	return cyber2000fb_update_start(cfb, &fb_display[con].var);
-}
-
-static int cyber2000fb_switch(int con, struct fb_info *info)
-{
-	struct cfb_info *cfb = (struct cfb_info *)info;
-	struct display *display = cfb->display;
-	struct fb_cmap *cmap;
-
-	if (display) {
-		/*
-		 * Save the old colormap and video mode.
-		 */
-		if (display->cmap.len)
-			fb_copy_cmap(&cfb->fb.cmap, &display->cmap, 0);
+		cfb->fb.var.vmode &= ~FB_VMODE_YWRAP;
 	}
 
-	cfb->display = display = fb_display + con;
-
-	/*
-	 * Install the new colormap and change the video mode.  By default,
-	 * fbcon sets all the colormaps and video modes to the default
-	 * values at bootup.
-	 *
-	 * Really, we want to set the colourmap size depending on the
-	 * depth of the new video mode.  For now, we leave it at its
-	 * default 256 entry.
-	 */
-	if (display->cmap.len)
-		cmap = &display->cmap;
-	else
-		cmap = fb_default_cmap(1 << display->var.bits_per_pixel);
-
-	fb_copy_cmap(cmap, &cfb->fb.cmap, 0);
-
-	display->var.activate = FB_ACTIVATE_NOW;
-	cyber2000fb_set_var(&display->var, con, &cfb->fb);
-
 	return 0;
 }
 
@@ -1243,12 +1054,16 @@
 
 static struct fb_ops cyber2000fb_ops = {
 	.owner		= THIS_MODULE,
-	.fb_set_var	= cyber2000fb_set_var,
-	.fb_get_cmap	= gen_get_cmap,
-	.fb_set_cmap	= gen_set_cmap,
+	.fb_check_var	= cyber2000fb_check_var,
+	.fb_set_par	= cyber2000fb_set_par,
 	.fb_setcolreg	= cyber2000fb_setcolreg,
-	.fb_pan_display	= cyber2000fb_pan_display,
 	.fb_blank	= cyber2000fb_blank,
+	.fb_pan_display	= cyber2000fb_pan_display,
+	.fb_fillrect	= cyber2000fb_fillrect,
+	.fb_copyarea	= cyber2000fb_copyarea,
+	.fb_imageblit	= cyber2000fb_imageblit,
+	.fb_cursor	= soft_cursor,
+	.fb_sync	= cyber2000fb_sync,
 };
 
 /*
@@ -1295,7 +1110,7 @@
 
 void cyber2000fb_get_fb_var(struct cfb_info *cfb, struct fb_var_screeninfo *var)
 {
-	memcpy(var, &cfb->display->var, sizeof(struct fb_var_screeninfo));
+	memcpy(var, &cfb->fb.var, sizeof(struct fb_var_screeninfo));
 }
 
 /*
@@ -1313,8 +1128,6 @@
 		info->info            = int_cfb_info;
 
 		strncpy(info->dev_name, int_cfb_info->fb.fix.id, sizeof(info->dev_name));
-
-		MOD_INC_USE_COUNT;
 	}
 
 	return int_cfb_info != NULL;
@@ -1325,7 +1138,6 @@
  */
 void cyber2000fb_detach(int idx)
 {
-	MOD_DEC_USE_COUNT;
 }
 
 EXPORT_SYMBOL(cyber2000fb_attach);
@@ -1411,13 +1223,13 @@
 {
 	struct cfb_info *cfb;
 
-	cfb = kmalloc(sizeof(struct cfb_info) + sizeof(struct display) +
+	cfb = kmalloc(sizeof(struct cfb_info) +
 		       sizeof(u32) * 16, GFP_KERNEL);
 
 	if (!cfb)
 		return NULL;
 
-	memset(cfb, 0, sizeof(struct cfb_info) + sizeof(struct display));
+	memset(cfb, 0, sizeof(struct cfb_info));
 
 	cfb->id			= id;
 
@@ -1467,17 +1279,10 @@
 	cfb->fb.var.width	= -1;
 	cfb->fb.var.accel_flags	= FB_ACCELF_TEXT;
 
-	strcpy(cfb->fb.modename, cfb->fb.fix.id);
-	strcpy(cfb->fb.fontname, default_font);
-
 	cfb->fb.fbops		= &cyber2000fb_ops;
-	cfb->fb.changevar	= NULL;
-	cfb->fb.switch_con	= cyber2000fb_switch;
-	cfb->fb.updatevar	= cyber2000fb_updatevar;
 	cfb->fb.flags		= FBINFO_FLAG_DEFAULT;
 	cfb->fb.node		= NODEV;
-	cfb->fb.disp		= (struct display *)(cfb + 1);
-	cfb->fb.pseudo_palette	= (void *)(cfb->fb.disp + 1);
+	cfb->fb.pseudo_palette	= (void *)(cfb + 1);
 
 	fb_alloc_cmap(&cfb->fb.cmap, NR_PALETTE, 0);
 
@@ -1575,7 +1380,7 @@
 	if (cfb->fb.var.yres_virtual < cfb->fb.var.yres)
 		cfb->fb.var.yres_virtual = cfb->fb.var.yres;
 
-	cyber2000fb_set_var(&cfb->fb.var, -1, &cfb->fb);
+//	fb_set_var(&cfb->fb.var, -1, &cfb->fb);
 
 	/*
 	 * Calculate the hsync and vsync frequencies.  Note that
@@ -1614,8 +1419,7 @@
 	 * Restore the old video mode and the palette.
 	 * We also need to tell fbcon to redraw the console.
 	 */
-	cfb->fb.var.activate = FB_ACTIVATE_NOW;
-	cyber2000fb_set_var(&cfb->fb.var, -1, &cfb->fb);
+	cyber2000fb_set_par(&cfb->fb);
 }
 
 #ifdef CONFIG_ARCH_SHARK
diff -Nru a/drivers/video/fbmem.c b/drivers/video/fbmem.c
--- a/drivers/video/fbmem.c	Mon Mar 31 13:41:05 2003
+++ b/drivers/video/fbmem.c	Mon Mar 31 13:41:05 2003
@@ -1111,13 +1111,11 @@
 #elif defined(__mips__)
 	pgprot_val(vma->vm_page_prot) &= ~_CACHE_MASK;
 	pgprot_val(vma->vm_page_prot) |= _CACHE_UNCACHED;
-#elif defined(__arm__)
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 #elif defined(__sh__)
 	pgprot_val(vma->vm_page_prot) &= ~_PAGE_CACHABLE;
 #elif defined(__hppa__)
 	pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
-#elif defined(__ia64__)
+#elif defined(__ia64__) || defined(__arm__)
 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 #else
 #warning What do we have to do here??
diff -Nru a/drivers/video/matrox/i2c-matroxfb.c b/drivers/video/matrox/i2c-matroxfb.c
--- a/drivers/video/matrox/i2c-matroxfb.c	Mon Mar 31 13:41:07 2003
+++ b/drivers/video/matrox/i2c-matroxfb.c	Mon Mar 31 13:41:07 2003
@@ -111,7 +111,8 @@
 	b->mask.data = data;
 	b->mask.clock = clock;
 	b->adapter = matrox_i2c_adapter_template;
-	sprintf(b->adapter.name, name, minor(minfo->fbcon.node));
+	snprintf(b->adapter.dev.name, DEVICE_NAME_SIZE, name,
+		minor(minfo->fbcon.node));
 	b->adapter.data = b;
 	b->adapter.algo_data = &b->bac;
 	b->bac = matrox_i2c_algo_template;
@@ -159,22 +160,22 @@
 	switch (ACCESS_FBINFO(chip)) {
 		case MGA_2064:
 		case MGA_2164:
-			err = i2c_bus_reg(&m2info->ddc1, minfo, DDC1B_DATA, DDC1B_CLK, "DDC:fb%u #0 on i2c-matroxfb");
+			err = i2c_bus_reg(&m2info->ddc1, minfo, DDC1B_DATA, DDC1B_CLK, "DDC:fb%u #0");
 			break;
 		default:
-			err = i2c_bus_reg(&m2info->ddc1, minfo, DDC1_DATA, DDC1_CLK, "DDC:fb%u #0 on i2c-matroxfb");
+			err = i2c_bus_reg(&m2info->ddc1, minfo, DDC1_DATA, DDC1_CLK, "DDC:fb%u #0");
 			break;
 	}
 	if (err)
 		goto fail_ddc1;
 	if (ACCESS_FBINFO(devflags.dualhead)) {
-		err = i2c_bus_reg(&m2info->ddc2, minfo, DDC2_DATA, DDC2_CLK, "DDC:fb%u #1 on i2c-matroxfb");
+		err = i2c_bus_reg(&m2info->ddc2, minfo, DDC2_DATA, DDC2_CLK, "DDC:fb%u #1");
 		if (err == -ENODEV) {
 			printk(KERN_INFO "i2c-matroxfb: VGA->TV plug detected, DDC unavailable.\n");
 		} else if (err)
 			printk(KERN_INFO "i2c-matroxfb: Could not register secondary output i2c bus. Continuing anyway.\n");
 		/* Register maven bus even on G450/G550 */
-		err = i2c_bus_reg(&m2info->maven, minfo, MAT_DATA, MAT_CLK, "MAVEN:fb%u on i2c-matroxfb");
+		err = i2c_bus_reg(&m2info->maven, minfo, MAT_DATA, MAT_CLK, "MAVEN:fb%u");
 		if (err)
 			printk(KERN_INFO "i2c-matroxfb: Could not register Maven i2c bus. Continuing anyway.\n");
 	}
diff -Nru a/fs/aio.c b/fs/aio.c
--- a/fs/aio.c	Mon Mar 31 13:41:07 2003
+++ b/fs/aio.c	Mon Mar 31 13:41:07 2003
@@ -522,7 +522,7 @@
 /*	Lookup an ioctx id.  ioctx_list is lockless for reads.
  *	FIXME: this is O(n) and is only suitable for development.
  */
-static struct kioctx *lookup_ioctx(unsigned long ctx_id)
+struct kioctx *lookup_ioctx(unsigned long ctx_id)
 {
 	struct kioctx *ioctx;
 	struct mm_struct *mm;
@@ -984,9 +984,9 @@
 	return -EINVAL;
 }
 
-static int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb *user_iocb,
+int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb *user_iocb,
 				  struct iocb *iocb));
-static int io_submit_one(struct kioctx *ctx, struct iocb *user_iocb,
+int io_submit_one(struct kioctx *ctx, struct iocb *user_iocb,
 			 struct iocb *iocb)
 {
 	struct kiocb *req;
diff -Nru a/fs/block_dev.c b/fs/block_dev.c
--- a/fs/block_dev.c	Mon Mar 31 13:41:09 2003
+++ b/fs/block_dev.c	Mon Mar 31 13:41:09 2003
@@ -653,15 +653,16 @@
 	struct gendisk *disk = bdev->bd_disk;
 
 	down(&bdev->bd_sem);
-	switch (kind) {
-	case BDEV_FILE:
-	case BDEV_FS:
-		sync_blockdev(bd_inode->i_bdev);
-		break;
-	}
 	lock_kernel();
-	if (!--bdev->bd_openers)
+	if (!--bdev->bd_openers) {
+		switch (kind) {
+		case BDEV_FILE:
+		case BDEV_FS:
+			sync_blockdev(bd_inode->i_bdev);
+			break;
+		}
 		kill_bdev(bdev);
+	}
 	if (bdev->bd_contains == bdev) {
 		if (disk->fops->release)
 			ret = disk->fops->release(bd_inode, NULL);
diff -Nru a/fs/buffer.c b/fs/buffer.c
--- a/fs/buffer.c	Mon Mar 31 13:41:07 2003
+++ b/fs/buffer.c	Mon Mar 31 13:41:07 2003
@@ -123,7 +123,9 @@
 	wait_queue_head_t *wqh = bh_waitq_head(bh);
 	DEFINE_WAIT(wait);
 
-	get_bh(bh);
+	if (atomic_read(&bh->b_count) == 0)
+		buffer_error();
+
 	do {
 		prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
 		if (buffer_locked(bh)) {
@@ -131,7 +133,6 @@
 			io_schedule();
 		}
 	} while (buffer_locked(bh));
-	put_bh(bh);
 	finish_wait(wqh, &wait);
 }
 
diff -Nru a/fs/char_dev.c b/fs/char_dev.c
--- a/fs/char_dev.c	Mon Mar 31 13:41:06 2003
+++ b/fs/char_dev.c	Mon Mar 31 13:41:06 2003
@@ -115,7 +115,15 @@
 }
 
 /*
- * Register a single major with a specified minor range
+ * Register a single major with a specified minor range.
+ *
+ * If major == 0 this functions will dynamically allocate a major and return
+ * its number.
+ *
+ * If major > 0 this function will attempt to reserve the passed range of
+ * minors and will return zero on success.
+ *
+ * Returns a -ve errno on failure.
  */
 int register_chrdev_region(unsigned int major, unsigned int baseminor,
 			   int minorct, const char *name,
@@ -125,23 +133,27 @@
 	int ret = 0;
 	int i;
 
+	cd = kmalloc(sizeof(struct char_device_struct), GFP_KERNEL);
+	if (cd == NULL)
+		return -ENOMEM;
+
+	write_lock_irq(&chrdevs_lock);
+
 	/* temporary */
 	if (major == 0) {
-		read_lock(&chrdevs_lock);
-		for (i = ARRAY_SIZE(chrdevs)-1; i > 0; i--)
+		for (i = ARRAY_SIZE(chrdevs)-1; i > 0; i--) {
 			if (chrdevs[i] == NULL)
 				break;
-		read_unlock(&chrdevs_lock);
+		}
 
-		if (i == 0)
-			return -EBUSY;
-		ret = major = i;
+		if (i == 0) {
+			ret = -EBUSY;
+			goto out;
+		}
+		major = i;
+		ret = major;
 	}
 
-	cd = kmalloc(sizeof(struct char_device_struct), GFP_KERNEL);
-	if (cd == NULL)
-		return -ENOMEM;
-
 	cd->major = major;
 	cd->baseminor = baseminor;
 	cd->minorct = minorct;
@@ -150,7 +162,6 @@
 
 	i = major_to_index(major);
 
-	write_lock(&chrdevs_lock);
 	for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
 		if ((*cp)->major > major ||
 		    ((*cp)->major == major && (*cp)->baseminor >= baseminor))
@@ -162,8 +173,10 @@
 		cd->next = *cp;
 		*cp = cd;
 	}
-	write_unlock(&chrdevs_lock);
-
+out:
+	write_unlock_irq(&chrdevs_lock);
+	if (ret < 0)
+		kfree(cd);
 	return ret;
 }
 
@@ -174,7 +187,8 @@
 }
 
 /* todo: make void - error printk here */
-int unregister_chrdev(unsigned int major, const char * name)
+int unregister_chrdev_region(unsigned int major, unsigned int baseminor,
+			     int minorct, const char *name)
 {
 	struct char_device_struct *cd, **cp;
 	int ret = 0;
@@ -182,9 +196,11 @@
 
 	i = major_to_index(major);
 
-	write_lock(&chrdevs_lock);
+	write_lock_irq(&chrdevs_lock);
 	for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
-		if ((*cp)->major == major)
+		if ((*cp)->major == major &&
+		    (*cp)->baseminor == baseminor &&
+		    (*cp)->minorct == minorct)
 			break;
 	if (!*cp || strcmp((*cp)->name, name))
 		ret = -EINVAL;
@@ -193,9 +209,14 @@
 		*cp = cd->next;
 		kfree(cd);
 	}
-	write_unlock(&chrdevs_lock);
+	write_unlock_irq(&chrdevs_lock);
 
 	return ret;
+}
+
+int unregister_chrdev(unsigned int major, const char *name)
+{
+	return unregister_chrdev_region(major, 0, 256, name);
 }
 
 /*
diff -Nru a/fs/cramfs/inode.c b/fs/cramfs/inode.c
--- a/fs/cramfs/inode.c	Mon Mar 31 13:41:06 2003
+++ b/fs/cramfs/inode.c	Mon Mar 31 13:41:06 2003
@@ -43,6 +43,7 @@
 static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inode * cramfs_inode)
 {
 	struct inode * inode = new_inode(sb);
+	static struct timespec zerotime = { 0, 0 };
 
 	if (inode) {
 		inode->i_mode = cramfs_inode->mode;
@@ -51,7 +52,8 @@
 		inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
 		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_gid = cramfs_inode->gid;
-		inode->i_mtime = inode->i_atime = inode->i_ctime = 0;
+		/* Struct copy intentional */
+		inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
 		inode->i_ino = CRAMINO(cramfs_inode);
 		/* inode->i_nlink is left 1 - arguably wrong for directories,
 		   but it's the best we can do without reading the directory
diff -Nru a/fs/exec.c b/fs/exec.c
--- a/fs/exec.c	Mon Mar 31 13:41:07 2003
+++ b/fs/exec.c	Mon Mar 31 13:41:07 2003
@@ -38,7 +38,6 @@
 #include <linux/binfmts.h>
 #include <linux/swap.h>
 #include <linux/utsname.h>
-#define __NO_VERSION__
 #include <linux/module.h>
 #include <linux/namei.h>
 #include <linux/proc_fs.h>
diff -Nru a/fs/ext3/fsync.c b/fs/ext3/fsync.c
--- a/fs/ext3/fsync.c	Mon Mar 31 13:41:08 2003
+++ b/fs/ext3/fsync.c	Mon Mar 31 13:41:08 2003
@@ -34,7 +34,7 @@
  * akpm: A new design for ext3_sync_file().
  *
  * This is only called from sys_fsync(), sys_fdatasync() and sys_msync().
- * There cannot be a transaction open by this task. (AKPM: quotas?)
+ * There cannot be a transaction open by this task.
  * Another task could have dirtied this inode.  Its data can be in any
  * state in the journalling system.
  *
@@ -50,19 +50,28 @@
 int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
 {
 	struct inode *inode = dentry->d_inode;
-	int ret;
 
 	J_ASSERT(ext3_journal_current_handle() == 0);
 
 	/*
-	 * fsync_inode_buffers() just walks private_list and waits
-	 * on them.  It's a no-op for full data journalling because
-	 * private_list will be empty.
-	 * Really, we only need to start I/O on the dirty buffers -
-	 * we'll end up waiting on them in commit.
+	 * data=writeback:
+	 *  The caller's filemap_fdatawrite()/wait will sync the data.
+	 *  ext3_force_commit() will sync the metadata
+	 *
+	 * data=ordered:
+	 *  The caller's filemap_fdatawrite() will write the data and
+	 *  ext3_force_commit() will wait on the buffers.  Then the caller's
+	 *  filemap_fdatawait() will wait on the pages (but all IO is complete)
+	 *  Not pretty, but it works.
+	 *
+	 * data=journal:
+	 *  filemap_fdatawrite won't do anything (the buffers are clean).
+	 *  ext3_force_commit will write the file data into the journal and
+	 *  will wait on that.
+	 *  filemap_fdatawait() will encounter a ton of newly-dirtied pages
+	 *  (they were dirtied by commit).  But that's OK - the blocks are
+	 *  safe in-journal, which is all fsync() needs to ensure.
 	 */
-	ret = sync_mapping_buffers(inode->i_mapping);
 	ext3_force_commit(inode->i_sb);
-
-	return ret;
+	return 0;
 }
diff -Nru a/fs/ext3/super.c b/fs/ext3/super.c
--- a/fs/ext3/super.c	Mon Mar 31 13:41:08 2003
+++ b/fs/ext3/super.c	Mon Mar 31 13:41:08 2003
@@ -1207,8 +1207,6 @@
 		goto failed_mount;
 	}
 
-	sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
-
 	hblock = bdev_hardsect_size(sb->s_bdev);
 	if (sb->s_blocksize != blocksize) {
 		/*
@@ -1239,6 +1237,8 @@
 			goto failed_mount;
 		}
 	}
+
+	sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
 
 	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
 		sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
diff -Nru a/fs/inode.c b/fs/inode.c
--- a/fs/inode.c	Mon Mar 31 13:41:08 2003
+++ b/fs/inode.c	Mon Mar 31 13:41:08 2003
@@ -1260,7 +1260,7 @@
 		init_waitqueue_head(&i_wait_queue_heads[i].wqh);
 
 	mempages >>= (14 - PAGE_SHIFT);
-	mempages *= sizeof(struct list_head);
+	mempages *= sizeof(struct hlist_head);
 	for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++)
 		;
 
diff -Nru a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
--- a/fs/jbd/checkpoint.c	Mon Mar 31 13:41:06 2003
+++ b/fs/jbd/checkpoint.c	Mon Mar 31 13:41:06 2003
@@ -260,7 +260,6 @@
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
 	int result;
-	int target;
 	int batch_count = 0;
 	struct buffer_head *bhs[NR_BATCH];
 
@@ -284,8 +283,6 @@
 	 * AKPM: check this code.  I had a feeling a while back that it
 	 * degenerates into a busy loop at unmount time.
 	 */
-	target = (journal->j_last - journal->j_first) / 4;
-
 	spin_lock(&journal_datalist_lock);
 repeat:
 	transaction = journal->j_checkpoint_transactions;
@@ -443,11 +440,8 @@
 			struct journal_head *last_jh = jh->b_cpprev;
 			struct journal_head *next_jh = jh;
 			do {
-				struct buffer_head *bh;
-
 				jh = next_jh;
 				next_jh = jh->b_cpnext;
-				bh = jh2bh(jh);
 				ret += __try_to_free_cp_buf(jh);
 			} while (jh != last_jh);
 		}
diff -Nru a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
--- a/fs/jfs/jfs_dtree.c	Mon Mar 31 13:41:08 2003
+++ b/fs/jfs/jfs_dtree.c	Mon Mar 31 13:41:08 2003
@@ -2978,7 +2978,6 @@
 	int d_namleft, len, outlen;
 	unsigned long dirent_buf;
 	char *name_ptr;
-	int dtlhdrdatalen;
 	u32 dir_index;
 	int do_index = 0;
 	uint loop_count = 0;
@@ -2998,7 +2997,6 @@
 		 *                      -1 = End of directory
 		 */
 		do_index = 1;
-		dtlhdrdatalen = DTLHDRDATALEN;
 
 		dir_index = (u32) filp->f_pos;
 
@@ -3083,8 +3081,6 @@
 		 * pn > 0:              Real entries, pn=1 -> leftmost page
 		 * pn = index = -1:     No more entries
 		 */
-		dtlhdrdatalen = DTLHDRDATALEN_LEGACY;
-
 		dtpos = filp->f_pos;
 		if (dtpos == 0) {
 			/* build "." entry */
diff -Nru a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
--- a/fs/jfs/jfs_extent.c	Mon Mar 31 13:41:07 2003
+++ b/fs/jfs/jfs_extent.c	Mon Mar 31 13:41:07 2003
@@ -91,7 +91,7 @@
 {
 	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
 	s64 nxlen, nxaddr, xoff, hint, xaddr = 0;
-	int rc, nbperpage;
+	int rc;
 	int xflag;
 
 	/* This blocks if we are low on resources */
@@ -103,9 +103,6 @@
 	/* validate extent length */
 	if (xlen > MAXXLEN)
 		xlen = MAXXLEN;
-
-	/* get the number of blocks per page */
-	nbperpage = sbi->nbperpage;
 
 	/* get the page's starting extent offset */
 	xoff = pno << sbi->l2nbperpage;
diff -Nru a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
--- a/fs/jfs/jfs_imap.c	Mon Mar 31 13:41:06 2003
+++ b/fs/jfs/jfs_imap.c	Mon Mar 31 13:41:06 2003
@@ -2966,7 +2966,6 @@
 	struct buffer_head *bh;
 	struct inode *ip;
 	tid_t tid;
-	int rc;
 
 	/* if AIT2 ipmap2 is bad, do not try to update it */
 	if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT)	/* s_flag */
@@ -2974,7 +2973,7 @@
 	ip = diReadSpecial(sb, FILESYSTEM_I, 1);
 	if (ip == NULL) {
 		JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
-		if ((rc = readSuper(sb, &bh)))
+		if (readSuper(sb, &bh))
 			return;
 		j_sb = (struct jfs_superblock *)bh->b_data;
 		j_sb->s_flag |= JFS_BAD_SAIT;
@@ -2988,7 +2987,7 @@
 	/* start transaction */
 	tid = txBegin(sb, COMMIT_FORCE);
 	/* update the inode map addressing structure to point to it */
-	if ((rc = xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0))) {
+	if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) {
 		JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
 		txAbort(tid, 1);
 		goto cleanup;
@@ -2997,7 +2996,7 @@
 	/* update the inode map's inode to reflect the extension */
 	ip->i_size += PSIZE;
 	ip->i_blocks += LBLK2PBLK(sb, xlen);
-	rc = txCommit(tid, 1, &ip, COMMIT_FORCE);
+	txCommit(tid, 1, &ip, COMMIT_FORCE);
       cleanup:
 	txEnd(tid);
 	diFreeSpecial(ip);
diff -Nru a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
--- a/fs/jfs/jfs_txnmgr.c	Mon Mar 31 13:41:07 2003
+++ b/fs/jfs/jfs_txnmgr.c	Mon Mar 31 13:41:07 2003
@@ -380,8 +380,8 @@
 
 	tblk = tid_to_tblock(t);
 
-	if ((tblk->next == 0) && (current != jfsCommitTask)) {
-		/* Save one tblk for jfsCommit thread */
+	if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) {
+		/* Don't let a non-forced transaction take the last tblk */
 		jfs_info("txBegin: waiting for free tid");
 		INCREMENT(TxStat.txBegin_freetid);
 		TXN_SLEEP(&TxAnchor.freewait);
@@ -1553,12 +1553,10 @@
 void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	   struct tlock * tlck)
 {
-	struct inode *ip;
 	struct metapage *mp;
 	struct pxd_lock *pxdlock;
 	pxd_t *pxd;
 
-	ip = tlck->ip;
 	mp = tlck->mp;
 
 	/* initialize as REDOPAGE/NOREDOPAGE record format */
@@ -2894,7 +2892,6 @@
 	struct inode *ip;
 	struct jfs_inode_info *jfs_ip;
 	struct jfs_log *log = JFS_SBI(sb)->log;
-	int rc;
 	tid_t tid;
 
 	set_bit(log_QUIESCE, &log->flag);
@@ -2914,7 +2911,7 @@
 		TXN_UNLOCK();
 		tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
 		down(&jfs_ip->commit_sem);
-		rc = txCommit(tid, 1, &ip, 0);
+		txCommit(tid, 1, &ip, 0);
 		txEnd(tid);
 		up(&jfs_ip->commit_sem);
 		/*
@@ -2994,8 +2991,7 @@
 				 * when it is committed
 				 */
 				TXN_UNLOCK();
-				tid = txBegin(ip->i_sb,
-					      COMMIT_INODE | COMMIT_FORCE);
+				tid = txBegin(ip->i_sb, COMMIT_INODE);
 				rc = txCommit(tid, 1, &ip, 0);
 				txEnd(tid);
 				up(&jfs_ip->commit_sem);
diff -Nru a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
--- a/fs/jfs/jfs_xtree.c	Mon Mar 31 13:41:07 2003
+++ b/fs/jfs/jfs_xtree.c	Mon Mar 31 13:41:07 2003
@@ -3268,14 +3268,13 @@
 void xtInitRoot(tid_t tid, struct inode *ip)
 {
 	xtpage_t *p;
-	struct tlock *tlck;
 
 	/*
 	 * acquire a transaction lock on the root
 	 *
 	 * action:
 	 */
-	tlck = txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
+	txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
 		      tlckXTREE | tlckNEW);
 	p = &JFS_IP(ip)->i_xtroot;
 
diff -Nru a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
--- a/fs/nfs/nfs4proc.c	Mon Mar 31 13:41:08 2003
+++ b/fs/nfs/nfs4proc.c	Mon Mar 31 13:41:08 2003
@@ -1529,7 +1529,6 @@
 renew_release(struct rpc_task *task)
 {
 	kfree(task->tk_calldata);
-	task->tk_calldata = NULL;
 }
 
 int
diff -Nru a/fs/nfs/read.c b/fs/nfs/read.c
--- a/fs/nfs/read.c	Mon Mar 31 13:41:06 2003
+++ b/fs/nfs/read.c	Mon Mar 31 13:41:06 2003
@@ -252,8 +252,7 @@
 void
 nfs_readpage_result(struct rpc_task *task)
 {
-	struct nfs_read_data	*data = (struct nfs_read_data *) task->tk_calldata;
-	struct nfs_fattr	*fattr = &data->fattr;
+	struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
 	unsigned int count = data->res.count;
 
 	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
@@ -270,18 +269,10 @@
 							req->wb_pgbase + count,
 							req->wb_bytes - count);
 
-				if (data->res.eof ||
-				    ((fattr->valid & NFS_ATTR_FATTR) &&
-				     ((req_offset(req) + count) >= fattr->size)))
-					SetPageUptodate(page);
-				else
-					if (count < req->wb_bytes)
-						SetPageError(page);
 				count = 0;
-			} else {
+			} else
 				count -= PAGE_CACHE_SIZE;
-				SetPageUptodate(page);
-			}
+			SetPageUptodate(page);
 		} else
 			SetPageError(page);
 		unlock_page(page);
diff -Nru a/fs/nfsd/export.c b/fs/nfsd/export.c
--- a/fs/nfsd/export.c	Mon Mar 31 13:41:07 2003
+++ b/fs/nfsd/export.c	Mon Mar 31 13:41:07 2003
@@ -496,13 +496,19 @@
 {
 	svc_export *exp;
 
-	read_lock(&dparent_lock);
+	dget(dentry);
 	exp = exp_get_by_name(clp, mnt, dentry, reqp);
+
 	while (exp == NULL && dentry != dentry->d_parent) {
-		dentry = dentry->d_parent;
+		struct dentry *parent;
+		read_lock(&dparent_lock);
+		parent = dget(dentry->d_parent);
+		dput(dentry);
+		dentry = parent;
+		read_unlock(&dparent_lock);
 		exp = exp_get_by_name(clp, mnt, dentry, reqp);
 	}
-	read_unlock(&dparent_lock);
+	dput(dentry);
 	return exp;
 }
 
diff -Nru a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
--- a/fs/nfsd/nfs4xdr.c	Mon Mar 31 13:41:08 2003
+++ b/fs/nfsd/nfs4xdr.c	Mon Mar 31 13:41:08 2003
@@ -231,6 +231,7 @@
 	p += XDR_QUADLEN(nbytes);		\
 } while (0)
 
+/* READ_BUF, read_buf(): nbytes must be <= PAGE_SIZE */
 #define READ_BUF(nbytes)  do {			\
 	if (nbytes <= (u32)((char *)argp->end - (char *)argp->p)) {	\
 		p = argp->p;			\
@@ -244,15 +245,15 @@
 u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
 {
 	/* We want more bytes than seem to be available.
-	 * Maybe we need a new page, may wehave just run out
+	 * Maybe we need a new page, maybe we have just run out
 	 */
 	int avail = (char*)argp->end - (char*)argp->p;
 	u32 *p;
 	if (avail + argp->pagelen < nbytes)
 		return NULL;
-	if (avail + PAGE_SIZE > nbytes) /* need more than a page !! */
+	if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */
 		return NULL;
-	/* ok, we can do it with the tail plus the next page */
+	/* ok, we can do it with the current plus the next page */
 	if (nbytes <= sizeof(argp->tmp))
 		p = argp->tmp;
 	else {
@@ -851,16 +852,16 @@
 		v++;
 		write->wr_vec[v].iov_base = page_address(argp->pagelist[0]);
 		argp->pagelist++;
-		if (argp->pagelen >= PAGE_SIZE) {
+		if (len >= PAGE_SIZE) {
 			write->wr_vec[v].iov_len = PAGE_SIZE;
 			argp->pagelen -= PAGE_SIZE;
 		} else {
 			write->wr_vec[v].iov_len = argp->pagelen;
-			argp->pagelen = 0;
+			argp->pagelen -= len;
 		}
 	}
 	argp->end = (u32*) (write->wr_vec[v].iov_base + write->wr_vec[v].iov_len);
-	argp->p = (u32*)  (write->wr_vec[v].iov_base + len);
+	argp->p = (u32*)  (write->wr_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
 	write->wr_vec[v].iov_len = len;
 	write->wr_vlen = v+1;
 
@@ -1690,7 +1691,8 @@
 {
 	u32 eof;
 	int v, pn;
-	unsigned long maxcount, len;
+	unsigned long maxcount; 
+	long len;
 	ENCODE_HEAD;
 
 	if (nfserr)
@@ -1730,6 +1732,10 @@
 	resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
 
 	resp->xbuf->page_len = maxcount;
+
+	/* read zero bytes -> don't set up tail */
+	if(!maxcount)
+		return 0;        
 
 	/* set up page for remaining responses */
 	svc_take_page(resp->rqstp);
diff -Nru a/fs/proc/base.c b/fs/proc/base.c
--- a/fs/proc/base.c	Mon Mar 31 13:41:07 2003
+++ b/fs/proc/base.c	Mon Mar 31 13:41:07 2003
@@ -565,10 +565,10 @@
 }
 
 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
-			    char * buffer, int buflen)
+			    char *buffer, int buflen)
 {
 	struct inode * inode;
-	char * tmp = (char*)__get_free_page(GFP_KERNEL), *path;
+	char *tmp = (char*)__get_free_page(GFP_KERNEL), *path;
 	int len;
 
 	if (!tmp)
@@ -576,13 +576,17 @@
 		
 	inode = dentry->d_inode;
 	path = d_path(dentry, mnt, tmp, PAGE_SIZE);
+	len = PTR_ERR(path);
+	if (IS_ERR(path))
+		goto out;
 	len = tmp + PAGE_SIZE - 1 - path;
 
-	if (len < buflen)
-		buflen = len;
-	copy_to_user(buffer, path, buflen);
+	if (len > buflen)
+		len = buflen;
+	copy_to_user(buffer, path, len);
+ out:
 	free_page((unsigned long)tmp);
-	return buflen;
+	return len;
 }
 
 static int proc_pid_readlink(struct dentry * dentry, char * buffer, int buflen)
diff -Nru a/fs/quota.c b/fs/quota.c
--- a/fs/quota.c	Mon Mar 31 13:41:08 2003
+++ b/fs/quota.c	Mon Mar 31 13:41:08 2003
@@ -221,12 +221,17 @@
 	uint cmds, type;
 	struct super_block *sb = NULL;
 	struct block_device *bdev;
+	char *tmp;
 	int ret = -ENODEV;
 
 	cmds = cmd >> SUBCMDSHIFT;
 	type = cmd & SUBCMDMASK;
 
-	bdev = lookup_bdev(special);
+	tmp = getname(special);
+	if (IS_ERR(tmp))
+		return PTR_ERR(tmp);
+	bdev = lookup_bdev(tmp);
+	putname(tmp);
 	if (IS_ERR(bdev))
 		return PTR_ERR(bdev);
 	sb = get_super(bdev);
diff -Nru a/fs/sysfs/inode.c b/fs/sysfs/inode.c
--- a/fs/sysfs/inode.c	Mon Mar 31 13:41:08 2003
+++ b/fs/sysfs/inode.c	Mon Mar 31 13:41:08 2003
@@ -97,7 +97,7 @@
 				 atomic_read(&victim->d_count));
 
 			simple_unlink(dir->d_inode,victim);
-
+			d_delete(victim);
 		}
 		/*
 		 * Drop reference from sysfs_get_dentry() above.
diff -Nru a/fs/xfs/Makefile b/fs/xfs/Makefile
--- a/fs/xfs/Makefile	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/Makefile	Mon Mar 31 13:41:07 2003
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+# Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of version 2 of the GNU General Public License as
@@ -29,13 +29,7 @@
 #
 # http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
 #
-# Makefile for XFS on Linux.
-#
-
 
-# This needs -I. because everything does #include <xfs.h> instead of "xfs.h".
-# The code is wrong, local files should be included using "xfs.h", not <xfs.h>
-# but I am not going to change every file at the moment.
 EXTRA_CFLAGS +=	 -Ifs/xfs -funsigned-char
 
 ifeq ($(CONFIG_XFS_DEBUG),y)
@@ -48,17 +42,22 @@
 obj-$(CONFIG_XFS_FS)		+= xfs.o
 
 
-xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o
-
-xfs-$(CONFIG_XFS_QUOTA)		+= xfs_dquot.o \
+xfs-$(CONFIG_XFS_QUOTA)		+= $(addprefix quota/, \
+				   xfs_dquot.o \
 				   xfs_dquot_item.o \
 				   xfs_trans_dquot.o \
 				   xfs_qm_syscalls.o \
-				   xfs_qm.o
+				   xfs_qm_bhv.o \
+				   xfs_qm.o)
 
+ifeq ($(CONFIG_XFS_QUOTA),y)
+xfs-$(CONFIG_PROC_FS)		+= quota/xfs_qm_stats.o
+endif
+
+xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o
 xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
-xfs-$(CONFIG_FS_POSIX_CAP)	+= xfs_cap.o
-xfs-$(CONFIG_FS_POSIX_MAC)	+= xfs_mac.o
+xfs-$(CONFIG_XFS_POSIX_CAP)	+= xfs_cap.o
+xfs-$(CONFIG_XFS_POSIX_MAC)	+= xfs_mac.o
 xfs-$(CONFIG_PROC_FS)		+= linux/xfs_stats.o
 xfs-$(CONFIG_SYSCTL)		+= linux/xfs_sysctl.o
 
@@ -127,6 +126,7 @@
 				   xfs_iops.o \
 				   xfs_lrw.o \
 				   xfs_super.o \
+				   xfs_vfs.o \
 				   xfs_vnode.o)
 
 # Objects in support/
@@ -138,6 +138,10 @@
 				   mrlock.o \
 				   qsort.o \
 				   uuid.o)
+
+# Quota and DMAPI stubs
+xfs-y				+= xfs_dmops.o \
+				   xfs_qmops.o
 
 # If both xfs and kdb modules are built in then xfsidbg is built in.  If xfs is
 # a module and kdb modules are being compiled then xfsidbg must be a module, to
diff -Nru a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c
--- a/fs/xfs/linux/xfs_aops.c	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/linux/xfs_aops.c	Mon Mar 31 13:41:08 2003
@@ -78,6 +78,7 @@
 				  XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp));
 	XFS_BUF_SET_FSPRIVATE(bp, NULL);
 	XFS_BUF_CLR_IODONE_FUNC(bp);
+	XFS_BUF_UNDATAIO(bp);
 	xfs_biodone(bp);
 }
 
@@ -431,6 +432,7 @@
 	size = nblocks;		/* NB: using 64bit number here */
 	size <<= block_bits;	/* convert fsb's to byte range */
 
+	XFS_BUF_DATAIO(pb);
 	XFS_BUF_SET_SIZE(pb, size);
 	XFS_BUF_SET_OFFSET(pb, offset);
 	XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode)->v_fbhv);
diff -Nru a/fs/xfs/linux/xfs_behavior.c b/fs/xfs/linux/xfs_behavior.c
--- a/fs/xfs/linux/xfs_behavior.c	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/linux/xfs_behavior.c	Mon Mar 31 13:41:08 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -30,41 +30,77 @@
  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
  *
  */
+#include <xfs.h>
 
 /*
  * Source file used to associate/disassociate behaviors with virtualized
- * objects.  See behavior.h for more information about behaviors, etc.
+ * objects.  See xfs_behavior.h for more information about behaviors, etc.
  *
  * The implementation is split between functions in this file and macros
- * in behavior.h.
+ * in xfs_behavior.h.
  */
-#include <xfs.h>
-
-kmem_zone_t	*bhv_global_zone;
 
 /*
- * Global initialization function called out of main.
+ * Insert a new behavior descriptor into a behavior chain.
+ *
+ * The behavior chain is ordered based on the 'position' number which
+ * lives in the first field of the ops vector (higher numbers first).
+ *
+ * Attemps to insert duplicate ops result in an EINVAL return code.
+ * Otherwise, return 0 to indicate success.
  */
-void
-bhv_global_init(void)
+int
+bhv_insert(bhv_head_t *bhp, bhv_desc_t *bdp)
 {
+	bhv_desc_t	*curdesc, *prev;
+	int		position;
+
+	/*
+	 * Validate the position value of the new behavior.
+	 */
+	position = BHV_POSITION(bdp);
+	ASSERT(position >= BHV_POSITION_BASE && position <= BHV_POSITION_TOP);
+
 	/*
-	 * Initialize a behavior zone used by subsystems using behaviors
-	 * but without any private data.  In the UNIKERNEL case, this zone
-	 * is used only for behaviors that are not yet isolated to a single
-	 * cell.  The only such user is in pshm.c in which a dummy vnode is
-	 * obtained in support of vce avoidance logic.
+	 * Find location to insert behavior.  Check for duplicates.
 	 */
-	bhv_global_zone = kmem_zone_init(sizeof(bhv_desc_t), "bhv_global_zone");
+	prev = NULL;
+	for (curdesc = bhp->bh_first;
+	     curdesc != NULL;
+	     curdesc = curdesc->bd_next) {
+
+		/* Check for duplication. */
+		if (curdesc->bd_ops == bdp->bd_ops) {
+			ASSERT(0);
+			return EINVAL;
+		}
+
+		/* Find correct position */
+		if (position >= BHV_POSITION(curdesc)) {
+			ASSERT(position != BHV_POSITION(curdesc));
+			break;		/* found it */
+		}
+
+		prev = curdesc;
+	}
+
+	if (prev == NULL) {
+		/* insert at front of chain */
+		bdp->bd_next = bhp->bh_first;
+		bhp->bh_first = bdp;
+	} else {
+		/* insert after prev */
+		bdp->bd_next = prev->bd_next;
+		prev->bd_next = bdp;
+	}
+
+        return 0;
 }
 
 /*
  * Remove a behavior descriptor from a position in a behavior chain;
  * the postition is guaranteed not to be the first position.
  * Should only be called by the bhv_remove() macro.
- *
- * The act of modifying the chain is done atomically w.r.t. ops-in-progress
- * (see comment at top of behavior.h for more info on synchronization).
  */
 void
 bhv_remove_not_first(bhv_head_t *bhp, bhv_desc_t *bdp)
@@ -86,7 +122,6 @@
 
 	ASSERT(curdesc == bdp);
 	prev->bd_next = bdp->bd_next;	/* remove from after prev */
-					/* atomic wrt oip's */
 }
 
 /*
@@ -110,20 +145,28 @@
 }
 
 /*
- * Look for a specific ops vector on the specified behavior chain.
- * Return the associated behavior descriptor.  Or NULL, if not found.
- *
- * The caller has not read locked the behavior chain, so acquire the
- * lock before traversing the chain.
+ * Looks for the first behavior within a specified range of positions.
+ * Return the associated behavior descriptor.  Or NULL, if none found.
  */
 bhv_desc_t *
-bhv_lookup_unlocked(bhv_head_t *bhp, void *ops)
+bhv_lookup_range(bhv_head_t *bhp, int low, int high)
 {
-	bhv_desc_t	*bdp;
+	bhv_desc_t	*curdesc;
+
+	for (curdesc = bhp->bh_first;
+	     curdesc != NULL;
+	     curdesc = curdesc->bd_next) {
 
-	bdp = bhv_lookup(bhp, ops);
+		int	position = BHV_POSITION(curdesc);
 
-	return bdp;
+		if (position <= high) {
+			if (position >= low)
+				return curdesc;
+			return NULL;
+		}
+	}
+
+	return NULL;
 }
 
 /*
@@ -134,49 +177,36 @@
  * lock before traversing the chain.
  */
 bhv_desc_t *
-bhv_base_unlocked(bhv_head_t *bhp)
+bhv_base(bhv_head_t *bhp)
 {
 	bhv_desc_t	*curdesc;
 
 	for (curdesc = bhp->bh_first;
 	     curdesc != NULL;
 	     curdesc = curdesc->bd_next) {
-		if (curdesc->bd_next == NULL)
+
+		if (curdesc->bd_next == NULL) {
 			return curdesc;
+		}
 	}
+
 	return NULL;
 }
 
-#define BHVMAGIC (void *)0xf00d
-
-/* ARGSUSED */
 void
 bhv_head_init(
 	bhv_head_t *bhp,
 	char *name)
 {
 	bhp->bh_first = NULL;
-	bhp->bh_lockp = BHVMAGIC;
-}
-
-
-/* ARGSUSED */
-void
-bhv_head_reinit(
-	bhv_head_t *bhp)
-{
-	ASSERT(bhp->bh_first == NULL);
-	ASSERT(bhp->bh_lockp == BHVMAGIC);
 }
 
-
 void
 bhv_insert_initial(
 	bhv_head_t *bhp,
 	bhv_desc_t *bdp)
 {
 	ASSERT(bhp->bh_first == NULL);
-	ASSERT(bhp->bh_lockp == BHVMAGIC);
 	(bhp)->bh_first = bdp;
 }
 
@@ -185,7 +215,4 @@
 	bhv_head_t *bhp)
 {
 	ASSERT(bhp->bh_first == NULL);
-	ASSERT(bhp->bh_lockp == BHVMAGIC);
-	bhp->bh_lockp = NULL;
 }
-
diff -Nru a/fs/xfs/linux/xfs_behavior.h b/fs/xfs/linux/xfs_behavior.h
--- a/fs/xfs/linux/xfs_behavior.h	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/linux/xfs_behavior.h	Mon Mar 31 13:41:07 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -91,8 +91,8 @@
  *     active object
  *
  */
-
-typedef void	bhv_head_lock_t;
+ 
+struct bhv_head_lock;
 
 /*
  * Behavior head.  Head of the chain of behaviors.
@@ -100,7 +100,7 @@
  */
 typedef struct bhv_head {
 	struct bhv_desc *bh_first;	/* first behavior in chain */
-	bhv_head_lock_t *bh_lockp;	/* pointer to lock info struct */
+	struct bhv_head_lock *bh_lockp;	/* pointer to lock info struct */
 } bhv_head_t;
 
 /*
@@ -128,10 +128,8 @@
 typedef bhv_identity_t bhv_position_t;
 
 #define BHV_IDENTITY_INIT(id,pos)	{id, pos}
-
 #define BHV_IDENTITY_INIT_POSITION(pos) BHV_IDENTITY_INIT(0, pos)
 
-
 /*
  * Define boundaries of position values.
  */
@@ -154,7 +152,7 @@
 
 extern void bhv_head_init(bhv_head_t *, char *);
 extern void bhv_head_destroy(bhv_head_t *);
-extern void bhv_head_reinit(bhv_head_t *);
+extern int  bhv_insert(bhv_head_t *, bhv_desc_t *);
 extern void bhv_insert_initial(bhv_head_t *, bhv_desc_t *);
 
 /*
@@ -196,7 +194,11 @@
  */
 extern void		bhv_remove_not_first(bhv_head_t *bhp, bhv_desc_t *bdp);
 extern bhv_desc_t *	bhv_lookup(bhv_head_t *bhp, void *ops);
-extern bhv_desc_t *	bhv_lookup_unlocked(bhv_head_t *bhp, void *ops);
-extern bhv_desc_t *	bhv_base_unlocked(bhv_head_t *bhp);
+extern bhv_desc_t *	bhv_lookup_range(bhv_head_t *bhp, int low, int high);
+extern bhv_desc_t *	bhv_base(bhv_head_t *bhp);
+
+/* No bhv locking on Linux */
+#define bhv_lookup_unlocked	bhv_lookup
+#define bhv_base_unlocked	bhv_base
 
 #endif /* __XFS_BEHAVIOR_H__ */
diff -Nru a/fs/xfs/linux/xfs_file.c b/fs/xfs/linux/xfs_file.c
--- a/fs/xfs/linux/xfs_file.c	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/linux/xfs_file.c	Mon Mar 31 13:41:07 2003
@@ -299,7 +299,9 @@
 	int		error;
 
 	if ((vp->v_type == VREG) && (vp->v_vfsp->vfs_flag & VFS_DMI)) {
-		error = -xfs_dm_send_mmap_event(vma, 0);
+		xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
+
+		error = -XFS_SEND_MMAP(mp, vma, 0);
 		if (error)
 			return error;
 	}
@@ -345,8 +347,10 @@
 
 	if ((vp->v_type == VREG) && (vp->v_vfsp->vfs_flag & VFS_DMI)) {
 		if ((vma->vm_flags & VM_MAYSHARE) &&
-		    (newflags & PROT_WRITE) && !(vma->vm_flags & PROT_WRITE)){
-			error = xfs_dm_send_mmap_event(vma, VM_WRITE);
+		    (newflags & PROT_WRITE) && !(vma->vm_flags & PROT_WRITE)) {
+			xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
+
+			error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
 		    }
 	}
 	return error;
diff -Nru a/fs/xfs/linux/xfs_globals.c b/fs/xfs/linux/xfs_globals.c
--- a/fs/xfs/linux/xfs_globals.c	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/linux/xfs_globals.c	Mon Mar 31 13:41:07 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -40,7 +40,7 @@
 /*
  * System memory size - used to scale certain data structures in XFS.
  */
-unsigned long	xfs_physmem;
+unsigned long xfs_physmem;
 
 /*
  * Tunable XFS parameters.  xfs_params is required even when CONFIG_SYSCTL=n,
@@ -51,24 +51,14 @@
 /*
  * Used to serialize atomicIncWithWrap.
  */
-spinlock_t Atomic_spin = SPIN_LOCK_UNLOCKED;
+spinlock_t xfs_atomic_spin = SPIN_LOCK_UNLOCKED;
 
 /*
  * Global system credential structure.
  */
 cred_t sys_cred_val, *sys_cred = &sys_cred_val;
 
-/*
- * The global quota manager. There is only one of these for the entire
- * system, _not_ one per file system. XQM keeps track of the overall
- * quota functionality, including maintaining the freelist and hash
- * tables of dquots.
- */
-struct xfs_qm	*xfs_Gqm;
-mutex_t		xfs_Gqm_lock;
-
 /* Export XFS symbols used by xfsidbg */
-EXPORT_SYMBOL(xfs_Gqm);
 EXPORT_SYMBOL(xfs_next_bit);
 EXPORT_SYMBOL(xfs_contig_bits);
 EXPORT_SYMBOL(xfs_bmbt_get_all);
diff -Nru a/fs/xfs/linux/xfs_globals.h b/fs/xfs/linux/xfs_globals.h
--- a/fs/xfs/linux/xfs_globals.h	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/linux/xfs_globals.h	Mon Mar 31 13:41:07 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -39,11 +39,10 @@
 
 extern uint64_t xfs_panic_mask;		/* set to cause more panics */
 
-extern unsigned long	xfs_physmem;
+extern unsigned long xfs_physmem;
 
-extern struct cred *sys_cred;
+extern spinlock_t xfs_atomic_spin;
 
-extern struct xfs_qm	*xfs_Gqm;
-extern mutex_t		xfs_Gqm_lock;
+extern struct cred *sys_cred;
 
 #endif	/* __XFS_GLOBALS_H__ */
diff -Nru a/fs/xfs/linux/xfs_ioctl.c b/fs/xfs/linux/xfs_ioctl.c
--- a/fs/xfs/linux/xfs_ioctl.c	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/linux/xfs_ioctl.c	Mon Mar 31 13:41:08 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -130,7 +130,7 @@
 		int		lock_mode;
 
 		/* need to get access to the xfs_inode to read the generation */
-		bhv = VNODE_TO_FIRST_BHV(vp);
+		bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);
 		ASSERT(bhv);
 		ip = XFS_BHVTOI(bhv);
 		ASSERT(ip);
diff -Nru a/fs/xfs/linux/xfs_iomap.c b/fs/xfs/linux/xfs_iomap.c
--- a/fs/xfs/linux/xfs_iomap.c	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/linux/xfs_iomap.c	Mon Mar 31 13:41:07 2003
@@ -258,11 +258,10 @@
 	 * the ilock across a disk read.
 	 */
 
-	if (XFS_IS_QUOTA_ON(mp) && XFS_NOT_DQATTACHED(mp, ip)) {
-		if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_ILOCKED))) {
-			return XFS_ERROR(error);
-		}
-	}
+	error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED);
+	if (error)
+		return XFS_ERROR(error);
+
 	maps = min(XFS_WRITE_IMAPS, *nmaps);
 	nimaps = maps;
 
@@ -291,7 +290,7 @@
 	 * determine if reserving space on
 	 * the data or realtime partition.
 	 */
-	if ((rt = ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
+	if ((rt = XFS_IS_REALTIME_INODE(ip))) {
 		int	sbrtextsize, iprtextsize;
 
 		sbrtextsize = mp->m_sb.sb_rextsize;
@@ -333,11 +332,9 @@
 		goto error_out; /* Don't return in above if .. trans ..,
 					need lock to return */
 
-	if (XFS_IS_QUOTA_ON(mp)) {
-		if (xfs_trans_reserve_blkquota(tp, ip, resblks)) {
-			error = (EDQUOT);
-			goto error1;
-		}
+	if (XFS_TRANS_RESERVE_BLKQUOTA(mp, tp, ip, resblks)) {
+		error = (EDQUOT);
+		goto error1;
 	}
 	nimaps = 1;
 
@@ -422,11 +419,9 @@
 	 * the ilock across a disk read.
 	 */
 
-	if (XFS_IS_QUOTA_ON(mp) && XFS_NOT_DQATTACHED(mp, ip)) {
-		if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_ILOCKED))) {
-			return XFS_ERROR(error);
-		}
-	}
+	error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+	if (error)
+		return XFS_ERROR(error);
 
 retry:
 	isize = ip->i_d.di_size;
@@ -538,11 +533,8 @@
 	 * Make sure that the dquots are there.
 	 */
 
-	if (XFS_IS_QUOTA_ON(mp) && XFS_NOT_DQATTACHED(mp, ip)) {
-		if ((error = xfs_qm_dqattach(ip, 0))) {
-			return XFS_ERROR(error);
-		}
-	}
+	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+		return XFS_ERROR(error);
 
 	offset_fsb = map->br_startoff;
 	count_fsb = map->br_blockcount;
diff -Nru a/fs/xfs/linux/xfs_linux.h b/fs/xfs/linux/xfs_linux.h
--- a/fs/xfs/linux/xfs_linux.h	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/linux/xfs_linux.h	Mon Mar 31 13:41:06 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -44,6 +44,7 @@
 #include <linux/major.h>
 #include <linux/pagemap.h>
 #include <linux/vfs.h>
+#include <linux/seq_file.h>
 
 #include <asm/page.h>
 #include <asm/div64.h>
@@ -159,6 +160,15 @@
 
 #define SYNCHRONIZE()	barrier()
 #define __return_address __builtin_return_address(0)
+
+/*
+ * IRIX (BSD) quotactl makes use of separate commands for user/group,
+ * whereas on Linux the syscall encodes this information into the cmd 
+ * field (see the QCMD macro in quota.h).  These macros help keep the
+ * code portable - they are not visible from the syscall interface.
+ */
+#define Q_XSETGQLIM	XQM_CMD(0x8)	/* set groups disk limits */
+#define Q_XGETGQUOTA	XQM_CMD(0x9)	/* get groups disk limits */
 
 /* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */
 /* we may well need to fine-tune this if it ever becomes an issue.  */
diff -Nru a/fs/xfs/linux/xfs_lrw.c b/fs/xfs/linux/xfs_lrw.c
--- a/fs/xfs/linux/xfs_lrw.c	Mon Mar 31 13:41:05 2003
+++ b/fs/xfs/linux/xfs_lrw.c	Mon Mar 31 13:41:06 2003
@@ -121,7 +121,8 @@
 	xfs_mount_t		*mp;
 	vnode_t			*vp;
 	unsigned long		seg;
-	int			direct = filp->f_flags & O_DIRECT;
+	int			direct = (filp->f_flags & O_DIRECT);
+	int			invisible = (filp->f_mode & FINVIS);
 
 	ip = XFS_BHVTOI(bdp);
 	vp = BHV_TO_VNODE(bdp);
@@ -180,13 +181,12 @@
 
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 
-	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
-	    !(filp->f_mode & FINVIS)) {
+	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && !invisible) {
 		int error;
 		vrwlock_t locktype = VRWLOCK_READ;
 
-		error = xfs_dm_send_data_event(DM_EVENT_READ, bdp, *offp,
-				size, FILP_DELAY_FLAG(filp), &locktype);
+		error = XFS_SEND_DATA(mp, DM_EVENT_READ, bdp, *offp, size,
+				      FILP_DELAY_FLAG(filp), &locktype);
 		if (error) {
 			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 			return -error;
@@ -198,7 +198,7 @@
 
 	XFS_STATS_ADD(xfsstats.xs_read_bytes, ret);
 
-	if (!(filp->f_mode & FINVIS))
+	if (!invisible)
 		xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
 
 	return ret;
@@ -217,11 +217,13 @@
 	ssize_t			ret;
 	xfs_fsize_t		n;
 	xfs_inode_t		*ip;
+	xfs_mount_t		*mp;
 	vnode_t			*vp;
 	int			invisible = (filp->f_mode & FINVIS);
 
 	ip = XFS_BHVTOI(bdp);
 	vp = BHV_TO_VNODE(bdp);
+	mp = ip->i_mount;
 	vn_trace_entry(vp, "xfs_sendfile", (inst_t *)__return_address);
 
 	XFS_STATS_INC(xfsstats.xs_read_calls);
@@ -241,8 +243,8 @@
 		vrwlock_t locktype = VRWLOCK_READ;
 		int error;
 
-		error = xfs_dm_send_data_event(DM_EVENT_READ, bdp, *offp,
-				count, FILP_DELAY_FLAG(filp), &locktype);
+		error = XFS_SEND_DATA(mp, DM_EVENT_READ, bdp, *offp, count,
+				      FILP_DELAY_FLAG(filp), &locktype);
 		if (error) {
 			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 			return -error;
@@ -493,7 +495,8 @@
 	vnode_t			*vp;
 	unsigned long		seg;
 	int			iolock;
-	int			direct = file->f_flags & O_DIRECT;
+	int			direct = (file->f_flags & O_DIRECT);
+	int			invisible = (file->f_mode & FINVIS);
 	int			eventsent = 0;
 	vrwlock_t		locktype;
 
@@ -573,13 +576,13 @@
 	}
 
 	if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) &&
-	    !(file->f_mode & FINVIS) && !eventsent)) {
+	    !invisible && !eventsent)) {
 		loff_t		savedsize = *offset;
 
 		xfs_iunlock(xip, XFS_ILOCK_EXCL);
-		error = xfs_dm_send_data_event(DM_EVENT_WRITE, bdp,
-				*offset, size,
-				FILP_DELAY_FLAG(file), &locktype);
+		error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, bdp,
+				      *offset, size,
+				      FILP_DELAY_FLAG(file), &locktype);
 		if (error) {
 			xfs_iunlock(xip, iolock);
 			return -error;
@@ -588,12 +591,11 @@
 		eventsent = 1;
 
 		/*
-		 * The iolock was dropped and reaquired in
-		 * xfs_dm_send_data_event so we have to recheck the size
-		 *  when appending.  We will only "goto start;" once,
-		 *  since having sent the event prevents another call
-		 *  to xfs_dm_send_data_event, which is what
-		 *  allows the size to change in the first place.
+		 * The iolock was dropped and reaquired in XFS_SEND_DATA
+		 * so we have to recheck the size when appending.
+		 * We will only "goto start;" once, since having sent the
+		 * event prevents another call to XFS_SEND_DATA, which is
+		 * what allows the size to change in the first place.
 		 */
 		if ((file->f_flags & O_APPEND) &&
 		    savedsize != xip->i_d.di_size) {
@@ -608,10 +610,8 @@
 	 *
 	 * We must update xfs' times since revalidate will overcopy xfs.
 	 */
-	if (size) {
-		if (!(file->f_mode & FINVIS))
-			xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-	}
+	if (size && !invisible)
+		xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 
 	/*
 	 * If the offset is beyond the size of the file, we have a couple
@@ -658,11 +658,10 @@
 	ret = generic_file_write_nolock(file, iovp, segs, offset);
 
 	if ((ret == -ENOSPC) &&
-	    DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) &&
-	    !(file->f_mode & FINVIS)) {
+	    DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !invisible) {
 
 		xfs_rwunlock(bdp, locktype);
-		error = dm_send_namesp_event(DM_EVENT_NOSPACE, bdp,
+		error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, bdp,
 				DM_RIGHT_NULL, bdp, DM_RIGHT_NULL, NULL, NULL,
 				0, 0, 0); /* Delay flag intentionally  unused */
 		if (error)
diff -Nru a/fs/xfs/linux/xfs_stats.c b/fs/xfs/linux/xfs_stats.c
--- a/fs/xfs/linux/xfs_stats.c	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/linux/xfs_stats.c	Mon Mar 31 13:41:08 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -61,7 +61,6 @@
 		{ "xstrat",		XFSSTAT_END_WRITE_CONVERT	},
 		{ "rw",			XFSSTAT_END_READ_WRITE_OPS	},
 		{ "attr",		XFSSTAT_END_ATTRIBUTE_OPS	},
-		{ "qm",			XFSSTAT_END_QUOTA_OPS		},
 		{ "icluster",		XFSSTAT_END_INODE_CLUSTER	},
 		{ "vnodes",		XFSSTAT_END_VNODE_OPS		},
 	};
@@ -95,50 +94,17 @@
 	return len;
 }
 
-STATIC int
-xfs_read_xfsquota(
-	char		*buffer,
-	char		**start,
-	off_t		offset,
-	int		count,
-	int		*eof,
-	void		*data)
-{
-	int		len;
-
-	/* maximum; incore; ratio free to inuse; freelist */
-	len = sprintf(buffer, "%d\t%d\t%d\t%u\n",
-			ndquot,
-			xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
-			xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
-			xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0);
-
-	if (offset >= len) {
-		*start = buffer;
-		*eof = 1;
-		return 0;
-	}
-	*start = buffer + offset;
-	if ((len -= offset) > count)
-		return count;
-	*eof = 1;
-
-	return len;
-}
-
 void
 xfs_init_procfs(void)
 {
 	if (!proc_mkdir("fs/xfs", 0))
 		return;
 	create_proc_read_entry("fs/xfs/stat", 0, 0, xfs_read_xfsstats, NULL);
-	create_proc_read_entry("fs/xfs/xqm", 0, 0, xfs_read_xfsquota, NULL);
 }
 
 void
 xfs_cleanup_procfs(void)
 {
 	remove_proc_entry("fs/xfs/stat", NULL);
-	remove_proc_entry("fs/xfs/xqm", NULL);
 	remove_proc_entry("fs/xfs", NULL);
 }
diff -Nru a/fs/xfs/linux/xfs_stats.h b/fs/xfs/linux/xfs_stats.h
--- a/fs/xfs/linux/xfs_stats.h	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/linux/xfs_stats.h	Mon Mar 31 13:41:06 2003
@@ -107,16 +107,7 @@
 	__uint32_t		xs_attr_set;
 	__uint32_t		xs_attr_remove;
 	__uint32_t		xs_attr_list;
-# define XFSSTAT_END_QUOTA_OPS		(XFSSTAT_END_ATTRIBUTE_OPS+8)
-	__uint32_t		xs_qm_dqreclaims;
-	__uint32_t		xs_qm_dqreclaim_misses;
-	__uint32_t		xs_qm_dquot_dups;
-	__uint32_t		xs_qm_dqcachemisses;
-	__uint32_t		xs_qm_dqcachehits;
-	__uint32_t		xs_qm_dqwants;
-	__uint32_t		xs_qm_dqshake_reclaims;
-	__uint32_t		xs_qm_dqinact_reclaims;
-# define XFSSTAT_END_INODE_CLUSTER	(XFSSTAT_END_QUOTA_OPS+3)
+# define XFSSTAT_END_INODE_CLUSTER	(XFSSTAT_END_ATTRIBUTE_OPS+3)
 	__uint32_t		xs_iflush_count;
 	__uint32_t		xs_icluster_flushcnt;
 	__uint32_t		xs_icluster_flushinode;
diff -Nru a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c
--- a/fs/xfs/linux/xfs_super.c	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/linux/xfs_super.c	Mon Mar 31 13:41:08 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -31,107 +31,68 @@
  */
 
 #include <xfs.h>
-#include <linux/bitops.h>
 #include <linux/blkdev.h>
 #include <linux/namei.h>
-#include <linux/pagemap.h>
-#include <linux/major.h>
 #include <linux/init.h>
-#include <linux/ctype.h>
-#include <linux/seq_file.h>
 #include <linux/mount.h>
 #include "xfs_version.h"
 
-/* xfs_vfs[ops].c */
-extern int  xfs_init(void);
-extern void xfs_cleanup(void);
-
-/* For kernels which have the s_maxbytes field - set it */
-#ifdef MAX_NON_LFS
-# define set_max_bytes(sb)	((sb)->s_maxbytes = XFS_MAX_FILE_OFFSET)
-#else
-# define set_max_bytes(sb)	do { } while (0)
-#endif
-
-#ifdef CONFIG_XFS_POSIX_ACL
-# define set_posix_acl(sb)	((sb)->s_flags |= MS_POSIXACL)
-#else
-# define set_posix_acl(sb)	do { } while (0)
-#endif
-
-#ifdef CONFIG_XFS_QUOTA
-STATIC struct quotactl_ops linvfs_qops = {
-	.get_xstate		= linvfs_getxstate,
-	.set_xstate		= linvfs_setxstate,
-	.get_xquota		= linvfs_getxquota,
-	.set_xquota		= linvfs_setxquota,
-};
-# define set_quota_ops(sb)	((sb)->s_qcop = &linvfs_qops)
-#else
-# define set_quota_ops(sb)	do { } while (0)
-#endif
-
-#ifdef CONFIG_XFS_DMAPI
-int dmapi_init(void);
-void dmapi_uninit(void);
-#else
-#define dmapi_init()
-#define dmapi_uninit()
-#endif
-
+STATIC struct quotactl_ops linvfs_qops;
 STATIC struct super_operations linvfs_sops;
 STATIC struct export_operations linvfs_export_ops;
+STATIC kmem_cache_t * linvfs_inode_cachep;
 
 #define MNTOPT_LOGBUFS	"logbufs"	/* number of XFS log buffers */
-#define MNTOPT_LOGBSIZE "logbsize"	/* size of XFS log buffers */
+#define MNTOPT_LOGBSIZE	"logbsize"	/* size of XFS log buffers */
 #define MNTOPT_LOGDEV	"logdev"	/* log device */
 #define MNTOPT_RTDEV	"rtdev"		/* realtime I/O device */
-#define MNTOPT_DMAPI	"dmapi"		/* DMI enabled (DMAPI / XDSM) */
-#define MNTOPT_XDSM	"xdsm"		/* DMI enabled (DMAPI / XDSM) */
 #define MNTOPT_BIOSIZE	"biosize"	/* log2 of preferred buffered io size */
 #define MNTOPT_WSYNC	"wsync"		/* safe-mode nfs compatible mount */
 #define MNTOPT_INO64	"ino64"		/* force inodes into 64-bit range */
 #define MNTOPT_NOALIGN	"noalign"	/* turn off stripe alignment */
 #define MNTOPT_SUNIT	"sunit"		/* data volume stripe unit */
 #define MNTOPT_SWIDTH	"swidth"	/* data volume stripe width */
-#define MNTOPT_NORECOVERY "norecovery"	/* don't run XFS recovery */
-#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
-#define MNTOPT_QUOTA	"quota"		/* disk quotas */
-#define MNTOPT_NOQUOTA	"noquota"	/* no quotas */
-#define MNTOPT_UQUOTA	"usrquota"	/* user quota enabled */
-#define MNTOPT_GQUOTA	"grpquota"	/* group quota enabled */
-#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
-#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
-#define MNTOPT_QUOTANOENF  "qnoenforce" /* same as uqnoenforce */
-#define MNTOPT_NOUUID	"nouuid"	/* Ignore FS uuid */
-#define MNTOPT_NOLOGFLUSH  "nologflush"	/* Don't use hard flushes in
-					   log writing */
+#define MNTOPT_NOUUID	"nouuid"	/* ignore filesystem UUID */
 #define MNTOPT_MTPT	"mtpt"		/* filesystem mount point */
+#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
+#define MNTOPT_NOLOGFLUSH   "nologflush"   /* don't hard flush on log writes */
+#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
 
-STATIC int
+STATIC struct xfs_mount_args *
+args_allocate(
+	struct super_block	*sb)
+{
+	struct xfs_mount_args	*args;
+
+	args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP);
+	args->logbufs = args->logbufsize = -1;
+	strncpy(args->fsname, sb->s_id, MAXNAMELEN);
+
+	/* Copy the already-parsed mount(2) flags we're interested in */
+	if (sb->s_flags & MS_NOATIME)
+		args->flags |= XFSMNT_NOATIME;
+
+	/* Default to 32 bit inodes on Linux all the time */
+	args->flags |= XFSMNT_32BITINODES;
+
+	return args;
+}
+
+int
 xfs_parseargs(
+	struct bhv_desc		*bhv,
 	char			*options,
-	int			flags,
-	struct xfs_mount_args	*args)
+	struct xfs_mount_args	*args,
+	int			update)
 {
+	struct vfs		*vfsp = bhvtovfs(bhv);
 	char			*this_char, *value, *eov;
 	int			dsunit, dswidth, vol_dsunit, vol_dswidth;
-	int			logbufs, logbufsize;
 	int			iosize;
 
-	/* Default to 32 bit inodes on linux all the time */
-	args->flags |= XFSMNT_32BITINODES;
-
-	/* Copy the already-parsed mount(2) flags we're interested in */
-	if (flags & MS_NOATIME)
-		args->flags |= XFSMNT_NOATIME;
-
-	if (!options) {
-		args->logbufs = args->logbufsize = -1;
+	if (!options)
 		return 0;
-	}
 
-	logbufs = logbufsize = -1;
 	iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
 
 	while ((this_char = strsep(&options, ",")) != NULL) {
@@ -146,22 +107,23 @@
 					MNTOPT_LOGBUFS);
 				return -EINVAL;
 			}
-			logbufs = simple_strtoul(value, &eov, 10);
+			args->logbufs = simple_strtoul(value, &eov, 10);
 		} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
-			int	in_kilobytes = 0;
+			int	last, in_kilobytes = 0;
 
 			if (!value || !*value) {
 				printk("XFS: %s option requires an argument\n",
 					MNTOPT_LOGBSIZE);
 				return -EINVAL;
 			}
-			if (toupper(value[strlen(value)-1]) == 'K') {
+			last = strlen(value) - 1;
+			if (value[last] == 'K' || value[last] == 'k') {
 				in_kilobytes = 1;
-				value[strlen(value)-1] = '\0';
+				value[last] = '\0';
 			}
-			logbufsize = simple_strtoul(value, &eov, 10);
+			args->logbufsize = simple_strtoul(value, &eov, 10);
 			if (in_kilobytes)
-				logbufsize = logbufsize * 1024;
+				args->logbufsize <<= 10;
 		} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
 			if (!value || !*value) {
 				printk("XFS: %s option requires an argument\n",
@@ -176,17 +138,6 @@
 				return -EINVAL;
 			}
 			strncpy(args->mtpt, value, MAXNAMELEN);
-#if CONFIG_XFS_DMAPI
-		} else if (!strcmp(this_char, MNTOPT_DMAPI)) {
-			args->flags |= XFSMNT_DMAPI;
-		} else if (!strcmp(this_char, MNTOPT_XDSM)) {
-			args->flags |= XFSMNT_DMAPI;
-#else
-		} else if (!strcmp(this_char, MNTOPT_DMAPI) ||
-			   !strcmp(this_char, MNTOPT_XDSM)) {
-			printk("XFS: this kernel does not support dmapi/xdsm.\n");
-			return -EINVAL;
-#endif
 		} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
 			if (!value || !*value) {
 				printk("XFS: %s option requires an argument\n",
@@ -210,28 +161,12 @@
 		} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
 			args->flags |= XFSMNT_NORECOVERY;
 		} else if (!strcmp(this_char, MNTOPT_INO64)) {
-#ifdef XFS_BIG_FILESYSTEMS
 			args->flags |= XFSMNT_INO64;
-#else
+#ifndef XFS_BIG_FILESYSTEMS
 			printk("XFS: %s option not allowed on this system\n",
 				MNTOPT_INO64);
 			return -EINVAL;
 #endif
-		} else if (!strcmp(this_char, MNTOPT_UQUOTA)) {
-			args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF;
-		} else if (!strcmp(this_char, MNTOPT_QUOTA)) {
-			args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF;
-		} else if (!strcmp(this_char, MNTOPT_UQUOTANOENF)) {
-			args->flags |= XFSMNT_UQUOTA;
-			args->flags &= ~XFSMNT_UQUOTAENF;
-		} else if (!strcmp(this_char, MNTOPT_QUOTANOENF)) {
-			args->flags |= XFSMNT_UQUOTA;
-			args->flags &= ~XFSMNT_UQUOTAENF;
-		} else if (!strcmp(this_char, MNTOPT_GQUOTA)) {
-			args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF;
-		} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
-			args->flags |= XFSMNT_GQUOTA;
-			args->flags &= ~XFSMNT_GQUOTAENF;
 		} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
 			args->flags |= XFSMNT_NOALIGN;
 		} else if (!strcmp(this_char, MNTOPT_SUNIT)) {
@@ -264,7 +199,7 @@
 	}
 
 	if (args->flags & XFSMNT_NORECOVERY) {
-		if ((flags & MS_RDONLY) == 0) {
+		if ((vfsp->vfs_flag & VFS_RDONLY) == 0) {
 			printk("XFS: no-recovery mounts must be read-only.\n");
 			return -EINVAL;
 		}
@@ -292,22 +227,21 @@
 		if (dsunit) {
 			args->sunit = dsunit;
 			args->flags |= XFSMNT_RETERR;
-		} else
+		} else {
 			args->sunit = vol_dsunit;
+		}
 		dswidth ? (args->swidth = dswidth) :
 			  (args->swidth = vol_dswidth);
-	} else
+	} else {
 		args->sunit = args->swidth = 0;
-
-	args->logbufs = logbufs;
-	args->logbufsize = logbufsize;
+	}
 
 	return 0;
 }
 
-STATIC int
+int
 xfs_showargs(
-	struct vfs		*vfsp,
+	struct bhv_desc		*bhv,
 	struct seq_file		*m)
 {
 	static struct proc_xfs_info {
@@ -322,7 +256,7 @@
 		{ 0, NULL }
 	};
 	struct proc_xfs_info	*xfs_infop;
-	struct xfs_mount	*mp = XFS_BHVTOM(vfsp->vfs_fbhv);
+	struct xfs_mount	*mp = XFS_BHVTOM(bhv);
 	char b[BDEVNAME_SIZE];
 
 	for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) {
@@ -330,18 +264,6 @@
 			seq_puts(m, xfs_infop->str);
 	}
 
-	if (mp->m_qflags & XFS_UQUOTA_ACCT) {
-		(mp->m_qflags & XFS_UQUOTA_ENFD) ?
-			seq_puts(m, "," MNTOPT_UQUOTA) :
-			seq_puts(m, "," MNTOPT_UQUOTANOENF);
-	}
-
-	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
-		(mp->m_qflags & XFS_GQUOTA_ENFD) ?
-			seq_puts(m, "," MNTOPT_GQUOTA) :
-			seq_puts(m, "," MNTOPT_GQUOTANOENF);
-	}
-
 	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
 		seq_printf(m, "," MNTOPT_BIOSIZE "=%d", mp->m_writeio_log);
 
@@ -368,9 +290,6 @@
 		seq_printf(m, "," MNTOPT_SWIDTH "=%d",
 				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
 
-	if (vfsp->vfs_flag & VFS_DMI)
-		seq_puts(m, "," MNTOPT_DMAPI);
-
 	return 0;
 }
 
@@ -402,21 +321,21 @@
 
 STATIC __inline__ void
 xfs_revalidate_inode(
-	xfs_mount_t	*mp,
-	vnode_t		*vp,
-	xfs_inode_t	*ip)
+	xfs_mount_t		*mp,
+	vnode_t			*vp,
+	xfs_inode_t		*ip)
 {
-	struct inode	*inode = LINVFS_GET_IP(vp);
+	struct inode		*inode = LINVFS_GET_IP(vp);
 
 	inode->i_mode	= (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type);
 	inode->i_nlink	= ip->i_d.di_nlink;
 	inode->i_uid	= ip->i_d.di_uid;
 	inode->i_gid 	= ip->i_d.di_gid;
 	if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
-		inode->i_rdev	= NODEV;
+		inode->i_rdev = NODEV;
 	} else {
 		xfs_dev_t dev = ip->i_df.if_u2.if_rdev;
-		inode->i_rdev	= XFS_DEV_TO_KDEVT(dev);
+		inode->i_rdev = XFS_DEV_TO_KDEVT(dev);
 	}
 	inode->i_blksize = PAGE_CACHE_SIZE;
 	inode->i_generation = ip->i_d.di_gen;
@@ -435,23 +354,24 @@
 
 void
 xfs_initialize_vnode(
-	bhv_desc_t	*bdp,
-	vnode_t		*vp,
-	bhv_desc_t	*inode_bhv,
-	int		unlock)
+	bhv_desc_t		*bdp,
+	vnode_t			*vp,
+	bhv_desc_t		*inode_bhv,
+	int			unlock)
 {
-	xfs_inode_t	*ip = XFS_BHVTOI(inode_bhv);
-	struct inode	*inode = LINVFS_GET_IP(vp);
+	xfs_inode_t		*ip = XFS_BHVTOI(inode_bhv);
+	struct inode		*inode = LINVFS_GET_IP(vp);
 
-	if (vp->v_fbhv == NULL) {
+	if (!inode_bhv->bd_vobj) {
 		vp->v_vfsp = bhvtovfs(bdp);
-		bhv_desc_init(&(ip->i_bhv_desc), ip, vp, &xfs_vnodeops);
-		bhv_insert_initial(VN_BHV_HEAD(vp), &(ip->i_bhv_desc));
+		bhv_desc_init(inode_bhv, ip, vp, &xfs_vnodeops);
+		bhv_insert(VN_BHV_HEAD(vp), inode_bhv);
 	}
 
 	vp->v_type = IFTOVT(ip->i_d.di_mode);
+
 	/* Have we been called during the new inode create process,
-	 * in which case we are too early to fill in the linux inode.
+	 * in which case we are too early to fill in the Linux inode.
 	 */
 	if (vp->v_type == VNON)
 		return;
@@ -497,7 +417,7 @@
 	xfs_buftarg_t		*btp)
 {
 	pagebuf_delwri_flush(btp, PBDF_WAIT, NULL);
-	kfree(btp);
+	kmem_free(btp, sizeof(*btp));
 }
 
 void
@@ -548,8 +468,6 @@
 	return btp;
 }
 
-STATIC kmem_cache_t * linvfs_inode_cachep;
-
 STATIC __inline__ unsigned int gfp_mask(void)
 {
 	/* If we're not in a transaction, FS activity is ok */
@@ -557,7 +475,6 @@
 	return GFP_KERNEL;
 }
 
-
 STATIC struct inode *
 linvfs_alloc_inode(
 	struct super_block	*sb)
@@ -606,119 +523,7 @@
 destroy_inodecache( void )
 {
 	if (kmem_cache_destroy(linvfs_inode_cachep))
-		printk(KERN_INFO
-			"linvfs_inode_cache: not all structures were freed\n");
-}
-
-static int
-linvfs_fill_super(
-	struct super_block	*sb,
-	void			*data,
-	int			silent)
-{
-	vfs_t			*vfsp;
-	vfsops_t		*vfsops;
-	vnode_t			*rootvp;
-	struct inode		*ip;
-	struct xfs_mount_args	*args;
-	struct statfs		statvfs;
-	int			error = EINVAL;
-
-	args = kmalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
-	if (!args)
-		return  -EINVAL;
-	memset(args, 0, sizeof(struct xfs_mount_args));
-	args->slcount = args->stimeout = args->ctimeout = -1;
-	strncpy(args->fsname, sb->s_id, MAXNAMELEN);
-	if (xfs_parseargs((char *)data, sb->s_flags, args))
-		goto out_null;
-
-	/*  Kludge in XFS until we have other VFS/VNODE FSs  */
-	vfsops = &xfs_vfsops;
-
-	/*  Set up the vfs_t structure	*/
-	vfsp = vfs_allocate();
-	if (!vfsp) {
-		error = ENOMEM;
-		goto out_null;
-	}
-
-	if (sb->s_flags & MS_RDONLY)
-		vfsp->vfs_flag |= VFS_RDONLY;
-
-	vfsp->vfs_super = sb;
-	set_max_bytes(sb);
-	set_quota_ops(sb);
-	sb->s_op = &linvfs_sops;
-	sb->s_export_op = &linvfs_export_ops;
-
-	sb_min_blocksize(sb, BBSIZE);
-
-	LINVFS_SET_VFS(sb, vfsp);
-
-	VFSOPS_MOUNT(vfsops, vfsp, args, NULL, error);
-	if (error)
-		goto fail_vfsop;
-
-	VFS_STATVFS(vfsp, &statvfs, NULL, error);
-	if (error)
-		goto fail_unmount;
-
-	sb->s_magic = XFS_SB_MAGIC;
-	sb->s_dirt = 1;
-	sb->s_blocksize = statvfs.f_bsize;
-	sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1;
-	set_posix_acl(sb);
-
-	VFS_ROOT(vfsp, &rootvp, error);
-	if (error)
-		goto fail_unmount;
-
-	ip = LINVFS_GET_IP(rootvp);
-
-	sb->s_root = d_alloc_root(ip);
-	if (!sb->s_root)
-		goto fail_vnrele;
-	if (is_bad_inode(sb->s_root->d_inode))
-		goto fail_vnrele;
-
-	/* Don't set the VFS_DMI flag until here because we don't want
-	 * to send events while replaying the log.
-	 */
-	if (args->flags & XFSMNT_DMAPI) {
-		vfsp->vfs_flag |= VFS_DMI;
-		VFSOPS_DMAPI_MOUNT(vfsops, vfsp, args->mtpt, args->fsname,
-				   error);
-
-		if (error) {
-			if (atomic_read(&sb->s_active) == 1)
-				vfsp->vfs_flag &= ~VFS_DMI;
-			goto fail_vnrele;
-		}
-	}
-
-	vn_trace_exit(rootvp, __FUNCTION__, (inst_t *)__return_address);
-
-	kfree(args);
-	return 0;
-
-fail_vnrele:
-	if (sb->s_root) {
-		dput(sb->s_root);
-		sb->s_root = NULL;
-	} else {
-		VN_RELE(rootvp);
-	}
-
-fail_unmount:
-	VFS_UNMOUNT(vfsp, 0, NULL, error);
-
-fail_vfsop:
-	vfs_deallocate(vfsp);
-
-out_null:
-	kfree(args);
-	return -error;
+		printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__);
 }
 
 /*
@@ -789,8 +594,7 @@
 	sb->s_dirt = 0;
 	if (sb->s_flags & MS_RDONLY)
 		return;
-	VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR,
-		NULL, error);
+	VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error);
 }
 
 STATIC int
@@ -811,30 +615,21 @@
 	int			*flags,
 	char			*options)
 {
-	struct xfs_mount_args	*args;
-	vfs_t			*vfsp;
-	xfs_mount_t		*mp;
-	int			error = 0;
-
-	vfsp = LINVFS_GET_VFS(sb);
-	mp = XFS_BHVTOM(vfsp->vfs_fbhv);
+	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	xfs_mount_t		*mp = XFS_VFSTOM(vfsp);
+	struct xfs_mount_args	*args = args_allocate(sb);
+	int			error;
 
-	args = kmalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
-	if (!args)
-		return -ENOMEM;
-	memset(args, 0, sizeof(struct xfs_mount_args));
-	args->slcount = args->stimeout = args->ctimeout = -1;
-	if (xfs_parseargs(options, *flags, args)) {
-		error = -EINVAL;
+	VFS_PARSEARGS(vfsp, options, args, 1, error);
+	if (error)
 		goto out;
-	}
 
 	if (args->flags & XFSMNT_NOATIME)
 		mp->m_flags |= XFS_MOUNT_NOATIME;
 	else
 		mp->m_flags &= ~XFS_MOUNT_NOATIME;
 
-	set_posix_acl(sb);
+	set_posix_acl_flag(sb);
 	linvfs_write_super(sb);
 
 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
@@ -842,14 +637,14 @@
 
 	if (*flags & MS_RDONLY) {
 		sb->s_flags |= MS_RDONLY;
-		XFS_log_write_unmount_ro(vfsp->vfs_fbhv);
+		XFS_log_write_unmount_ro(&mp->m_bhv);
 		vfsp->vfs_flag |= VFS_RDONLY;
 	} else {
 		vfsp->vfs_flag &= ~VFS_RDONLY;
 	}
 
 out:
-	kfree(args);
+	kmem_free(args, sizeof(*args));
 	return error;
 }
 
@@ -950,6 +745,151 @@
 	return result;
 }
 
+STATIC int
+linvfs_show_options(
+	struct seq_file		*m,
+	struct vfsmount		*mnt)
+{
+	struct vfs		*vfsp = LINVFS_GET_VFS(mnt->mnt_sb);
+	int			error;
+
+	VFS_SHOWARGS(vfsp, m, error);
+	return error;
+}
+
+STATIC int
+linvfs_getxstate(
+	struct super_block	*sb,
+	struct fs_quota_stat	*fqs)
+{
+	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	int			error;
+
+	VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error);
+	return -error;
+}
+
+STATIC int
+linvfs_setxstate(
+	struct super_block	*sb,
+	unsigned int		flags,
+	int			op)
+{
+	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	int			error;
+
+	VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error);
+	return -error;
+}
+
+STATIC int
+linvfs_getxquota(
+	struct super_block	*sb,
+	int			type,
+	qid_t			id,
+	struct fs_disk_quota	*fdq)
+{
+	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	int			error, getmode;
+
+	getmode = (type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETQUOTA;
+	VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error);
+	return -error;
+}
+
+STATIC int
+linvfs_setxquota(
+	struct super_block	*sb,
+	int			type,
+	qid_t			id,
+	struct fs_disk_quota	*fdq)
+{
+	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	int			error, setmode;
+
+	setmode = (type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETQLIM;
+	VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error);
+	return -error;
+}
+
+STATIC int
+linvfs_fill_super(
+	struct super_block	*sb,
+	void			*data,
+	int			silent)
+{
+	vnode_t			*rootvp;
+	struct vfs		*vfsp = vfs_allocate();
+	struct xfs_mount_args	*args = args_allocate(sb);
+	struct statfs		statvfs;
+	int			error;
+
+	vfsp->vfs_super = sb;
+	LINVFS_SET_VFS(sb, vfsp);
+	if (sb->s_flags & MS_RDONLY)
+		vfsp->vfs_flag |= VFS_RDONLY;
+	bhv_insert_all_vfsops(vfsp);
+
+	VFS_PARSEARGS(vfsp, (char *)data, args, 0, error);
+	if (error) {
+		bhv_remove_all_vfsops(vfsp, 1);
+		goto fail_vfsop;
+	}
+
+	sb_min_blocksize(sb, BBSIZE);
+	sb->s_maxbytes = XFS_MAX_FILE_OFFSET;
+	sb->s_export_op = &linvfs_export_ops;
+	sb->s_qcop = &linvfs_qops;
+	sb->s_op = &linvfs_sops;
+
+	VFS_MOUNT(vfsp, args, NULL, error);
+	if (error) {
+		bhv_remove_all_vfsops(vfsp, 1);
+		goto fail_vfsop;
+	}
+
+	VFS_STATVFS(vfsp, &statvfs, NULL, error);
+	if (error)
+		goto fail_unmount;
+
+	sb->s_dirt = 1;
+	sb->s_magic = XFS_SB_MAGIC;
+	sb->s_blocksize = statvfs.f_bsize;
+	sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1;
+	set_posix_acl_flag(sb);
+
+	VFS_ROOT(vfsp, &rootvp, error);
+	if (error)
+		goto fail_unmount;
+
+	sb->s_root = d_alloc_root(LINVFS_GET_IP(rootvp));
+	if (!sb->s_root)
+		goto fail_vnrele;
+	if (is_bad_inode(sb->s_root->d_inode))
+		goto fail_vnrele;
+
+	vn_trace_exit(rootvp, __FUNCTION__, (inst_t *)__return_address);
+
+	kmem_free(args, sizeof(*args));
+	return 0;
+
+fail_vnrele:
+	if (sb->s_root) {
+		dput(sb->s_root);
+		sb->s_root = NULL;
+	} else {
+		VN_RELE(rootvp);
+	}
+
+fail_unmount:
+	VFS_UNMOUNT(vfsp, 0, NULL, error);
+
+fail_vfsop:
+	vfs_deallocate(vfsp);
+	kmem_free(args, sizeof(*args));
+	return -error;
+}
+
 STATIC struct super_block *
 linvfs_get_sb(
 	struct file_system_type	*fs_type,
@@ -960,15 +900,6 @@
 	return get_sb_bdev(fs_type, flags, dev_name, data, linvfs_fill_super);
 }
 
-STATIC int
-linvfs_show_options(
-	struct seq_file		*m,
-	struct vfsmount		*mnt)
-{
-	vfs_t			*vfsp = LINVFS_GET_VFS(mnt->mnt_sb);
-
-	return xfs_showargs(vfsp, m);
-}
 
 STATIC struct export_operations linvfs_export_ops = {
 	.get_parent		= linvfs_get_parent,
@@ -989,6 +920,13 @@
 	.show_options		= linvfs_show_options,
 };
 
+STATIC struct quotactl_ops linvfs_qops = {
+	.get_xstate		= linvfs_getxstate,
+	.set_xstate		= linvfs_setxstate,
+	.get_xquota		= linvfs_getxquota,
+	.set_xquota		= linvfs_setxquota,
+};
+
 STATIC struct file_system_type xfs_fs_type = {
 	.owner			= THIS_MODULE,
 	.name			= "xfs",
@@ -997,6 +935,7 @@
 	.fs_flags		= FS_REQUIRES_DEV,
 };
 
+
 STATIC int __init
 init_xfs_fs( void )
 {
@@ -1008,37 +947,44 @@
 
 	printk(message);
 
-	error = init_inodecache();
-	if (error < 0)
-		return error;
-
 	si_meminfo(&si);
 	xfs_physmem = si.totalram;
 
+	error = init_inodecache();
+	if (error < 0)
+		goto undo_inodecache;
+
 	error = pagebuf_init();
 	if (error < 0)
-		goto out;
+		goto undo_pagebuf;
 
 	vn_init();
 	xfs_init();
-	dmapi_init();
+	vfs_initdmapi();
+	vfs_initquota();
 
 	error = register_filesystem(&xfs_fs_type);
 	if (error)
-		goto out;
+		goto undo_register;
 	return 0;
 
-out:
+undo_register:
+	pagebuf_terminate();
+
+undo_pagebuf:
 	destroy_inodecache();
+
+undo_inodecache:
 	return error;
 }
 
 STATIC void __exit
 exit_xfs_fs( void )
 {
-	dmapi_uninit();
-	xfs_cleanup();
 	unregister_filesystem(&xfs_fs_type);
+	xfs_cleanup();
+	vfs_exitquota();
+	vfs_exitdmapi();
 	pagebuf_terminate();
 	destroy_inodecache();
 }
@@ -1047,5 +993,6 @@
 module_exit(exit_xfs_fs);
 
 MODULE_AUTHOR("SGI <sgi.com>");
-MODULE_DESCRIPTION("SGI XFS " XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
+MODULE_DESCRIPTION(
+	"SGI XFS " XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
 MODULE_LICENSE("GPL");
diff -Nru a/fs/xfs/linux/xfs_super.h b/fs/xfs/linux/xfs_super.h
--- a/fs/xfs/linux/xfs_super.h	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/linux/xfs_super.h	Mon Mar 31 13:41:06 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -32,28 +32,38 @@
 #ifndef __XFS_SUPER_H__
 #define __XFS_SUPER_H__
 
-#ifdef CONFIG_XFS_POSIX_ACL
-# define XFS_ACL_STRING		"ACLs, "
-#else
-# define XFS_ACL_STRING
-#endif
-
 #ifdef CONFIG_XFS_DMAPI
-# define XFS_DMAPI_STRING	"DMAPI, "
-#else
-# define XFS_DMAPI_STRING
+# define vfs_insertdmapi(vfs)	vfs_insertops(vfsp, &xfs_dmops)
+# define vfs_initdmapi()	dmapi_init()
+# define vfs_exitdmapi()	dmapi_uninit()
+#else
+# define vfs_insertdmapi(vfs)	do { } while (0)
+# define vfs_initdmapi()	do { } while (0)
+# define vfs_exitdmapi()	do { } while (0)
 #endif
 
 #ifdef CONFIG_XFS_QUOTA
-# define XFS_QUOTA_STRING	"quota, "
+# define vfs_insertquota(vfs)	vfs_insertops(vfsp, &xfs_qmops)
+# define vfs_initquota()	xfs_qm_init()
+# define vfs_exitquota()	xfs_qm_exit()
+#else
+# define vfs_insertquota(vfs)	do { } while (0)
+# define vfs_initquota()	do { } while (0)
+# define vfs_exitquota()	do { } while (0)
+#endif
+
+#ifdef CONFIG_XFS_POSIX_ACL
+# define XFS_ACL_STRING		"ACLs, "
+# define set_posix_acl_flag(sb)	((sb)->s_flags |= MS_POSIXACL)
 #else
-# define XFS_QUOTA_STRING
+# define XFS_ACL_STRING
+# define set_posix_acl_flag(sb)	do { } while (0)
 #endif
 
 #ifdef CONFIG_XFS_RT
-# define XFS_RT_STRING		"realtime, "
+# define XFS_REALTIME_STRING	"realtime, "
 #else
-# define XFS_RT_STRING
+# define XFS_REALTIME_STRING
 #endif
 
 #ifdef CONFIG_XFS_VNODE_TRACING
@@ -68,9 +78,9 @@
 # define XFS_DBG_STRING		"no debug"
 #endif
 
-#define XFS_BUILD_OPTIONS	XFS_ACL_STRING XFS_DMAPI_STRING \
-				XFS_RT_STRING \
-				XFS_QUOTA_STRING XFS_VNTRACE_STRING \
+#define XFS_BUILD_OPTIONS	XFS_ACL_STRING \
+				XFS_REALTIME_STRING \
+				XFS_VNTRACE_STRING \
 				XFS_DBG_STRING /* DBG must be last */
 
 #define LINVFS_GET_VFS(s) \
@@ -82,6 +92,8 @@
 struct pb_target;
 struct block_device;
 
+extern int  xfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int);
+extern int  xfs_showargs(bhv_desc_t *, struct seq_file *);
 extern void xfs_initialize_vnode(bhv_desc_t *, vnode_t *, bhv_desc_t *, int);
 
 extern int  xfs_blkdev_get(struct xfs_mount *, const char *,
diff -Nru a/fs/xfs/linux/xfs_vfs.c b/fs/xfs/linux/xfs_vfs.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/linux/xfs_vfs.c	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+int
+vfs_mount(
+	struct bhv_desc		*bdp,
+	struct xfs_mount_args	*args,
+	struct cred		*cr)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_mount)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_mount)(next, args, cr));
+}
+
+int
+vfs_parseargs(
+	struct bhv_desc		*bdp,
+	char			*s,
+	struct xfs_mount_args	*args,
+	int			f)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_parseargs)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_parseargs)(next, s, args, f));
+}
+
+int
+vfs_showargs(
+	struct bhv_desc		*bdp,
+	struct seq_file		*m)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_showargs)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_showargs)(next, m));
+}
+
+int
+vfs_unmount(
+	struct bhv_desc		*bdp,
+	int			fl,
+	struct cred		*cr)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_unmount)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_unmount)(next, fl, cr));
+}
+
+int
+vfs_root(
+	struct bhv_desc		*bdp,
+	struct vnode		**vpp)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_root)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_root)(next, vpp));
+}
+
+int
+vfs_statvfs(
+	struct bhv_desc		*bdp,
+	struct statfs		*sp,
+	struct vnode		*vp)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_statvfs)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_statvfs)(next, sp, vp));
+}
+
+int
+vfs_sync(
+	struct bhv_desc		*bdp,
+	int			fl,
+	struct cred		*cr)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_sync)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_sync)(next, fl, cr));
+}
+
+int
+vfs_vget(
+	struct bhv_desc		*bdp,
+	struct vnode		**vpp,
+	struct fid		*fidp)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_vget)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_vget)(next, vpp, fidp));
+}
+
+int
+vfs_dmapiops(
+	struct bhv_desc		*bdp,
+	caddr_t			addr)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_dmapiops)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_dmapiops)(next, addr));
+}
+
+int
+vfs_quotactl(
+	struct bhv_desc		*bdp,
+	int			cmd,
+	int			id,
+	caddr_t			addr)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_quotactl)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_quotactl)(next, cmd, id, addr));
+}
+
+void
+vfs_init_vnode(
+	struct bhv_desc		*bdp,
+	struct vnode		*vp,
+	struct bhv_desc		*bp,
+	int			unlock)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_init_vnode)
+		next = BHV_NEXT(next);
+	((*bhvtovfsops(next)->vfs_init_vnode)(next, vp, bp, unlock));
+}
+
+void
+vfs_force_shutdown(
+	struct bhv_desc		*bdp,
+	int			fl,
+	char			*file,
+	int			line)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_force_shutdown)
+		next = BHV_NEXT(next);
+	((*bhvtovfsops(next)->vfs_force_shutdown)(next, fl, file, line));
+}
+
+vfs_t *
+vfs_allocate( void )
+{
+	struct vfs		*vfsp;
+
+	vfsp = kmem_zalloc(sizeof(vfs_t), KM_SLEEP);
+	bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
+	return vfsp;
+}
+
+void
+vfs_deallocate(
+	struct vfs		*vfsp)
+{
+	bhv_head_destroy(VFS_BHVHEAD(vfsp));
+	kmem_free(vfsp, sizeof(vfs_t));
+}
+
+void
+vfs_insertops(
+	struct vfs		*vfsp,
+	struct bhv_vfsops	*vfsops)
+{
+	struct bhv_desc		*bdp;
+
+	bdp = kmem_alloc(sizeof(struct bhv_desc), KM_SLEEP);
+	bhv_desc_init(bdp, NULL, vfsp, vfsops);
+	bhv_insert(&vfsp->vfs_bh, bdp);
+}
+
+void
+vfs_insertbhv(
+	struct vfs		*vfsp,
+	struct bhv_desc		*bdp,
+	struct vfsops		*vfsops,
+	void			*mount)
+{
+	bhv_desc_init(bdp, mount, vfsp, vfsops);
+	bhv_insert_initial(&vfsp->vfs_bh, bdp);
+}
+
+void
+bhv_remove_vfsops(
+	struct vfs		*vfsp,
+	int			pos)
+{
+	struct bhv_desc		*bhv;
+
+	bhv = bhv_lookup_range(&vfsp->vfs_bh, pos, pos);
+	if (!bhv)
+		return;
+	bhv_remove(&vfsp->vfs_bh, bhv);
+	kmem_free(bhv, sizeof(*bhv));
+}
+
+void
+bhv_remove_all_vfsops(
+	struct vfs		*vfsp,
+	int			freebase)
+{
+	struct xfs_mount	*mp;
+
+	bhv_remove_vfsops(vfsp, VFS_POSITION_QM);
+	bhv_remove_vfsops(vfsp, VFS_POSITION_DM);
+	if (!freebase)
+		return;
+	mp = XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfsp), &xfs_vfsops));
+	VFS_REMOVEBHV(vfsp, &mp->m_bhv);
+	xfs_mount_free(mp, 0);
+}
+
+void
+bhv_insert_all_vfsops(
+	struct vfs		*vfsp)
+{
+	struct xfs_mount	*mp;
+
+	mp = xfs_mount_init();
+	vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp);
+	vfs_insertdmapi(vfsp);
+	vfs_insertquota(vfsp);
+}
diff -Nru a/fs/xfs/linux/xfs_vfs.h b/fs/xfs/linux/xfs_vfs.h
--- a/fs/xfs/linux/xfs_vfs.h	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/linux/xfs_vfs.h	Mon Mar 31 13:41:06 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -34,162 +34,154 @@
 
 #include <linux/vfs.h>
 
-struct statfs;
-struct vnode;
+struct fid;
 struct cred;
+struct vnode;
+struct statfs;
+struct seq_file;
 struct super_block;
-struct fid;
-struct dm_fcntl_vector;
 struct xfs_mount_args;
 
 typedef struct vfs {
-	u_int		vfs_flag;	/* flags */
-	fsid_t		vfs_fsid;	/* file system id */
-	fsid_t		*vfs_altfsid;	/* An ID fixed for life of FS */
-	bhv_head_t	vfs_bh;		/* head of vfs behavior chain */
-	struct super_block *vfs_super;	/* pointer to super block structure */
+	u_int			vfs_flag;	/* flags */
+	fsid_t			vfs_fsid;	/* file system ID */
+	fsid_t			*vfs_altfsid;	/* An ID fixed for life of FS */
+	bhv_head_t		vfs_bh;		/* head of vfs behavior chain */
+	struct super_block	*vfs_super;	/* Linux superblock structure */
 } vfs_t;
 
-#define vfs_fbhv	vfs_bh.bh_first		/* 1st on vfs behavior chain */
-#define VFS_FOPS(vfsp)	\
-	((vfsops_t *)((vfsp)->vfs_fbhv->bd_ops))/* ops for 1st behavior */
-
-
-#define bhvtovfs(bdp)	((struct vfs *)BHV_VOBJ(bdp))
-#define VFS_BHVHEAD(vfsp) (&(vfsp)->vfs_bh)
-
-
-#define VFS_RDONLY	0x0001		/* read-only vfs */
-#define VFS_GRPID	0x0002		/* group-ID assigned from directory */
-#define VFS_DMI		0x0004		/* filesystem has the DMI enabled */
-
-#define SYNC_ATTR	0x0001		/* sync attributes */
-#define SYNC_CLOSE	0x0002		/* close file system down */
-#define SYNC_DELWRI	0x0004		/* look at delayed writes */
-#define SYNC_WAIT	0x0008		/* wait for i/o to complete */
-#define SYNC_FSDATA	0x0020		/* flush fs data (e.g. superblocks) */
-#define SYNC_BDFLUSH	0x0010		/* BDFLUSH is calling -- don't block */
+#define vfs_fbhv		vfs_bh.bh_first	/* 1st on vfs behavior chain */
 
+#define bhvtovfs(bdp)		( (struct vfs *)BHV_VOBJ(bdp) )
+#define bhvtovfsops(bdp)	( (struct vfsops *)BHV_OPS(bdp) )
+#define VFS_BHVHEAD(vfs)	( &(vfs)->vfs_bh )
+#define VFS_REMOVEBHV(vfs, bdp)	( bhv_remove(VFS_BHVHEAD(vfs), bdp) )
+
+#define VFS_POSITION_BASE	BHV_POSITION_BASE	/* chain bottom */
+#define VFS_POSITION_TOP	BHV_POSITION_TOP	/* chain top */
+#define VFS_POSITION_INVALID	BHV_POSITION_INVALID	/* invalid pos. num */
+
+typedef enum {
+	VFS_BHV_UNKNOWN,	/* not specified */
+	VFS_BHV_XFS,		/* xfs */
+	VFS_BHV_DM,		/* data migration */
+	VFS_BHV_QM,		/* quota manager */
+	VFS_BHV_IO,		/* IO path */
+	VFS_BHV_END		/* housekeeping end-of-range */
+} vfs_bhv_t;
+
+#define VFS_POSITION_XFS	(BHV_POSITION_BASE)
+#define VFS_POSITION_DM		(VFS_POSITION_BASE+10)
+#define VFS_POSITION_QM		(VFS_POSITION_BASE+20)
+#define VFS_POSITION_IO		(VFS_POSITION_BASE+30)
+
+#define VFS_RDONLY		0x0001	/* read-only vfs */
+#define VFS_GRPID		0x0002	/* group-ID assigned from directory */
+#define VFS_DMI			0x0004	/* filesystem has the DMI enabled */
+#define VFS_END			0x0004	/* max flag */
+
+#define SYNC_ATTR		0x0001	/* sync attributes */
+#define SYNC_CLOSE		0x0002	/* close file system down */
+#define SYNC_DELWRI		0x0004	/* look at delayed writes */
+#define SYNC_WAIT		0x0008	/* wait for i/o to complete */
+#define SYNC_FSDATA		0x0020	/* flush fs data (e.g. superblocks) */
+#define SYNC_BDFLUSH		0x0010	/* BDFLUSH is calling -- don't block */
+
+typedef int	(*vfs_mount_t)(bhv_desc_t *,
+				struct xfs_mount_args *, struct cred *);
+typedef int	(*vfs_parseargs_t)(bhv_desc_t *, char *,
+				struct xfs_mount_args *, int);
+typedef	int	(*vfs_showargs_t)(bhv_desc_t *, struct seq_file *);
+typedef int	(*vfs_unmount_t)(bhv_desc_t *, int, struct cred *);
+typedef int	(*vfs_root_t)(bhv_desc_t *, struct vnode **);
+typedef int	(*vfs_statvfs_t)(bhv_desc_t *, struct statfs *, struct vnode *);
+typedef int	(*vfs_sync_t)(bhv_desc_t *, int, struct cred *);
+typedef int	(*vfs_vget_t)(bhv_desc_t *, struct vnode **, struct fid *);
+typedef int	(*vfs_dmapiops_t)(bhv_desc_t *, caddr_t);
+typedef int	(*vfs_quotactl_t)(bhv_desc_t *, int, int, caddr_t);
+typedef void	(*vfs_init_vnode_t)(bhv_desc_t *,
+				struct vnode *, bhv_desc_t *, int);
+typedef void	(*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int);
 
 typedef struct vfsops {
-	int	(*vfs_mount)(struct vfs *, struct xfs_mount_args *,
-					struct cred *);
-					/* mount file system */
-	int	(*vfs_unmount)(bhv_desc_t *, int, struct cred *);
-					/* unmount file system */
-	int	(*vfs_root)(bhv_desc_t *, struct vnode **);
-					/* get root vnode */
-	int	(*vfs_statvfs)(bhv_desc_t *, struct statfs *, struct vnode *);
-					/* get file system statistics */
-	int	(*vfs_sync)(bhv_desc_t *, int, struct cred *);
-					/* flush files */
-	int	(*vfs_vget)(bhv_desc_t *, struct vnode **, struct fid *);
-					/* get vnode from fid */
-	int	(*vfs_dmapi_mount)(struct vfs *, char *, char *);
-					/* send dmapi mount event */
-	int	(*vfs_dmapi_fsys_vector)(bhv_desc_t *,
-					 struct dm_fcntl_vector *);
-	void	(*vfs_init_vnode)(bhv_desc_t *, struct vnode *,
-					bhv_desc_t *, int);
-	void	(*vfs_force_shutdown)(bhv_desc_t *,
-					int, char *, int);
+	bhv_position_t		vf_position;	/* behavior chain position */
+	vfs_mount_t		vfs_mount;	/* mount file system */
+	vfs_parseargs_t		vfs_parseargs;	/* parse mount options */
+	vfs_showargs_t		vfs_showargs;	/* unparse mount options */
+	vfs_unmount_t		vfs_unmount;	/* unmount file system */
+	vfs_root_t		vfs_root;	/* get root vnode */
+	vfs_statvfs_t		vfs_statvfs;	/* file system statistics */
+	vfs_sync_t		vfs_sync;	/* flush files */
+	vfs_vget_t		vfs_vget;	/* get vnode from fid */
+	vfs_dmapiops_t		vfs_dmapiops;	/* data migration */
+	vfs_quotactl_t		vfs_quotactl;	/* disk quota */
+	vfs_init_vnode_t	vfs_init_vnode;	/* initialize a new vnode */
+	vfs_force_shutdown_t	vfs_force_shutdown;	/* crash and burn */
 } vfsops_t;
 
-#define VFS_UNMOUNT(vfsp,f,cr, rv)	\
-{	\
-	rv = (*(VFS_FOPS(vfsp)->vfs_unmount))((vfsp)->vfs_fbhv, f, cr);	\
-}
-#define VFS_ROOT(vfsp, vpp, rv)		\
-{	\
-	rv = (*(VFS_FOPS(vfsp)->vfs_root))((vfsp)->vfs_fbhv, vpp);	\
-}
-#define VFS_STATVFS(vfsp, sp, vp, rv)	\
-{	\
-	rv = (*(VFS_FOPS(vfsp)->vfs_statvfs))((vfsp)->vfs_fbhv, sp, vp);\
-}
-#define VFS_SYNC(vfsp, flag, cr, rv) \
-{	\
-	rv = (*(VFS_FOPS(vfsp)->vfs_sync))((vfsp)->vfs_fbhv, flag, cr); \
-}
-#define VFS_VGET(vfsp, vpp, fidp, rv) \
-{	\
-	rv = (*(VFS_FOPS(vfsp)->vfs_vget))((vfsp)->vfs_fbhv, vpp, fidp);  \
-}
-
-#define VFS_INIT_VNODE(vfsp, vp, bhv, unlock) \
-{	\
-	(*(VFS_FOPS(vfsp)->vfs_init_vnode))((vfsp)->vfs_fbhv, vp, bhv, unlock);\
-}
-
-/* No behavior lock here */
-#define VFS_FORCE_SHUTDOWN(vfsp, flags) \
-	(*(VFS_FOPS(vfsp)->vfs_force_shutdown))((vfsp)->vfs_fbhv, flags, __FILE__, __LINE__);
-
-#define VFS_DMAPI_FSYS_VECTOR(vfsp, df, rv) \
-{	\
-	rv = (*(VFS_FOPS(vfsp)->vfs_dmapi_fsys_vector))((vfsp)->vfs_fbhv, df);	      \
-}
-
-
-#define VFSOPS_DMAPI_MOUNT(vfs_op, vfsp, dir_name, fsname, rv) \
-	rv = (*(vfs_op)->vfs_dmapi_mount)(vfsp, dir_name, fsname)
-#define VFSOPS_MOUNT(vfs_op, vfsp, args, cr, rv) \
-	rv = (*(vfs_op)->vfs_mount)(vfsp, args, cr)
-
-#define VFS_REMOVEBHV(vfsp, bdp)\
-{	\
-	bhv_remove(VFS_BHVHEAD(vfsp), bdp); \
-}
-
-#define PVFS_UNMOUNT(bdp,f,cr, rv)	\
-{	\
-	rv = (*((vfsops_t *)(bdp)->bd_ops)->vfs_unmount)(bdp, f, cr);	\
-}
-
-#define PVFS_SYNC(bdp, flag, cr, rv) \
-{	\
-	rv = (*((vfsops_t *)(bdp)->bd_ops)->vfs_sync)(bdp, flag, cr);	\
-}
-
-
-static __inline vfs_t *
-vfs_allocate(void)
-{
-	vfs_t	*vfsp;
-
-	vfsp = kmalloc(sizeof(vfs_t), GFP_KERNEL);
-	if (vfsp) {
-		memset(vfsp, 0, sizeof(vfs_t));
-		bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
-	}
-	return (vfsp);
-}
-
-static __inline void
-vfs_deallocate(
-	vfs_t		*vfsp)
-{
-	bhv_head_destroy(VFS_BHVHEAD(vfsp));
-	kfree(vfsp);
-}
+/*
+ * VFS's.  Operates on vfs structure pointers (starts at bhv head).
+ */
+#define VHEAD(v)			((v)->vfs_fbhv)
+#define VFS_MOUNT(v, ma,cr, rv)		((rv) = vfs_mount(VHEAD(v), ma,cr))
+#define VFS_PARSEARGS(v, o,ma,f, rv)	((rv) = vfs_parseargs(VHEAD(v), o,ma,f))
+#define VFS_SHOWARGS(v, m, rv)		((rv) = vfs_showargs(VHEAD(v), m))
+#define VFS_UNMOUNT(v, f,cr, rv)	((rv) = vfs_unmount(VHEAD(v), f,cr))
+#define VFS_ROOT(v, vpp, rv)		((rv) = vfs_root(VHEAD(v), vpp))
+#define VFS_STATVFS(v, sp,vp, rv)	((rv) = vfs_statvfs(VHEAD(v), sp,vp))
+#define VFS_SYNC(v, flag,cr, rv)	((rv) = vfs_sync(VHEAD(v), flag,cr))
+#define VFS_VGET(v, vpp,fidp, rv)	((rv) = vfs_vget(VHEAD(v), vpp,fidp))
+#define VFS_DMAPIOPS(v, p, rv)		((rv) = vfs_dmapiops(VHEAD(v), p))
+#define VFS_QUOTACTL(v, c,id,p, rv)	((rv) = vfs_quotactl(VHEAD(v), c,id,p))
+#define VFS_INIT_VNODE(v, vp,b,ul)	( vfs_init_vnode(VHEAD(v), vp,b,ul) )
+#define VFS_FORCE_SHUTDOWN(v, fl,f,l)	( vfs_force_shutdown(VHEAD(v), fl,f,l) )
 
 /*
- * Called by fs dependent VFS_MOUNT code to link the VFS base file system
- * dependent behavior with the VFS virtual object.
+ * PVFS's.  Operates on behavior descriptor pointers.
  */
-static __inline void
-vfs_insertbhv(
-	vfs_t		*vfsp,
-	bhv_desc_t	*bdp,
-	vfsops_t	*vfsops,
-	void		*mount)
-{
-	/*
-	 * Initialize behavior desc with ops and data and then
-	 * attach it to the vfs.
-	 */
-	bhv_desc_init(bdp, mount, vfsp, vfsops);
-	bhv_insert_initial(&vfsp->vfs_bh, bdp);
-}
+#define PVFS_MOUNT(b, ma,cr, rv)	((rv) = vfs_mount(b, ma,cr))
+#define PVFS_PARSEARGS(b, o,ma,f, rv)	((rv) = vfs_parseargs(b, o,ma,f))
+#define PVFS_SHOWARGS(b, m, rv)		((rv) = vfs_showargs(b, m))
+#define PVFS_UNMOUNT(b, f,cr, rv)	((rv) = vfs_unmount(b, f,cr))
+#define PVFS_ROOT(b, vpp, rv)		((rv) = vfs_root(b, vpp))
+#define PVFS_STATVFS(b, sp,vp, rv)	((rv) = vfs_statvfs(b, sp,vp))
+#define PVFS_SYNC(b, flag,cr, rv)	((rv) = vfs_sync(b, flag,cr))
+#define PVFS_VGET(b, vpp,fidp, rv)	((rv) = vfs_vget(b, vpp,fidp))
+#define PVFS_DMAPIOPS(b, p, rv)		((rv) = vfs_dmapiops(b, p))
+#define PVFS_QUOTACTL(b, c,id,p, rv)	((rv) = vfs_quotactl(b, c,id,p))
+#define PVFS_INIT_VNODE(b, vp,b2,ul)	( vfs_init_vnode(b, vp,b2,ul) )
+#define PVFS_FORCE_SHUTDOWN(b, fl,f,l)	( vfs_force_shutdown(b, fl,f,l) )
+
+extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *);
+extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int);
+extern int vfs_showargs(bhv_desc_t *, struct seq_file *);
+extern int vfs_unmount(bhv_desc_t *, int, struct cred *);
+extern int vfs_root(bhv_desc_t *, struct vnode **);
+extern int vfs_statvfs(bhv_desc_t *, struct statfs *, struct vnode *);
+extern int vfs_sync(bhv_desc_t *, int, struct cred *);
+extern int vfs_vget(bhv_desc_t *, struct vnode **, struct fid *);
+extern int vfs_dmapiops(bhv_desc_t *, caddr_t);
+extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t);
+extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int);
+extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int);
+
+typedef struct bhv_vfsops {
+	struct vfsops		bhv_common;
+	void *			bhv_custom;
+} bhv_vfsops_t;
+
+#define vfs_bhv_lookup(v, id)	( bhv_lookup_range(&(v)->vfs_bh, (id), (id)) )
+#define vfs_bhv_custom(b)	( ((bhv_vfsops_t *)BHV_OPS(b))->bhv_custom )
+#define vfs_bhv_set_custom(b,o)	( (b)->bhv_custom = (void *)(o))
+#define vfs_bhv_clr_custom(b)	( (b)->bhv_custom = NULL )
+
+extern vfs_t *vfs_allocate(void);
+extern void vfs_deallocate(vfs_t *);
+extern void vfs_insertops(vfs_t *, bhv_vfsops_t *);
+extern void vfs_insertbhv(vfs_t *, bhv_desc_t *, vfsops_t *, void *);
+
+extern void bhv_insert_all_vfsops(struct vfs *);
+extern void bhv_remove_all_vfsops(struct vfs *, int);
+extern void bhv_remove_vfsops(struct vfs *, int);
 
 #endif	/* __XFS_VFS_H__ */
diff -Nru a/fs/xfs/linux/xfs_vnode.c b/fs/xfs/linux/xfs_vnode.c
--- a/fs/xfs/linux/xfs_vnode.c	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/linux/xfs_vnode.c	Mon Mar 31 13:41:06 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,6 @@
  */
 
 #include <xfs.h>
-#include <linux/pagemap.h>
 
 
 uint64_t vn_generation;		/* vnode generation number */
@@ -73,19 +72,19 @@
  * Clean a vnode of filesystem-specific data and prepare it for reuse.
  */
 STATIC int
-vn_reclaim(struct vnode *vp)
+vn_reclaim(
+	struct vnode	*vp)
 {
-	int	error;
+	int		error;
 
 	XFS_STATS_INC(xfsstats.vn_reclaim);
-
 	vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address);
 
 	/*
 	 * Only make the VOP_RECLAIM call if there are behaviors
 	 * to call.
 	 */
-	if (vp->v_fbhv != NULL) {
+	if (vp->v_fbhv) {
 		VOP_RECLAIM(vp, error);
 		if (error)
 			return -error;
@@ -108,18 +107,19 @@
 }
 
 STATIC void
-vn_wakeup(struct vnode *vp)
+vn_wakeup(
+	struct vnode	*vp)
 {
 	VN_LOCK(vp);
-	if (vp->v_flag & VWAIT) {
+	if (vp->v_flag & VWAIT)
 		sv_broadcast(vptosync(vp));
-	}
 	vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED);
 	VN_UNLOCK(vp, 0);
 }
 
 int
-vn_wait(struct vnode *vp)
+vn_wait(
+	struct vnode	*vp)
 {
 	VN_LOCK(vp);
 	if (vp->v_flag & (VINACT | VRECLM)) {
@@ -132,7 +132,8 @@
 }
 
 struct vnode *
-vn_initialize(struct inode *inode)
+vn_initialize(
+	struct inode	*inode)
 {
 	struct vnode	*vp = LINVFS_GET_VP(inode);
 
@@ -165,7 +166,9 @@
  * Get a reference on a vnode.
  */
 vnode_t *
-vn_get(struct vnode *vp, vmap_t *vmap)
+vn_get(
+	struct vnode	*vp,
+	vmap_t		*vmap)
 {
 	struct inode	*inode;
 
@@ -175,7 +178,7 @@
 		return NULL;
 
 	inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino);
-	if (inode == NULL)		/* Inode not present */
+	if (!inode)	/* Inode not present */
 		return NULL;
 
 	vn_trace_exit(vp, "vn_get", (inst_t *)__return_address);
@@ -187,16 +190,17 @@
  * Revalidate the Linux inode from the vnode.
  */
 int
-vn_revalidate(struct vnode *vp)
+vn_revalidate(
+	struct vnode	*vp)
 {
-	int		error;
 	struct inode	*inode;
 	vattr_t		va;
+	int		error;
 
 	vn_trace_entry(vp, "vn_revalidate", (inst_t *)__return_address);
 	ASSERT(vp->v_fbhv != NULL);
 
-	va.va_mask = XFS_AT_STAT|XFS_AT_GENCOUNT;
+	va.va_mask = XFS_AT_STAT;
 	VOP_GETATTR(vp, &va, 0, NULL, error);
 	if (!error) {
 		inode = LINVFS_GET_IP(vp);
@@ -206,12 +210,9 @@
 		inode->i_gid	    = va.va_gid;
 		inode->i_size	    = va.va_size;
 		inode->i_blocks	    = va.va_nblocks;
-		inode->i_mtime.tv_sec	    = va.va_mtime.tv_sec;
-		inode->i_mtime.tv_nsec	    = va.va_mtime.tv_nsec;
-		inode->i_ctime.tv_sec	    = va.va_ctime.tv_sec;
-		inode->i_ctime.tv_nsec	    = va.va_ctime.tv_nsec;
-		inode->i_atime.tv_sec	    = va.va_atime.tv_sec;
-		inode->i_atime.tv_nsec	    = va.va_atime.tv_nsec;
+		inode->i_mtime	    = va.va_mtime;
+		inode->i_ctime	    = va.va_ctime;
+		inode->i_atime	    = va.va_atime;
 		VUNMODIFY(vp);
 	}
 	return -error;
@@ -224,7 +225,9 @@
  * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock).
  */
 void
-vn_purge(struct vnode *vp, vmap_t *vmap)
+vn_purge(
+	struct vnode	*vp,
+	vmap_t		*vmap)
 {
 	vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address);
 
@@ -284,9 +287,10 @@
  * Add a reference to a referenced vnode.
  */
 struct vnode *
-vn_hold(struct vnode *vp)
+vn_hold(
+	struct vnode	*vp)
 {
-	struct inode *inode;
+	struct inode	*inode;
 
 	XFS_STATS_INC(xfsstats.vn_hold);
 
@@ -302,10 +306,11 @@
  *  Call VOP_INACTIVE on last reference.
  */
 void
-vn_rele(struct vnode *vp)
+vn_rele(
+	struct vnode	*vp)
 {
-	int	vcnt;
-	int	cache;
+	int		vcnt;
+	int		cache;
 
 	XFS_STATS_INC(xfsstats.vn_rele);
 
@@ -319,7 +324,7 @@
 	 * that i_count won't be decremented after we
 	 * return.
 	 */
-	if (vcnt == 0) {
+	if (!vcnt) {
 		/*
 		 * As soon as we turn this on, noone can find us in vn_get
 		 * until we turn off VINACT or VRECLM
@@ -331,19 +336,14 @@
 		 * Do not make the VOP_INACTIVE call if there
 		 * are no behaviors attached to the vnode to call.
 		 */
-		if (vp->v_fbhv != NULL) {
+		if (vp->v_fbhv)
 			VOP_INACTIVE(vp, NULL, cache);
-		}
 
 		VN_LOCK(vp);
-		if (vp->v_flag & VWAIT) {
-			if (vp->v_flag & VWAIT) {
-				sv_broadcast(vptosync(vp));
-			}
-		}
+		if (vp->v_flag & VWAIT)
+			sv_broadcast(vptosync(vp));
 
 		vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED);
-
 	}
 
 	VN_UNLOCK(vp, 0);
@@ -355,17 +355,16 @@
  * Finish the removal of a vnode.
  */
 void
-vn_remove(struct vnode *vp)
+vn_remove(
+	struct vnode	*vp)
 {
-	/* REFERENCED */
-	vmap_t	vmap;
+	vmap_t		vmap;
 
 	/* Make sure we don't do this to the same vnode twice */
 	if (!(vp->v_fbhv))
 		return;
 
 	XFS_STATS_INC(xfsstats.vn_remove);
-
 	vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address);
 
 	/*
diff -Nru a/fs/xfs/linux/xfs_vnode.h b/fs/xfs/linux/xfs_vnode.h
--- a/fs/xfs/linux/xfs_vnode.h	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/linux/xfs_vnode.h	Mon Mar 31 13:41:08 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -85,10 +85,16 @@
 typedef enum {
 	VN_BHV_UNKNOWN,		/* not specified */
 	VN_BHV_XFS,		/* xfs */
+	VN_BHV_DM,		/* data migration */
+	VN_BHV_QM,		/* quota manager */
+	VN_BHV_IO,		/* IO path */
 	VN_BHV_END		/* housekeeping end-of-range */
 } vn_bhv_t;
 
 #define VNODE_POSITION_XFS	(VNODE_POSITION_BASE)
+#define VNODE_POSITION_DM	(VNODE_POSITION_BASE+10)
+#define VNODE_POSITION_QM	(VNODE_POSITION_BASE+20)
+#define VNODE_POSITION_IO	(VNODE_POSITION_BASE+30)
 
 /*
  * Macros for dealing with the behavior descriptor inside of the vnode.
@@ -96,7 +102,6 @@
 #define BHV_TO_VNODE(bdp)	((vnode_t *)BHV_VOBJ(bdp))
 #define BHV_TO_VNODE_NULL(bdp)	((vnode_t *)BHV_VOBJNULL(bdp))
 
-#define VNODE_TO_FIRST_BHV(vp)		(BHV_HEAD_FIRST(&(vp)->v_bh))
 #define VN_BHV_HEAD(vp)			((bhv_head_t *)(&((vp)->v_bh)))
 #define vn_bhv_head_init(bhp,name)	bhv_head_init(bhp,name)
 #define vn_bhv_remove(bhp,bdp)		bhv_remove(bhp,bdp)
@@ -127,16 +132,6 @@
 #define VWAIT		       0x4	/* waiting for VINACT/VRECLM to end */
 #define VMODIFIED	       0x8	/* XFS inode state possibly differs */
 					/* to the Linux inode state.	*/
-#define VROOT		  0x100000	/* root of its file system	*/
-#define VNOSWAP		  0x200000	/* cannot be used as virt swap device */
-#define VISSWAP		  0x400000	/* vnode is part of virt swap device */
-#define VREPLICABLE	  0x800000	/* Vnode can have replicated pages */
-#define VNONREPLICABLE	 0x1000000	/* Vnode has writers. Don't replicate */
-#define VDOCMP		 0x2000000	/* Vnode has special VOP_CMP impl. */
-#define VSHARE		 0x4000000	/* vnode part of global cache	*/
-#define VFRLOCKS	 0x8000000	/* vnode has FR locks applied	*/
-#define VENF_LOCKING	0x10000000	/* enf. mode FR locking in effect */
-#define VOPLOCK		0x20000000	/* oplock set on the vnode	*/
 
 typedef enum vrwlock	{ VRWLOCK_NONE, VRWLOCK_READ,
 			  VRWLOCK_WRITE, VRWLOCK_WRITE_DIRECT,
@@ -267,163 +262,92 @@
  */
 #define _VOP_(op, vp)	(*((vnodeops_t *)(vp)->v_fops)->op)
 
-#define VOP_READ(vp,file,iov,segs,offset,cr,rv)				\
-{									\
-	rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,cr); \
-}
-#define VOP_WRITE(vp,file,iov,segs,offset,cr,rv)			\
-{									\
-	rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,cr);\
-}
-#define VOP_SENDFILE(vp,f,of,cnt,act,targ,cr,rv)			\
-{									\
-	rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,of,cnt,act,targ,cr);\
-}
+#define VOP_READ(vp,file,iov,segs,offset,cr,rv)			\
+	rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,cr)
+#define VOP_WRITE(vp,file,iov,segs,offset,cr,rv)		\
+	rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,cr)
+#define VOP_SENDFILE(vp,f,off,cnt,act,targ,cr,rv)		\
+	rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,cnt,act,targ,cr)
 #define VOP_BMAP(vp,of,sz,rw,b,n,rv)					\
-{									\
-	rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n);		\
-}
+	rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
 #define VOP_OPEN(vp, cr, rv)						\
-{									\
-	rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr);			\
-}
+	rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr)
 #define VOP_GETATTR(vp, vap, f, cr, rv)					\
-{									\
-	rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr);		\
-}
+	rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr)
 #define VOP_SETATTR(vp, vap, f, cr, rv)					\
-{									\
-	rv = _VOP_(vop_setattr, vp)((vp)->v_fbhv, vap, f, cr);		\
-}
+	rv = _VOP_(vop_setattr, vp)((vp)->v_fbhv, vap, f, cr)
 #define VOP_ACCESS(vp, mode, cr, rv)					\
-{									\
-	rv = _VOP_(vop_access, vp)((vp)->v_fbhv, mode, cr);		\
-}
+	rv = _VOP_(vop_access, vp)((vp)->v_fbhv, mode, cr)
 #define VOP_LOOKUP(vp,d,vpp,f,rdir,cr,rv)				\
-{									\
-	rv = _VOP_(vop_lookup, vp)((vp)->v_fbhv,d,vpp,f,rdir,cr);	\
-}
+	rv = _VOP_(vop_lookup, vp)((vp)->v_fbhv,d,vpp,f,rdir,cr)
 #define VOP_CREATE(dvp,d,vap,vpp,cr,rv)					\
-{									\
-	rv = _VOP_(vop_create, dvp)((dvp)->v_fbhv,d,vap,vpp,cr);	\
-}
+	rv = _VOP_(vop_create, dvp)((dvp)->v_fbhv,d,vap,vpp,cr)
 #define VOP_REMOVE(dvp,d,cr,rv)						\
-{									\
-	rv = _VOP_(vop_remove, dvp)((dvp)->v_fbhv,d,cr);		\
-}
+	rv = _VOP_(vop_remove, dvp)((dvp)->v_fbhv,d,cr)
 #define VOP_LINK(tdvp,fvp,d,cr,rv)					\
-{									\
-	rv = _VOP_(vop_link, tdvp)((tdvp)->v_fbhv,fvp,d,cr);		\
-}
+	rv = _VOP_(vop_link, tdvp)((tdvp)->v_fbhv,fvp,d,cr)
 #define VOP_RENAME(fvp,fnm,tdvp,tnm,cr,rv)				\
-{									\
-	rv = _VOP_(vop_rename, fvp)((fvp)->v_fbhv,fnm,tdvp,tnm,cr);	\
-}
+	rv = _VOP_(vop_rename, fvp)((fvp)->v_fbhv,fnm,tdvp,tnm,cr)
 #define VOP_MKDIR(dp,d,vap,vpp,cr,rv)					\
-{									\
-	rv = _VOP_(vop_mkdir, dp)((dp)->v_fbhv,d,vap,vpp,cr);		\
-}
+	rv = _VOP_(vop_mkdir, dp)((dp)->v_fbhv,d,vap,vpp,cr)
 #define	VOP_RMDIR(dp,d,cr,rv)	 					\
-{									\
-	rv = _VOP_(vop_rmdir, dp)((dp)->v_fbhv,d,cr);			\
-}
+	rv = _VOP_(vop_rmdir, dp)((dp)->v_fbhv,d,cr)
 #define VOP_READDIR(vp,uiop,cr,eofp,rv)					\
-{									\
-	rv = _VOP_(vop_readdir, vp)((vp)->v_fbhv,uiop,cr,eofp);		\
-}
+	rv = _VOP_(vop_readdir, vp)((vp)->v_fbhv,uiop,cr,eofp)
 #define VOP_SYMLINK(dvp,d,vap,tnm,vpp,cr,rv)				\
-{									\
-	rv = _VOP_(vop_symlink, dvp) ((dvp)->v_fbhv,d,vap,tnm,vpp,cr);	\
-}
+	rv = _VOP_(vop_symlink, dvp) ((dvp)->v_fbhv,d,vap,tnm,vpp,cr)
 #define VOP_READLINK(vp,uiop,cr,rv)					\
-{									\
-	rv = _VOP_(vop_readlink, vp)((vp)->v_fbhv,uiop,cr);		\
-}
+	rv = _VOP_(vop_readlink, vp)((vp)->v_fbhv,uiop,cr)
 #define VOP_FSYNC(vp,f,cr,b,e,rv)					\
-{									\
-	rv = _VOP_(vop_fsync, vp)((vp)->v_fbhv,f,cr,b,e);		\
-}
+	rv = _VOP_(vop_fsync, vp)((vp)->v_fbhv,f,cr,b,e)
 #define VOP_INACTIVE(vp, cr, rv)					\
-{									\
-	rv = _VOP_(vop_inactive, vp)((vp)->v_fbhv, cr);			\
-}
+	rv = _VOP_(vop_inactive, vp)((vp)->v_fbhv, cr)
 #define VOP_RELEASE(vp, rv)						\
-{									\
-	rv = _VOP_(vop_release, vp)((vp)->v_fbhv);			\
-}
+	rv = _VOP_(vop_release, vp)((vp)->v_fbhv)
 #define VOP_FID2(vp, fidp, rv)						\
-{									\
-	rv = _VOP_(vop_fid2, vp)((vp)->v_fbhv, fidp);			\
-}
+	rv = _VOP_(vop_fid2, vp)((vp)->v_fbhv, fidp)
 #define VOP_RWLOCK(vp,i)						\
-{									\
-	(void)_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i);			\
-}
+	(void)_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
 #define VOP_RWLOCK_TRY(vp,i)						\
 	_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
-
 #define VOP_RWUNLOCK(vp,i)						\
-{									\
-	(void)_VOP_(vop_rwunlock, vp)((vp)->v_fbhv, i);			\
-}
+	(void)_VOP_(vop_rwunlock, vp)((vp)->v_fbhv, i)
+#define VOP_FRLOCK(vp,c,fl,flags,offset,fr,rv)				\
+	rv = _VOP_(vop_frlock, vp)((vp)->v_fbhv,c,fl,flags,offset,fr)
 #define VOP_RECLAIM(vp, rv)						\
-{									\
-	rv = _VOP_(vop_reclaim, vp)((vp)->v_fbhv);			\
-}
+	rv = _VOP_(vop_reclaim, vp)((vp)->v_fbhv)
 #define VOP_ATTR_GET(vp, name, val, vallenp, fl, cred, rv)		\
-{									\
-	rv = _VOP_(vop_attr_get, vp)((vp)->v_fbhv,name,val,vallenp,fl,cred); \
-}
+	rv = _VOP_(vop_attr_get, vp)((vp)->v_fbhv,name,val,vallenp,fl,cred)
 #define VOP_ATTR_SET(vp, name, val, vallen, fl, cred, rv)		\
-{									\
-	rv = _VOP_(vop_attr_set, vp)((vp)->v_fbhv,name,val,vallen,fl,cred); \
-}
+	rv = _VOP_(vop_attr_set, vp)((vp)->v_fbhv,name,val,vallen,fl,cred)
 #define VOP_ATTR_REMOVE(vp, name, flags, cred, rv)			\
-{									\
-	rv = _VOP_(vop_attr_remove, vp)((vp)->v_fbhv,name,flags,cred);	\
-}
+	rv = _VOP_(vop_attr_remove, vp)((vp)->v_fbhv,name,flags,cred)
 #define VOP_ATTR_LIST(vp, buf, buflen, fl, cursor, cred, rv)		\
-{									\
-	rv = _VOP_(vop_attr_list, vp)((vp)->v_fbhv,buf,buflen,fl,cursor,cred);\
-}
+	rv = _VOP_(vop_attr_list, vp)((vp)->v_fbhv,buf,buflen,fl,cursor,cred)
 #define VOP_LINK_REMOVED(vp, dvp, linkzero)				\
-{									\
-	(void)_VOP_(vop_link_removed, vp)((vp)->v_fbhv, dvp, linkzero); \
-}
+	(void)_VOP_(vop_link_removed, vp)((vp)->v_fbhv, dvp, linkzero)
 #define VOP_VNODE_CHANGE(vp, cmd, val)					\
-{									\
-	(void)_VOP_(vop_vnode_change, vp)((vp)->v_fbhv,cmd,val);	\
-}
+	(void)_VOP_(vop_vnode_change, vp)((vp)->v_fbhv,cmd,val)
 /*
  * These are page cache functions that now go thru VOPs.
  * 'last' parameter is unused and left in for IRIX compatibility
  */
 #define VOP_TOSS_PAGES(vp, first, last, fiopt)				\
-{									\
-	_VOP_(vop_tosspages, vp)((vp)->v_fbhv,first, last, fiopt);	\
-}
+	_VOP_(vop_tosspages, vp)((vp)->v_fbhv,first, last, fiopt)
 /*
  * 'last' parameter is unused and left in for IRIX compatibility
  */
 #define VOP_FLUSHINVAL_PAGES(vp, first, last, fiopt)			\
-{									\
-	_VOP_(vop_flushinval_pages, vp)((vp)->v_fbhv,first,last,fiopt); \
-}
+	_VOP_(vop_flushinval_pages, vp)((vp)->v_fbhv,first,last,fiopt)
 /*
  * 'last' parameter is unused and left in for IRIX compatibility
  */
 #define VOP_FLUSH_PAGES(vp, first, last, flags, fiopt, rv)		\
-{									\
-	rv = _VOP_(vop_flush_pages, vp)((vp)->v_fbhv,first,last,flags,fiopt);\
-}
+	rv = _VOP_(vop_flush_pages, vp)((vp)->v_fbhv,first,last,flags,fiopt)
 #define VOP_IOCTL(vp, inode, filp, cmd, arg, rv)			\
-{									\
-	rv = _VOP_(vop_ioctl, vp)((vp)->v_fbhv,inode,filp,cmd,arg);	\
-}
+	rv = _VOP_(vop_ioctl, vp)((vp)->v_fbhv,inode,filp,cmd,arg)
 #define VOP_IFLUSH(vp, flags, rv)					\
-{									\
-	rv = _VOP_(vop_iflush, vp)((vp)->v_fbhv, flags);		\
-}
+	rv = _VOP_(vop_iflush, vp)((vp)->v_fbhv, flags)
 
 /*
  * Flags for VOP_IFLUSH call
diff -Nru a/fs/xfs/pagebuf/page_buf.c b/fs/xfs/pagebuf/page_buf.c
--- a/fs/xfs/pagebuf/page_buf.c	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/pagebuf/page_buf.c	Mon Mar 31 13:41:06 2003
@@ -1290,7 +1290,7 @@
 
 	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
 		pb->pb_locked = 0;
-		pagebuf_iodone(pb, 0, 1);
+		pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), 1);
 	}
 
 	bio_put(bio);
@@ -1434,7 +1434,7 @@
 
 	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
 		pb->pb_locked = 0;
-		pagebuf_iodone(pb, 0, 0);
+		pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), 1);
 	}
 
 	return 0;
@@ -1612,15 +1612,13 @@
 			refrigerator(PF_IOTHREAD);
 
 		if (pbd_active == 1) {
-			del_timer(&pb_daemon_timer);
-			pb_daemon_timer.expires = jiffies +
-					pb_params.p_un.flush_interval;
-			add_timer(&pb_daemon_timer);
+			mod_timer(&pb_daemon_timer,
+				  jiffies + pb_params.p_un.flush_interval);
 			interruptible_sleep_on(&pbd_waitq);
 		}
 
 		if (pbd_active == 0) {
-			del_timer(&pb_daemon_timer);
+			del_timer_sync(&pb_daemon_timer);
 		}
 
 		spin_lock(&pbd_delwrite_lock);
diff -Nru a/fs/xfs/pagebuf/page_buf.h b/fs/xfs/pagebuf/page_buf.h
--- a/fs/xfs/pagebuf/page_buf.h	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/pagebuf/page_buf.h	Mon Mar 31 13:41:07 2003
@@ -96,6 +96,7 @@
 	PBF_MAPPABLE = (1 << 9),/* use directly-addressable pages	   */
 	PBF_STALE = (1 << 10),	/* buffer has been staled, do not find it  */
 	PBF_FS_MANAGED = (1 << 11), /* filesystem controls freeing memory  */
+	PBF_FS_DATAIOD = (1 << 12), /* schedule IO completion on fs datad  */
 
 	/* flags used only as arguments to access routines */
 	PBF_LOCK = (1 << 13),	/* lock requested			   */
diff -Nru a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/quota/xfs_dquot.c	Mon Mar 31 13:41:07 2003
@@ -0,0 +1,1552 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+#include "xfs_qm.h"
+
+
+/*
+   LOCK ORDER
+
+   inode lock		    (ilock)
+   dquot hash-chain lock    (hashlock)
+   xqm dquot freelist lock  (freelistlock
+   mount's dquot list lock  (mplistlock)
+   user dquot lock - lock ordering among dquots is based on the uid or gid
+   group dquot lock - similar to udquots. Between the two dquots, the udquot
+		      has to be locked first.
+   pin lock - the dquot lock must be held to take this lock.
+   flush lock - ditto.
+*/
+
+STATIC void		xfs_qm_dqflush_done(xfs_buf_t *, xfs_dq_logitem_t *);
+
+#ifdef DEBUG
+dev_t xfs_dqerror_dev = 0;
+int xfs_do_dqerror = 0;
+int xfs_dqreq_num = 0;
+int xfs_dqerror_mod = 33;
+#endif
+
+/*
+ * Allocate and initialize a dquot. We don't always allocate fresh memory;
+ * we try to reclaim a free dquot if the number of incore dquots are above
+ * a threshold.
+ * The only field inside the core that gets initialized at this point
+ * is the d_id field. The idea is to fill in the entire q_core
+ * when we read in the on disk dquot.
+ */
+xfs_dquot_t *
+xfs_qm_dqinit(
+	xfs_mount_t  *mp,
+	xfs_dqid_t   id,
+	uint	     type)
+{
+	xfs_dquot_t	*dqp;
+	boolean_t	brandnewdquot;
+
+	brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
+	dqp->dq_flags = type;
+	INT_SET(dqp->q_core.d_id, ARCH_CONVERT, id);
+	dqp->q_mount = mp;
+
+	/*
+	 * No need to re-initialize these if this is a reclaimed dquot.
+	 */
+	if (brandnewdquot) {
+		dqp->dq_flnext = dqp->dq_flprev = dqp;
+		mutex_init(&dqp->q_qlock,  MUTEX_DEFAULT, "xdq");
+		initnsema(&dqp->q_flock, 1, "fdq");
+		sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
+
+#ifdef DQUOT_TRACING
+		dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_SLEEP);
+		xfs_dqtrace_entry(dqp, "DQINIT");
+#endif
+	} else {
+		/*
+		 * Only the q_core portion was zeroed in dqreclaim_one().
+		 * So, we need to reset others.
+		 */
+		 dqp->q_nrefs = 0;
+		 dqp->q_blkno = 0;
+		 dqp->MPL_NEXT = dqp->HL_NEXT = NULL;
+		 dqp->HL_PREVP = dqp->MPL_PREVP = NULL;
+		 dqp->q_bufoffset = 0;
+		 dqp->q_fileoffset = 0;
+		 dqp->q_transp = NULL;
+		 dqp->q_gdquot = NULL;
+		 dqp->q_res_bcount = 0;
+		 dqp->q_res_icount = 0;
+		 dqp->q_res_rtbcount = 0;
+		 dqp->q_pincount = 0;
+		 dqp->q_hash = 0;
+		 ASSERT(dqp->dq_flnext == dqp->dq_flprev);
+
+#ifdef DQUOT_TRACING
+		 ASSERT(dqp->q_trace);
+		 xfs_dqtrace_entry(dqp, "DQRECLAIMED_INIT");
+#endif
+	 }
+
+	/*
+	 * log item gets initialized later
+	 */
+	return (dqp);
+}
+
+/*
+ * This is called to free all the memory associated with a dquot
+ */
+void
+xfs_qm_dqdestroy(
+	xfs_dquot_t	*dqp)
+{
+	ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp));
+
+	mutex_destroy(&dqp->q_qlock);
+	freesema(&dqp->q_flock);
+	sv_destroy(&dqp->q_pinwait);
+
+#ifdef DQUOT_TRACING
+	if (dqp->q_trace)
+	     ktrace_free(dqp->q_trace);
+	dqp->q_trace = NULL;
+#endif
+	kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
+	atomic_dec(&xfs_Gqm->qm_totaldquots);
+}
+
+/*
+ * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
+ */
+STATIC void
+xfs_qm_dqinit_core(
+	xfs_dqid_t	 id,
+	uint		 type,
+	xfs_dqblk_t	 *d)
+{
+	/*
+	 * Caller has zero'd the entire dquot 'chunk' already.
+	 */
+	INT_SET(d->dd_diskdq.d_magic, ARCH_CONVERT, XFS_DQUOT_MAGIC);
+	INT_SET(d->dd_diskdq.d_version, ARCH_CONVERT, XFS_DQUOT_VERSION);
+	INT_SET(d->dd_diskdq.d_id, ARCH_CONVERT, id);
+	INT_SET(d->dd_diskdq.d_flags, ARCH_CONVERT, type);
+}
+
+
+#ifdef DQUOT_TRACING
+/*
+ * Dquot tracing for debugging.
+ */
+/* ARGSUSED */
+void
+xfs_dqtrace_entry__(
+	xfs_dquot_t *dqp,
+	char *func,
+	void *retaddr,
+	xfs_inode_t *ip)
+{
+	xfs_dquot_t *udqp = NULL;
+	int ino;
+
+	ASSERT(dqp->q_trace);
+	if (ip) {
+		ino = ip->i_ino;
+		udqp = ip->i_udquot;
+	}
+	ktrace_enter(dqp->q_trace,
+		     (void *)(__psint_t)DQUOT_KTRACE_ENTRY,
+		     (void *)func,
+		     (void *)(__psint_t)dqp->q_nrefs,
+		     (void *)(__psint_t)dqp->dq_flags,
+		     (void *)(__psint_t)dqp->q_res_bcount,
+		     (void *)(__psint_t)INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT),
+		     (void *)(__psint_t)INT_GET(dqp->q_core.d_icount, ARCH_CONVERT),
+		     (void *)(__psint_t)INT_GET(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT),
+		     (void *)(__psint_t)INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT),
+		     (void *)(__psint_t)INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT),
+		     (void *)(__psint_t)INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT),
+		     (void *)(__psint_t)INT_GET(dqp->q_core.d_id, ARCH_CONVERT), /* 11 */
+		     (void *)(__psint_t)current_pid(),
+		     (void *)(__psint_t)ino,
+		     (void *)(__psint_t)retaddr,
+		     (void *)(__psint_t)udqp);
+	return;
+}
+#endif
+
+
+/*
+ * Check the limits and timers of a dquot and start or reset timers
+ * if necessary.
+ * This gets called even when quota enforcement is OFF, which makes our
+ * life a little less complicated. (We just don't reject any quota
+ * reservations in that case, when enforcement is off).
+ * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
+ * enforcement's off.
+ * In contrast, warnings are a little different in that they don't
+ * 'automatically' get started when limits get exceeded.
+ */
+void
+xfs_qm_adjust_dqtimers(
+	xfs_mount_t		*mp,
+	xfs_disk_dquot_t	*d)
+{
+	/*
+	 * The dquot had better be locked. We are modifying it here.
+	 */
+
+	/*
+	 * root's limits are not real limits.
+	 */
+	if (INT_ISZERO(d->d_id, ARCH_CONVERT))
+		return;
+
+#ifdef QUOTADEBUG
+	if (INT_GET(d->d_blk_hardlimit, ARCH_CONVERT))
+		ASSERT(INT_GET(d->d_blk_softlimit, ARCH_CONVERT) <= INT_GET(d->d_blk_hardlimit, ARCH_CONVERT));
+	if (INT_GET(d->d_ino_hardlimit, ARCH_CONVERT))
+		ASSERT(INT_GET(d->d_ino_softlimit, ARCH_CONVERT) <= INT_GET(d->d_ino_hardlimit, ARCH_CONVERT));
+#endif
+	if (INT_ISZERO(d->d_btimer, ARCH_CONVERT)) {
+		if ((INT_GET(d->d_blk_softlimit, ARCH_CONVERT) &&
+		    (INT_GET(d->d_bcount, ARCH_CONVERT) >= INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) ||
+		    (INT_GET(d->d_blk_hardlimit, ARCH_CONVERT) &&
+		    (INT_GET(d->d_bcount, ARCH_CONVERT) >= INT_GET(d->d_blk_hardlimit, ARCH_CONVERT)))) {
+			INT_SET(d->d_btimer, ARCH_CONVERT, get_seconds() + XFS_QI_BTIMELIMIT(mp));
+		}
+	} else {
+		if ((INT_ISZERO(d->d_blk_softlimit, ARCH_CONVERT) ||
+		    (INT_GET(d->d_bcount, ARCH_CONVERT) < INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) &&
+		    (INT_ISZERO(d->d_blk_hardlimit, ARCH_CONVERT) ||
+		    (INT_GET(d->d_bcount, ARCH_CONVERT) < INT_GET(d->d_blk_hardlimit, ARCH_CONVERT)))) {
+			INT_ZERO(d->d_btimer, ARCH_CONVERT);
+		}
+	}
+
+	if (INT_ISZERO(d->d_itimer, ARCH_CONVERT)) {
+		if ((INT_GET(d->d_ino_softlimit, ARCH_CONVERT) &&
+		    (INT_GET(d->d_icount, ARCH_CONVERT) >= INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) ||
+		    (INT_GET(d->d_ino_hardlimit, ARCH_CONVERT) &&
+		    (INT_GET(d->d_icount, ARCH_CONVERT) >= INT_GET(d->d_ino_hardlimit, ARCH_CONVERT)))) {
+			INT_SET(d->d_itimer, ARCH_CONVERT, get_seconds() + XFS_QI_ITIMELIMIT(mp));
+		}
+	} else {
+		if ((INT_ISZERO(d->d_ino_softlimit, ARCH_CONVERT) ||
+		    (INT_GET(d->d_icount, ARCH_CONVERT) < INT_GET(d->d_ino_softlimit, ARCH_CONVERT)))  &&
+		    (INT_ISZERO(d->d_ino_hardlimit, ARCH_CONVERT) ||
+		    (INT_GET(d->d_icount, ARCH_CONVERT) < INT_GET(d->d_ino_hardlimit, ARCH_CONVERT)))) {
+			INT_ZERO(d->d_itimer, ARCH_CONVERT);
+		}
+	}
+}
+
+/*
+ * Increment or reset warnings of a given dquot.
+ */
+int
+xfs_qm_dqwarn(
+	xfs_disk_dquot_t	*d,
+	uint			flags)
+{
+	int	warned;
+
+	/*
+	 * root's limits are not real limits.
+	 */
+	if (INT_ISZERO(d->d_id, ARCH_CONVERT))
+		return (0);
+
+	warned = 0;
+	if (INT_GET(d->d_blk_softlimit, ARCH_CONVERT) &&
+	    (INT_GET(d->d_bcount, ARCH_CONVERT) >=
+	     INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) {
+		if (flags & XFS_QMOPT_DOWARN) {
+			INT_MOD(d->d_bwarns, ARCH_CONVERT, +1);
+			warned++;
+		}
+	} else {
+		if (INT_ISZERO(d->d_blk_softlimit, ARCH_CONVERT) ||
+		    (INT_GET(d->d_bcount, ARCH_CONVERT) <
+		     INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) {
+			INT_ZERO(d->d_bwarns, ARCH_CONVERT);
+		}
+	}
+
+	if (INT_GET(d->d_ino_softlimit, ARCH_CONVERT) > 0 &&
+	    (INT_GET(d->d_icount, ARCH_CONVERT) >=
+	     INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) {
+		if (flags & XFS_QMOPT_DOWARN) {
+			INT_MOD(d->d_iwarns, ARCH_CONVERT, +1);
+			warned++;
+		}
+	} else {
+		if ((INT_ISZERO(d->d_ino_softlimit, ARCH_CONVERT)) ||
+		    (INT_GET(d->d_icount, ARCH_CONVERT) <
+		     INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) {
+			INT_ZERO(d->d_iwarns, ARCH_CONVERT);
+		}
+	}
+#ifdef QUOTADEBUG
+	if (INT_GET(d->d_iwarns, ARCH_CONVERT))
+		cmn_err(CE_DEBUG,
+			"--------@@Inode warnings running : %Lu >= %Lu",
+			INT_GET(d->d_icount, ARCH_CONVERT),
+			INT_GET(d->d_ino_softlimit, ARCH_CONVERT));
+	if (INT_GET(d->d_bwarns, ARCH_CONVERT))
+		cmn_err(CE_DEBUG,
+			"--------@@Blks warnings running : %Lu >= %Lu",
+			INT_GET(d->d_bcount, ARCH_CONVERT),
+			INT_GET(d->d_blk_softlimit, ARCH_CONVERT));
+#endif
+	return (warned);
+}
+
+
+/*
+ * initialize a buffer full of dquots and log the whole thing
+ */
+STATIC void
+xfs_qm_init_dquot_blk(
+	xfs_trans_t	*tp,
+	xfs_mount_t	*mp,
+	xfs_dqid_t	id,
+	uint		type,
+	xfs_buf_t	*bp)
+{
+	xfs_dqblk_t	*d;
+	int		curid, i;
+
+	ASSERT(tp);
+	ASSERT(XFS_BUF_ISBUSY(bp));
+	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+
+	d = (xfs_dqblk_t *)XFS_BUF_PTR(bp);
+
+	/*
+	 * ID of the first dquot in the block - id's are zero based.
+	 */
+	curid = id - (id % XFS_QM_DQPERBLK(mp));
+	ASSERT(curid >= 0);
+	memset(d, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)));
+	for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++)
+		xfs_qm_dqinit_core(curid, type, d);
+	xfs_trans_dquot_buf(tp, bp,
+			    type & XFS_DQ_USER ?
+			    XFS_BLI_UDQUOT_BUF :
+			    XFS_BLI_GDQUOT_BUF);
+	xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1);
+}
+
+
+
+/*
+ * Allocate a block and fill it with dquots.
+ * This is called when the bmapi finds a hole.
+ */
+STATIC int
+xfs_qm_dqalloc(
+	xfs_trans_t	*tp,
+	xfs_mount_t	*mp,
+	xfs_dquot_t	*dqp,
+	xfs_inode_t	*quotip,
+	xfs_fileoff_t	offset_fsb,
+	xfs_buf_t	**O_bpp)
+{
+	xfs_fsblock_t	firstblock;
+	xfs_bmap_free_t flist;
+	xfs_bmbt_irec_t map;
+	int		nmaps, error, committed;
+	xfs_buf_t	*bp;
+
+	ASSERT(tp != NULL);
+	xfs_dqtrace_entry(dqp, "DQALLOC");
+
+	/*
+	 * Initialize the bmap freelist prior to calling bmapi code.
+	 */
+	XFS_BMAP_INIT(&flist, &firstblock);
+	xfs_ilock(quotip, XFS_ILOCK_EXCL);
+	/*
+	 * Return if this type of quotas is turned off while we didn't
+	 * have an inode lock
+	 */
+	if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+		xfs_iunlock(quotip, XFS_ILOCK_EXCL);
+		return (ESRCH);
+	}
+
+	/*
+	 * xfs_trans_commit normally decrements the vnode ref count
+	 * when it unlocks the inode. Since we want to keep the quota
+	 * inode around, we bump the vnode ref count now.
+	 */
+	VN_HOLD(XFS_ITOV(quotip));
+
+	xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
+	nmaps = 1;
+	if ((error = xfs_bmapi(tp, quotip,
+			      offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
+			      XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
+			      &firstblock,
+			      XFS_QM_DQALLOC_SPACE_RES(mp),
+			      &map, &nmaps, &flist))) {
+		goto error0;
+	}
+	ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
+	ASSERT(nmaps == 1);
+	ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
+	       (map.br_startblock != HOLESTARTBLOCK));
+
+	/*
+	 * Keep track of the blkno to save a lookup later
+	 */
+	dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+
+	/* now we can just get the buffer (there's nothing to read yet) */
+	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+			       dqp->q_blkno,
+			       XFS_QI_DQCHUNKLEN(mp),
+			       0);
+	if (!bp || (error = XFS_BUF_GETERROR(bp)))
+		goto error1;
+	/*
+	 * Make a chunk of dquots out of this buffer and log
+	 * the entire thing.
+	 */
+	xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT),
+			      dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP),
+			      bp);
+
+	if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) {
+		goto error1;
+	}
+
+	*O_bpp = bp;
+	return 0;
+
+      error1:
+	xfs_bmap_cancel(&flist);
+      error0:
+	xfs_iunlock(quotip, XFS_ILOCK_EXCL);
+
+	return (error);
+}
+
+/*
+ * Maps a dquot to the buffer containing its on-disk version.
+ * This returns a ptr to the buffer containing the on-disk dquot
+ * in the bpp param, and a ptr to the on-disk dquot within that buffer
+ */
+STATIC int
+xfs_qm_dqtobp(
+	xfs_trans_t		*tp,
+	xfs_dquot_t		*dqp,
+	xfs_disk_dquot_t	**O_ddpp,
+	xfs_buf_t		**O_bpp,
+	uint			flags)
+{
+	xfs_bmbt_irec_t map;
+	int		nmaps, error;
+	xfs_buf_t	*bp;
+	xfs_inode_t	*quotip;
+	xfs_mount_t	*mp;
+	xfs_disk_dquot_t *ddq;
+	xfs_dqid_t	id;
+	boolean_t	newdquot;
+
+	mp = dqp->q_mount;
+	id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT);
+	nmaps = 1;
+	newdquot = B_FALSE;
+
+	/*
+	 * If we don't know where the dquot lives, find out.
+	 */
+	if (dqp->q_blkno == (xfs_daddr_t) 0) {
+		/* We use the id as an index */
+		dqp->q_fileoffset = (xfs_fileoff_t) ((uint)id /
+						     XFS_QM_DQPERBLK(mp));
+		nmaps = 1;
+		quotip = XFS_DQ_TO_QIP(dqp);
+		xfs_ilock(quotip, XFS_ILOCK_SHARED);
+		/*
+		 * Return if this type of quotas is turned off while we didn't
+		 * have an inode lock
+		 */
+		if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+			xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+			return (ESRCH);
+		}
+		/*
+		 * Find the block map; no allocations yet
+		 */
+		error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
+				  XFS_DQUOT_CLUSTER_SIZE_FSB,
+				  XFS_BMAPI_METADATA,
+				  NULL, 0, &map, &nmaps, NULL);
+
+		xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+		if (error)
+			return (error);
+		ASSERT(nmaps == 1);
+		ASSERT(map.br_blockcount == 1);
+
+		/*
+		 * offset of dquot in the (fixed sized) dquot chunk.
+		 */
+		dqp->q_bufoffset = (id % XFS_QM_DQPERBLK(mp)) *
+			sizeof(xfs_dqblk_t);
+		if (map.br_startblock == HOLESTARTBLOCK) {
+			/*
+			 * We don't allocate unless we're asked to
+			 */
+			if (!(flags & XFS_QMOPT_DQALLOC))
+				return (ENOENT);
+
+			ASSERT(tp);
+			if ((error = xfs_qm_dqalloc(tp, mp, dqp, quotip,
+						dqp->q_fileoffset, &bp)))
+				return (error);
+			newdquot = B_TRUE;
+		} else {
+			/*
+			 * store the blkno etc so that we don't have to do the
+			 * mapping all the time
+			 */
+			dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+		}
+	}
+	ASSERT(dqp->q_blkno != DELAYSTARTBLOCK);
+	ASSERT(dqp->q_blkno != HOLESTARTBLOCK);
+
+	/*
+	 * Read in the buffer, unless we've just done the allocation
+	 * (in which case we already have the buf).
+	 */
+	if (! newdquot) {
+		xfs_dqtrace_entry(dqp, "DQTOBP READBUF");
+		if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+					       dqp->q_blkno,
+					       XFS_QI_DQCHUNKLEN(mp),
+					       0, &bp))) {
+			return (error);
+		}
+		if (error || !bp)
+			return XFS_ERROR(error);
+	}
+	ASSERT(XFS_BUF_ISBUSY(bp));
+	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+
+	/*
+	 * calculate the location of the dquot inside the buffer.
+	 */
+	ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset);
+
+	/*
+	 * A simple sanity check in case we got a corrupted dquot...
+	 */
+	if (xfs_qm_dqcheck(ddq, id,
+			   dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP),
+			   flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
+			   "dqtobp")) {
+		if (!(flags & XFS_QMOPT_DQREPAIR)) {
+			xfs_trans_brelse(tp, bp);
+			return XFS_ERROR(EIO);
+		}
+		XFS_BUF_BUSY(bp); /* We dirtied this */
+	}
+
+	*O_bpp = bp;
+	*O_ddpp = ddq;
+
+	return (0);
+}
+
+
+/*
+ * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
+ * and release the buffer immediately.
+ *
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_dqread(
+	xfs_trans_t	*tp,
+	xfs_dqid_t	id,
+	xfs_dquot_t	*dqp,	/* dquot to get filled in */
+	uint		flags)
+{
+	xfs_disk_dquot_t *ddqp;
+	xfs_buf_t	 *bp;
+	int		 error;
+
+	/*
+	 * get a pointer to the on-disk dquot and the buffer containing it
+	 * dqp already knows its own type (GROUP/USER).
+	 */
+	xfs_dqtrace_entry(dqp, "DQREAD");
+	if ((error = xfs_qm_dqtobp(tp, dqp, &ddqp, &bp, flags))) {
+		return (error);
+	}
+
+	/* copy everything from disk dquot to the incore dquot */
+	memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
+	ASSERT(INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id);
+	xfs_qm_dquot_logitem_init(dqp);
+
+	/*
+	 * Reservation counters are defined as reservation plus current usage
+	 * to avoid having to add everytime.
+	 */
+	dqp->q_res_bcount = INT_GET(ddqp->d_bcount, ARCH_CONVERT);
+	dqp->q_res_icount = INT_GET(ddqp->d_icount, ARCH_CONVERT);
+	dqp->q_res_rtbcount = INT_GET(ddqp->d_rtbcount, ARCH_CONVERT);
+
+	/* Mark the buf so that this will stay incore a little longer */
+	XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
+
+	/*
+	 * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
+	 * So we need to release with xfs_trans_brelse().
+	 * The strategy here is identical to that of inodes; we lock
+	 * the dquot in xfs_qm_dqget() before making it accessible to
+	 * others. This is because dquots, like inodes, need a good level of
+	 * concurrency, and we don't want to take locks on the entire buffers
+	 * for dquot accesses.
+	 * Note also that the dquot buffer may even be dirty at this point, if
+	 * this particular dquot was repaired. We still aren't afraid to
+	 * brelse it because we have the changes incore.
+	 */
+	ASSERT(XFS_BUF_ISBUSY(bp));
+	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+	xfs_trans_brelse(tp, bp);
+
+	return (error);
+}
+
+
+/*
+ * allocate an incore dquot from the kernel heap,
+ * and fill its core with quota information kept on disk.
+ * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
+ * if it wasn't already allocated.
+ */
+STATIC int
+xfs_qm_idtodq(
+	xfs_mount_t	*mp,
+	xfs_dqid_t	id,	 /* gid or uid, depending on type */
+	uint		type,	 /* UDQUOT or GDQUOT */
+	uint		flags,	 /* DQALLOC, DQREPAIR */
+	xfs_dquot_t	**O_dqpp)/* OUT : incore dquot, not locked */
+{
+	xfs_dquot_t	*dqp;
+	int		error;
+	xfs_trans_t	*tp;
+	int		cancelflags=0;
+
+	dqp = xfs_qm_dqinit(mp, id, type);
+	tp = NULL;
+	if (flags & XFS_QMOPT_DQALLOC) {
+		tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
+		if ((error = xfs_trans_reserve(tp,
+				       XFS_QM_DQALLOC_SPACE_RES(mp),
+				       XFS_WRITE_LOG_RES(mp) +
+					      BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1 +
+					      128,
+				       0,
+				       XFS_TRANS_PERM_LOG_RES,
+				       XFS_WRITE_LOG_COUNT))) {
+			cancelflags = 0;
+			goto error0;
+		}
+		cancelflags = XFS_TRANS_RELEASE_LOG_RES;
+	}
+
+	/*
+	 * Read it from disk; xfs_dqread() takes care of
+	 * all the necessary initialization of dquot's fields (locks, etc)
+	 */
+	if ((error = xfs_qm_dqread(tp, id, dqp, flags))) {
+		/*
+		 * This can happen if quotas got turned off (ESRCH),
+		 * or if the dquot didn't exist on disk and we ask to
+		 * allocate (ENOENT).
+		 */
+		xfs_dqtrace_entry(dqp, "DQREAD FAIL");
+		cancelflags |= XFS_TRANS_ABORT;
+		goto error0;
+	}
+	if (tp) {
+		if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES,
+					     NULL)))
+			goto error1;
+	}
+
+	*O_dqpp = dqp;
+	ASSERT(! XFS_DQ_IS_LOCKED(dqp));
+	return (0);
+
+ error0:
+	ASSERT(error);
+	if (tp)
+		xfs_trans_cancel(tp, cancelflags);
+ error1:
+	xfs_qm_dqdestroy(dqp);
+	*O_dqpp = NULL;
+	return (error);
+}
+
+/*
+ * Lookup a dquot in the incore dquot hashtable. We keep two separate
+ * hashtables for user and group dquots; and, these are global tables
+ * inside the XQM, not per-filesystem tables.
+ * The hash chain must be locked by caller, and it is left locked
+ * on return. Returning dquot is locked.
+ */
+STATIC int
+xfs_qm_dqlookup(
+	xfs_mount_t		*mp,
+	xfs_dqid_t		id,
+	xfs_dqhash_t		*qh,
+	xfs_dquot_t		**O_dqpp)
+{
+	xfs_dquot_t		*dqp;
+	uint			flist_locked;
+	xfs_dquot_t		*d;
+
+	ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
+
+	flist_locked = B_FALSE;
+
+	/*
+	 * Traverse the hashchain looking for a match
+	 */
+	for (dqp = qh->qh_next; dqp != NULL; dqp = dqp->HL_NEXT) {
+		/*
+		 * We already have the hashlock. We don't need the
+		 * dqlock to look at the id field of the dquot, since the
+		 * id can't be modified without the hashlock anyway.
+		 */
+		if (INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id && dqp->q_mount == mp) {
+			xfs_dqtrace_entry(dqp, "DQFOUND BY LOOKUP");
+			/*
+			 * All in core dquots must be on the dqlist of mp
+			 */
+			ASSERT(dqp->MPL_PREVP != NULL);
+
+			xfs_dqlock(dqp);
+			if (dqp->q_nrefs == 0) {
+				ASSERT (XFS_DQ_IS_ON_FREELIST(dqp));
+				if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
+					xfs_dqtrace_entry(dqp, "DQLOOKUP: WANT");
+
+					/*
+					 * We may have raced with dqreclaim_one()
+					 * (and lost). So, flag that we don't
+					 * want the dquot to be reclaimed.
+					 */
+					dqp->dq_flags |= XFS_DQ_WANT;
+					xfs_dqunlock(dqp);
+					xfs_qm_freelist_lock(xfs_Gqm);
+					xfs_dqlock(dqp);
+					dqp->dq_flags &= ~(XFS_DQ_WANT);
+				}
+				flist_locked = B_TRUE;
+			}
+
+			/*
+			 * id couldn't have changed; we had the hashlock all
+			 * along
+			 */
+			ASSERT(INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id);
+
+			if (flist_locked) {
+				if (dqp->q_nrefs != 0) {
+					xfs_qm_freelist_unlock(xfs_Gqm);
+					flist_locked = B_FALSE;
+				} else {
+					/*
+					 * take it off the freelist
+					 */
+					xfs_dqtrace_entry(dqp,
+							"DQLOOKUP: TAKEOFF FL");
+					XQM_FREELIST_REMOVE(dqp);
+					/* xfs_qm_freelist_print(&(xfs_Gqm->
+							qm_dqfreelist),
+							"after removal"); */
+				}
+			}
+
+			/*
+			 * grab a reference
+			 */
+			XFS_DQHOLD(dqp);
+
+			if (flist_locked)
+				xfs_qm_freelist_unlock(xfs_Gqm);
+			/*
+			 * move the dquot to the front of the hashchain
+			 */
+			ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
+			if (dqp->HL_PREVP != &qh->qh_next) {
+				xfs_dqtrace_entry(dqp,
+						  "DQLOOKUP: HASH MOVETOFRONT");
+				if ((d = dqp->HL_NEXT))
+					d->HL_PREVP = dqp->HL_PREVP;
+				*(dqp->HL_PREVP) = d;
+				d = qh->qh_next;
+				d->HL_PREVP = &dqp->HL_NEXT;
+				dqp->HL_NEXT = d;
+				dqp->HL_PREVP = &qh->qh_next;
+				qh->qh_next = dqp;
+			}
+			xfs_dqtrace_entry(dqp, "LOOKUP END");
+			*O_dqpp = dqp;
+			ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
+			return (0);
+		}
+	}
+
+	*O_dqpp = NULL;
+	ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
+	return (1);
+}
+
+/*
+ * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
+ * a locked dquot, doing an allocation (if requested) as needed.
+ * When both an inode and an id are given, the inode's id takes precedence.
+ * That is, if the id changes while we don't hold the ilock inside this
+ * function, the new dquot is returned, not necessarily the one requested
+ * in the id argument.
+ */
+int
+xfs_qm_dqget(
+	xfs_mount_t	*mp,
+	xfs_inode_t	*ip,	  /* locked inode (optional) */
+	xfs_dqid_t	id,	  /* gid or uid, depending on type */
+	uint		type,	  /* UDQUOT or GDQUOT */
+	uint		flags,	  /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
+	xfs_dquot_t	**O_dqpp) /* OUT : locked incore dquot */
+{
+	xfs_dquot_t	*dqp;
+	xfs_dqhash_t	*h;
+	uint		version;
+	int		error;
+
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+	if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
+	    (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
+		return (ESRCH);
+	}
+	h = XFS_DQ_HASH(mp, id, type);
+
+#ifdef DEBUG
+	if (xfs_do_dqerror) {
+		if ((xfs_dqerror_dev == mp->m_dev) &&
+		    (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
+			cmn_err(CE_DEBUG, "Returning error in dqget");
+			return (EIO);
+		}
+	}
+#endif
+
+ again:
+
+#ifdef DEBUG
+	ASSERT(type == XFS_DQ_USER || type == XFS_DQ_GROUP);
+	if (ip) {
+		ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+		if (type == XFS_DQ_USER)
+			ASSERT(ip->i_udquot == NULL);
+		else
+			ASSERT(ip->i_gdquot == NULL);
+	}
+#endif
+	XFS_DQ_HASH_LOCK(h);
+
+	/*
+	 * Look in the cache (hashtable).
+	 * The chain is kept locked during lookup.
+	 */
+	if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
+		XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
+		/*
+		 * The dquot was found, moved to the front of the chain,
+		 * taken off the freelist if it was on it, and locked
+		 * at this point. Just unlock the hashchain and return.
+		 */
+		ASSERT(*O_dqpp);
+		ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
+		XFS_DQ_HASH_UNLOCK(h);
+		xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)");
+		return (0);	/* success */
+	}
+	XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
+
+	/*
+	 * Dquot cache miss. We don't want to keep the inode lock across
+	 * a (potential) disk read. Also we don't want to deal with the lock
+	 * ordering between quotainode and this inode. OTOH, dropping the inode
+	 * lock here means dealing with a chown that can happen before
+	 * we re-acquire the lock.
+	 */
+	if (ip)
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	/*
+	 * Save the hashchain version stamp, and unlock the chain, so that
+	 * we don't keep the lock across a disk read
+	 */
+	version = h->qh_version;
+	XFS_DQ_HASH_UNLOCK(h);
+
+	/*
+	 * Allocate the dquot on the kernel heap, and read the ondisk
+	 * portion off the disk. Also, do all the necessary initialization
+	 * This can return ENOENT if dquot didn't exist on disk and we didn't
+	 * ask it to allocate; ESRCH if quotas got turned off suddenly.
+	 */
+	if ((error = xfs_qm_idtodq(mp, id, type,
+				  flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
+					   XFS_QMOPT_DOWARN),
+				  &dqp))) {
+		if (ip)
+			xfs_ilock(ip, XFS_ILOCK_EXCL);
+		return (error);
+	}
+
+	/*
+	 * See if this is mount code calling to look at the overall quota limits
+	 * which are stored in the id == 0 user or group's dquot.
+	 * Since we may not have done a quotacheck by this point, just return
+	 * the dquot without attaching it to any hashtables, lists, etc, or even
+	 * taking a reference.
+	 * The caller must dqdestroy this once done.
+	 */
+	if (flags & XFS_QMOPT_DQSUSER) {
+		ASSERT(id == 0);
+		ASSERT(! ip);
+		goto dqret;
+	}
+
+	/*
+	 * Dquot lock comes after hashlock in the lock ordering
+	 */
+	ASSERT(! XFS_DQ_IS_LOCKED(dqp));
+	if (ip) {
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		if (! XFS_IS_DQTYPE_ON(mp, type)) {
+			/* inode stays locked on return */
+			xfs_qm_dqdestroy(dqp);
+			return XFS_ERROR(ESRCH);
+		}
+		/*
+		 * A dquot could be attached to this inode by now, since
+		 * we had dropped the ilock.
+		 */
+		if (type == XFS_DQ_USER) {
+			if (ip->i_udquot) {
+				xfs_qm_dqdestroy(dqp);
+				dqp = ip->i_udquot;
+				xfs_dqlock(dqp);
+				goto dqret;
+			}
+		} else {
+			if (ip->i_gdquot) {
+				xfs_qm_dqdestroy(dqp);
+				dqp = ip->i_gdquot;
+				xfs_dqlock(dqp);
+				goto dqret;
+			}
+		}
+	}
+
+	/*
+	 * Hashlock comes after ilock in lock order
+	 */
+	XFS_DQ_HASH_LOCK(h);
+	if (version != h->qh_version) {
+		xfs_dquot_t *tmpdqp;
+		/*
+		 * Now, see if somebody else put the dquot in the
+		 * hashtable before us. This can happen because we didn't
+		 * keep the hashchain lock. We don't have to worry about
+		 * lock order between the two dquots here since dqp isn't
+		 * on any findable lists yet.
+		 */
+		if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
+			/*
+			 * Duplicate found. Just throw away the new dquot
+			 * and start over.
+			 */
+			xfs_qm_dqput(tmpdqp);
+			XFS_DQ_HASH_UNLOCK(h);
+			xfs_qm_dqdestroy(dqp);
+			XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
+			goto again;
+		}
+	}
+
+	/*
+	 * Put the dquot at the beginning of the hash-chain and mp's list
+	 * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
+	 */
+	ASSERT(XFS_DQ_IS_HASH_LOCKED(h));
+	dqp->q_hash = h;
+	XQM_HASHLIST_INSERT(h, dqp);
+
+	/*
+	 * Attach this dquot to this filesystem's list of all dquots,
+	 * kept inside the mount structure in m_quotainfo field
+	 */
+	xfs_qm_mplist_lock(mp);
+
+	/*
+	 * We return a locked dquot to the caller, with a reference taken
+	 */
+	xfs_dqlock(dqp);
+	dqp->q_nrefs = 1;
+
+	XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp);
+
+	xfs_qm_mplist_unlock(mp);
+	XFS_DQ_HASH_UNLOCK(h);
+ dqret:
+	ASSERT((ip == NULL) || XFS_ISLOCKED_INODE_EXCL(ip));
+	xfs_dqtrace_entry(dqp, "DQGET DONE");
+	*O_dqpp = dqp;
+	return (0);
+}
+
+
+/*
+ * Release a reference to the dquot (decrement ref-count)
+ * and unlock it. If there is a group quota attached to this
+ * dquot, carefully release that too without tripping over
+ * deadlocks'n'stuff.
+ */
+void
+xfs_qm_dqput(
+	xfs_dquot_t	*dqp)
+{
+	xfs_dquot_t	*gdqp;
+
+	ASSERT(dqp->q_nrefs > 0);
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	xfs_dqtrace_entry(dqp, "DQPUT");
+
+	if (dqp->q_nrefs != 1) {
+		dqp->q_nrefs--;
+		xfs_dqunlock(dqp);
+		return;
+	}
+
+	/*
+	 * drop the dqlock and acquire the freelist and dqlock
+	 * in the right order; but try to get it out-of-order first
+	 */
+	if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
+		xfs_dqtrace_entry(dqp, "DQPUT: FLLOCK-WAIT");
+		xfs_dqunlock(dqp);
+		xfs_qm_freelist_lock(xfs_Gqm);
+		xfs_dqlock(dqp);
+	}
+
+	while (1) {
+		gdqp = NULL;
+
+		/* We can't depend on nrefs being == 1 here */
+		if (--dqp->q_nrefs == 0) {
+			xfs_dqtrace_entry(dqp, "DQPUT: ON FREELIST");
+			/*
+			 * insert at end of the freelist.
+			 */
+			XQM_FREELIST_INSERT(&(xfs_Gqm->qm_dqfreelist), dqp);
+
+			/*
+			 * If we just added a udquot to the freelist, then
+			 * we want to release the gdquot reference that
+			 * it (probably) has. Otherwise it'll keep the
+			 * gdquot from getting reclaimed.
+			 */
+			if ((gdqp = dqp->q_gdquot)) {
+				/*
+				 * Avoid a recursive dqput call
+				 */
+				xfs_dqlock(gdqp);
+				dqp->q_gdquot = NULL;
+			}
+
+			/* xfs_qm_freelist_print(&(xfs_Gqm->qm_dqfreelist),
+			   "@@@@@++ Free list (after append) @@@@@+");
+			   */
+		}
+		xfs_dqunlock(dqp);
+
+		/*
+		 * If we had a group quota inside the user quota as a hint,
+		 * release it now.
+		 */
+		if (! gdqp)
+			break;
+		dqp = gdqp;
+	}
+	xfs_qm_freelist_unlock(xfs_Gqm);
+}
+
+/*
+ * Release a dquot. Flush it if dirty, then dqput() it.
+ * dquot must not be locked.
+ */
+void
+xfs_qm_dqrele(
+	xfs_dquot_t	*dqp)
+{
+	ASSERT(dqp);
+	xfs_dqtrace_entry(dqp, "DQRELE");
+
+	xfs_dqlock(dqp);
+	/*
+	 * We don't care to flush it if the dquot is dirty here.
+	 * That will create stutters that we want to avoid.
+	 * Instead we do a delayed write when we try to reclaim
+	 * a dirty dquot. Also xfs_sync will take part of the burden...
+	 */
+	xfs_qm_dqput(dqp);
+}
+
+
+/*
+ * Write a modified dquot to disk.
+ * The dquot must be locked and the flush lock too taken by caller.
+ * The flush lock will not be unlocked until the dquot reaches the disk,
+ * but the dquot is free to be unlocked and modified by the caller
+ * in the interim. Dquot is still locked on return. This behavior is
+ * identical to that of inodes.
+ */
+int
+xfs_qm_dqflush(
+	xfs_dquot_t		*dqp,
+	uint			flags)
+{
+	xfs_mount_t		*mp;
+	xfs_buf_t		*bp;
+	xfs_disk_dquot_t	*ddqp;
+	int			error;
+	SPLDECL(s);
+
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp));
+	xfs_dqtrace_entry(dqp, "DQFLUSH");
+
+	/*
+	 * If not dirty, nada.
+	 */
+	if (!XFS_DQ_IS_DIRTY(dqp)) {
+		xfs_dqfunlock(dqp);
+		return (0);
+	}
+
+	/*
+	 * Cant flush a pinned dquot. Wait for it.
+	 */
+	xfs_qm_dqunpin_wait(dqp);
+
+	/*
+	 * This may have been unpinned because the filesystem is shutting
+	 * down forcibly. If that's the case we must not write this dquot
+	 * to disk, because the log record didn't make it to disk!
+	 */
+	if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) {
+		dqp->dq_flags &= ~(XFS_DQ_DIRTY);
+		xfs_dqfunlock(dqp);
+		return XFS_ERROR(EIO);
+	}
+
+	/*
+	 * Get the buffer containing the on-disk dquot
+	 * We don't need a transaction envelope because we know that the
+	 * the ondisk-dquot has already been allocated for.
+	 */
+	if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) {
+		xfs_dqtrace_entry(dqp, "DQTOBP FAIL");
+		ASSERT(error != ENOENT);
+		/*
+		 * Quotas could have gotten turned off (ESRCH)
+		 */
+		xfs_dqfunlock(dqp);
+		return (error);
+	}
+
+	if (xfs_qm_dqcheck(&dqp->q_core, INT_GET(ddqp->d_id, ARCH_CONVERT), 0, XFS_QMOPT_DOWARN,
+			   "dqflush (incore copy)")) {
+		xfs_force_shutdown(dqp->q_mount, XFS_CORRUPT_INCORE);
+		return XFS_ERROR(EIO);
+	}
+
+	/* This is the only portion of data that needs to persist */
+	memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t));
+
+	/*
+	 * Clear the dirty field and remember the flush lsn for later use.
+	 */
+	dqp->dq_flags &= ~(XFS_DQ_DIRTY);
+	mp = dqp->q_mount;
+
+	/* lsn is 64 bits */
+	AIL_LOCK(mp, s);
+	dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn;
+	AIL_UNLOCK(mp, s);
+
+	/*
+	 * Attach an iodone routine so that we can remove this dquot from the
+	 * AIL and release the flush lock once the dquot is synced to disk.
+	 */
+	xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t *, xfs_log_item_t *))
+			      xfs_qm_dqflush_done, &(dqp->q_logitem.qli_item));
+	/*
+	 * If the buffer is pinned then push on the log so we won't
+	 * get stuck waiting in the write for too long.
+	 */
+	if (XFS_BUF_ISPINNED(bp)) {
+		xfs_dqtrace_entry(dqp, "DQFLUSH LOG FORCE");
+		xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
+	}
+
+	if (flags & XFS_QMOPT_DELWRI) {
+		xfs_bdwrite(mp, bp);
+	} else if (flags & XFS_QMOPT_ASYNC) {
+		xfs_bawrite(mp, bp);
+	} else {
+		error = xfs_bwrite(mp, bp);
+	}
+	xfs_dqtrace_entry(dqp, "DQFLUSH END");
+	/*
+	 * dqp is still locked, but caller is free to unlock it now.
+	 */
+	return (error);
+
+}
+
+/*
+ * This is the dquot flushing I/O completion routine.  It is called
+ * from interrupt level when the buffer containing the dquot is
+ * flushed to disk.  It is responsible for removing the dquot logitem
+ * from the AIL if it has not been re-logged, and unlocking the dquot's
+ * flush lock. This behavior is very similar to that of inodes..
+ */
+/*ARGSUSED*/
+STATIC void
+xfs_qm_dqflush_done(
+	xfs_buf_t		*bp,
+	xfs_dq_logitem_t	*qip)
+{
+	xfs_dquot_t		*dqp;
+	SPLDECL(s);
+
+	dqp = qip->qli_dquot;
+
+	/*
+	 * We only want to pull the item from the AIL if its
+	 * location in the log has not changed since we started the flush.
+	 * Thus, we only bother if the dquot's lsn has
+	 * not changed. First we check the lsn outside the lock
+	 * since it's cheaper, and then we recheck while
+	 * holding the lock before removing the dquot from the AIL.
+	 */
+	if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
+	    qip->qli_item.li_lsn == qip->qli_flush_lsn) {
+
+		AIL_LOCK(dqp->q_mount, s);
+		/*
+		 * xfs_trans_delete_ail() drops the AIL lock.
+		 */
+		if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
+			xfs_trans_delete_ail(dqp->q_mount,
+					     (xfs_log_item_t*)qip, s);
+		else
+			AIL_UNLOCK(dqp->q_mount, s);
+	}
+
+	/*
+	 * Release the dq's flush lock since we're done with it.
+	 */
+	xfs_dqfunlock(dqp);
+}
+
+
+int
+xfs_qm_dqflock_nowait(
+	xfs_dquot_t *dqp)
+{
+	int locked;
+
+	locked = cpsema(&((dqp)->q_flock));
+
+	/* XXX ifdef these out */
+	if (locked)
+		(dqp)->dq_flags |= XFS_DQ_FLOCKED;
+	return (locked);
+}
+
+
+int
+xfs_qm_dqlock_nowait(
+	xfs_dquot_t *dqp)
+{
+	return (mutex_trylock(&((dqp)->q_qlock)));
+}
+
+void
+xfs_dqlock(
+	xfs_dquot_t *dqp)
+{
+	mutex_lock(&(dqp->q_qlock), PINOD);
+}
+
+void
+xfs_dqunlock(
+	xfs_dquot_t *dqp)
+{
+	mutex_unlock(&(dqp->q_qlock));
+	if (dqp->q_logitem.qli_dquot == dqp) {
+		/* Once was dqp->q_mount, but might just have been cleared */
+		xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_mountp,
+					(xfs_log_item_t*)&(dqp->q_logitem));
+	}
+}
+
+
+void
+xfs_dqunlock_nonotify(
+	xfs_dquot_t *dqp)
+{
+	mutex_unlock(&(dqp->q_qlock));
+}
+
+void
+xfs_dqlock2(
+	xfs_dquot_t	*d1,
+	xfs_dquot_t	*d2)
+{
+	if (d1 && d2) {
+		ASSERT(d1 != d2);
+		if (INT_GET(d1->q_core.d_id, ARCH_CONVERT) > INT_GET(d2->q_core.d_id, ARCH_CONVERT)) {
+			xfs_dqlock(d2);
+			xfs_dqlock(d1);
+		} else {
+			xfs_dqlock(d1);
+			xfs_dqlock(d2);
+		}
+	} else {
+		if (d1) {
+			xfs_dqlock(d1);
+		} else if (d2) {
+			xfs_dqlock(d2);
+		}
+	}
+}
+
+
+/*
+ * Take a dquot out of the mount's dqlist as well as the hashlist.
+ * This is called via unmount as well as quotaoff, and the purge
+ * will always succeed unless there are soft (temp) references
+ * outstanding.
+ *
+ * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
+ * that we're returning! XXXsup - not cool.
+ */
+/* ARGSUSED */
+int
+xfs_qm_dqpurge(
+	xfs_dquot_t	*dqp,
+	uint		flags)
+{
+	xfs_dqhash_t	*thishash;
+	xfs_mount_t	*mp;
+
+	mp = dqp->q_mount;
+
+	ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
+	ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash));
+
+	xfs_dqlock(dqp);
+	/*
+	 * We really can't afford to purge a dquot that is
+	 * referenced, because these are hard refs.
+	 * It shouldn't happen in general because we went thru _all_ inodes in
+	 * dqrele_all_inodes before calling this and didn't let the mountlock go.
+	 * However it is possible that we have dquots with temporary
+	 * references that are not attached to an inode. e.g. see xfs_setattr().
+	 */
+	if (dqp->q_nrefs != 0) {
+		xfs_dqunlock(dqp);
+		XFS_DQ_HASH_UNLOCK(dqp->q_hash);
+		return (1);
+	}
+
+	ASSERT(XFS_DQ_IS_ON_FREELIST(dqp));
+
+	/*
+	 * If we're turning off quotas, we have to make sure that, for
+	 * example, we don't delete quota disk blocks while dquots are
+	 * in the process of getting written to those disk blocks.
+	 * This dquot might well be on AIL, and we can't leave it there
+	 * if we're turning off quotas. Basically, we need this flush
+	 * lock, and are willing to block on it.
+	 */
+	if (! xfs_qm_dqflock_nowait(dqp)) {
+		/*
+		 * Block on the flush lock after nudging dquot buffer,
+		 * if it is incore.
+		 */
+		xfs_qm_dqflock_pushbuf_wait(dqp);
+	}
+
+	/*
+	 * XXXIf we're turning this type of quotas off, we don't care
+	 * about the dirty metadata sitting in this dquot. OTOH, if
+	 * we're unmounting, we do care, so we flush it and wait.
+	 */
+	if (XFS_DQ_IS_DIRTY(dqp)) {
+		xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY");
+		/* dqflush unlocks dqflock */
+		/*
+		 * Given that dqpurge is a very rare occurrence, it is OK
+		 * that we're holding the hashlist and mplist locks
+		 * across the disk write. But, ... XXXsup
+		 *
+		 * We don't care about getting disk errors here. We need
+		 * to purge this dquot anyway, so we go ahead regardless.
+		 */
+		(void) xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC);
+		xfs_dqflock(dqp);
+	}
+	ASSERT(dqp->q_pincount == 0);
+	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
+	       !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
+
+	thishash = dqp->q_hash;
+	XQM_HASHLIST_REMOVE(thishash, dqp);
+	XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(mp)), dqp);
+	/*
+	 * XXX Move this to the front of the freelist, if we can get the
+	 * freelist lock.
+	 */
+	ASSERT(XFS_DQ_IS_ON_FREELIST(dqp));
+
+	dqp->q_mount = NULL;;
+	dqp->q_hash = NULL;
+	dqp->dq_flags = XFS_DQ_INACTIVE;
+	memset(&dqp->q_core, 0, sizeof(dqp->q_core));
+	xfs_dqfunlock(dqp);
+	xfs_dqunlock(dqp);
+	XFS_DQ_HASH_UNLOCK(thishash);
+	return (0);
+}
+
+
+#ifdef QUOTADEBUG
+void
+xfs_qm_dqprint(xfs_dquot_t *dqp)
+{
+	cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------");
+	cmn_err(CE_DEBUG, "---- dquotID =  %d",
+		(int)INT_GET(dqp->q_core.d_id, ARCH_CONVERT));
+	cmn_err(CE_DEBUG, "---- type    =  %s",
+		XFS_QM_ISUDQ(dqp) ? "USR" : "GRP");
+	cmn_err(CE_DEBUG, "---- fs      =  0x%p", dqp->q_mount);
+	cmn_err(CE_DEBUG, "---- blkno   =  0x%x", (int) dqp->q_blkno);
+	cmn_err(CE_DEBUG, "---- boffset =  0x%x", (int) dqp->q_bufoffset);
+	cmn_err(CE_DEBUG, "---- blkhlimit =  %Lu (0x%x)",
+		INT_GET(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT),
+		(int) INT_GET(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT));
+	cmn_err(CE_DEBUG, "---- blkslimit =  %Lu (0x%x)",
+		INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT),
+		(int)INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT));
+	cmn_err(CE_DEBUG, "---- inohlimit =  %Lu (0x%x)",
+		INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT),
+		(int)INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT));
+	cmn_err(CE_DEBUG, "---- inoslimit =  %Lu (0x%x)",
+		INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT),
+		(int)INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT));
+	cmn_err(CE_DEBUG, "---- bcount  =  %Lu (0x%x)",
+		INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT),
+		(int)INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT));
+	cmn_err(CE_DEBUG, "---- icount  =  %Lu (0x%x)",
+		INT_GET(dqp->q_core.d_icount, ARCH_CONVERT),
+		(int)INT_GET(dqp->q_core.d_icount, ARCH_CONVERT));
+	cmn_err(CE_DEBUG, "---- btimer  =  %d",
+		(int)INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT));
+	cmn_err(CE_DEBUG, "---- itimer  =  %d",
+		(int)INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT));
+	cmn_err(CE_DEBUG, "---------------------------");
+}
+#endif
+
+/*
+ * Give the buffer a little push if it is incore and
+ * wait on the flush lock.
+ */
+void
+xfs_qm_dqflock_pushbuf_wait(
+	xfs_dquot_t	*dqp)
+{
+	xfs_buf_t	*bp;
+
+	/*
+	 * Check to see if the dquot has been flushed delayed
+	 * write.  If so, grab its buffer and send it
+	 * out immediately.  We'll be able to acquire
+	 * the flush lock when the I/O completes.
+	 */
+	bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno,
+		    XFS_QI_DQCHUNKLEN(dqp->q_mount),
+		    XFS_INCORE_TRYLOCK);
+	if (bp != NULL) {
+		if (XFS_BUF_ISDELAYWRITE(bp)) {
+			if (XFS_BUF_ISPINNED(bp)) {
+				xfs_log_force(dqp->q_mount,
+					      (xfs_lsn_t)0,
+					      XFS_LOG_FORCE);
+			}
+			xfs_bawrite(dqp->q_mount, bp);
+		} else {
+			xfs_buf_relse(bp);
+		}
+	}
+	xfs_dqflock(dqp);
+}
diff -Nru a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/quota/xfs_dquot.h	Mon Mar 31 13:41:08 2003
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DQUOT_H__
+#define __XFS_DQUOT_H__
+
+/*
+ * Dquots are structures that hold quota information about a user or a group,
+ * much like inodes are for files. In fact, dquots share many characteristics
+ * with inodes. However, dquots can also be a centralized resource, relative
+ * to a collection of inodes. In this respect, dquots share some characteristics
+ * of the superblock.
+ * XFS dquots exploit both those in its algorithms. They make every attempt
+ * to not be a bottleneck when quotas are on and have minimal impact, if any,
+ * when quotas are off.
+ */
+
+/*
+ * The hash chain headers (hash buckets)
+ */
+typedef struct xfs_dqhash {
+	struct xfs_dquot *qh_next;
+	mutex_t		  qh_lock;
+	uint		  qh_version;	/* ever increasing version */
+	uint		  qh_nelems;	/* number of dquots on the list */
+} xfs_dqhash_t;
+
+typedef struct xfs_dqlink {
+	struct xfs_dquot  *ql_next;	/* forward link */
+	struct xfs_dquot **ql_prevp;	/* pointer to prev ql_next */
+} xfs_dqlink_t;
+
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * This is the marker which is designed to occupy the first few
+ * bytes of the xfs_dquot_t structure. Even inside this, the freelist pointers
+ * must come first.
+ * This serves as the marker ("sentinel") when we have to restart list
+ * iterations because of locking considerations.
+ */
+typedef struct xfs_dqmarker {
+	struct xfs_dquot*dqm_flnext;	/* link to freelist: must be first */
+	struct xfs_dquot*dqm_flprev;
+	xfs_dqlink_t	 dqm_mplist;	/* link to mount's list of dquots */
+	xfs_dqlink_t	 dqm_hashlist;	/* link to the hash chain */
+	uint		 dqm_flags;	/* various flags (XFS_DQ_*) */
+} xfs_dqmarker_t;
+
+/*
+ * The incore dquot structure
+ */
+typedef struct xfs_dquot {
+	xfs_dqmarker_t	 q_lists;	/* list ptrs, q_flags (marker) */
+	xfs_dqhash_t	*q_hash;	/* the hashchain header */
+	struct xfs_mount*q_mount;	/* filesystem this relates to */
+	struct xfs_trans*q_transp;	/* trans this belongs to currently */
+	uint		 q_nrefs;	/* # active refs from inodes */
+	xfs_daddr_t	 q_blkno;	/* blkno of dquot buffer */
+	int		 q_bufoffset;	/* off of dq in buffer (# dquots) */
+	xfs_fileoff_t	 q_fileoffset;	/* offset in quotas file */
+
+	struct xfs_dquot*q_gdquot;	/* group dquot, hint only */
+	xfs_disk_dquot_t q_core;	/* actual usage & quotas */
+	xfs_dq_logitem_t q_logitem;	/* dquot log item */
+	xfs_qcnt_t	 q_res_bcount;	/* total regular nblks used+reserved */
+	xfs_qcnt_t	 q_res_icount;	/* total inos allocd+reserved */
+	xfs_qcnt_t	 q_res_rtbcount;/* total realtime blks used+reserved */
+	mutex_t		 q_qlock;	/* quota lock */
+	sema_t		 q_flock;	/* flush lock */
+	uint		 q_pincount;	/* pin count for this dquot */
+	sv_t		 q_pinwait;	/* sync var for pinning */
+#ifdef DQUOT_TRACING
+	struct ktrace	*q_trace;	/* trace header structure */
+#endif
+} xfs_dquot_t;
+
+
+#define dq_flnext	q_lists.dqm_flnext
+#define dq_flprev	q_lists.dqm_flprev
+#define dq_mplist	q_lists.dqm_mplist
+#define dq_hashlist	q_lists.dqm_hashlist
+#define dq_flags	q_lists.dqm_flags
+
+#define XFS_DQHOLD(dqp)		((dqp)->q_nrefs++)
+
+/*
+ * Quota Accounting flags
+ */
+#define XFS_ALL_QUOTA_ACCT	(XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT)
+#define XFS_ALL_QUOTA_ENFD	(XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD)
+#define XFS_ALL_QUOTA_CHKD	(XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD)
+#define XFS_ALL_QUOTA_ACTV	(XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE)
+#define XFS_ALL_QUOTA_ACCT_ENFD (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
+				 XFS_GQUOTA_ACCT|XFS_GQUOTA_ENFD)
+
+#define XFS_IS_QUOTA_RUNNING(mp)  ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
+#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT)
+#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT)
+
+/*
+ * Quota Limit Enforcement flags
+ */
+#define XFS_IS_QUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ENFD)
+#define XFS_IS_UQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_UQUOTA_ENFD)
+#define XFS_IS_GQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_GQUOTA_ENFD)
+
+#ifdef DEBUG
+static inline int
+XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
+{
+	if (mutex_trylock(&dqp->q_qlock)) {
+		mutex_unlock(&dqp->q_qlock);
+		return 0;
+	}
+	return 1;
+}
+#endif
+
+
+/*
+ * The following three routines simply manage the q_flock
+ * semaphore embedded in the dquot.  This semaphore synchronizes
+ * processes attempting to flush the in-core dquot back to disk.
+ */
+#define xfs_dqflock(dqp)	 { psema(&((dqp)->q_flock), PINOD | PRECALC);\
+				   (dqp)->dq_flags |= XFS_DQ_FLOCKED; }
+#define xfs_dqfunlock(dqp)	 { ASSERT(valusema(&((dqp)->q_flock)) <= 0); \
+				   vsema(&((dqp)->q_flock)); \
+				   (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); }
+
+#define XFS_DQ_PINLOCK(dqp)	   mutex_spinlock( \
+				     &(XFS_DQ_TO_QINF(dqp)->qi_pinlock))
+#define XFS_DQ_PINUNLOCK(dqp, s)   mutex_spinunlock( \
+				     &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s)
+
+#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (valusema(&((dqp)->q_flock)) <= 0)
+#define XFS_DQ_IS_ON_FREELIST(dqp)  ((dqp)->dq_flnext != (dqp))
+#define XFS_DQ_IS_DIRTY(dqp)	((dqp)->dq_flags & XFS_DQ_DIRTY)
+#define XFS_QM_ISUDQ(dqp)	((dqp)->dq_flags & XFS_DQ_USER)
+#define XFS_DQ_TO_QINF(dqp)	((dqp)->q_mount->m_quotainfo)
+#define XFS_DQ_TO_QIP(dqp)	(XFS_QM_ISUDQ(dqp) ? \
+				 XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
+				 XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
+
+#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
+				     (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
+				     (XFS_IS_GQUOTA_ON((d)->q_mount))))
+#ifdef DQUOT_TRACING
+/*
+ * Dquot Tracing stuff.
+ */
+#define DQUOT_TRACE_SIZE	64
+#define DQUOT_KTRACE_ENTRY	1
+
+#define xfs_dqtrace_entry_ino(a,b,ip) \
+xfs_dqtrace_entry__((a), (b), (void*)__return_address, (ip))
+#define xfs_dqtrace_entry(a,b) \
+xfs_dqtrace_entry__((a), (b), (void*)__return_address, NULL)
+extern void		xfs_dqtrace_entry__(xfs_dquot_t *dqp, char *func,
+					    void *, xfs_inode_t *);
+#else
+#define xfs_dqtrace_entry(a,b)
+#define xfs_dqtrace_entry_ino(a,b,ip)
+#endif
+#ifdef QUOTADEBUG
+extern void		xfs_qm_dqprint(xfs_dquot_t *);
+#else
+#define xfs_qm_dqprint(a)
+#endif
+
+extern void		xfs_qm_dqdestroy(xfs_dquot_t *);
+extern int		xfs_qm_dqflush(xfs_dquot_t *, uint);
+extern int		xfs_qm_dqpurge(xfs_dquot_t *, uint);
+extern void		xfs_qm_dqunpin_wait(xfs_dquot_t *);
+extern int		xfs_qm_dqlock_nowait(xfs_dquot_t *);
+extern int		xfs_qm_dqflock_nowait(xfs_dquot_t *);
+extern void		xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
+extern void		xfs_qm_adjust_dqtimers(xfs_mount_t *,
+					xfs_disk_dquot_t *);
+extern int		xfs_qm_dqwarn(xfs_disk_dquot_t *, uint);
+extern int		xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
+					xfs_dqid_t, uint, uint, xfs_dquot_t **);
+extern void		xfs_qm_dqput(xfs_dquot_t *);
+extern void		xfs_qm_dqrele(xfs_dquot_t *);
+extern void		xfs_dqlock(xfs_dquot_t *);
+extern void		xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
+extern void		xfs_dqunlock(xfs_dquot_t *);
+extern void		xfs_dqunlock_nonotify(xfs_dquot_t *);
+
+#endif /* __XFS_DQUOT_H__ */
diff -Nru a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/quota/xfs_dquot_item.c	Mon Mar 31 13:41:06 2003
@@ -0,0 +1,680 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+#include "xfs_qm.h"
+
+
+/*
+ * returns the number of iovecs needed to log the given dquot item.
+ */
+/* ARGSUSED */
+STATIC uint
+xfs_qm_dquot_logitem_size(
+	xfs_dq_logitem_t	*logitem)
+{
+	/*
+	 * we need only two iovecs, one for the format, one for the real thing
+	 */
+	return (2);
+}
+
+/*
+ * fills in the vector of log iovecs for the given dquot log item.
+ */
+STATIC void
+xfs_qm_dquot_logitem_format(
+	xfs_dq_logitem_t	*logitem,
+	xfs_log_iovec_t		*logvec)
+{
+	ASSERT(logitem);
+	ASSERT(logitem->qli_dquot);
+
+	logvec->i_addr = (xfs_caddr_t)&logitem->qli_format;
+	logvec->i_len  = sizeof(xfs_dq_logformat_t);
+	logvec++;
+	logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core;
+	logvec->i_len  = sizeof(xfs_disk_dquot_t);
+
+	ASSERT(2 == logitem->qli_item.li_desc->lid_size);
+	logitem->qli_format.qlf_size = 2;
+
+}
+
+/*
+ * Increment the pin count of the given dquot.
+ * This value is protected by pinlock spinlock in the xQM structure.
+ */
+STATIC void
+xfs_qm_dquot_logitem_pin(
+	xfs_dq_logitem_t *logitem)
+{
+	unsigned long	s;
+	xfs_dquot_t *dqp;
+
+	dqp = logitem->qli_dquot;
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	s = XFS_DQ_PINLOCK(dqp);
+	dqp->q_pincount++;
+	XFS_DQ_PINUNLOCK(dqp, s);
+}
+
+/*
+ * Decrement the pin count of the given dquot, and wake up
+ * anyone in xfs_dqwait_unpin() if the count goes to 0.	 The
+ * dquot must have been previously pinned with a call to xfs_dqpin().
+ */
+/* ARGSUSED */
+STATIC void
+xfs_qm_dquot_logitem_unpin(
+	xfs_dq_logitem_t *logitem,
+	int		  stale)
+{
+	unsigned long	s;
+	xfs_dquot_t *dqp;
+
+	dqp = logitem->qli_dquot;
+	ASSERT(dqp->q_pincount > 0);
+	s = XFS_DQ_PINLOCK(dqp);
+	dqp->q_pincount--;
+	if (dqp->q_pincount == 0) {
+		sv_broadcast(&dqp->q_pinwait);
+	}
+	XFS_DQ_PINUNLOCK(dqp, s);
+}
+
+/* ARGSUSED */
+STATIC void
+xfs_qm_dquot_logitem_unpin_remove(
+	xfs_dq_logitem_t *logitem,
+	xfs_trans_t	 *tp)
+{
+	xfs_qm_dquot_logitem_unpin(logitem, 0);
+}
+
+/*
+ * Given the logitem, this writes the corresponding dquot entry to disk
+ * asynchronously. This is called with the dquot entry securely locked;
+ * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
+ * at the end.
+ */
+STATIC void
+xfs_qm_dquot_logitem_push(
+	xfs_dq_logitem_t	*logitem)
+{
+	xfs_dquot_t	*dqp;
+
+	dqp = logitem->qli_dquot;
+
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp));
+
+	/*
+	 * Since we were able to lock the dquot's flush lock and
+	 * we found it on the AIL, the dquot must be dirty.  This
+	 * is because the dquot is removed from the AIL while still
+	 * holding the flush lock in xfs_dqflush_done().  Thus, if
+	 * we found it in the AIL and were able to obtain the flush
+	 * lock without sleeping, then there must not have been
+	 * anyone in the process of flushing the dquot.
+	 */
+	xfs_qm_dqflush(dqp, XFS_B_DELWRI);
+	xfs_dqunlock(dqp);
+}
+
+/*ARGSUSED*/
+STATIC xfs_lsn_t
+xfs_qm_dquot_logitem_committed(
+	xfs_dq_logitem_t	*l,
+	xfs_lsn_t		lsn)
+{
+	/*
+	 * We always re-log the entire dquot when it becomes dirty,
+	 * so, the latest copy _is_ the only one that matters.
+	 */
+	return (lsn);
+}
+
+
+/*
+ * This is called to wait for the given dquot to be unpinned.
+ * Most of these pin/unpin routines are plagiarized from inode code.
+ */
+void
+xfs_qm_dqunpin_wait(
+	xfs_dquot_t	*dqp)
+{
+	SPLDECL(s);
+
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	if (dqp->q_pincount == 0) {
+		return;
+	}
+
+	/*
+	 * Give the log a push so we don't wait here too long.
+	 */
+	xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE);
+	s = XFS_DQ_PINLOCK(dqp);
+	if (dqp->q_pincount == 0) {
+		XFS_DQ_PINUNLOCK(dqp, s);
+		return;
+	}
+	sv_wait(&(dqp->q_pinwait), PINOD,
+		&(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s);
+}
+
+/*
+ * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
+ * the dquot is locked by us, but the flush lock isn't. So, here we are
+ * going to see if the relevant dquot buffer is incore, waiting on DELWRI.
+ * If so, we want to push it out to help us take this item off the AIL as soon
+ * as possible.
+ *
+ * We must not be holding the AIL_LOCK at this point. Calling incore() to
+ * search the buffercache can be a time consuming thing, and AIL_LOCK is a
+ * spinlock.
+ */
+STATIC void
+xfs_qm_dquot_logitem_pushbuf(
+	xfs_dq_logitem_t    *qip)
+{
+	xfs_dquot_t	*dqp;
+	xfs_mount_t	*mp;
+	xfs_buf_t	*bp;
+	uint		dopush;
+
+	dqp = qip->qli_dquot;
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+	/*
+	 * The qli_pushbuf_flag keeps others from
+	 * trying to duplicate our effort.
+	 */
+	ASSERT(qip->qli_pushbuf_flag != 0);
+	ASSERT(qip->qli_push_owner == get_thread_id());
+
+	/*
+	 * If flushlock isn't locked anymore, chances are that the
+	 * inode flush completed and the inode was taken off the AIL.
+	 * So, just get out.
+	 */
+	if ((valusema(&(dqp->q_flock)) > 0)  ||
+	    ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
+		qip->qli_pushbuf_flag = 0;
+		xfs_dqunlock(dqp);
+		return;
+	}
+	mp = dqp->q_mount;
+	bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno,
+		    XFS_QI_DQCHUNKLEN(mp),
+		    XFS_INCORE_TRYLOCK);
+	if (bp != NULL) {
+		if (XFS_BUF_ISDELAYWRITE(bp)) {
+			dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
+				  (valusema(&(dqp->q_flock)) <= 0));
+			qip->qli_pushbuf_flag = 0;
+			xfs_dqunlock(dqp);
+
+			if (XFS_BUF_ISPINNED(bp)) {
+				xfs_log_force(mp, (xfs_lsn_t)0,
+					      XFS_LOG_FORCE);
+			}
+			if (dopush) {
+#ifdef XFSRACEDEBUG
+				delay_for_intr();
+				delay(300);
+#endif
+				xfs_bawrite(mp, bp);
+			} else {
+				xfs_buf_relse(bp);
+			}
+		} else {
+			qip->qli_pushbuf_flag = 0;
+			xfs_dqunlock(dqp);
+			xfs_buf_relse(bp);
+		}
+		return;
+	}
+
+	qip->qli_pushbuf_flag = 0;
+	xfs_dqunlock(dqp);
+}
+
+/*
+ * This is called to attempt to lock the dquot associated with this
+ * dquot log item.  Don't sleep on the dquot lock or the flush lock.
+ * If the flush lock is already held, indicating that the dquot has
+ * been or is in the process of being flushed, then see if we can
+ * find the dquot's buffer in the buffer cache without sleeping.  If
+ * we can and it is marked delayed write, then we want to send it out.
+ * We delay doing so until the push routine, though, to avoid sleeping
+ * in any device strategy routines.
+ */
+STATIC uint
+xfs_qm_dquot_logitem_trylock(
+	xfs_dq_logitem_t	*qip)
+{
+	xfs_dquot_t		*dqp;
+	uint			retval;
+
+	dqp = qip->qli_dquot;
+	if (dqp->q_pincount > 0)
+		return (XFS_ITEM_PINNED);
+
+	if (! xfs_qm_dqlock_nowait(dqp))
+		return (XFS_ITEM_LOCKED);
+
+	retval = XFS_ITEM_SUCCESS;
+	if (! xfs_qm_dqflock_nowait(dqp)) {
+		/*
+		 * The dquot is already being flushed.	It may have been
+		 * flushed delayed write, however, and we don't want to
+		 * get stuck waiting for that to complete.  So, we want to check
+		 * to see if we can lock the dquot's buffer without sleeping.
+		 * If we can and it is marked for delayed write, then we
+		 * hold it and send it out from the push routine.  We don't
+		 * want to do that now since we might sleep in the device
+		 * strategy routine.  We also don't want to grab the buffer lock
+		 * here because we'd like not to call into the buffer cache
+		 * while holding the AIL_LOCK.
+		 * Make sure to only return PUSHBUF if we set pushbuf_flag
+		 * ourselves.  If someone else is doing it then we don't
+		 * want to go to the push routine and duplicate their efforts.
+		 */
+		if (qip->qli_pushbuf_flag == 0) {
+			qip->qli_pushbuf_flag = 1;
+			ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno);
+#ifdef DEBUG
+			qip->qli_push_owner = get_thread_id();
+#endif
+			/*
+			 * The dquot is left locked.
+			 */
+			retval = XFS_ITEM_PUSHBUF;
+		} else {
+			retval = XFS_ITEM_FLUSHING;
+			xfs_dqunlock_nonotify(dqp);
+		}
+	}
+
+	ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL);
+	return (retval);
+}
+
+
+/*
+ * Unlock the dquot associated with the log item.
+ * Clear the fields of the dquot and dquot log item that
+ * are specific to the current transaction.  If the
+ * hold flags is set, do not unlock the dquot.
+ */
+STATIC void
+xfs_qm_dquot_logitem_unlock(
+	xfs_dq_logitem_t    *ql)
+{
+	xfs_dquot_t	*dqp;
+
+	ASSERT(ql != NULL);
+	dqp = ql->qli_dquot;
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+	/*
+	 * Clear the transaction pointer in the dquot
+	 */
+	dqp->q_transp = NULL;
+
+	/*
+	 * dquots are never 'held' from getting unlocked at the end of
+	 * a transaction.  Their locking and unlocking is hidden inside the
+	 * transaction layer, within trans_commit. Hence, no LI_HOLD flag
+	 * for the logitem.
+	 */
+	xfs_dqunlock(dqp);
+}
+
+
+/*
+ * The transaction with the dquot locked has aborted.  The dquot
+ * must not be dirty within the transaction.  We simply unlock just
+ * as if the transaction had been cancelled.
+ */
+STATIC void
+xfs_qm_dquot_logitem_abort(
+	xfs_dq_logitem_t    *ql)
+{
+	xfs_qm_dquot_logitem_unlock(ql);
+}
+
+/*
+ * this needs to stamp an lsn into the dquot, I think.
+ * rpc's that look at user dquot's would then have to
+ * push on the dependency recorded in the dquot
+ */
+/* ARGSUSED */
+STATIC void
+xfs_qm_dquot_logitem_committing(
+	xfs_dq_logitem_t	*l,
+	xfs_lsn_t		lsn)
+{
+	return;
+}
+
+
+/*
+ * This is the ops vector for dquots
+ */
+struct xfs_item_ops xfs_dquot_item_ops = {
+	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size,
+	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
+					xfs_qm_dquot_logitem_format,
+	.iop_pin	= (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin,
+	.iop_unpin	= (void(*)(xfs_log_item_t*, int))
+					xfs_qm_dquot_logitem_unpin,
+	.iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
+					xfs_qm_dquot_logitem_unpin_remove,
+	.iop_trylock	= (uint(*)(xfs_log_item_t*))
+					xfs_qm_dquot_logitem_trylock,
+	.iop_unlock	= (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unlock,
+	.iop_committed	= (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
+					xfs_qm_dquot_logitem_committed,
+	.iop_push	= (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push,
+	.iop_abort	= (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_abort,
+	.iop_pushbuf	= (void(*)(xfs_log_item_t*))
+					xfs_qm_dquot_logitem_pushbuf,
+	.iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
+					xfs_qm_dquot_logitem_committing
+};
+
+/*
+ * Initialize the dquot log item for a newly allocated dquot.
+ * The dquot isn't locked at this point, but it isn't on any of the lists
+ * either, so we don't care.
+ */
+void
+xfs_qm_dquot_logitem_init(
+	struct xfs_dquot *dqp)
+{
+	xfs_dq_logitem_t  *lp;
+	lp = &dqp->q_logitem;
+
+	lp->qli_item.li_type = XFS_LI_DQUOT;
+	lp->qli_item.li_ops = &xfs_dquot_item_ops;
+	lp->qli_item.li_mountp = dqp->q_mount;
+	lp->qli_dquot = dqp;
+	lp->qli_format.qlf_type = XFS_LI_DQUOT;
+	lp->qli_format.qlf_id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT);
+	lp->qli_format.qlf_blkno = dqp->q_blkno;
+	lp->qli_format.qlf_len = 1;
+	/*
+	 * This is just the offset of this dquot within its buffer
+	 * (which is currently 1 FSB and probably won't change).
+	 * Hence 32 bits for this offset should be just fine.
+	 * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t))
+	 * here, and recompute it at recovery time.
+	 */
+	lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;
+}
+
+/*------------------  QUOTAOFF LOG ITEMS  -------------------*/
+
+/*
+ * This returns the number of iovecs needed to log the given quotaoff item.
+ * We only need 1 iovec for an quotaoff item.  It just logs the
+ * quotaoff_log_format structure.
+ */
+/*ARGSUSED*/
+STATIC uint
+xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf)
+{
+	return (1);
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the
+ * given quotaoff log item. We use only 1 iovec, and we point that
+ * at the quotaoff_log_format structure embedded in the quotaoff item.
+ * It is at this point that we assert that all of the extent
+ * slots in the quotaoff item have been filled.
+ */
+STATIC void
+xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t	*qf,
+			   xfs_log_iovec_t	*log_vector)
+{
+	ASSERT(qf->qql_format.qf_type == XFS_LI_QUOTAOFF);
+
+	log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format);
+	log_vector->i_len = sizeof(xfs_qoff_logitem_t);
+	qf->qql_format.qf_size = 1;
+}
+
+
+/*
+ * Pinning has no meaning for an quotaoff item, so just return.
+ */
+/*ARGSUSED*/
+STATIC void
+xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf)
+{
+	return;
+}
+
+
+/*
+ * Since pinning has no meaning for an quotaoff item, unpinning does
+ * not either.
+ */
+/*ARGSUSED*/
+STATIC void
+xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf, int stale)
+{
+	return;
+}
+
+/*ARGSUSED*/
+STATIC void
+xfs_qm_qoff_logitem_unpin_remove(xfs_qoff_logitem_t *qf, xfs_trans_t *tp)
+{
+	return;
+}
+
+/*
+ * Quotaoff items have no locking, so just return success.
+ */
+/*ARGSUSED*/
+STATIC uint
+xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf)
+{
+	return XFS_ITEM_LOCKED;
+}
+
+/*
+ * Quotaoff items have no locking or pushing, so return failure
+ * so that the caller doesn't bother with us.
+ */
+/*ARGSUSED*/
+STATIC void
+xfs_qm_qoff_logitem_unlock(xfs_qoff_logitem_t *qf)
+{
+	return;
+}
+
+/*
+ * The quotaoff-start-item is logged only once and cannot be moved in the log,
+ * so simply return the lsn at which it's been logged.
+ */
+/*ARGSUSED*/
+STATIC xfs_lsn_t
+xfs_qm_qoff_logitem_committed(xfs_qoff_logitem_t *qf, xfs_lsn_t lsn)
+{
+	return (lsn);
+}
+
+/*
+ * The transaction of which this QUOTAOFF is a part has been aborted.
+ * Just clean up after ourselves.
+ * Shouldn't this never happen in the case of qoffend logitems? XXX
+ */
+STATIC void
+xfs_qm_qoff_logitem_abort(xfs_qoff_logitem_t *qf)
+{
+	kmem_free(qf, sizeof(xfs_qoff_logitem_t));
+}
+
+/*
+ * There isn't much you can do to push on an quotaoff item.  It is simply
+ * stuck waiting for the log to be flushed to disk.
+ */
+/*ARGSUSED*/
+STATIC void
+xfs_qm_qoff_logitem_push(xfs_qoff_logitem_t *qf)
+{
+	return;
+}
+
+
+/*ARGSUSED*/
+STATIC xfs_lsn_t
+xfs_qm_qoffend_logitem_committed(
+	xfs_qoff_logitem_t *qfe,
+	xfs_lsn_t lsn)
+{
+	xfs_qoff_logitem_t	*qfs;
+	SPLDECL(s);
+
+	qfs = qfe->qql_start_lip;
+	AIL_LOCK(qfs->qql_item.li_mountp,s);
+	/*
+	 * Delete the qoff-start logitem from the AIL.
+	 * xfs_trans_delete_ail() drops the AIL lock.
+	 */
+	xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs, s);
+	kmem_free(qfs, sizeof(xfs_qoff_logitem_t));
+	kmem_free(qfe, sizeof(xfs_qoff_logitem_t));
+	return (xfs_lsn_t)-1;
+}
+
+/*
+ * XXX rcc - don't know quite what to do with this.  I think we can
+ * just ignore it.  The only time that isn't the case is if we allow
+ * the client to somehow see that quotas have been turned off in which
+ * we can't allow that to get back until the quotaoff hits the disk.
+ * So how would that happen?  Also, do we need different routines for
+ * quotaoff start and quotaoff end?  I suspect the answer is yes but
+ * to be sure, I need to look at the recovery code and see how quota off
+ * recovery is handled (do we roll forward or back or do something else).
+ * If we roll forwards or backwards, then we need two separate routines,
+ * one that does nothing and one that stamps in the lsn that matters
+ * (truly makes the quotaoff irrevocable).  If we do something else,
+ * then maybe we don't need two.
+ */
+/* ARGSUSED */
+STATIC void
+xfs_qm_qoff_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
+{
+	return;
+}
+
+/* ARGSUSED */
+STATIC void
+xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
+{
+	return;
+}
+
+struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
+	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
+	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
+					xfs_qm_qoff_logitem_format,
+	.iop_pin	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
+	.iop_unpin	= (void(*)(xfs_log_item_t* ,int))
+					xfs_qm_qoff_logitem_unpin,
+	.iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
+					xfs_qm_qoff_logitem_unpin_remove,
+	.iop_trylock	= (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
+	.iop_unlock	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock,
+	.iop_committed	= (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
+					xfs_qm_qoffend_logitem_committed,
+	.iop_push	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
+	.iop_abort	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_abort,
+	.iop_pushbuf	= NULL,
+	.iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
+					xfs_qm_qoffend_logitem_committing
+};
+
+/*
+ * This is the ops vector shared by all quotaoff-start log items.
+ */
+struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
+	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
+	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
+					xfs_qm_qoff_logitem_format,
+	.iop_pin	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
+	.iop_unpin	= (void(*)(xfs_log_item_t*, int))
+					xfs_qm_qoff_logitem_unpin,
+	.iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
+					xfs_qm_qoff_logitem_unpin_remove,
+	.iop_trylock	= (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
+	.iop_unlock	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock,
+	.iop_committed	= (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
+					xfs_qm_qoff_logitem_committed,
+	.iop_push	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
+	.iop_abort	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_abort,
+	.iop_pushbuf	= NULL,
+	.iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
+					xfs_qm_qoff_logitem_committing
+};
+
+/*
+ * Allocate and initialize an quotaoff item of the correct quota type(s).
+ */
+xfs_qoff_logitem_t *
+xfs_qm_qoff_logitem_init(
+	struct xfs_mount *mp,
+	xfs_qoff_logitem_t *start,
+	uint flags)
+{
+	xfs_qoff_logitem_t	*qf;
+
+	qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP);
+
+	qf->qql_item.li_type = XFS_LI_QUOTAOFF;
+	if (start)
+		qf->qql_item.li_ops = &xfs_qm_qoffend_logitem_ops;
+	else
+		qf->qql_item.li_ops = &xfs_qm_qoff_logitem_ops;
+	qf->qql_item.li_mountp = mp;
+	qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
+	qf->qql_format.qf_flags = flags;
+	qf->qql_start_lip = start;
+	return (qf);
+}
diff -Nru a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/quota/xfs_dquot_item.h	Mon Mar 31 13:41:07 2003
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DQUOT_ITEM_H__
+#define __XFS_DQUOT_ITEM_H__
+
+struct xfs_dquot;
+struct xfs_trans;
+struct xfs_mount;
+struct xfs_qoff_logitem;
+
+typedef struct xfs_dq_logitem {
+	xfs_log_item_t		 qli_item;	   /* common portion */
+	struct xfs_dquot	*qli_dquot;	   /* dquot ptr */
+	xfs_lsn_t		 qli_flush_lsn;	   /* lsn at last flush */
+	unsigned short		 qli_pushbuf_flag; /* 1 bit used in push_ail */
+#ifdef DEBUG
+	uint64_t		 qli_push_owner;
+#endif
+	xfs_dq_logformat_t	 qli_format;	   /* logged structure */
+} xfs_dq_logitem_t;
+
+typedef struct xfs_qoff_logitem {
+	xfs_log_item_t		 qql_item;	/* common portion */
+	struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
+	xfs_qoff_logformat_t	 qql_format;	/* logged structure */
+} xfs_qoff_logitem_t;
+
+
+extern void		   xfs_qm_dquot_logitem_init(struct xfs_dquot *);
+extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
+					struct xfs_qoff_logitem *, uint);
+extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
+					struct xfs_qoff_logitem *, uint);
+extern void		   xfs_trans_log_quotaoff_item(struct xfs_trans *,
+					struct xfs_qoff_logitem *);
+
+#endif	/* __XFS_DQUOT_ITEM_H__ */
diff -Nru a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/quota/xfs_qm.c	Mon Mar 31 13:41:06 2003
@@ -0,0 +1,2794 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+#include "xfs_qm.h"
+
+/*
+ * The global quota manager. There is only one of these for the entire
+ * system, _not_ one per file system. XQM keeps track of the overall
+ * quota functionality, including maintaining the freelist and hash
+ * tables of dquots.
+ */
+mutex_t xfs_Gqm_lock;
+struct xfs_qm	*xfs_Gqm;
+EXPORT_SYMBOL(xfs_Gqm);	/* used by xfsidbg */
+
+kmem_zone_t	*qm_dqzone;
+kmem_zone_t	*qm_dqtrxzone;
+
+STATIC void	xfs_qm_list_init(xfs_dqlist_t *, char *, int);
+STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
+STATIC int	xfs_qm_quotacheck(xfs_mount_t *);
+
+STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
+STATIC void	xfs_qm_shake(void);
+
+#ifdef DEBUG
+extern mutex_t	qcheck_lock;
+#endif
+
+#ifdef QUOTADEBUG
+#define XQM_LIST_PRINT(l, NXT, title) \
+{ \
+	xfs_dquot_t	*dqp; int i = 0; \
+	cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \
+	for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \
+		cmn_err(CE_DEBUG, "   %d.  \"%d (%s)\"   " \
+				  "bcnt = %d, icnt = %d, refs = %d", \
+			++i, (int) INT_GET(dqp->q_core.d_id, ARCH_CONVERT), \
+			DQFLAGTO_TYPESTR(dqp),	     \
+			(int) INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT), \
+			(int) INT_GET(dqp->q_core.d_icount, ARCH_CONVERT), \
+			(int) dqp->q_nrefs);  } \
+}
+#else
+#define XQM_LIST_PRINT(l, NXT, title) do { } while (0)
+#endif
+
+/*
+ * Initialize the XQM structure.
+ * Note that there is not one quota manager per file system.
+ */
+STATIC struct xfs_qm *
+xfs_Gqm_init(void)
+{
+	xfs_qm_t		*xqm;
+	int			hsize, i;
+
+	xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
+	ASSERT(xqm);
+
+	/*
+	 * Initialize the dquot hash tables.
+	 */
+	hsize = (DQUOT_HASH_HEURISTIC < XFS_QM_NCSIZE_THRESHOLD) ?
+		XFS_QM_HASHSIZE_LOW : XFS_QM_HASHSIZE_HIGH;
+	xqm->qm_dqhashmask = hsize - 1;
+
+	xqm->qm_usr_dqhtable = (xfs_dqhash_t *)kmem_zalloc(hsize *
+						      sizeof(xfs_dqhash_t),
+						      KM_SLEEP);
+	xqm->qm_grp_dqhtable = (xfs_dqhash_t *)kmem_zalloc(hsize *
+						      sizeof(xfs_dqhash_t),
+						      KM_SLEEP);
+	ASSERT(xqm->qm_usr_dqhtable != NULL);
+	ASSERT(xqm->qm_grp_dqhtable != NULL);
+
+	for (i = 0; i < hsize; i++) {
+		xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
+		xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
+	}
+
+	/*
+	 * Freelist of all dquots of all file systems
+	 */
+	xfs_qm_freelist_init(&(xqm->qm_dqfreelist));
+
+	/*
+	 * dquot zone. we register our own low-memory callback.
+	 */
+	if (!qm_dqzone) {
+		xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
+						"xfs_dquots");
+		qm_dqzone = xqm->qm_dqzone;
+	} else
+		xqm->qm_dqzone = qm_dqzone;
+
+	kmem_shake_register(xfs_qm_shake);
+
+	/*
+	 * The t_dqinfo portion of transactions.
+	 */
+	if (!qm_dqtrxzone) {
+		xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
+						   "xfs_dqtrx");
+		qm_dqtrxzone = xqm->qm_dqtrxzone;
+	} else
+		xqm->qm_dqtrxzone = qm_dqtrxzone;
+
+	atomic_set(&xqm->qm_totaldquots, 0);
+	xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
+	xqm->qm_nrefs = 0;
+#ifdef DEBUG
+	mutex_init(&qcheck_lock, MUTEX_DEFAULT, "qchk");
+#endif
+	return xqm;
+}
+
+/*
+ * Destroy the global quota manager when its reference count goes to zero.
+ */
+void
+xfs_qm_destroy(
+	struct xfs_qm	*xqm)
+{
+	int		hsize, i;
+
+	ASSERT(xqm != NULL);
+	ASSERT(xqm->qm_nrefs == 0);
+	kmem_shake_deregister(xfs_qm_shake);
+	hsize = xqm->qm_dqhashmask + 1;
+	for (i = 0; i < hsize; i++) {
+		xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
+		xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
+	}
+	kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t));
+	kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t));
+	xqm->qm_usr_dqhtable = NULL;
+	xqm->qm_grp_dqhtable = NULL;
+	xqm->qm_dqhashmask = 0;
+	xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist));
+#ifdef DEBUG
+	mutex_destroy(&qcheck_lock);
+#endif
+	kmem_free(xqm, sizeof(xfs_qm_t));
+}
+
+/*
+ * Called at mount time to let XQM know that another file system is
+ * starting quotas. This isn't crucial information as the individual mount
+ * structures are pretty independent, but it helps the XQM keep a
+ * global view of what's going on.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_hold_quotafs_ref(
+	struct xfs_mount *mp)
+{
+	/*
+	 * Need to lock the xfs_Gqm structure for things like this. For example,
+	 * the structure could disappear between the entry to this routine and
+	 * a HOLD operation if not locked.
+	 */
+	XFS_QM_LOCK(xfs_Gqm);
+
+	if (xfs_Gqm == NULL) {
+		if ((xfs_Gqm = xfs_Gqm_init()) == NULL) {
+			return (XFS_ERROR(EINVAL));
+		}
+	}
+	/*
+	 * We can keep a list of all filesystems with quotas mounted for
+	 * debugging and statistical purposes, but ...
+	 * Just take a reference and get out.
+	 */
+	XFS_QM_HOLD(xfs_Gqm);
+	XFS_QM_UNLOCK(xfs_Gqm);
+
+	return 0;
+}
+
+
+/*
+ * Release the reference that a filesystem took at mount time,
+ * so that we know when we need to destroy the entire quota manager.
+ */
+/* ARGSUSED */
+STATIC void
+xfs_qm_rele_quotafs_ref(
+	struct xfs_mount *mp)
+{
+	xfs_dquot_t	*dqp, *nextdqp;
+
+	ASSERT(xfs_Gqm);
+	ASSERT(xfs_Gqm->qm_nrefs > 0);
+
+	/*
+	 * Go thru the freelist and destroy all inactive dquots.
+	 */
+	xfs_qm_freelist_lock(xfs_Gqm);
+
+	for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
+	     dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
+		xfs_dqlock(dqp);
+		nextdqp = dqp->dq_flnext;
+		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
+			ASSERT(dqp->q_mount == NULL);
+			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
+			ASSERT(dqp->HL_PREVP == NULL);
+			ASSERT(dqp->MPL_PREVP == NULL);
+			XQM_FREELIST_REMOVE(dqp);
+			xfs_dqunlock(dqp);
+			xfs_qm_dqdestroy(dqp);
+		} else {
+			xfs_dqunlock(dqp);
+		}
+		dqp = nextdqp;
+	}
+	xfs_qm_freelist_unlock(xfs_Gqm);
+
+	/*
+	 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
+	 * be restarted.
+	 */
+	XFS_QM_LOCK(xfs_Gqm);
+	XFS_QM_RELE(xfs_Gqm);
+	if (xfs_Gqm->qm_nrefs == 0) {
+		xfs_qm_destroy(xfs_Gqm);
+		xfs_Gqm = NULL;
+	}
+	XFS_QM_UNLOCK(xfs_Gqm);
+}
+
+/*
+ * This is called at mount time from xfs_mountfs to initialize the quotainfo
+ * structure and start the global quotamanager (xfs_Gqm) if it hasn't done
+ * so already.	Note that the superblock has not been read in yet.
+ */
+void
+xfs_qm_mount_quotainit(
+	xfs_mount_t	*mp,
+	uint		flags)
+{
+	/*
+	 * User or group quotas has to be on.
+	 */
+	ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA));
+
+	/*
+	 * Initialize the flags in the mount structure. From this point
+	 * onwards we look at m_qflags to figure out if quotas's ON/OFF, etc.
+	 * Note that we enforce nothing if accounting is off.
+	 * ie.	XFSMNT_*QUOTA must be ON for XFSMNT_*QUOTAENF.
+	 * It isn't necessary to take the quotaoff lock to do this; this is
+	 * called from mount.
+	 */
+	if (flags & XFSMNT_UQUOTA) {
+		mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+		if (flags & XFSMNT_UQUOTAENF)
+			mp->m_qflags |= XFS_UQUOTA_ENFD;
+	}
+	if (flags & XFSMNT_GQUOTA) {
+		mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+		if (flags & XFSMNT_GQUOTAENF)
+			mp->m_qflags |= XFS_GQUOTA_ENFD;
+	}
+}
+
+/*
+ * Just destroy the quotainfo structure.
+ */
+void
+xfs_qm_unmount_quotadestroy(
+	xfs_mount_t	*mp)
+{
+	if (mp->m_quotainfo)
+		xfs_qm_destroy_quotainfo(mp);
+}
+
+
+/*
+ * This is called from xfs_mountfs to start quotas and initialize all
+ * necessary data structures like quotainfo.  This is also responsible for
+ * running a quotacheck as necessary.  We are guaranteed that the superblock
+ * is consistently read in at this point.
+ */
+int
+xfs_qm_mount_quotas(
+	xfs_mount_t	*mp)
+{
+	unsigned long	s;
+	int		error = 0;
+	uint		sbf;
+
+	/*
+	 * If a file system had quotas running earlier, but decided to
+	 * mount without -o quota/uquota/gquota options, revoke the
+	 * quotachecked license, and bail out.
+	 */
+	if (! XFS_IS_QUOTA_ON(mp) &&
+	    (mp->m_sb.sb_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT))) {
+		mp->m_qflags = 0;
+		goto write_changes;
+	}
+
+	/*
+	 * If quotas on realtime volumes is not supported, we disable
+	 * quotas immediately.
+	 */
+	if (mp->m_sb.sb_rextents) {
+		cmn_err(CE_NOTE,
+			"Cannot turn on quotas for realtime filesystem %s",
+			mp->m_fsname);
+		mp->m_qflags = 0;
+		goto write_changes;
+	}
+
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+	cmn_err(CE_NOTE, "Attempting to turn on disk quotas.");
+#endif
+
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+	/*
+	 * Allocate the quotainfo structure inside the mount struct, and
+	 * create quotainode(s), and change/rev superblock if necessary.
+	 */
+	if ((error = xfs_qm_init_quotainfo(mp))) {
+		/*
+		 * We must turn off quotas.
+		 */
+		ASSERT(mp->m_quotainfo == NULL);
+		mp->m_qflags = 0;
+		goto write_changes;
+	}
+	/*
+	 * If any of the quotas are not consistent, do a quotacheck.
+	 */
+	if (XFS_QM_NEED_QUOTACHECK(mp)) {
+#ifdef DEBUG
+		cmn_err(CE_NOTE, "Doing a quotacheck. Please wait.");
+#endif
+		if ((error = xfs_qm_quotacheck(mp))) {
+			cmn_err(CE_WARN, "Quotacheck unsuccessful (Error %d): "
+				"Disabling quotas.",
+				error);
+			/*
+			 * We must turn off quotas.
+			 */
+			ASSERT(mp->m_quotainfo != NULL);
+			ASSERT(xfs_Gqm != NULL);
+			xfs_qm_destroy_quotainfo(mp);
+			mp->m_qflags = 0;
+			goto write_changes;
+		}
+#ifdef DEBUG
+		cmn_err(CE_NOTE, "Done quotacheck.");
+#endif
+	}
+ write_changes:
+	/*
+	 * We actually don't have to acquire the SB_LOCK at all.
+	 * This can only be called from mount, and that's single threaded. XXX
+	 */
+	s = XFS_SB_LOCK(mp);
+	sbf = mp->m_sb.sb_qflags;
+	mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
+	XFS_SB_UNLOCK(mp, s);
+
+	if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
+		if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
+			/*
+			 * We could only have been turning quotas off.
+			 * We aren't in very good shape actually because
+			 * the incore structures are convinced that quotas are
+			 * off, but the on disk superblock doesn't know that !
+			 */
+			ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
+			xfs_fs_cmn_err(CE_ALERT, mp,
+				"XFS mount_quotas: Superblock update failed!");
+		}
+	}
+
+	if (error) {
+		xfs_fs_cmn_err(CE_WARN, mp,
+			"Failed to initialize disk quotas.");
+	}
+	return XFS_ERROR(error);
+}
+
+/*
+ * Called from the vfsops layer.
+ */
+int
+xfs_qm_unmount_quotas(
+	xfs_mount_t	*mp)
+{
+	xfs_inode_t	*uqp, *gqp;
+	int		error = 0;
+
+	/*
+	 * Release the dquots that root inode, et al might be holding,
+	 * before we flush quotas and blow away the quotainfo structure.
+	 */
+	ASSERT(mp->m_rootip);
+	xfs_qm_dqdetach(mp->m_rootip);
+	if (mp->m_rbmip)
+		xfs_qm_dqdetach(mp->m_rbmip);
+	if (mp->m_rsumip)
+		xfs_qm_dqdetach(mp->m_rsumip);
+
+	/*
+	 * Flush out the quota inodes.
+	 */
+	uqp = gqp = NULL;
+	if (mp->m_quotainfo) {
+		if ((uqp = mp->m_quotainfo->qi_uquotaip) != NULL) {
+			xfs_ilock(uqp, XFS_ILOCK_EXCL);
+			xfs_iflock(uqp);
+			error = xfs_iflush(uqp, XFS_IFLUSH_SYNC);
+			xfs_iunlock(uqp, XFS_ILOCK_EXCL);
+			if (unlikely(error == EFSCORRUPTED)) {
+				XFS_ERROR_REPORT("xfs_qm_unmount_quotas(1)",
+						 XFS_ERRLEVEL_LOW, mp);
+				goto out;
+			}
+		}
+		if ((gqp = mp->m_quotainfo->qi_gquotaip) != NULL) {
+			xfs_ilock(gqp, XFS_ILOCK_EXCL);
+			xfs_iflock(gqp);
+			error = xfs_iflush(gqp, XFS_IFLUSH_SYNC);
+			xfs_iunlock(gqp, XFS_ILOCK_EXCL);
+			if (unlikely(error == EFSCORRUPTED)) {
+				XFS_ERROR_REPORT("xfs_qm_unmount_quotas(2)",
+						 XFS_ERRLEVEL_LOW, mp);
+				goto out;
+			}
+		}
+	}
+	if (uqp) {
+		 XFS_PURGE_INODE(uqp);
+		 mp->m_quotainfo->qi_uquotaip = NULL;
+	}
+	if (gqp) {
+		XFS_PURGE_INODE(gqp);
+		mp->m_quotainfo->qi_gquotaip = NULL;
+	}
+out:
+	return XFS_ERROR(error);
+}
+
+/*
+ * Flush all dquots of the given file system to disk. The dquots are
+ * _not_ purged from memory here, just their data written to disk.
+ */
+int
+xfs_qm_dqflush_all(
+	xfs_mount_t	*mp,
+	int		flags)
+{
+	int		recl;
+	xfs_dquot_t	*dqp;
+	int		niters;
+	int		error;
+
+	if (mp->m_quotainfo == NULL)
+		return (0);
+	niters = 0;
+again:
+	xfs_qm_mplist_lock(mp);
+	FOREACH_DQUOT_IN_MP(dqp, mp) {
+		xfs_dqlock(dqp);
+		if (! XFS_DQ_IS_DIRTY(dqp)) {
+			xfs_dqunlock(dqp);
+			continue;
+		}
+		xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
+		/* XXX a sentinel would be better */
+		recl = XFS_QI_MPLRECLAIMS(mp);
+		if (! xfs_qm_dqflock_nowait(dqp)) {
+			/*
+			 * If we can't grab the flush lock then check
+			 * to see if the dquot has been flushed delayed
+			 * write.  If so, grab its buffer and send it
+			 * out immediately.  We'll be able to acquire
+			 * the flush lock when the I/O completes.
+			 */
+			xfs_qm_dqflock_pushbuf_wait(dqp);
+		}
+		/*
+		 * Let go of the mplist lock. We don't want to hold it
+		 * across a disk write.
+		 */
+		xfs_qm_mplist_unlock(mp);
+		error = xfs_qm_dqflush(dqp, flags);
+		xfs_dqunlock(dqp);
+		if (error)
+			return (error);
+
+		xfs_qm_mplist_lock(mp);
+		if (recl != XFS_QI_MPLRECLAIMS(mp)) {
+			xfs_qm_mplist_unlock(mp);
+			/* XXX restart limit */
+			goto again;
+		}
+	}
+
+	xfs_qm_mplist_unlock(mp);
+	/* return ! busy */
+	return (0);
+}
+/*
+ * Release the group dquot pointers the user dquots may be
+ * carrying around as a hint. mplist is locked on entry and exit.
+ */
+STATIC void
+xfs_qm_detach_gdquots(
+	xfs_mount_t	*mp)
+{
+	xfs_dquot_t	*dqp, *gdqp;
+	int		nrecl;
+
+ again:
+	ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
+	dqp = XFS_QI_MPLNEXT(mp);
+	while (dqp) {
+		xfs_dqlock(dqp);
+		if ((gdqp = dqp->q_gdquot)) {
+			xfs_dqlock(gdqp);
+			dqp->q_gdquot = NULL;
+		}
+		xfs_dqunlock(dqp);
+
+		if (gdqp) {
+			/*
+			 * Can't hold the mplist lock across a dqput.
+			 * XXXmust convert to marker based iterations here.
+			 */
+			nrecl = XFS_QI_MPLRECLAIMS(mp);
+			xfs_qm_mplist_unlock(mp);
+			xfs_qm_dqput(gdqp);
+
+			xfs_qm_mplist_lock(mp);
+			if (nrecl != XFS_QI_MPLRECLAIMS(mp))
+				goto again;
+		}
+		dqp = dqp->MPL_NEXT;
+	}
+}
+
+/*
+ * Go through all the incore dquots of this file system and take them
+ * off the mplist and hashlist, if the dquot type matches the dqtype
+ * parameter. This is used when turning off quota accounting for
+ * users and/or groups, as well as when the filesystem is unmounting.
+ */
+STATIC int
+xfs_qm_dqpurge_int(
+	xfs_mount_t	*mp,
+	uint		flags) /* QUOTAOFF/UMOUNTING/UQUOTA/GQUOTA */
+{
+	xfs_dquot_t	*dqp;
+	uint		dqtype;
+	int		nrecl;
+	xfs_dquot_t	*nextdqp;
+	int		nmisses;
+
+	if (mp->m_quotainfo == NULL)
+		return (0);
+
+	dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
+	dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
+
+	xfs_qm_mplist_lock(mp);
+
+	/*
+	 * In the first pass through all incore dquots of this filesystem,
+	 * we release the group dquot pointers the user dquots may be
+	 * carrying around as a hint. We need to do this irrespective of
+	 * what's being turned off.
+	 */
+	xfs_qm_detach_gdquots(mp);
+
+      again:
+	nmisses = 0;
+	ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
+	/*
+	 * Try to get rid of all of the unwanted dquots. The idea is to
+	 * get them off mplist and hashlist, but leave them on freelist.
+	 */
+	dqp = XFS_QI_MPLNEXT(mp);
+	while (dqp) {
+		/*
+		 * It's OK to look at the type without taking dqlock here.
+		 * We're holding the mplist lock here, and that's needed for
+		 * a dqreclaim.
+		 */
+		if ((dqp->dq_flags & dqtype) == 0) {
+			dqp = dqp->MPL_NEXT;
+			continue;
+		}
+
+		if (! xfs_qm_dqhashlock_nowait(dqp)) {
+			nrecl = XFS_QI_MPLRECLAIMS(mp);
+			xfs_qm_mplist_unlock(mp);
+			XFS_DQ_HASH_LOCK(dqp->q_hash);
+			xfs_qm_mplist_lock(mp);
+
+			/*
+			 * XXXTheoretically, we can get into a very long
+			 * ping pong game here.
+			 * No one can be adding dquots to the mplist at
+			 * this point, but somebody might be taking things off.
+			 */
+			if (nrecl != XFS_QI_MPLRECLAIMS(mp)) {
+				XFS_DQ_HASH_UNLOCK(dqp->q_hash);
+				goto again;
+			}
+		}
+
+		/*
+		 * Take the dquot off the mplist and hashlist. It may remain on
+		 * freelist in INACTIVE state.
+		 */
+		nextdqp = dqp->MPL_NEXT;
+		nmisses += xfs_qm_dqpurge(dqp, flags);
+		dqp = nextdqp;
+	}
+	xfs_qm_mplist_unlock(mp);
+	return nmisses;
+}
+
+int
+xfs_qm_dqpurge_all(
+	xfs_mount_t	*mp,
+	uint		flags)
+{
+	int		ndquots;
+
+	/*
+	 * Purge the dquot cache.
+	 * None of the dquots should really be busy at this point.
+	 */
+	if (mp->m_quotainfo) {
+		while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
+			delay(ndquots * 10);
+		}
+	}
+	return 0;
+}
+
+STATIC int
+xfs_qm_dqattach_one(
+	xfs_inode_t	*ip,
+	xfs_dqid_t	id,
+	uint		type,
+	uint		doalloc,
+	uint		dolock,
+	xfs_dquot_t	*udqhint, /* hint */
+	xfs_dquot_t	**IO_idqpp)
+{
+	xfs_dquot_t	*dqp;
+	int		error;
+
+	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+	error = 0;
+	/*
+	 * See if we already have it in the inode itself. IO_idqpp is
+	 * &i_udquot or &i_gdquot. This made the code look weird, but
+	 * made the logic a lot simpler.
+	 */
+	if ((dqp = *IO_idqpp)) {
+		if (dolock)
+			xfs_dqlock(dqp);
+		xfs_dqtrace_entry(dqp, "DQATTACH: found in ip");
+		goto done;
+	}
+
+	/*
+	 * udqhint is the i_udquot field in inode, and is non-NULL only
+	 * when the type arg is XFS_DQ_GROUP. Its purpose is to save a
+	 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
+	 * the user dquot.
+	 */
+	ASSERT(!udqhint || type == XFS_DQ_GROUP);
+	if (udqhint && !dolock)
+		xfs_dqlock(udqhint);
+
+	/*
+	 * No need to take dqlock to look at the id.
+	 * The ID can't change until it gets reclaimed, and it won't
+	 * be reclaimed as long as we have a ref from inode and we hold
+	 * the ilock.
+	 */
+	if (udqhint &&
+	    (dqp = udqhint->q_gdquot) &&
+	    (INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id)) {
+		ASSERT(XFS_DQ_IS_LOCKED(udqhint));
+		xfs_dqlock(dqp);
+		XFS_DQHOLD(dqp);
+		ASSERT(*IO_idqpp == NULL);
+		*IO_idqpp = dqp;
+		if (!dolock) {
+			xfs_dqunlock(dqp);
+			xfs_dqunlock(udqhint);
+		}
+		goto done;
+	}
+	/*
+	 * We can't hold a dquot lock when we call the dqget code.
+	 * We'll deadlock in no time, because of (not conforming to)
+	 * lock ordering - the inodelock comes before any dquot lock,
+	 * and we may drop and reacquire the ilock in xfs_qm_dqget().
+	 */
+	if (udqhint)
+		xfs_dqunlock(udqhint);
+	/*
+	 * Find the dquot from somewhere. This bumps the
+	 * reference count of dquot and returns it locked.
+	 * This can return ENOENT if dquot didn't exist on
+	 * disk and we didn't ask it to allocate;
+	 * ESRCH if quotas got turned off suddenly.
+	 */
+	if ((error = xfs_qm_dqget(ip->i_mount, ip, id, type,
+				 doalloc|XFS_QMOPT_DOWARN, &dqp))) {
+		if (udqhint && dolock)
+			xfs_dqlock(udqhint);
+		goto done;
+	}
+
+	xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget");
+	/*
+	 * dqget may have dropped and re-acquired the ilock, but it guarantees
+	 * that the dquot returned is the one that should go in the inode.
+	 */
+	*IO_idqpp = dqp;
+	ASSERT(dqp);
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	if (! dolock) {
+		xfs_dqunlock(dqp);
+		ASSERT(!udqhint || !XFS_DQ_IS_LOCKED(udqhint));
+		goto done;
+	}
+	if (! udqhint)
+		goto done;
+
+	ASSERT(udqhint);
+	ASSERT(dolock);
+	ASSERT(! XFS_DQ_IS_LOCKED(udqhint));
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	if (! xfs_qm_dqlock_nowait(udqhint)) {
+		xfs_dqunlock(dqp);
+		xfs_dqlock(udqhint);
+		xfs_dqlock(dqp);
+	}
+      done:
+#ifdef QUOTADEBUG
+	if (udqhint) {
+		if (dolock)
+			ASSERT(XFS_DQ_IS_LOCKED(udqhint));
+		else
+			ASSERT(! XFS_DQ_IS_LOCKED(udqhint));
+	}
+	if (! error) {
+		if (dolock)
+			ASSERT(XFS_DQ_IS_LOCKED(dqp));
+		else
+			ASSERT(! XFS_DQ_IS_LOCKED(dqp));
+	}
+#endif
+	return (error);
+}
+
+
+/*
+ * Given a udquot and gdquot, attach a ptr to the group dquot in the
+ * udquot as a hint for future lookups. The idea sounds simple, but the
+ * execution isn't, because the udquot might have a group dquot attached
+ * already and getting rid of that gets us into lock ordering contraints.
+ * The process is complicated more by the fact that the dquots may or may not
+ * be locked on entry.
+ */
+STATIC void
+xfs_qm_dqattach_grouphint(
+	xfs_dquot_t	*udq,
+	xfs_dquot_t	*gdq,
+	uint		locked)
+{
+	xfs_dquot_t	*tmp;
+
+#ifdef QUOTADEBUG
+	if (locked) {
+		ASSERT(XFS_DQ_IS_LOCKED(udq));
+		ASSERT(XFS_DQ_IS_LOCKED(gdq));
+	} else {
+		ASSERT(! XFS_DQ_IS_LOCKED(udq));
+		ASSERT(! XFS_DQ_IS_LOCKED(gdq));
+	}
+#endif
+	if (! locked)
+		xfs_dqlock(udq);
+
+	if ((tmp = udq->q_gdquot)) {
+		if (tmp == gdq) {
+			if (! locked)
+				xfs_dqunlock(udq);
+			return;
+		}
+
+		udq->q_gdquot = NULL;
+		/*
+		 * We can't keep any dqlocks when calling dqrele,
+		 * because the freelist lock comes before dqlocks.
+		 */
+		xfs_dqunlock(udq);
+		if (locked)
+			xfs_dqunlock(gdq);
+		/*
+		 * we took a hard reference once upon a time in dqget,
+		 * so give it back when the udquot no longer points at it
+		 * dqput() does the unlocking of the dquot.
+		 */
+		xfs_qm_dqrele(tmp);
+
+		ASSERT(! XFS_DQ_IS_LOCKED(udq));
+		ASSERT(! XFS_DQ_IS_LOCKED(gdq));
+		xfs_dqlock(udq);
+		xfs_dqlock(gdq);
+
+	} else {
+		ASSERT(XFS_DQ_IS_LOCKED(udq));
+		if (! locked) {
+			ASSERT(! XFS_DQ_IS_LOCKED(gdq));
+			xfs_dqlock(gdq);
+		}
+	}
+
+	ASSERT(XFS_DQ_IS_LOCKED(udq));
+	ASSERT(XFS_DQ_IS_LOCKED(gdq));
+	/*
+	 * Somebody could have attached a gdquot here,
+	 * when we dropped the uqlock. If so, just do nothing.
+	 */
+	if (udq->q_gdquot == NULL) {
+		XFS_DQHOLD(gdq);
+		udq->q_gdquot = gdq;
+	}
+	if (! locked) {
+		xfs_dqunlock(gdq);
+		xfs_dqunlock(udq);
+	}
+}
+
+
+/*
+ * Given a locked inode, attach dquot(s) to it, taking UQUOTAON / GQUOTAON
+ * in to account.
+ * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
+ * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty
+ * much made this code a complete mess, but it has been pretty useful.
+ * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
+ * Inode may get unlocked and relocked in here, and the caller must deal with
+ * the consequences.
+ */
+int
+xfs_qm_dqattach(
+	xfs_inode_t	*ip,
+	uint		flags)
+{
+	xfs_mount_t	*mp = ip->i_mount;
+	uint		nquotas = 0;
+	int		error = 0;
+
+	if ((! XFS_IS_QUOTA_ON(mp)) ||
+	    (! XFS_NOT_DQATTACHED(mp, ip)) ||
+	    (ip->i_ino == mp->m_sb.sb_uquotino) ||
+	    (ip->i_ino == mp->m_sb.sb_gquotino))
+		return (0);
+
+	ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
+	       XFS_ISLOCKED_INODE_EXCL(ip));
+
+	if (! (flags & XFS_QMOPT_ILOCKED))
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+	if (XFS_IS_UQUOTA_ON(mp)) {
+		error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
+						flags & XFS_QMOPT_DQALLOC,
+						flags & XFS_QMOPT_DQLOCK,
+						NULL, &ip->i_udquot);
+		if (error)
+			goto done;
+		nquotas++;
+	}
+	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+	if (XFS_IS_GQUOTA_ON(mp)) {
+		error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
+						flags & XFS_QMOPT_DQALLOC,
+						flags & XFS_QMOPT_DQLOCK,
+						ip->i_udquot, &ip->i_gdquot);
+		/*
+		 * Don't worry about the udquot that we may have
+		 * attached above. It'll get detached, if not already.
+		 */
+		if (error)
+			goto done;
+		nquotas++;
+	}
+
+	/*
+	 * Attach this group quota to the user quota as a hint.
+	 * This WON'T, in general, result in a thrash.
+	 */
+	if (nquotas == 2) {
+		ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+		ASSERT(ip->i_udquot);
+		ASSERT(ip->i_gdquot);
+
+		/*
+		 * We may or may not have the i_udquot locked at this point,
+		 * but this check is OK since we don't depend on the i_gdquot to
+		 * be accurate 100% all the time. It is just a hint, and this
+		 * will succeed in general.
+		 */
+		if (ip->i_udquot->q_gdquot == ip->i_gdquot)
+			goto done;
+		/*
+		 * Attach i_gdquot to the gdquot hint inside the i_udquot.
+		 */
+		xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot,
+					 flags & XFS_QMOPT_DQLOCK);
+	}
+
+      done:
+
+#ifdef QUOTADEBUG
+	if (! error) {
+		if (ip->i_udquot) {
+			if (flags & XFS_QMOPT_DQLOCK)
+				ASSERT(XFS_DQ_IS_LOCKED(ip->i_udquot));
+			else
+				ASSERT(! XFS_DQ_IS_LOCKED(ip->i_udquot));
+		}
+		if (ip->i_gdquot) {
+			if (flags & XFS_QMOPT_DQLOCK)
+				ASSERT(XFS_DQ_IS_LOCKED(ip->i_gdquot));
+			else
+				ASSERT(! XFS_DQ_IS_LOCKED(ip->i_gdquot));
+		}
+		if (XFS_IS_UQUOTA_ON(mp))
+			ASSERT(ip->i_udquot);
+		if (XFS_IS_GQUOTA_ON(mp))
+			ASSERT(ip->i_gdquot);
+	}
+#endif
+
+	if (! (flags & XFS_QMOPT_ILOCKED))
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+#ifdef QUOTADEBUG
+	else
+		ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+#endif
+	return (error);
+}
+
+/*
+ * Release dquots (and their references) if any.
+ * The inode should be locked EXCL except when this's called by
+ * xfs_ireclaim.
+ */
+void
+xfs_qm_dqdetach(
+	xfs_inode_t	*ip)
+{
+	if (!(ip->i_udquot || ip->i_gdquot))
+		return;
+
+	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
+	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
+	if (ip->i_udquot)
+		xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
+	if (ip->i_udquot) {
+		xfs_qm_dqrele(ip->i_udquot);
+		ip->i_udquot = NULL;
+	}
+	if (ip->i_gdquot) {
+		xfs_qm_dqrele(ip->i_gdquot);
+		ip->i_gdquot = NULL;
+	}
+}
+
+/*
+ * This is called by VFS_SYNC and flags arg determines the caller,
+ * and its motives, as done in xfs_sync.
+ *
+ * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31
+ * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25
+ * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA
+ */
+
+int
+xfs_qm_sync(
+	xfs_mount_t	*mp,
+	short		flags)
+{
+	int		recl, restarts;
+	xfs_dquot_t	*dqp;
+	uint		flush_flags;
+	boolean_t	nowait;
+	int		error;
+
+	restarts = 0;
+	/*
+	 * We won't block unless we are asked to.
+	 */
+	nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
+
+  again:
+	xfs_qm_mplist_lock(mp);
+	/*
+	 * dqpurge_all() also takes the mplist lock and iterate thru all dquots
+	 * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
+	 * when we have the mplist lock, we know that dquots will be consistent
+	 * as long as we have it locked.
+	 */
+	if (! XFS_IS_QUOTA_ON(mp)) {
+		xfs_qm_mplist_unlock(mp);
+		return (0);
+	}
+	FOREACH_DQUOT_IN_MP(dqp, mp) {
+		/*
+		 * If this is vfs_sync calling, then skip the dquots that
+		 * don't 'seem' to be dirty. ie. don't acquire dqlock.
+		 * This is very similar to what xfs_sync does with inodes.
+		 */
+		if (flags & SYNC_BDFLUSH) {
+			if (! XFS_DQ_IS_DIRTY(dqp))
+				continue;
+		}
+
+		if (nowait) {
+			/*
+			 * Try to acquire the dquot lock. We are NOT out of
+			 * lock order, but we just don't want to wait for this
+			 * lock, unless somebody wanted us to.
+			 */
+			if (! xfs_qm_dqlock_nowait(dqp))
+				continue;
+		} else {
+			xfs_dqlock(dqp);
+		}
+
+		/*
+		 * Now, find out for sure if this dquot is dirty or not.
+		 */
+		if (! XFS_DQ_IS_DIRTY(dqp)) {
+			xfs_dqunlock(dqp);
+			continue;
+		}
+
+		/* XXX a sentinel would be better */
+		recl = XFS_QI_MPLRECLAIMS(mp);
+		if (! xfs_qm_dqflock_nowait(dqp)) {
+			if (nowait) {
+				xfs_dqunlock(dqp);
+				continue;
+			}
+			/*
+			 * If we can't grab the flush lock then if the caller
+			 * really wanted us to give this our best shot,
+			 * see if we can give a push to the buffer before we wait
+			 * on the flush lock. At this point, we know that
+			 * eventhough the dquot is being flushed,
+			 * it has (new) dirty data.
+			 */
+			xfs_qm_dqflock_pushbuf_wait(dqp);
+		}
+		/*
+		 * Let go of the mplist lock. We don't want to hold it
+		 * across a disk write
+		 */
+		flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
+		xfs_qm_mplist_unlock(mp);
+		xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
+		error = xfs_qm_dqflush(dqp, flush_flags);
+		xfs_dqunlock(dqp);
+		if (error && XFS_FORCED_SHUTDOWN(mp))
+			return(0);	/* Need to prevent umount failure */
+		else if (error)
+			return (error);
+
+		xfs_qm_mplist_lock(mp);
+		if (recl != XFS_QI_MPLRECLAIMS(mp)) {
+			if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
+				break;
+
+			xfs_qm_mplist_unlock(mp);
+			goto again;
+		}
+	}
+
+	xfs_qm_mplist_unlock(mp);
+	return (0);
+}
+
+
+/*
+ * This initializes all the quota information that's kept in the
+ * mount structure
+ */
+int
+xfs_qm_init_quotainfo(
+	xfs_mount_t	*mp)
+{
+	xfs_quotainfo_t *qinf;
+	int		error;
+	xfs_dquot_t	*dqp;
+
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+	/*
+	 * Tell XQM that we exist as soon as possible.
+	 */
+	if ((error = xfs_qm_hold_quotafs_ref(mp))) {
+		return (error);
+	}
+
+	qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
+
+	/*
+	 * See if quotainodes are setup, and if not, allocate them,
+	 * and change the superblock accordingly.
+	 */
+	if ((error = xfs_qm_init_quotainos(mp))) {
+		kmem_free(qinf, sizeof(xfs_quotainfo_t));
+		mp->m_quotainfo = NULL;
+		return (error);
+	}
+
+	spinlock_init(&qinf->qi_pinlock, "xfs_qinf_pin");
+	xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
+	qinf->qi_dqreclaims = 0;
+
+	/* mutex used to serialize quotaoffs */
+	mutex_init(&qinf->qi_quotaofflock, MUTEX_DEFAULT, "qoff");
+
+	/* Precalc some constants */
+	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
+	ASSERT(qinf->qi_dqchunklen);
+	qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
+	do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
+
+	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
+
+	/*
+	 * We try to get the limits from the superuser's limits fields.
+	 * This is quite hacky, but it is standard quota practice.
+	 * We look at the USR dquot with id == 0 first, but if user quotas
+	 * are not enabled we goto the GRP dquot with id == 0.
+	 * We don't really care to keep separate default limits for user
+	 * and group quotas, at least not at this point.
+	 */
+	error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
+			     (XFS_IS_UQUOTA_RUNNING(mp)) ?
+			     XFS_DQ_USER : XFS_DQ_GROUP,
+			     XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
+			     &dqp);
+	if (! error) {
+		/*
+		 * The warnings and timers set the grace period given to
+		 * a user or group before he or she can not perform any
+		 * more writing. If it is zero, a default is used.
+		 */
+		qinf->qi_btimelimit = INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT) ?
+			INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT) : XFS_QM_BTIMELIMIT;
+		qinf->qi_itimelimit = INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT) ?
+			INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT) : XFS_QM_ITIMELIMIT;
+		qinf->qi_rtbtimelimit = INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT) ?
+			INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT) : XFS_QM_RTBTIMELIMIT;
+		qinf->qi_bwarnlimit = INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT) ?
+			INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT) : XFS_QM_BWARNLIMIT;
+		qinf->qi_iwarnlimit = INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT) ?
+			INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT) : XFS_QM_IWARNLIMIT;
+
+		/*
+		 * We sent the XFS_QMOPT_DQSUSER flag to dqget because
+		 * we don't want this dquot cached. We haven't done a
+		 * quotacheck yet, and quotacheck doesn't like incore dquots.
+		 */
+		xfs_qm_dqdestroy(dqp);
+	} else {
+		qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
+		qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
+		qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
+		qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
+		qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
+	}
+
+	return (0);
+}
+
+
+/*
+ * Gets called when unmounting a filesystem or when all quotas get
+ * turned off.
+ * This purges the quota inodes, destroys locks and frees itself.
+ */
+void
+xfs_qm_destroy_quotainfo(
+	xfs_mount_t	*mp)
+{
+	xfs_quotainfo_t *qi;
+
+	qi = mp->m_quotainfo;
+	ASSERT(qi != NULL);
+	ASSERT(xfs_Gqm != NULL);
+
+	/*
+	 * Release the reference that XQM kept, so that we know
+	 * when the XQM structure should be freed. We cannot assume
+	 * that xfs_Gqm is non-null after this point.
+	 */
+	xfs_qm_rele_quotafs_ref(mp);
+
+	spinlock_destroy(&qi->qi_pinlock);
+	xfs_qm_list_destroy(&qi->qi_dqlist);
+
+	if (qi->qi_uquotaip) {
+		XFS_PURGE_INODE(qi->qi_uquotaip);
+		qi->qi_uquotaip = NULL; /* paranoia */
+	}
+	if (qi->qi_gquotaip) {
+		XFS_PURGE_INODE(qi->qi_gquotaip);
+		qi->qi_gquotaip = NULL;
+	}
+	mutex_destroy(&qi->qi_quotaofflock);
+	kmem_free(qi, sizeof(xfs_quotainfo_t));
+	mp->m_quotainfo = NULL;
+}
+
+
+
+/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
+
+/* ARGSUSED */
+STATIC void
+xfs_qm_list_init(
+	xfs_dqlist_t	*list,
+	char		*str,
+	int		n)
+{
+	mutex_init(&list->qh_lock, MUTEX_DEFAULT, str);
+	list->qh_next = NULL;
+	list->qh_version = 0;
+	list->qh_nelems = 0;
+}
+
+STATIC void
+xfs_qm_list_destroy(
+	xfs_dqlist_t	*list)
+{
+	mutex_destroy(&(list->qh_lock));
+}
+
+
+/*
+ * Stripped down version of dqattach. This doesn't attach, or even look at the
+ * dquots attached to the inode. The rationale is that there won't be any
+ * attached at the time this is called from quotacheck.
+ */
+STATIC int
+xfs_qm_dqget_noattach(
+	xfs_inode_t	*ip,
+	xfs_dquot_t	**O_udqpp,
+	xfs_dquot_t	**O_gdqpp)
+{
+	int		error;
+	xfs_mount_t	*mp;
+	xfs_dquot_t	*udqp, *gdqp;
+
+	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+	mp = ip->i_mount;
+	udqp = NULL;
+	gdqp = NULL;
+
+	if (XFS_IS_UQUOTA_ON(mp)) {
+		ASSERT(ip->i_udquot == NULL);
+		/*
+		 * We want the dquot allocated if it doesn't exist.
+		 */
+		if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
+					 XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
+					 &udqp))) {
+			/*
+			 * Shouldn't be able to turn off quotas here.
+			 */
+			ASSERT(error != ESRCH);
+			ASSERT(error != ENOENT);
+			return (error);
+		}
+		ASSERT(udqp);
+	}
+
+	if (XFS_IS_GQUOTA_ON(mp)) {
+		ASSERT(ip->i_gdquot == NULL);
+		if (udqp)
+			xfs_dqunlock(udqp);
+		if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_gid, XFS_DQ_GROUP,
+					 XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
+					 &gdqp))) {
+			if (udqp)
+				xfs_qm_dqrele(udqp);
+			ASSERT(error != ESRCH);
+			ASSERT(error != ENOENT);
+			return (error);
+		}
+		ASSERT(gdqp);
+
+		/* Reacquire the locks in the right order */
+		if (udqp) {
+			if (! xfs_qm_dqlock_nowait(udqp)) {
+				xfs_dqunlock(gdqp);
+				xfs_dqlock(udqp);
+				xfs_dqlock(gdqp);
+			}
+		}
+	}
+
+	*O_udqpp = udqp;
+	*O_gdqpp = gdqp;
+
+#ifdef QUOTADEBUG
+	if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
+	if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
+#endif
+	return (0);
+}
+
+/*
+ * Create an inode and return with a reference already taken, but unlocked
+ * This is how we create quota inodes
+ */
+STATIC int
+xfs_qm_qino_alloc(
+	xfs_mount_t	*mp,
+	xfs_inode_t	**ip,
+	__int64_t	sbfields,
+	uint		flags)
+{
+	xfs_trans_t	*tp;
+	int		error;
+	unsigned long s;
+	cred_t		zerocr;
+	int		committed;
+
+	tp = xfs_trans_alloc(mp,XFS_TRANS_QM_QINOCREATE);
+	if ((error = xfs_trans_reserve(tp,
+				      XFS_QM_QINOCREATE_SPACE_RES(mp),
+				      XFS_CREATE_LOG_RES(mp), 0,
+				      XFS_TRANS_PERM_LOG_RES,
+				      XFS_CREATE_LOG_COUNT))) {
+		xfs_trans_cancel(tp, 0);
+		return (error);
+	}
+	memset(&zerocr, 0, sizeof(zerocr));
+
+	if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, IFREG, 1, 0,
+				   &zerocr, 0, 1, ip, &committed))) {
+		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
+				 XFS_TRANS_ABORT);
+		return (error);
+	}
+
+	/*
+	 * Keep an extra reference to this quota inode. This inode is
+	 * locked exclusively and joined to the transaction already.
+	 */
+	ASSERT(XFS_ISLOCKED_INODE_EXCL(*ip));
+	VN_HOLD(XFS_ITOV((*ip)));
+
+	/*
+	 * Make the changes in the superblock, and log those too.
+	 * sbfields arg may contain fields other than *QUOTINO;
+	 * VERSIONNUM for example.
+	 */
+	s = XFS_SB_LOCK(mp);
+	if (flags & XFS_QMOPT_SBVERSION) {
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+		unsigned oldv = mp->m_sb.sb_versionnum;
+#endif
+		ASSERT(!XFS_SB_VERSION_HASQUOTA(&mp->m_sb));
+		ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+				   XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
+		       (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+			XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
+
+		XFS_SB_VERSION_ADDQUOTA(&mp->m_sb);
+		mp->m_sb.sb_uquotino = NULLFSINO;
+		mp->m_sb.sb_gquotino = NULLFSINO;
+
+		/* qflags will get updated _after_ quotacheck */
+		mp->m_sb.sb_qflags = 0;
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+		cmn_err(CE_NOTE,
+			"Old superblock version %x, converting to %x.",
+			oldv, mp->m_sb.sb_versionnum);
+#endif
+	}
+	if (flags & XFS_QMOPT_UQUOTA)
+		mp->m_sb.sb_uquotino = (*ip)->i_ino;
+	else
+		mp->m_sb.sb_gquotino = (*ip)->i_ino;
+	XFS_SB_UNLOCK(mp, s);
+	xfs_mod_sb(tp, sbfields);
+
+	if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES,
+				     NULL))) {
+		xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
+		return (error);
+	}
+	return (0);
+}
+
+
+STATIC int
+xfs_qm_reset_dqcounts(
+	xfs_mount_t	*mp,
+	xfs_buf_t	*bp,
+	xfs_dqid_t	id,
+	uint		type)
+{
+	xfs_disk_dquot_t	*ddq;
+	int			j;
+
+	xfs_buftrace("RESET DQUOTS", bp);
+	/*
+	 * Reset all counters and timers. They'll be
+	 * started afresh by xfs_qm_quotacheck.
+	 */
+#ifdef DEBUG
+	j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
+	do_div(j, sizeof(xfs_dqblk_t));
+	ASSERT(XFS_QM_DQPERBLK(mp) == j);
+#endif
+	ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
+	for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) {
+		/*
+		 * Do a sanity check, and if needed, repair the dqblk. Don't
+		 * output any warnings because it's perfectly possible to
+		 * find unitialized dquot blks. See comment in xfs_qm_dqcheck.
+		 */
+		(void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
+				      "xfs_quotacheck");
+		INT_SET(ddq->d_bcount, ARCH_CONVERT, 0ULL);
+		INT_SET(ddq->d_icount, ARCH_CONVERT, 0ULL);
+		INT_SET(ddq->d_rtbcount, ARCH_CONVERT, 0ULL);
+		INT_SET(ddq->d_btimer, ARCH_CONVERT, (time_t)0);
+		INT_SET(ddq->d_itimer, ARCH_CONVERT, (time_t)0);
+		INT_SET(ddq->d_bwarns, ARCH_CONVERT, 0UL);
+		INT_SET(ddq->d_iwarns, ARCH_CONVERT, 0UL);
+		ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
+	}
+
+	return (0);
+}
+
+STATIC int
+xfs_qm_dqiter_bufs(
+	xfs_mount_t	*mp,
+	xfs_dqid_t	firstid,
+	xfs_fsblock_t	bno,
+	xfs_filblks_t	blkcnt,
+	uint		flags)
+{
+	xfs_buf_t	*bp;
+	int		error;
+	int		notcommitted;
+	int		incr;
+
+	ASSERT(blkcnt > 0);
+	notcommitted = 0;
+	incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
+		XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
+	error = 0;
+
+	/*
+	 * Blkcnt arg can be a very big number, and might even be
+	 * larger than the log itself. So, we have to break it up into
+	 * manageable-sized transactions.
+	 * Note that we don't start a permanent transaction here; we might
+	 * not be able to get a log reservation for the whole thing up front,
+	 * and we don't really care to either, because we just discard
+	 * everything if we were to crash in the middle of this loop.
+	 */
+	while (blkcnt--) {
+		error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+			      XFS_FSB_TO_DADDR(mp, bno),
+			      (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp);
+		if (error)
+			break;
+
+		(void) xfs_qm_reset_dqcounts(mp, bp, firstid,
+					     flags & XFS_QMOPT_UQUOTA ?
+					     XFS_DQ_USER : XFS_DQ_GROUP);
+		xfs_bdwrite(mp, bp);
+		/*
+		 * goto the next block.
+		 */
+		bno++;
+		firstid += XFS_QM_DQPERBLK(mp);
+	}
+	return (error);
+}
+
+/*
+ * Iterate over all allocated USR/GRP dquots in the system, calling a
+ * caller supplied function for every chunk of dquots that we find.
+ */
+STATIC int
+xfs_qm_dqiterate(
+	xfs_mount_t	*mp,
+	xfs_inode_t	*qip,
+	uint		flags)
+{
+	xfs_bmbt_irec_t		*map;
+	int			i, nmaps;	/* number of map entries */
+	int			error;		/* return value */
+	xfs_fileoff_t		lblkno;
+	xfs_filblks_t		maxlblkcnt;
+	xfs_dqid_t		firstid;
+	xfs_fsblock_t		rablkno;
+	xfs_filblks_t		rablkcnt;
+
+	error = 0;
+	/*
+	 * This looks racey, but we can't keep an inode lock across a
+	 * trans_reserve. But, this gets called during quotacheck, and that
+	 * happens only at mount time which is single threaded.
+	 */
+	if (qip->i_d.di_nblocks == 0)
+		return (0);
+
+	map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
+
+	lblkno = 0;
+	maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAX_FILE_OFFSET);
+	do {
+		nmaps = XFS_DQITER_MAP_SIZE;
+		/*
+		 * We aren't changing the inode itself. Just changing
+		 * some of its data. No new blocks are added here, and
+		 * the inode is never added to the transaction.
+		 */
+		xfs_ilock(qip, XFS_ILOCK_SHARED);
+		error = xfs_bmapi(NULL, qip, lblkno,
+				  maxlblkcnt - lblkno,
+				  XFS_BMAPI_METADATA,
+				  NULL,
+				  0, map, &nmaps, NULL);
+		xfs_iunlock(qip, XFS_ILOCK_SHARED);
+		if (error)
+			break;
+
+		ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
+		for (i = 0; i < nmaps; i++) {
+			ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
+			ASSERT(map[i].br_blockcount);
+
+
+			lblkno += map[i].br_blockcount;
+
+			if (map[i].br_startblock == HOLESTARTBLOCK)
+				continue;
+
+			firstid = (xfs_dqid_t) map[i].br_startoff *
+				XFS_QM_DQPERBLK(mp);
+			/*
+			 * Do a read-ahead on the next extent.
+			 */
+			if ((i+1 < nmaps) &&
+			    (map[i+1].br_startblock != HOLESTARTBLOCK)) {
+				rablkcnt =  map[i+1].br_blockcount;
+				rablkno = map[i+1].br_startblock;
+				while (rablkcnt--) {
+					xfs_baread(mp->m_ddev_targp,
+					       XFS_FSB_TO_DADDR(mp, rablkno),
+					       (int)XFS_QI_DQCHUNKLEN(mp));
+					rablkno++;
+				}
+			}
+			/*
+			 * Iterate thru all the blks in the extent and
+			 * reset the counters of all the dquots inside them.
+			 */
+			if ((error = xfs_qm_dqiter_bufs(mp,
+						       firstid,
+						       map[i].br_startblock,
+						       map[i].br_blockcount,
+						       flags))) {
+				break;
+			}
+		}
+
+		if (error)
+			break;
+	} while (nmaps > 0);
+
+	kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map));
+
+	return (error);
+}
+
+/*
+ * Called by dqusage_adjust in doing a quotacheck.
+ * Given the inode, and a dquot (either USR or GRP, doesn't matter),
+ * this updates its incore copy as well as the buffer copy. This is
+ * so that once the quotacheck is done, we can just log all the buffers,
+ * as opposed to logging numerous updates to individual dquots.
+ */
+STATIC void
+xfs_qm_quotacheck_dqadjust(
+	xfs_dquot_t		*dqp,
+	xfs_qcnt_t		nblks,
+	xfs_qcnt_t		rtblks)
+{
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	xfs_dqtrace_entry(dqp, "QCHECK DQADJUST");
+	/*
+	 * Adjust the inode count and the block count to reflect this inode's
+	 * resource usage.
+	 */
+	INT_MOD(dqp->q_core.d_icount, ARCH_CONVERT, +1);
+	dqp->q_res_icount++;
+	if (nblks) {
+		INT_MOD(dqp->q_core.d_bcount, ARCH_CONVERT, nblks);
+		dqp->q_res_bcount += nblks;
+	}
+	if (rtblks) {
+		INT_MOD(dqp->q_core.d_rtbcount, ARCH_CONVERT, rtblks);
+		dqp->q_res_rtbcount += rtblks;
+	}
+
+	/*
+	 * Adjust the timers since we just changed usages
+	 */
+	if (! XFS_IS_SUSER_DQUOT(dqp))
+		xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
+
+	dqp->dq_flags |= XFS_DQ_DIRTY;
+}
+
+STATIC int
+xfs_qm_get_rtblks(
+	xfs_inode_t	*ip,
+	xfs_qcnt_t	*O_rtblks)
+{
+	xfs_filblks_t	rtblks;			/* total rt blks */
+	xfs_ifork_t	*ifp;			/* inode fork pointer */
+	xfs_extnum_t	nextents;		/* number of extent entries */
+	xfs_bmbt_rec_t	*base;			/* base of extent array */
+	xfs_bmbt_rec_t	*ep;			/* pointer to an extent entry */
+	int		error;
+
+	ASSERT(XFS_IS_REALTIME_INODE(ip));
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+		if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
+			return (error);
+	}
+	rtblks = 0;
+	nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+	base = &ifp->if_u1.if_extents[0];
+	for (ep = base; ep < &base[nextents]; ep++)
+		rtblks += xfs_bmbt_get_blockcount(ep);
+	*O_rtblks = (xfs_qcnt_t)rtblks;
+	return (0);
+}
+
+/*
+ * callback routine supplied to bulkstat(). Given an inumber, find its
+ * dquots and update them to account for resources taken by that inode.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_dqusage_adjust(
+	xfs_mount_t	*mp,		/* mount point for filesystem */
+	xfs_trans_t	*tp,		/* transaction pointer - NULL */
+	xfs_ino_t	ino,		/* inode number to get data for */
+	void		*buffer,	/* not used */
+	xfs_daddr_t	bno,		/* starting block of inode cluster */
+	void		*dip,		/* on-disk inode pointer (not used) */
+	int		*res)		/* result code value */
+{
+	xfs_inode_t	*ip;
+	xfs_dquot_t	*udqp, *gdqp;
+	xfs_qcnt_t	nblks, rtblks;
+	int		error;
+
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+	/*
+	 * rootino must have its resources accounted for, not so with the quota
+	 * inodes.
+	 */
+	if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
+		*res = BULKSTAT_RV_NOTHING;
+		return XFS_ERROR(EINVAL);
+	}
+
+	/*
+	 * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
+	 * interface expects the inode to be exclusively locked because that's
+	 * the case in all other instances. It's OK that we do this because
+	 * quotacheck is done only at mount time.
+	 */
+	if ((error = xfs_iget(mp, tp, ino, XFS_ILOCK_EXCL, &ip, bno))) {
+		*res = BULKSTAT_RV_NOTHING;
+		return (error);
+	}
+
+	if (ip->i_d.di_mode == 0) {
+		xfs_iput_new(ip, XFS_ILOCK_EXCL);
+		*res = BULKSTAT_RV_NOTHING;
+		return XFS_ERROR(ENOENT);
+	}
+
+	/*
+	 * Obtain the locked dquots. In case of an error (eg. allocation
+	 * fails for ENOSPC), we return the negative of the error number
+	 * to bulkstat, so that it can get propagated to quotacheck() and
+	 * making us disable quotas for the file system.
+	 */
+	if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
+		xfs_iput(ip, XFS_ILOCK_EXCL);
+		*res = BULKSTAT_RV_GIVEUP;
+		return (error);
+	}
+
+	rtblks = 0;
+	if (! XFS_IS_REALTIME_INODE(ip)) {
+		nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
+	} else {
+		/*
+		 * Walk thru the extent list and count the realtime blocks.
+		 */
+		if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
+			xfs_iput(ip, XFS_ILOCK_EXCL);
+			if (udqp)
+				xfs_qm_dqput(udqp);
+			if (gdqp)
+				xfs_qm_dqput(gdqp);
+			*res = BULKSTAT_RV_GIVEUP;
+			return (error);
+		}
+		nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
+	}
+	ASSERT(ip->i_delayed_blks == 0);
+
+	/*
+	 * We can't release the inode while holding its dquot locks.
+	 * The inode can go into inactive and might try to acquire the dquotlocks.
+	 * So, just unlock here and do a vn_rele at the end.
+	 */
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+	/*
+	 * Add the (disk blocks and inode) resources occupied by this
+	 * inode to its dquots. We do this adjustment in the incore dquot,
+	 * and also copy the changes to its buffer.
+	 * We don't care about putting these changes in a transaction
+	 * envelope because if we crash in the middle of a 'quotacheck'
+	 * we have to start from the beginning anyway.
+	 * Once we're done, we'll log all the dquot bufs.
+	 *
+	 * The *QUOTA_ON checks below may look pretty racey, but quotachecks
+	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
+	 */
+	if (XFS_IS_UQUOTA_ON(mp)) {
+		ASSERT(udqp);
+		xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
+		xfs_qm_dqput(udqp);
+	}
+	if (XFS_IS_GQUOTA_ON(mp)) {
+		ASSERT(gdqp);
+		xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
+		xfs_qm_dqput(gdqp);
+	}
+	/*
+	 * Now release the inode. This will send it to 'inactive', and
+	 * possibly even free blocks.
+	 */
+	VN_RELE(XFS_ITOV(ip));
+
+	/*
+	 * Goto next inode.
+	 */
+	*res = BULKSTAT_RV_DIDONE;
+	return (0);
+}
+
+/*
+ * Walk thru all the filesystem inodes and construct a consistent view
+ * of the disk quota world.
+ */
+STATIC int
+xfs_qm_quotacheck(
+	xfs_mount_t	*mp)
+{
+	int		done, count, error;
+	xfs_ino_t	lastino;
+	size_t		structsz;
+	xfs_inode_t	*uip, *gip;
+	uint		flags;
+
+	count = INT_MAX;
+	structsz = 1;
+	lastino = 0;
+	flags = 0;
+
+	ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp));
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+	/*
+	 * There should be no cached dquots. The (simplistic) quotacheck
+	 * algorithm doesn't like that.
+	 */
+	ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0);
+
+	cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
+
+	/*
+	 * First we go thru all the dquots on disk, USR and GRP, and reset
+	 * their counters to zero. We need a clean slate.
+	 * We don't log our changes till later.
+	 */
+	if ((uip = XFS_QI_UQIP(mp))) {
+		if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA)))
+			goto error_return;
+		flags |= XFS_UQUOTA_CHKD;
+	}
+
+	if ((gip = XFS_QI_GQIP(mp))) {
+		if ((error = xfs_qm_dqiterate(mp, gip, XFS_QMOPT_GQUOTA)))
+			goto error_return;
+		flags |= XFS_GQUOTA_CHKD;
+	}
+
+	do {
+		/*
+		 * Iterate thru all the inodes in the file system,
+		 * adjusting the corresponding dquot counters in core.
+		 */
+		if ((error = xfs_bulkstat(mp, NULL, &lastino, &count,
+				     xfs_qm_dqusage_adjust,
+				     structsz, NULL,
+				     BULKSTAT_FG_IGET|BULKSTAT_FG_VFSLOCKED,
+				     &done)))
+			break;
+
+	} while (! done);
+
+	/*
+	 * We can get this error if we couldn't do a dquot allocation inside
+	 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
+	 * dirty dquots that might be cached, we just want to get rid of them
+	 * and turn quotaoff. The dquots won't be attached to any of the inodes
+	 * at this point (because we intentionally didn't in dqget_noattach).
+	 */
+	if (error) {
+		xfs_qm_dqpurge_all(mp,
+				   XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA|
+				   XFS_QMOPT_QUOTAOFF);
+		goto error_return;
+	}
+	/*
+	 * We've made all the changes that we need to make incore.
+	 * Now flush_them down to disk buffers.
+	 */
+	xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
+
+	/*
+	 * We didn't log anything, because if we crashed, we'll have to
+	 * start the quotacheck from scratch anyway. However, we must make
+	 * sure that our dquot changes are secure before we put the
+	 * quotacheck'd stamp on the superblock. So, here we do a synchronous
+	 * flush.
+	 */
+	XFS_bflush(mp->m_ddev_targp);
+
+	/*
+	 * If one type of quotas is off, then it will lose its
+	 * quotachecked status, since we won't be doing accounting for
+	 * that type anymore.
+	 */
+	mp->m_qflags &= ~(XFS_GQUOTA_CHKD | XFS_UQUOTA_CHKD);
+	mp->m_qflags |= flags;
+
+	XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++");
+
+ error_return:
+	cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
+	return (error);
+}
+
+/*
+ * This is called after the superblock has been read in and we're ready to
+ * iget the quota inodes.
+ */
+STATIC int
+xfs_qm_init_quotainos(
+	xfs_mount_t	*mp)
+{
+	xfs_inode_t	*uip, *gip;
+	int		error;
+	__int64_t	sbflags;
+	uint		flags;
+
+	ASSERT(mp->m_quotainfo);
+	uip = gip = NULL;
+	sbflags = 0;
+	flags = 0;
+
+	/*
+	 * Get the uquota and gquota inodes
+	 */
+	if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
+		if (XFS_IS_UQUOTA_ON(mp) &&
+		    mp->m_sb.sb_uquotino != NULLFSINO) {
+			ASSERT(mp->m_sb.sb_uquotino > 0);
+			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
+					     0, &uip, 0)))
+				return XFS_ERROR(error);
+		}
+		if (XFS_IS_GQUOTA_ON(mp) &&
+		    mp->m_sb.sb_gquotino != NULLFSINO) {
+			ASSERT(mp->m_sb.sb_gquotino > 0);
+			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
+					     0, &gip, 0))) {
+				if (uip)
+					VN_RELE(XFS_ITOV(uip));
+				return XFS_ERROR(error);
+			}
+		}
+	} else {
+		flags |= XFS_QMOPT_SBVERSION;
+		sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+			    XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
+	}
+
+	/*
+	 * Create the two inodes, if they don't exist already. The changes
+	 * made above will get added to a transaction and logged in one of
+	 * the qino_alloc calls below.  If the device is readonly,
+	 * temporarily switch to read-write to do this.
+	 */
+	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
+		if ((error = xfs_qm_qino_alloc(mp, &uip,
+					      sbflags | XFS_SB_UQUOTINO,
+					      flags | XFS_QMOPT_UQUOTA)))
+			return XFS_ERROR(error);
+
+		flags &= ~XFS_QMOPT_SBVERSION;
+	}
+	if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) {
+		if ((error = xfs_qm_qino_alloc(mp, &gip,
+					      sbflags | XFS_SB_GQUOTINO,
+					      flags | XFS_QMOPT_GQUOTA))) {
+			if (uip)
+				VN_RELE(XFS_ITOV(uip));
+
+			return XFS_ERROR(error);
+		}
+	}
+
+	XFS_QI_UQIP(mp) = uip;
+	XFS_QI_GQIP(mp) = gip;
+
+	return (0);
+}
+
+
+/*
+ * Traverse the freelist of dquots and attempt to reclaim a maximum of
+ * 'howmany' dquots. This operation races with dqlookup(), and attempts to
+ * favor the lookup function ...
+ * XXXsup merge this with qm_reclaim_one().
+ */
+STATIC int
+xfs_qm_shake_freelist(
+	int howmany)
+{
+	int		nreclaimed;
+	xfs_dqhash_t	*hash;
+	xfs_dquot_t	*dqp, *nextdqp;
+	int		restarts;
+	int		nflushes;
+
+	if (howmany <= 0)
+		return (0);
+
+	nreclaimed = 0;
+	restarts = 0;
+	nflushes = 0;
+
+#ifdef QUOTADEBUG
+	cmn_err(CE_DEBUG, "Shake free 0x%x", howmany);
+#endif
+	/* lock order is : hashchainlock, freelistlock, mplistlock */
+ tryagain:
+	xfs_qm_freelist_lock(xfs_Gqm);
+
+	for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
+	     ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) &&
+	      nreclaimed < howmany); ) {
+		xfs_dqlock(dqp);
+
+		/*
+		 * We are racing with dqlookup here. Naturally we don't
+		 * want to reclaim a dquot that lookup wants.
+		 */
+		if (dqp->dq_flags & XFS_DQ_WANT) {
+			xfs_dqunlock(dqp);
+			xfs_qm_freelist_unlock(xfs_Gqm);
+			if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
+				return (nreclaimed != howmany);
+			XQM_STATS_INC(xqmstats.xs_qm_dqwants);
+			goto tryagain;
+		}
+
+		/*
+		 * If the dquot is inactive, we are assured that it is
+		 * not on the mplist or the hashlist, and that makes our
+		 * life easier.
+		 */
+		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
+			ASSERT(dqp->q_mount == NULL);
+			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
+			ASSERT(dqp->HL_PREVP == NULL);
+			ASSERT(dqp->MPL_PREVP == NULL);
+			XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
+			nextdqp = dqp->dq_flnext;
+			goto off_freelist;
+		}
+
+		ASSERT(dqp->MPL_PREVP);
+		/*
+		 * Try to grab the flush lock. If this dquot is in the process of
+		 * getting flushed to disk, we don't want to reclaim it.
+		 */
+		if (! xfs_qm_dqflock_nowait(dqp)) {
+			xfs_dqunlock(dqp);
+			dqp = dqp->dq_flnext;
+			continue;
+		}
+
+		/*
+		 * We have the flush lock so we know that this is not in the
+		 * process of being flushed. So, if this is dirty, flush it
+		 * DELWRI so that we don't get a freelist infested with
+		 * dirty dquots.
+		 */
+		if (XFS_DQ_IS_DIRTY(dqp)) {
+			xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY");
+			/*
+			 * We flush it delayed write, so don't bother
+			 * releasing the mplock.
+			 */
+			(void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
+			xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
+			dqp = dqp->dq_flnext;
+			continue;
+		}
+		/*
+		 * We're trying to get the hashlock out of order. This races
+		 * with dqlookup; so, we giveup and goto the next dquot if
+		 * we couldn't get the hashlock. This way, we won't starve
+		 * a dqlookup process that holds the hashlock that is
+		 * waiting for the freelist lock.
+		 */
+		if (! xfs_qm_dqhashlock_nowait(dqp)) {
+			xfs_dqfunlock(dqp);
+			xfs_dqunlock(dqp);
+			dqp = dqp->dq_flnext;
+			continue;
+		}
+		/*
+		 * This races with dquot allocation code as well as dqflush_all
+		 * and reclaim code. So, if we failed to grab the mplist lock,
+		 * giveup everything and start over.
+		 */
+		hash = dqp->q_hash;
+		ASSERT(hash);
+		if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
+			/* XXX put a sentinel so that we can come back here */
+			xfs_dqfunlock(dqp);
+			xfs_dqunlock(dqp);
+			XFS_DQ_HASH_UNLOCK(hash);
+			xfs_qm_freelist_unlock(xfs_Gqm);
+			if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
+				return (nreclaimed != howmany);
+			goto tryagain;
+		}
+		xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING");
+#ifdef QUOTADEBUG
+		cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
+			dqp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT));
+#endif
+		ASSERT(dqp->q_nrefs == 0);
+		nextdqp = dqp->dq_flnext;
+		XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
+		XQM_HASHLIST_REMOVE(hash, dqp);
+		xfs_dqfunlock(dqp);
+		xfs_qm_mplist_unlock(dqp->q_mount);
+		XFS_DQ_HASH_UNLOCK(hash);
+
+ off_freelist:
+		XQM_FREELIST_REMOVE(dqp);
+		xfs_dqunlock(dqp);
+		nreclaimed++;
+		XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims);
+		xfs_qm_dqdestroy(dqp);
+		dqp = nextdqp;
+	}
+	xfs_qm_freelist_unlock(xfs_Gqm);
+	return (nreclaimed != howmany);
+}
+
+
+/*
+ * The shake manager routine called by shaked() when memory is
+ * running low.
+ */
+/* ARGSUSED */
+STATIC void
+xfs_qm_shake(void)
+{
+	int	ndqused, nfree, n;
+
+	if (!xfs_Gqm)
+		return;
+
+	nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
+	/* incore dquots in all f/s's */
+	ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
+
+	ASSERT(ndqused >= 0);
+
+	if (nfree <= ndqused && nfree < ndquot)
+		return;
+
+	ndqused *= xfs_Gqm->qm_dqfree_ratio;	/* target # of free dquots */
+	n = nfree - ndqused - ndquot;		/* # over target */
+
+	(void) xfs_qm_shake_freelist(MAX(nfree, n));
+}
+
+
+/*
+ * Just pop the least recently used dquot off the freelist and
+ * recycle it. The returned dquot is locked.
+ */
+STATIC xfs_dquot_t *
+xfs_qm_dqreclaim_one(void)
+{
+	xfs_dquot_t	*dqpout;
+	xfs_dquot_t	*dqp;
+	int		restarts;
+	int		nflushes;
+
+	restarts = 0;
+	dqpout = NULL;
+	nflushes = 0;
+
+	/* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
+ startagain:
+	xfs_qm_freelist_lock(xfs_Gqm);
+
+	FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
+		xfs_dqlock(dqp);
+
+		/*
+		 * We are racing with dqlookup here. Naturally we don't
+		 * want to reclaim a dquot that lookup wants. We release the
+		 * freelist lock and start over, so that lookup will grab
+		 * both the dquot and the freelistlock.
+		 */
+		if (dqp->dq_flags & XFS_DQ_WANT) {
+			ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
+			xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT");
+			xfs_dqunlock(dqp);
+			xfs_qm_freelist_unlock(xfs_Gqm);
+			if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
+				return (NULL);
+			XQM_STATS_INC(xqmstats.xs_qm_dqwants);
+			goto startagain;
+		}
+
+		/*
+		 * If the dquot is inactive, we are assured that it is
+		 * not on the mplist or the hashlist, and that makes our
+		 * life easier.
+		 */
+		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
+			ASSERT(dqp->q_mount == NULL);
+			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
+			ASSERT(dqp->HL_PREVP == NULL);
+			ASSERT(dqp->MPL_PREVP == NULL);
+			XQM_FREELIST_REMOVE(dqp);
+			xfs_dqunlock(dqp);
+			dqpout = dqp;
+			XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
+			break;
+		}
+
+		ASSERT(dqp->q_hash);
+		ASSERT(dqp->MPL_PREVP);
+
+		/*
+		 * Try to grab the flush lock. If this dquot is in the process of
+		 * getting flushed to disk, we don't want to reclaim it.
+		 */
+		if (! xfs_qm_dqflock_nowait(dqp)) {
+			xfs_dqunlock(dqp);
+			continue;
+		}
+
+		/*
+		 * We have the flush lock so we know that this is not in the
+		 * process of being flushed. So, if this is dirty, flush it
+		 * DELWRI so that we don't get a freelist infested with
+		 * dirty dquots.
+		 */
+		if (XFS_DQ_IS_DIRTY(dqp)) {
+			xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY");
+			/*
+			 * We flush it delayed write, so don't bother
+			 * releasing the freelist lock.
+			 */
+			(void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
+			xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
+			continue;
+		}
+
+		if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
+			xfs_dqfunlock(dqp);
+			xfs_dqunlock(dqp);
+			continue;
+		}
+
+		if (! xfs_qm_dqhashlock_nowait(dqp))
+			goto mplistunlock;
+
+		ASSERT(dqp->q_nrefs == 0);
+		xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING");
+		XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
+		XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
+		XQM_FREELIST_REMOVE(dqp);
+		dqpout = dqp;
+		XFS_DQ_HASH_UNLOCK(dqp->q_hash);
+ mplistunlock:
+		xfs_qm_mplist_unlock(dqp->q_mount);
+		xfs_dqfunlock(dqp);
+		xfs_dqunlock(dqp);
+		if (dqpout)
+			break;
+	}
+
+	xfs_qm_freelist_unlock(xfs_Gqm);
+	return (dqpout);
+}
+
+
+/*------------------------------------------------------------------*/
+
+/*
+ * Return a new incore dquot. Depending on the number of
+ * dquots in the system, we either allocate a new one on the kernel heap,
+ * or reclaim a free one.
+ * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
+ * to reclaim an existing one from the freelist.
+ */
+boolean_t
+xfs_qm_dqalloc_incore(
+	xfs_dquot_t **O_dqpp)
+{
+	xfs_dquot_t	*dqp;
+
+	/*
+	 * Check against high water mark to see if we want to pop
+	 * a nincompoop dquot off the freelist.
+	 */
+	if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
+		/*
+		 * Try to recycle a dquot from the freelist.
+		 */
+		if ((dqp = xfs_qm_dqreclaim_one())) {
+			XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
+			/*
+			 * Just zero the core here. The rest will get
+			 * reinitialized by caller. XXX we shouldn't even
+			 * do this zero ...
+			 */
+			memset(&dqp->q_core, 0, sizeof(dqp->q_core));
+			*O_dqpp = dqp;
+			return (B_FALSE);
+		}
+		XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
+	}
+
+	/*
+	 * Allocate a brand new dquot on the kernel heap and return it
+	 * to the caller to initialize.
+	 */
+	ASSERT(xfs_Gqm->qm_dqzone != NULL);
+	*O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
+	atomic_inc(&xfs_Gqm->qm_totaldquots);
+
+	return (B_TRUE);
+}
+
+
+/*
+ * Start a transaction and write the incore superblock changes to
+ * disk. flags parameter indicates which fields have changed.
+ */
+int
+xfs_qm_write_sb_changes(
+	xfs_mount_t	*mp,
+	__int64_t	flags)
+{
+	xfs_trans_t	*tp;
+	int		error;
+
+#ifdef QUOTADEBUG
+	cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname);
+#endif
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+	if ((error = xfs_trans_reserve(tp, 0,
+				      mp->m_sb.sb_sectsize + 128, 0,
+				      0,
+				      XFS_DEFAULT_LOG_COUNT))) {
+		xfs_trans_cancel(tp, 0);
+		return (error);
+	}
+
+	xfs_mod_sb(tp, flags);
+	(void) xfs_trans_commit(tp, 0, NULL);
+
+	return (0);
+}
+
+
+/* --------------- utility functions for vnodeops ---------------- */
+
+
+/*
+ * Given an inode, a uid and gid (from cred_t) make sure that we have
+ * allocated relevant dquot(s) on disk, and that we won't exceed inode
+ * quotas by creating this file.
+ * This also attaches dquot(s) to the given inode after locking it,
+ * and returns the dquots corresponding to the uid and/or gid.
+ *
+ * in	: inode (unlocked)
+ * out	: udquot, gdquot with references taken and unlocked
+ */
+int
+xfs_qm_vop_dqalloc(
+	xfs_mount_t	*mp,
+	xfs_inode_t	*ip,
+	uid_t		uid,
+	gid_t		gid,
+	uint		flags,
+	xfs_dquot_t	**O_udqpp,
+	xfs_dquot_t	**O_gdqpp)
+{
+	int		error;
+	xfs_dquot_t	*uq, *gq;
+	uint		lockflags;
+
+	if (!XFS_IS_QUOTA_ON(mp))
+		return 0;
+
+	lockflags = XFS_ILOCK_EXCL;
+	xfs_ilock(ip, lockflags);
+
+	if ((flags & XFS_QMOPT_INHERIT) &&
+	    XFS_INHERIT_GID(ip, XFS_MTOVFS(mp)))
+		gid = ip->i_d.di_gid;
+
+	/*
+	 * Attach the dquot(s) to this inode, doing a dquot allocation
+	 * if necessary. The dquot(s) will not be locked.
+	 */
+	if (XFS_NOT_DQATTACHED(mp, ip)) {
+		if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
+					    XFS_QMOPT_ILOCKED))) {
+			xfs_iunlock(ip, lockflags);
+			return (error);
+		}
+	}
+
+	uq = gq = NULL;
+	if ((flags & XFS_QMOPT_UQUOTA) &&
+	    XFS_IS_UQUOTA_ON(mp)) {
+		if (ip->i_d.di_uid != uid) {
+			/*
+			 * What we need is the dquot that has this uid, and
+			 * if we send the inode to dqget, the uid of the inode
+			 * takes priority over what's sent in the uid argument.
+			 * We must unlock inode here before calling dqget if
+			 * we're not sending the inode, because otherwise
+			 * we'll deadlock by doing trans_reserve while
+			 * holding ilock.
+			 */
+			xfs_iunlock(ip, lockflags);
+			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
+						 XFS_DQ_USER,
+						 XFS_QMOPT_DQALLOC |
+						 XFS_QMOPT_DOWARN,
+						 &uq))) {
+				ASSERT(error != ENOENT);
+				return (error);
+			}
+			/*
+			 * Get the ilock in the right order.
+			 */
+			xfs_dqunlock(uq);
+			lockflags = XFS_ILOCK_SHARED;
+			xfs_ilock(ip, lockflags);
+		} else {
+			/*
+			 * Take an extra reference, because we'll return
+			 * this to caller
+			 */
+			ASSERT(ip->i_udquot);
+			uq = ip->i_udquot;
+			xfs_dqlock(uq);
+			XFS_DQHOLD(uq);
+			xfs_dqunlock(uq);
+		}
+	}
+	if ((flags & XFS_QMOPT_GQUOTA) &&
+	    XFS_IS_GQUOTA_ON(mp)) {
+		if (ip->i_d.di_gid != gid) {
+			xfs_iunlock(ip, lockflags);
+			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
+						 XFS_DQ_GROUP,
+						 XFS_QMOPT_DQALLOC |
+						 XFS_QMOPT_DOWARN,
+						 &gq))) {
+				if (uq)
+					xfs_qm_dqrele(uq);
+				ASSERT(error != ENOENT);
+				return (error);
+			}
+			xfs_dqunlock(gq);
+			lockflags = XFS_ILOCK_SHARED;
+			xfs_ilock(ip, lockflags);
+		} else {
+			ASSERT(ip->i_gdquot);
+			gq = ip->i_gdquot;
+			xfs_dqlock(gq);
+			XFS_DQHOLD(gq);
+			xfs_dqunlock(gq);
+		}
+	}
+	if (uq)
+		xfs_dqtrace_entry_ino(uq, "DQALLOC", ip);
+
+	xfs_iunlock(ip, lockflags);
+	if (O_udqpp)
+		*O_udqpp = uq;
+	else if (uq)
+		xfs_qm_dqrele(uq);
+	if (O_gdqpp)
+		*O_gdqpp = gq;
+	else if (gq)
+		xfs_qm_dqrele(gq);
+	return (0);
+}
+
+/*
+ * Actually transfer ownership, and do dquot modifications.
+ * These were already reserved.
+ */
+xfs_dquot_t *
+xfs_qm_vop_chown(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	xfs_dquot_t	**IO_olddq,
+	xfs_dquot_t	*newdq)
+{
+	xfs_dquot_t	*prevdq;
+	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
+
+	/* old dquot */
+	prevdq = *IO_olddq;
+	ASSERT(prevdq);
+	ASSERT(prevdq != newdq);
+
+	xfs_trans_mod_dquot(tp, prevdq,
+			    XFS_TRANS_DQ_BCOUNT,
+			    -(ip->i_d.di_nblocks));
+	xfs_trans_mod_dquot(tp, prevdq,
+			    XFS_TRANS_DQ_ICOUNT,
+			    -1);
+
+	/* the sparkling new dquot */
+	xfs_trans_mod_dquot(tp, newdq,
+			    XFS_TRANS_DQ_BCOUNT,
+			    ip->i_d.di_nblocks);
+	xfs_trans_mod_dquot(tp, newdq,
+			    XFS_TRANS_DQ_ICOUNT,
+			    1);
+
+	/*
+	 * Take an extra reference, because the inode
+	 * is going to keep this dquot pointer even
+	 * after the trans_commit.
+	 */
+	xfs_dqlock(newdq);
+	XFS_DQHOLD(newdq);
+	xfs_dqunlock(newdq);
+	*IO_olddq = newdq;
+
+	return (prevdq);
+}
+
+/*
+ * Quota reservations for setattr(AT_UID|AT_GID).
+ */
+int
+xfs_qm_vop_chown_reserve(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	xfs_dquot_t	*udqp,
+	xfs_dquot_t	*gdqp,
+	uint		flags)
+{
+	int		error;
+	xfs_mount_t	*mp;
+	uint		delblks;
+	xfs_dquot_t	*unresudq, *unresgdq, *delblksudq, *delblksgdq;
+
+	ASSERT(XFS_ISLOCKED_INODE(ip));
+	mp = ip->i_mount;
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+	delblks = ip->i_delayed_blks;
+	delblksudq = delblksgdq = unresudq = unresgdq = NULL;
+
+	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
+	    ip->i_d.di_uid != (uid_t)INT_GET(udqp->q_core.d_id, ARCH_CONVERT)) {
+		delblksudq = udqp;
+		/*
+		 * If there are delayed allocation blocks, then we have to
+		 * unreserve those from the old dquot, and add them to the
+		 * new dquot.
+		 */
+		if (delblks) {
+			ASSERT(ip->i_udquot);
+			unresudq = ip->i_udquot;
+		}
+	}
+	if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp &&
+	    ip->i_d.di_gid != INT_GET(gdqp->q_core.d_id, ARCH_CONVERT)) {
+		delblksgdq = gdqp;
+		if (delblks) {
+			ASSERT(ip->i_gdquot);
+			unresgdq = ip->i_gdquot;
+		}
+	}
+
+	if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
+				delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
+				flags | XFS_QMOPT_RES_REGBLKS)))
+		return (error);
+
+	/*
+	 * Do the delayed blks reservations/unreservations now. Since, these
+	 * are done without the help of a transaction, if a reservation fails
+	 * its previous reservations won't be automatically undone by trans
+	 * code. So, we have to do it manually here.
+	 */
+	if (delblks) {
+		/*
+		 * Do the reservations first. Unreservation can't fail.
+		 */
+		ASSERT(delblksudq || delblksgdq);
+		ASSERT(unresudq || unresgdq);
+		if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+				delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
+				flags | XFS_QMOPT_RES_REGBLKS)))
+			return (error);
+		xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+				unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
+				XFS_QMOPT_RES_REGBLKS);
+	}
+
+	return (0);
+}
+
+int
+xfs_qm_vop_rename_dqattach(
+	xfs_inode_t	**i_tab)
+{
+	xfs_inode_t	*ip;
+	int		i;
+	int		error;
+
+	ip = i_tab[0];
+
+	if (! XFS_IS_QUOTA_ON(ip->i_mount))
+		return (0);
+
+	if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
+		error = xfs_qm_dqattach(ip, 0);
+		if (error)
+			return (error);
+	}
+	for (i = 1; (i < 4 && i_tab[i]); i++) {
+		/*
+		 * Watch out for duplicate entries in the table.
+		 */
+		if ((ip = i_tab[i]) != i_tab[i-1]) {
+			if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
+				error = xfs_qm_dqattach(ip, 0);
+				if (error)
+					return (error);
+			}
+		}
+	}
+	return (0);
+}
+
+void
+xfs_qm_vop_dqattach_and_dqmod_newinode(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	xfs_dquot_t	*udqp,
+	xfs_dquot_t	*gdqp)
+{
+	if (!XFS_IS_QUOTA_ON(tp->t_mountp))
+		return;
+
+	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+	ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
+
+	if (udqp) {
+		xfs_dqlock(udqp);
+		XFS_DQHOLD(udqp);
+		xfs_dqunlock(udqp);
+		ASSERT(ip->i_udquot == NULL);
+		ip->i_udquot = udqp;
+		ASSERT(ip->i_d.di_uid == INT_GET(udqp->q_core.d_id, ARCH_CONVERT));
+		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
+	}
+	if (gdqp) {
+		xfs_dqlock(gdqp);
+		XFS_DQHOLD(gdqp);
+		xfs_dqunlock(gdqp);
+		ASSERT(ip->i_gdquot == NULL);
+		ip->i_gdquot = gdqp;
+		ASSERT(ip->i_d.di_gid == INT_GET(gdqp->q_core.d_id, ARCH_CONVERT));
+		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
+	}
+}
+
+/* ------------- list stuff -----------------*/
+void
+xfs_qm_freelist_init(xfs_frlist_t *ql)
+{
+	ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
+	mutex_init(&ql->qh_lock, MUTEX_DEFAULT, "dqf");
+	ql->qh_version = 0;
+	ql->qh_nelems = 0;
+}
+
+void
+xfs_qm_freelist_destroy(xfs_frlist_t *ql)
+{
+	xfs_dquot_t	*dqp, *nextdqp;
+
+	mutex_lock(&ql->qh_lock, PINOD);
+	for (dqp = ql->qh_next;
+	     dqp != (xfs_dquot_t *)ql; ) {
+		xfs_dqlock(dqp);
+		nextdqp = dqp->dq_flnext;
+#ifdef QUOTADEBUG
+		cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
+#endif
+		XQM_FREELIST_REMOVE(dqp);
+		xfs_dqunlock(dqp);
+		xfs_qm_dqdestroy(dqp);
+		dqp = nextdqp;
+	}
+	/*
+	 * Don't bother about unlocking.
+	 */
+	mutex_destroy(&ql->qh_lock);
+
+	ASSERT(ql->qh_nelems == 0);
+}
+
+void
+xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
+{
+	dq->dq_flnext = ql->qh_next;
+	dq->dq_flprev = (xfs_dquot_t *)ql;
+	ql->qh_next = dq;
+	dq->dq_flnext->dq_flprev = dq;
+	xfs_Gqm->qm_dqfreelist.qh_nelems++;
+	xfs_Gqm->qm_dqfreelist.qh_version++;
+}
+
+void
+xfs_qm_freelist_unlink(xfs_dquot_t *dq)
+{
+	xfs_dquot_t *next = dq->dq_flnext;
+	xfs_dquot_t *prev = dq->dq_flprev;
+
+	next->dq_flprev = prev;
+	prev->dq_flnext = next;
+	dq->dq_flnext = dq->dq_flprev = dq;
+	xfs_Gqm->qm_dqfreelist.qh_nelems--;
+	xfs_Gqm->qm_dqfreelist.qh_version++;
+}
+
+void
+xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
+{
+	xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
+}
+
+int
+xfs_qm_dqhashlock_nowait(
+	xfs_dquot_t *dqp)
+{
+	int locked;
+
+	locked = mutex_trylock(&((dqp)->q_hash->qh_lock));
+	return (locked);
+}
+
+int
+xfs_qm_freelist_lock_nowait(
+	xfs_qm_t *xqm)
+{
+	int locked;
+
+	locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock));
+	return (locked);
+}
+
+int
+xfs_qm_mplist_nowait(
+	xfs_mount_t	*mp)
+{
+	int locked;
+
+	ASSERT(mp->m_quotainfo);
+	locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp)));
+	return (locked);
+}
diff -Nru a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/quota/xfs_qm.h	Mon Mar 31 13:41:06 2003
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_QM_H__
+#define __XFS_QM_H__
+
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
+#include "xfs_quota_priv.h"
+#include "xfs_qm_stats.h"
+
+struct xfs_qm;
+struct xfs_inode;
+
+extern mutex_t		xfs_Gqm_lock;
+extern struct xfs_qm	*xfs_Gqm;
+extern kmem_zone_t	*qm_dqzone;
+extern kmem_zone_t	*qm_dqtrxzone;
+
+/*
+ * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
+ * iterate over the mountpt's dquot list in one call.
+ */
+#define XFS_QM_SYNC_MAX_RESTARTS	7
+
+/*
+ * Ditto, for xfs_qm_dqreclaim_one.
+ */
+#define XFS_QM_RECLAIM_MAX_RESTARTS	4
+
+/*
+ * Ideal ratio of free to in use dquots. Quota manager makes an attempt
+ * to keep this balance.
+ */
+#define XFS_QM_DQFREE_RATIO		2
+
+/*
+ * Dquot hashtable constants/threshold values.
+ */
+#define XFS_QM_NCSIZE_THRESHOLD		5000
+#define XFS_QM_HASHSIZE_LOW		32
+#define XFS_QM_HASHSIZE_HIGH		64
+
+/*
+ * We output a cmn_err when quotachecking a quota file with more than
+ * this many fsbs.
+ */
+#define XFS_QM_BIG_QCHECK_NBLKS		500
+
+/*
+ * This defines the unit of allocation of dquots.
+ * Currently, it is just one file system block, and a 4K blk contains 30
+ * (136 * 30 = 4080) dquots. It's probably not worth trying to make
+ * this more dynamic.
+ * XXXsup However, if this number is changed, we have to make sure that we don't
+ * implicitly assume that we do allocations in chunks of a single filesystem
+ * block in the dquot/xqm code.
+ */
+#define XFS_DQUOT_CLUSTER_SIZE_FSB	(xfs_filblks_t)1
+/*
+ * When doing a quotacheck, we log dquot clusters of this many FSBs at most
+ * in a single transaction. We don't want to ask for too huge a log reservation.
+ */
+#define XFS_QM_MAX_DQCLUSTER_LOGSZ	3
+
+typedef xfs_dqhash_t	xfs_dqlist_t;
+/*
+ * The freelist head. The first two fields match the first two in the
+ * xfs_dquot_t structure (in xfs_dqmarker_t)
+ */
+typedef struct xfs_frlist {
+       struct xfs_dquot *qh_next;
+       struct xfs_dquot *qh_prev;
+       mutex_t		 qh_lock;
+       uint		 qh_version;
+       uint		 qh_nelems;
+} xfs_frlist_t;
+
+/*
+ * Quota Manager (global) structure. Lives only in core.
+ */
+typedef struct xfs_qm {
+	xfs_dqlist_t	*qm_usr_dqhtable;/* udquot hash table */
+	xfs_dqlist_t	*qm_grp_dqhtable;/* gdquot hash table */
+	uint		 qm_dqhashmask;	 /* # buckets in dq hashtab - 1 */
+	xfs_frlist_t	 qm_dqfreelist;	 /* freelist of dquots */
+	atomic_t	 qm_totaldquots; /* total incore dquots */
+	uint		 qm_nrefs;	 /* file systems with quota on */
+	int		 qm_dqfree_ratio;/* ratio of free to inuse dquots */
+	kmem_zone_t	*qm_dqzone;	 /* dquot mem-alloc zone */
+	kmem_zone_t	*qm_dqtrxzone;	 /* t_dqinfo of transactions */
+} xfs_qm_t;
+
+/*
+ * Various quota information for individual filesystems.
+ * The mount structure keeps a pointer to this.
+ */
+typedef struct xfs_quotainfo {
+	xfs_inode_t	*qi_uquotaip;	 /* user quota inode */
+	xfs_inode_t	*qi_gquotaip;	 /* group quota inode */
+	lock_t		 qi_pinlock;	 /* dquot pinning mutex */
+	xfs_dqlist_t	 qi_dqlist;	 /* all dquots in filesys */
+	int		 qi_dqreclaims;	 /* a change here indicates
+					    a removal in the dqlist */
+	time_t		 qi_btimelimit;	 /* limit for blks timer */
+	time_t		 qi_itimelimit;	 /* limit for inodes timer */
+	time_t		 qi_rtbtimelimit;/* limit for rt blks timer */
+	xfs_qwarncnt_t	 qi_bwarnlimit;	 /* limit for num warnings */
+	xfs_qwarncnt_t	 qi_iwarnlimit;	 /* limit for num warnings */
+	mutex_t		 qi_quotaofflock;/* to serialize quotaoff */
+	/* Some useful precalculated constants */
+	xfs_filblks_t	 qi_dqchunklen;	 /* # BBs in a chunk of dqs */
+	uint		 qi_dqperchunk;	 /* # ondisk dqs in above chunk */
+} xfs_quotainfo_t;
+
+
+extern xfs_dqtrxops_t	xfs_trans_dquot_ops;
+
+extern void	xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
+extern int	xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
+			xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
+extern void	xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *);
+extern void	xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *);
+
+/*
+ * We keep the usr and grp dquots separately so that locking will be easier
+ * to do at commit time. All transactions that we know of at this point
+ * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.
+ */
+#define XFS_QM_TRANS_MAXDQS		2
+typedef struct xfs_dquot_acct {
+	xfs_dqtrx_t	dqa_usrdquots[XFS_QM_TRANS_MAXDQS];
+	xfs_dqtrx_t	dqa_grpdquots[XFS_QM_TRANS_MAXDQS];
+} xfs_dquot_acct_t;
+
+/*
+ * Users are allowed to have a usage exceeding their softlimit for
+ * a period this long.
+ */
+#define XFS_QM_BTIMELIMIT	DQ_BTIMELIMIT
+#define XFS_QM_RTBTIMELIMIT	DQ_BTIMELIMIT
+#define XFS_QM_ITIMELIMIT	DQ_FTIMELIMIT
+
+#define XFS_QM_BWARNLIMIT	5
+#define XFS_QM_IWARNLIMIT	5
+
+#define XFS_QM_LOCK(xqm)	(mutex_lock(&xqm##_lock, PINOD))
+#define XFS_QM_UNLOCK(xqm)	(mutex_unlock(&xqm##_lock))
+#define XFS_QM_HOLD(xqm)	((xqm)->qm_nrefs++)
+#define XFS_QM_RELE(xqm)	((xqm)->qm_nrefs--)
+
+extern int		xfs_qm_init_quotainfo(xfs_mount_t *);
+extern void		xfs_qm_destroy_quotainfo(xfs_mount_t *);
+extern int		xfs_qm_mount_quotas(xfs_mount_t *);
+extern void		xfs_qm_mount_quotainit(xfs_mount_t *, uint);
+extern void		xfs_qm_unmount_quotadestroy(xfs_mount_t *);
+extern int		xfs_qm_unmount_quotas(xfs_mount_t *);
+extern int		xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
+extern int		xfs_qm_sync(xfs_mount_t *, short);
+
+/* dquot stuff */
+extern void		xfs_qm_dqunlink(xfs_dquot_t *);
+extern boolean_t	xfs_qm_dqalloc_incore(xfs_dquot_t **);
+extern int		xfs_qm_dqattach(xfs_inode_t *, uint);
+extern void		xfs_qm_dqdetach(xfs_inode_t *);
+extern int		xfs_qm_dqpurge_all(xfs_mount_t *, uint);
+extern void		xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
+
+/* vop stuff */
+extern int		xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *,
+					uid_t, gid_t, uint,
+					xfs_dquot_t **, xfs_dquot_t **);
+extern void		xfs_qm_vop_dqattach_and_dqmod_newinode(
+					xfs_trans_t *, xfs_inode_t *,
+					xfs_dquot_t *, xfs_dquot_t *);
+extern int		xfs_qm_vop_rename_dqattach(xfs_inode_t **);
+extern xfs_dquot_t *	xfs_qm_vop_chown(xfs_trans_t *, xfs_inode_t *,
+					xfs_dquot_t **, xfs_dquot_t *);
+extern int		xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *,
+					xfs_dquot_t *, xfs_dquot_t *, uint);
+
+/* list stuff */
+extern void		xfs_qm_freelist_init(xfs_frlist_t *);
+extern void		xfs_qm_freelist_destroy(xfs_frlist_t *);
+extern void		xfs_qm_freelist_insert(xfs_frlist_t *, xfs_dquot_t *);
+extern void		xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
+extern void		xfs_qm_freelist_unlink(xfs_dquot_t *);
+extern int		xfs_qm_freelist_lock_nowait(xfs_qm_t *);
+extern int		xfs_qm_mplist_nowait(xfs_mount_t *);
+extern int		xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
+
+/* system call interface */
+extern int		xfs_qm_quotactl(bhv_desc_t *, int, int, xfs_caddr_t);
+
+#ifdef DEBUG
+extern int		xfs_qm_internalqcheck(xfs_mount_t *);
+#else
+#define xfs_qm_internalqcheck(mp)	(0)
+#endif
+
+#endif /* __XFS_QM_H__ */
diff -Nru a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/quota/xfs_qm_bhv.c	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+#include <linux/init.h>
+#include "xfs_qm.h"
+
+#define MNTOPT_QUOTA	"quota"		/* disk quotas (user) */
+#define MNTOPT_NOQUOTA	"noquota"	/* no quotas */
+#define MNTOPT_USRQUOTA	"usrquota"	/* user quota enabled */
+#define MNTOPT_GRPQUOTA	"grpquota"	/* group quota enabled */
+#define MNTOPT_UQUOTA	"uquota"	/* user quota (IRIX variant) */
+#define MNTOPT_GQUOTA	"gquota"	/* group quota (IRIX variant) */
+#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
+#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
+#define MNTOPT_QUOTANOENF  "qnoenforce"	/* same as uqnoenforce */
+
+STATIC int
+xfs_qm_parseargs(
+	struct bhv_desc		*bhv,
+	char			*options,
+	struct xfs_mount_args	*args,
+	int			update)
+{
+	size_t			length;
+	char			*local_options = options;
+	char			*this_char;
+	int			error;
+	int			referenced = update;
+
+	while ((this_char = strsep(&local_options, ",")) != NULL) {
+		length = strlen(this_char);
+		if (local_options)
+			length++;
+
+		if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
+			args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA);
+			args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA);
+			referenced = update;
+		} else if (!strcmp(this_char, MNTOPT_QUOTA) ||
+			   !strcmp(this_char, MNTOPT_UQUOTA) ||
+			   !strcmp(this_char, MNTOPT_USRQUOTA)) {
+			args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF;
+			referenced = 1;
+		} else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
+			   !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
+			args->flags |= XFSMNT_UQUOTA;
+			args->flags &= ~XFSMNT_UQUOTAENF;
+			referenced = 1;
+		} else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
+			   !strcmp(this_char, MNTOPT_GRPQUOTA)) {
+			args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF;
+			referenced = 1;
+		} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
+			args->flags |= XFSMNT_GQUOTA;
+			args->flags &= ~XFSMNT_GQUOTAENF;
+			referenced = 1;
+		} else {
+			if (local_options)
+				*(local_options-1) = ',';
+			continue;
+		}
+
+		while (length--)
+			*this_char++ = ',';
+	}
+
+	PVFS_PARSEARGS(BHV_NEXT(bhv), options, args, update, error);
+	if (!error && !referenced)
+		bhv_remove_vfsops(bhvtovfs(bhv), VFS_POSITION_QM);
+	return error;
+}
+
+STATIC int
+xfs_qm_showargs(
+	struct bhv_desc		*bhv,
+	struct seq_file		*m)
+{
+	struct vfs		*vfsp = bhvtovfs(bhv);
+	struct xfs_mount	*mp = XFS_VFSTOM(vfsp);
+	int			error;
+
+	if (mp->m_qflags & XFS_UQUOTA_ACCT) {
+		(mp->m_qflags & XFS_UQUOTA_ENFD) ?
+			seq_puts(m, "," MNTOPT_USRQUOTA) :
+			seq_puts(m, "," MNTOPT_UQUOTANOENF);
+	}
+
+	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
+		(mp->m_qflags & XFS_GQUOTA_ENFD) ?
+			seq_puts(m, "," MNTOPT_GRPQUOTA) :
+			seq_puts(m, "," MNTOPT_GQUOTANOENF);
+	}
+
+	if (!(mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT)))
+		seq_puts(m, "," MNTOPT_NOQUOTA);
+
+	PVFS_SHOWARGS(BHV_NEXT(bhv), m, error);
+	return error;
+}
+
+STATIC int
+xfs_qm_mount(
+	struct bhv_desc		*bhv,
+	struct xfs_mount_args	*args,
+	struct cred		*cr)
+{
+	struct vfs		*vfsp = bhvtovfs(bhv);
+	struct xfs_mount	*mp = XFS_VFSTOM(vfsp);
+	int			error;
+
+	if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA))
+		xfs_qm_mount_quotainit(mp, args->flags);
+	PVFS_MOUNT(BHV_NEXT(bhv), args, cr, error);
+	return error;
+}
+
+STATIC int
+xfs_qm_syncall(
+	struct bhv_desc		*bhv,
+	int			flags,
+	cred_t			*credp)
+{
+	struct vfs		*vfsp = bhvtovfs(bhv);
+	struct xfs_mount	*mp = XFS_VFSTOM(vfsp);
+	int			error;
+
+	/*
+	 * Get the Quota Manager to flush the dquots.
+	 */
+	if (XFS_IS_QUOTA_ON(mp)) {
+		if ((error = xfs_qm_sync(mp, flags))) {
+			/*
+			 * If we got an IO error, we will be shutting down.
+			 * So, there's nothing more for us to do here.
+			 */
+			ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp));
+			if (XFS_FORCED_SHUTDOWN(mp)) {
+				return XFS_ERROR(error);
+			}
+		}
+	}
+	PVFS_SYNC(BHV_NEXT(bhv), flags, credp, error);
+	return error;
+}
+
+/*
+ * When xfsquotas isn't installed and the superblock had quotas, we need to
+ * clear the quotaflags from superblock.
+ */
+STATIC void
+xfs_mount_reset_sbqflags(
+	xfs_mount_t		*mp)
+{
+	xfs_trans_t		*tp;
+	unsigned long		s;
+
+	mp->m_qflags = 0;
+	/*
+	 * It is OK to look at sb_qflags here in mount path,
+	 * without SB_LOCK.
+	 */
+	if (mp->m_sb.sb_qflags == 0)
+		return;
+	s = XFS_SB_LOCK(mp);
+	mp->m_sb.sb_qflags = 0;
+	XFS_SB_UNLOCK(mp, s);
+
+	/*
+	 * if the fs is readonly, let the incore superblock run
+	 * with quotas off but don't flush the update out to disk
+	 */
+	if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)
+		return;
+#ifdef QUOTADEBUG
+	xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
+#endif
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+	if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+				      XFS_DEFAULT_LOG_COUNT)) {
+		xfs_trans_cancel(tp, 0);
+		return;
+	}
+	xfs_mod_sb(tp, XFS_SB_QFLAGS);
+	xfs_trans_commit(tp, 0, NULL);
+}
+
+STATIC int
+xfs_qm_newmount(
+	xfs_mount_t	*mp,
+	uint		*needquotamount,
+	uint		*quotaflags)
+{
+	uint		quotaondisk;
+	uint		uquotaondisk = 0, gquotaondisk = 0;
+
+	*quotaflags = 0;
+	*needquotamount = B_FALSE;
+
+	quotaondisk = XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
+		mp->m_sb.sb_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT);
+
+	if (quotaondisk) {
+		uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
+		gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
+	}
+
+	/*
+	 * If the device itself is read-only, we can't allow
+	 * the user to change the state of quota on the mount -
+	 * this would generate a transaction on the ro device,
+	 * which would lead to an I/O error and shutdown
+	 */
+
+	if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
+	    (!uquotaondisk &&  XFS_IS_UQUOTA_ON(mp)) ||
+	     (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
+	    (!gquotaondisk &&  XFS_IS_GQUOTA_ON(mp)))  &&
+	    xfs_dev_is_read_only(mp, "changing quota state")) {
+		cmn_err(CE_WARN,
+			"XFS: please mount with%s%s%s.",
+			(!quotaondisk ? "out quota" : ""),
+			(uquotaondisk ? " usrquota" : ""),
+			(gquotaondisk ? " grpquota" : ""));
+		return XFS_ERROR(EPERM);
+	}
+
+	if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
+		/*
+		 * Call mount_quotas at this point only if we won't have to do
+		 * a quotacheck.
+		 */
+		if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
+			/*
+			 * If the xfs quota code isn't installed,
+			 * we have to reset the quotachk'd bit.
+			 * If an error occured, qm_mount_quotas code
+			 * has already disabled quotas. So, just finish
+			 * mounting, and get on with the boring life
+			 * without disk quotas.
+			 */
+			if (xfs_qm_mount_quotas(mp))
+				xfs_mount_reset_sbqflags(mp);
+		} else {
+			/*
+			 * Clear the quota flags, but remember them. This
+			 * is so that the quota code doesn't get invoked
+			 * before we're ready. This can happen when an
+			 * inode goes inactive and wants to free blocks,
+			 * or via xfs_log_mount_finish.
+			 */
+			*needquotamount = B_TRUE;
+			*quotaflags = mp->m_qflags;
+			mp->m_qflags = 0;
+		}
+	}
+
+	return 0;
+}
+
+STATIC int
+xfs_qm_endmount(
+	xfs_mount_t	*mp,
+	uint		needquotamount,
+	uint		quotaflags)
+{
+	if (needquotamount) {
+		ASSERT(mp->m_qflags == 0);
+		mp->m_qflags = quotaflags;
+		if (xfs_qm_mount_quotas(mp))
+			xfs_mount_reset_sbqflags(mp);
+	}
+
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+	if (! (XFS_IS_QUOTA_ON(mp)))
+		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
+	else
+		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
+#endif
+
+#ifdef QUOTADEBUG
+	if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp))
+		cmn_err(CE_WARN, "XFS: mount internalqcheck failed");
+#endif
+
+	return 0;
+}
+
+STATIC void
+xfs_qm_dqrele_null(
+	xfs_dquot_t	*dq)
+{
+	/*
+	 * Called from XFS, where we always check first for a NULL dquot.
+	 */
+	if (!dq)
+		return;
+	xfs_qm_dqrele(dq);
+}
+
+
+struct xfs_qmops xfs_qmcore_xfs = {
+	.xfs_qminit		= xfs_qm_newmount,
+	.xfs_qmdone		= xfs_qm_unmount_quotadestroy,
+	.xfs_qmmount		= xfs_qm_endmount,
+	.xfs_qmunmount		= xfs_qm_unmount_quotas,
+	.xfs_dqrele		= xfs_qm_dqrele_null,
+	.xfs_dqattach		= xfs_qm_dqattach,
+	.xfs_dqdetach		= xfs_qm_dqdetach,
+	.xfs_dqpurgeall		= xfs_qm_dqpurge_all,
+	.xfs_dqvopalloc		= xfs_qm_vop_dqalloc,
+	.xfs_dqvopcreate	= xfs_qm_vop_dqattach_and_dqmod_newinode,
+	.xfs_dqvoprename	= xfs_qm_vop_rename_dqattach,
+	.xfs_dqvopchown		= xfs_qm_vop_chown,
+	.xfs_dqvopchownresv	= xfs_qm_vop_chown_reserve,
+	.xfs_dqtrxops		= &xfs_trans_dquot_ops,
+};
+
+struct bhv_vfsops xfs_qmops = { {
+	BHV_IDENTITY_INIT(VFS_BHV_QM, VFS_POSITION_QM),
+	.vfs_parseargs		= xfs_qm_parseargs,
+	.vfs_showargs		= xfs_qm_showargs,
+	.vfs_mount		= xfs_qm_mount,
+	.vfs_sync		= xfs_qm_syncall,
+	.vfs_quotactl		= xfs_qm_quotactl, },
+};
+
+
+void __init
+xfs_qm_init(void)
+{
+	static char	message[] __initdata =
+		KERN_INFO "SGI XFS Quota Management subsystem\n";
+
+	printk(message);
+	mutex_init(&xfs_Gqm_lock, MUTEX_DEFAULT, "xfs_qmlock");
+	vfs_bhv_set_custom(&xfs_qmops, &xfs_qmcore_xfs);
+	xfs_qm_init_procfs();
+}
+
+void __exit
+xfs_qm_exit(void)
+{
+	vfs_bhv_clr_custom(&xfs_qmops);
+	xfs_qm_cleanup_procfs();
+	if (qm_dqzone)
+		kmem_cache_destroy(qm_dqzone);
+	if (qm_dqtrxzone)
+		kmem_cache_destroy(qm_dqtrxzone);
+}
diff -Nru a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/quota/xfs_qm_stats.c	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+#include <linux/proc_fs.h>
+#include "xfs_qm.h"
+
+struct xqmstats xqmstats;
+
+STATIC int
+xfs_qm_read_xfsquota(
+	char		*buffer,
+	char		**start,
+	off_t		offset,
+	int		count,
+	int		*eof,
+	void		*data)
+{
+	int		len;
+
+	/* maximum; incore; ratio free to inuse; freelist */
+	len = sprintf(buffer, "%d\t%d\t%d\t%u\n",
+			ndquot,
+			xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
+			xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
+			xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0);
+
+	if (offset >= len) {
+		*start = buffer;
+		*eof = 1;
+		return 0;
+	}
+	*start = buffer + offset;
+	if ((len -= offset) > count)
+		return count;
+	*eof = 1;
+
+	return len;
+}
+
+STATIC int
+xfs_qm_read_stats(
+	char		*buffer,
+	char		**start,
+	off_t		offset,
+	int		count,
+	int		*eof,
+	void		*data)
+{
+	int		len;
+
+	/* quota performance statistics */
+	len = sprintf(buffer, "qm %u %u %u %u %u %u %u %u\n",
+			xqmstats.xs_qm_dqreclaims,
+			xqmstats.xs_qm_dqreclaim_misses,
+			xqmstats.xs_qm_dquot_dups,
+			xqmstats.xs_qm_dqcachemisses,
+			xqmstats.xs_qm_dqcachehits,
+			xqmstats.xs_qm_dqwants,
+			xqmstats.xs_qm_dqshake_reclaims,
+			xqmstats.xs_qm_dqinact_reclaims);
+
+	if (offset >= len) {
+		*start = buffer;
+		*eof = 1;
+		return 0;
+	}
+	*start = buffer + offset;
+	if ((len -= offset) > count)
+		return count;
+	*eof = 1;
+
+	return len;
+}
+
+void
+xfs_qm_init_procfs(void)
+{
+	create_proc_read_entry("fs/xfs/xqmstat", 0, 0, xfs_qm_read_stats, NULL);
+	create_proc_read_entry("fs/xfs/xqm", 0, 0, xfs_qm_read_xfsquota, NULL);
+}
+
+void
+xfs_qm_cleanup_procfs(void)
+{
+	remove_proc_entry("fs/xfs/xqm", NULL);
+	remove_proc_entry("fs/xfs/xqmstat", NULL);
+}
diff -Nru a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/quota/xfs_qm_stats.h	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_QM_STATS_H__
+#define __XFS_QM_STATS_H__
+
+
+#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
+
+/*
+ * XQM global statistics
+ */
+struct xqmstats {
+	__uint32_t		xs_qm_dqreclaims;
+	__uint32_t		xs_qm_dqreclaim_misses;
+	__uint32_t		xs_qm_dquot_dups;
+	__uint32_t		xs_qm_dqcachemisses;
+	__uint32_t		xs_qm_dqcachehits;
+	__uint32_t		xs_qm_dqwants;
+	__uint32_t		xs_qm_dqshake_reclaims;
+	__uint32_t		xs_qm_dqinact_reclaims;
+};
+
+extern struct xqmstats xqmstats;
+
+# define XQM_STATS_INC(count)	( (count)++ )
+
+extern void xfs_qm_init_procfs(void);
+extern void xfs_qm_cleanup_procfs(void);
+
+#else
+
+# define XQM_STATS_INC(count)	do { } while (0)
+
+static __inline void xfs_qm_init_procfs(void) { };
+static __inline void xfs_qm_cleanup_procfs(void) { };
+
+#endif
+
+#endif	/* __XFS_QM_STATS_H__ */
diff -Nru a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/quota/xfs_qm_syscalls.c	Mon Mar 31 13:41:07 2003
@@ -0,0 +1,1410 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+#include "xfs_qm.h"
+
+#ifdef DEBUG
+# define qdprintk(s, args...)	cmn_err(CE_DEBUG, s, ## args)
+#else
+# define qdprintk(s, args...)	do { } while (0)
+#endif
+
+STATIC int	xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
+STATIC int	xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
+					fs_disk_quota_t *);
+STATIC int	xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
+STATIC int	xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
+					fs_disk_quota_t *);
+STATIC int	xfs_qm_scall_quotaon(xfs_mount_t *, uint);
+STATIC int	xfs_qm_scall_quotaoff(xfs_mount_t *, uint, boolean_t);
+STATIC int	xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
+STATIC int	xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
+					uint);
+STATIC uint	xfs_qm_import_flags(uint);
+STATIC uint	xfs_qm_export_flags(uint);
+STATIC uint	xfs_qm_import_qtype_flags(uint);
+STATIC uint	xfs_qm_export_qtype_flags(uint);
+STATIC void	xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
+					fs_disk_quota_t *);
+
+
+/*
+ * The main distribution switch of all XFS quotactl system calls.
+ */
+int
+xfs_qm_quotactl(
+	struct bhv_desc *bdp,
+	int		cmd,
+	int		id,
+	xfs_caddr_t	addr)
+{
+	xfs_mount_t	*mp;
+	int 		error;
+	struct vfs	*vfsp;
+
+	vfsp = bhvtovfs(bdp);
+        mp = XFS_VFSTOM(vfsp);
+
+	if (addr == NULL && cmd != Q_SYNC)
+		return XFS_ERROR(EINVAL);
+	if (id < 0 && cmd != Q_SYNC)
+		return XFS_ERROR(EINVAL);
+
+	/*
+	 * The following commands are valid even when quotaoff.
+	 */
+	switch (cmd) {
+	      	/* 
+		 * truncate quota files. quota must be off.
+		 */
+	      case Q_XQUOTARM:
+		if (XFS_IS_QUOTA_ON(mp) || addr == NULL)
+			return XFS_ERROR(EINVAL);
+		if (vfsp->vfs_flag & VFS_RDONLY)
+			return XFS_ERROR(EROFS);
+		return (xfs_qm_scall_trunc_qfiles(mp, 
+			       xfs_qm_import_qtype_flags(*(uint *)addr)));
+		/*
+		 * Get quota status information.
+		 */
+	      case Q_XGETQSTAT:
+		return (xfs_qm_scall_getqstat(mp, (fs_quota_stat_t *)addr));
+
+		/*
+		 * QUOTAON for root f/s and quota enforcement on others..
+		 * Quota accounting for non-root f/s's must be turned on
+		 * at mount time.
+		 */
+	      case Q_XQUOTAON:
+		if (addr == NULL)
+			return XFS_ERROR(EINVAL);
+		if (vfsp->vfs_flag & VFS_RDONLY)
+			return XFS_ERROR(EROFS);
+		return (xfs_qm_scall_quotaon(mp,
+					  xfs_qm_import_flags(*(uint *)addr)));
+	      case Q_XQUOTAOFF:
+		if (vfsp->vfs_flag & VFS_RDONLY)
+			return XFS_ERROR(EROFS);
+		break;
+		
+	      default:
+		break;
+	}
+
+	if (! XFS_IS_QUOTA_ON(mp))
+		return XFS_ERROR(ESRCH);
+
+	switch (cmd) {
+	      case Q_XQUOTAOFF:
+		if (vfsp->vfs_flag & VFS_RDONLY)
+			return XFS_ERROR(EROFS);
+		error = xfs_qm_scall_quotaoff(mp,
+					    xfs_qm_import_flags(*(uint *)addr),
+					    B_FALSE);
+		break;
+
+		/* 
+		 * Defaults to XFS_GETUQUOTA. 
+		 */
+	      case Q_XGETQUOTA:
+		error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_USER, 
+					(fs_disk_quota_t *)addr);
+		break;
+		/*
+		 * Set limits, both hard and soft. Defaults to Q_SETUQLIM.
+		 */
+	      case Q_XSETQLIM:
+		if (vfsp->vfs_flag & VFS_RDONLY)
+			return XFS_ERROR(EROFS);
+		error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_USER,
+					     (fs_disk_quota_t *)addr);
+		break;
+
+	       case Q_XSETGQLIM:
+		if (vfsp->vfs_flag & VFS_RDONLY)
+			return XFS_ERROR(EROFS);
+		error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
+					     (fs_disk_quota_t *)addr);
+		break;
+
+	      		
+	      case Q_XGETGQUOTA:
+		error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP, 
+					(fs_disk_quota_t *)addr);
+		break;
+
+		/*
+		 * Quotas are entirely undefined after quotaoff in XFS quotas.
+		 * For instance, there's no way to set limits when quotaoff.
+		 */
+
+	      default:
+		error = XFS_ERROR(EINVAL);
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * Turn off quota accounting and/or enforcement for all udquots and/or
+ * gdquots. Called only at unmount time.
+ *
+ * This assumes that there are no dquots of this file system cached
+ * incore, and modifies the ondisk dquot directly. Therefore, for example,
+ * it is an error to call this twice, without purging the cache.
+ */
+STATIC int
+xfs_qm_scall_quotaoff(
+	xfs_mount_t		*mp,
+	uint			flags,
+	boolean_t		force)
+{
+	uint			dqtype;
+	unsigned long	s;
+	int			error;
+	uint			inactivate_flags;
+	xfs_qoff_logitem_t	*qoffstart;
+	int			nculprits;
+
+	if (!force && !capable(CAP_SYS_ADMIN))
+		return XFS_ERROR(EPERM);
+	/*
+	 * No file system can have quotas enabled on disk but not in core.
+	 * Note that quota utilities (like quotaoff) _expect_
+	 * errno == EEXIST here.
+	 */
+	if ((mp->m_qflags & flags) == 0)
+		return XFS_ERROR(EEXIST);
+	error = 0;
+
+	flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
+
+	/*
+	 * We don't want to deal with two quotaoffs messing up each other,
+	 * so we're going to serialize it. quotaoff isn't exactly a performance
+	 * critical thing.
+	 * If quotaoff, then we must be dealing with the root filesystem.
+	 */
+	ASSERT(mp->m_quotainfo);
+	if (mp->m_quotainfo)
+		mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
+
+	ASSERT(mp->m_quotainfo);
+
+	/*
+	 * If we're just turning off quota enforcement, change mp and go.
+	 */
+	if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
+		mp->m_qflags &= ~(flags);
+
+		s = XFS_SB_LOCK(mp);
+		mp->m_sb.sb_qflags = mp->m_qflags;
+		XFS_SB_UNLOCK(mp, s);
+		mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+
+		/* XXX what to do if error ? Revert back to old vals incore ? */
+		error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
+		return (error);
+	}
+
+	dqtype = 0;
+	inactivate_flags = 0;
+	/*
+	 * If accounting is off, we must turn enforcement off, clear the
+	 * quota 'CHKD' certificate to make it known that we have to
+	 * do a quotacheck the next time this quota is turned on.
+	 */
+	if (flags & XFS_UQUOTA_ACCT) {
+		dqtype |= XFS_QMOPT_UQUOTA;
+		flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD);
+		inactivate_flags |= XFS_UQUOTA_ACTIVE;
+	}
+	if (flags & XFS_GQUOTA_ACCT) {
+		dqtype |= XFS_QMOPT_GQUOTA;
+		flags |= (XFS_GQUOTA_CHKD | XFS_GQUOTA_ENFD);
+		inactivate_flags |= XFS_GQUOTA_ACTIVE;
+	}
+
+	/*
+	 * Nothing to do?  Don't complain. This happens when we're just
+	 * turning off quota enforcement.
+	 */
+	if ((mp->m_qflags & flags) == 0) {
+		mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+		return (0);
+	}
+
+	/*
+	 * Write the LI_QUOTAOFF log record, and do SB changes atomically,
+	 * and synchronously.
+	 */
+	xfs_qm_log_quotaoff(mp, &qoffstart, flags);
+
+	/*
+	 * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
+	 * to take care of the race between dqget and quotaoff. We don't take
+	 * any special locks to reset these bits. All processes need to check
+	 * these bits *after* taking inode lock(s) to see if the particular
+	 * quota type is in the process of being turned off. If *ACTIVE, it is
+	 * guaranteed that all dquot structures and all quotainode ptrs will all
+	 * stay valid as long as that inode is kept locked.
+	 *
+	 * There is no turning back after this.
+	 */
+	mp->m_qflags &= ~inactivate_flags;
+
+	/*
+	 * Give back all the dquot reference(s) held by inodes.
+	 * Here we go thru every single incore inode in this file system, and
+	 * do a dqrele on the i_udquot/i_gdquot that it may have.
+	 * Essentially, as long as somebody has an inode locked, this guarantees
+	 * that quotas will not be turned off. This is handy because in a
+	 * transaction once we lock the inode(s) and check for quotaon, we can
+	 * depend on the quota inodes (and other things) being valid as long as
+	 * we keep the lock(s).
+	 */
+	xfs_qm_dqrele_all_inodes(mp, flags);
+
+	/*
+	 * Next we make the changes in the quota flag in the mount struct.
+	 * This isn't protected by a particular lock directly, because we
+	 * don't want to take a mrlock everytime we depend on quotas being on.
+	 */
+	mp->m_qflags &= ~(flags);
+
+	/*
+	 * Go through all the dquots of this file system and purge them,
+	 * according to what was turned off. We may not be able to get rid
+	 * of all dquots, because dquots can have temporary references that
+	 * are not attached to inodes. eg. xfs_setattr, xfs_create.
+	 * So, if we couldn't purge all the dquots from the filesystem,
+	 * we can't get rid of the incore data structures.
+	 */
+	while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype|XFS_QMOPT_QUOTAOFF)))
+		delay(10 * nculprits);
+
+	/*
+	 * Transactions that had started before ACTIVE state bit was cleared
+	 * could have logged many dquots, so they'd have higher LSNs than
+	 * the first QUOTAOFF log record does. If we happen to crash when
+	 * the tail of the log has gone past the QUOTAOFF record, but
+	 * before the last dquot modification, those dquots __will__
+	 * recover, and that's not good.
+	 *
+	 * So, we have QUOTAOFF start and end logitems; the start
+	 * logitem won't get overwritten until the end logitem appears...
+	 */
+	xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
+
+	/*
+	 * If quotas is completely disabled, close shop.
+	 */
+	if ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_ALL) {
+		mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+		xfs_qm_destroy_quotainfo(mp);
+		return (0);
+	}
+
+	/*
+	 * Release our quotainode references, and vn_purge them,
+	 * if we don't need them anymore.
+	 */
+	if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) {
+		XFS_PURGE_INODE(XFS_QI_UQIP(mp));
+		XFS_QI_UQIP(mp) = NULL;
+	}
+	if ((dqtype & XFS_QMOPT_GQUOTA) && XFS_QI_GQIP(mp)) {
+		XFS_PURGE_INODE(XFS_QI_GQIP(mp));
+		XFS_QI_GQIP(mp) = NULL;
+	}
+	mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+
+	return (error);
+}
+
+STATIC int
+xfs_qm_scall_trunc_qfiles(
+	xfs_mount_t	*mp,
+	uint		flags)
+{
+	int		error;
+	xfs_inode_t	*qip;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return XFS_ERROR(EPERM);
+	error = 0;
+	if (!XFS_SB_VERSION_HASQUOTA(&mp->m_sb) || flags == 0) {
+		qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags);
+		return XFS_ERROR(EINVAL);
+	}
+
+	if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
+		error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, &qip, 0);
+		if (! error) {
+			(void) xfs_truncate_file(mp, qip);
+			VN_RELE(XFS_ITOV(qip));
+		}
+	}
+
+	if ((flags & XFS_DQ_GROUP) && mp->m_sb.sb_gquotino != NULLFSINO) {
+		error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, &qip, 0);
+		if (! error) {
+			(void) xfs_truncate_file(mp, qip);
+			VN_RELE(XFS_ITOV(qip));
+		}
+	}
+
+	return (error);
+}
+
+
+/*
+ * Switch on (a given) quota enforcement for a filesystem.  This takes
+ * effect immediately.
+ * (Switching on quota accounting must be done at mount time.)
+ */
+STATIC int
+xfs_qm_scall_quotaon(
+	xfs_mount_t	*mp,
+	uint		flags)
+{
+	int		error;
+	unsigned long s;
+	uint		qf;
+	uint		accflags;
+	__int64_t	sbflags;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return XFS_ERROR(EPERM);
+
+	flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
+	/*
+	 * Switching on quota accounting must be done at mount time.
+	 */
+	accflags = flags & XFS_ALL_QUOTA_ACCT;
+	flags &= ~(XFS_ALL_QUOTA_ACCT);
+
+	sbflags = 0;
+
+	if (flags == 0) {
+		qdprintk("quotaon: zero flags, m_qflags=%x\n", mp->m_qflags);
+		return XFS_ERROR(EINVAL);
+	}
+
+	/* No fs can turn on quotas with a delayed effect */
+	ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0);
+
+	/*
+	 * Can't enforce without accounting. We check the superblock
+	 * qflags here instead of m_qflags because rootfs can have
+	 * quota acct on ondisk without m_qflags' knowing.
+	 */
+	if (((flags & XFS_UQUOTA_ACCT) == 0 &&
+	    (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
+	    (flags & XFS_UQUOTA_ENFD))
+	    ||
+	    ((flags & XFS_GQUOTA_ACCT) == 0 &&
+	    (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
+	    (flags & XFS_GQUOTA_ENFD))) {
+		qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n",
+			flags, mp->m_sb.sb_qflags);
+		return XFS_ERROR(EINVAL);
+	}
+	/*
+	 * If everything's upto-date incore, then don't waste time.
+	 */
+	if ((mp->m_qflags & flags) == flags)
+		return XFS_ERROR(EEXIST);
+
+	/*
+	 * Change sb_qflags on disk but not incore mp->qflags
+	 * if this is the root filesystem.
+	 */
+	s = XFS_SB_LOCK(mp);
+	qf = mp->m_sb.sb_qflags;
+	mp->m_sb.sb_qflags = qf | flags;
+	XFS_SB_UNLOCK(mp, s);
+
+	/*
+	 * There's nothing to change if it's the same.
+	 */
+	if ((qf & flags) == flags && sbflags == 0)
+		return XFS_ERROR(EEXIST);
+	sbflags |= XFS_SB_QFLAGS;
+
+	if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
+		return (error);
+	/*
+	 * If we aren't trying to switch on quota enforcement, we are done.
+	 */
+	if  (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
+	     (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
+	    (flags & XFS_ALL_QUOTA_ENFD) == 0)
+		return (0);
+
+	if (! XFS_IS_QUOTA_RUNNING(mp))
+		return XFS_ERROR(ESRCH);
+
+	/*
+	 * Switch on quota enforcement in core.
+	 */
+	mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
+	mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
+	mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+
+	return (0);
+}
+
+
+
+/*
+ * Return quota status information, such as uquota-off, enforcements, etc.
+ */
+STATIC int
+xfs_qm_scall_getqstat(
+	xfs_mount_t	*mp,
+	fs_quota_stat_t *out)
+{
+	xfs_inode_t	*uip, *gip;
+	boolean_t	tempuqip, tempgqip;
+
+	uip = gip = NULL;
+	tempuqip = tempgqip = B_FALSE;
+	memset(out, 0, sizeof(fs_quota_stat_t));
+
+	out->qs_version = FS_QSTAT_VERSION;
+	if (! XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
+		out->qs_uquota.qfs_ino = NULLFSINO;
+		out->qs_gquota.qfs_ino = NULLFSINO;
+		return (0);
+	}
+	out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
+							(XFS_ALL_QUOTA_ACCT|
+							 XFS_ALL_QUOTA_ENFD));
+	out->qs_pad = 0;
+	out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
+	out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
+
+	if (mp->m_quotainfo) {
+		uip = mp->m_quotainfo->qi_uquotaip;
+		gip = mp->m_quotainfo->qi_gquotaip;
+	}
+	if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
+		if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, &uip, 0) == 0)
+			tempuqip = B_TRUE;
+	}
+	if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
+		if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, &gip, 0) == 0)
+			tempgqip = B_TRUE;
+	}
+	if (uip) {
+		out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
+		out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
+		if (tempuqip)
+			VN_RELE(XFS_ITOV(uip));
+	}
+	if (gip) {
+		out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
+		out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
+		if (tempgqip)
+			VN_RELE(XFS_ITOV(gip));
+	}
+	if (mp->m_quotainfo) {
+		out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp);
+		out->qs_btimelimit = XFS_QI_BTIMELIMIT(mp);
+		out->qs_itimelimit = XFS_QI_ITIMELIMIT(mp);
+		out->qs_rtbtimelimit = XFS_QI_RTBTIMELIMIT(mp);
+		out->qs_bwarnlimit = XFS_QI_BWARNLIMIT(mp);
+		out->qs_iwarnlimit = XFS_QI_IWARNLIMIT(mp);
+	}
+	return (0);
+}
+
+/*
+ * Adjust quota limits, and start/stop timers accordingly.
+ */
+STATIC int
+xfs_qm_scall_setqlim(
+	xfs_mount_t		*mp,
+	xfs_dqid_t		id,
+	uint			type,
+	fs_disk_quota_t		*newlim)
+{
+	xfs_disk_dquot_t	*ddq;
+	xfs_dquot_t		*dqp;
+	xfs_trans_t		*tp;
+	int			error;
+	xfs_qcnt_t		hard, soft;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return XFS_ERROR(EPERM);
+
+	if ((newlim->d_fieldmask & (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK)) == 0)
+		return (0);
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
+	if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
+				      0, 0, XFS_DEFAULT_LOG_COUNT))) {
+		xfs_trans_cancel(tp, 0);
+		return (error);
+	}
+
+	/*
+	 * We don't want to race with a quotaoff so take the quotaoff lock.
+	 * (We don't hold an inode lock, so there's nothing else to stop
+	 * a quotaoff from happening). (XXXThis doesn't currently happen
+	 * because we take the vfslock before calling xfs_qm_sysent).
+	 */
+	mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
+
+	/*
+	 * Get the dquot (locked), and join it to the transaction.
+	 * Allocate the dquot if this doesn't exist.
+	 */
+	if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
+		xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+		mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+		ASSERT(error != ENOENT);
+		return (error);
+	}
+	xfs_dqtrace_entry(dqp, "Q_SETQLIM: AFT DQGET");
+	xfs_trans_dqjoin(tp, dqp);
+	ddq = &dqp->q_core;
+
+	/*
+	 * Make sure that hardlimits are >= soft limits before changing.
+	 */
+	hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
+		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
+			INT_GET(ddq->d_blk_hardlimit, ARCH_CONVERT);
+	soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
+		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
+			INT_GET(ddq->d_blk_softlimit, ARCH_CONVERT);
+	if (hard == 0 || hard >= soft) {
+		INT_SET(ddq->d_blk_hardlimit, ARCH_CONVERT, hard);
+		INT_SET(ddq->d_blk_softlimit, ARCH_CONVERT, soft);
+	}
+	else {
+		qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft);
+	}
+	hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
+		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
+			INT_GET(ddq->d_rtb_hardlimit, ARCH_CONVERT);
+	soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
+		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
+			INT_GET(ddq->d_rtb_softlimit, ARCH_CONVERT);
+	if (hard == 0 || hard >= soft) {
+		INT_SET(ddq->d_rtb_hardlimit, ARCH_CONVERT, hard);
+		INT_SET(ddq->d_rtb_softlimit, ARCH_CONVERT, soft);
+	}
+	else
+		qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
+
+	hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
+		(xfs_qcnt_t) newlim->d_ino_hardlimit :
+		INT_GET(ddq->d_ino_hardlimit, ARCH_CONVERT);
+	soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
+		(xfs_qcnt_t) newlim->d_ino_softlimit :
+		INT_GET(ddq->d_ino_softlimit, ARCH_CONVERT);
+	if (hard == 0 || hard >= soft) {
+		INT_SET(ddq->d_ino_hardlimit, ARCH_CONVERT, hard);
+		INT_SET(ddq->d_ino_softlimit, ARCH_CONVERT, soft);
+	}
+	else
+		qdprintk("ihard %Ld < isoft %Ld\n", hard, soft);
+
+	if (id == 0) {
+		/*
+		 * Timelimits for the super user set the relative time
+		 * the other users can be over quota for this file system.
+		 * If it is zero a default is used.
+		 */
+		if (newlim->d_fieldmask & FS_DQ_BTIMER) {
+			mp->m_quotainfo->qi_btimelimit = newlim->d_btimer;
+			INT_SET(dqp->q_core.d_btimer, ARCH_CONVERT, newlim->d_btimer);
+		}
+		if (newlim->d_fieldmask & FS_DQ_ITIMER) {
+			mp->m_quotainfo->qi_itimelimit = newlim->d_itimer;
+			INT_SET(dqp->q_core.d_itimer, ARCH_CONVERT, newlim->d_itimer);
+		}
+		if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
+			mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer;
+			INT_SET(dqp->q_core.d_rtbtimer, ARCH_CONVERT, newlim->d_rtbtimer);
+		}
+	} else /* if (XFS_IS_QUOTA_ENFORCED(mp)) */ {
+		/*
+		 * If the user is now over quota, start the timelimit.
+		 * The user will not be 'warned'.
+		 * Note that we keep the timers ticking, whether enforcement
+		 * is on or off. We don't really want to bother with iterating
+		 * over all ondisk dquots and turning the timers on/off.
+		 */
+		xfs_qm_adjust_dqtimers(mp, ddq);
+	}
+	dqp->dq_flags |= XFS_DQ_DIRTY;
+	xfs_trans_log_dquot(tp, dqp);
+
+	xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT");
+	xfs_trans_commit(tp, 0, NULL);
+	xfs_qm_dqprint(dqp);
+	xfs_qm_dqrele(dqp);
+	mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+
+	return (0);
+}
+
+STATIC int
+xfs_qm_scall_getquota(
+	xfs_mount_t	*mp,
+	xfs_dqid_t	id,
+	uint		type,
+	fs_disk_quota_t *out)
+{
+	xfs_dquot_t	*dqp;
+	int		error;
+
+	/*
+	 * Try to get the dquot. We don't want it allocated on disk, so
+	 * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
+	 * exist, we'll get ENOENT back.
+	 */
+	if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
+		return (error);
+	}
+
+	xfs_dqtrace_entry(dqp, "Q_GETQUOTA SUCCESS");
+	/*
+	 * If everything's NULL, this dquot doesn't quite exist as far as
+	 * our utility programs are concerned.
+	 */
+	if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+		xfs_qm_dqput(dqp);
+		return XFS_ERROR(ENOENT);
+	}
+	/* xfs_qm_dqprint(dqp); */
+	/*
+	 * Convert the disk dquot to the exportable format
+	 */
+	xfs_qm_export_dquot(mp, &dqp->q_core, out);
+	xfs_qm_dqput(dqp);
+	return (error ? XFS_ERROR(EFAULT) : 0);
+}
+
+
+STATIC int
+xfs_qm_log_quotaoff_end(
+	xfs_mount_t		*mp,
+	xfs_qoff_logitem_t	*startqoff,
+	uint			flags)
+{
+	xfs_trans_t	       *tp;
+	int			error;
+	xfs_qoff_logitem_t     *qoffi;
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
+
+	if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
+				      0, 0, XFS_DEFAULT_LOG_COUNT))) {
+		xfs_trans_cancel(tp, 0);
+		return (error);
+	}
+
+	qoffi = xfs_trans_get_qoff_item(tp, startqoff,
+					flags & XFS_ALL_QUOTA_ACCT);
+	xfs_trans_log_quotaoff_item(tp, qoffi);
+
+	/*
+	 * We have to make sure that the transaction is secure on disk before we
+	 * return and actually stop quota accounting. So, make it synchronous.
+	 * We don't care about quotoff's performance.
+	 */
+	xfs_trans_set_sync(tp);
+	error = xfs_trans_commit(tp, 0, NULL);
+	return (error);
+}
+
+
+STATIC int
+xfs_qm_log_quotaoff(
+	xfs_mount_t	       *mp,
+	xfs_qoff_logitem_t     **qoffstartp,
+	uint		       flags)
+{
+	xfs_trans_t	       *tp;
+	int			error;
+	unsigned long	s;
+	xfs_qoff_logitem_t     *qoffi=NULL;
+	uint			oldsbqflag=0;
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
+	if ((error = xfs_trans_reserve(tp, 0,
+				      sizeof(xfs_qoff_logitem_t) * 2 +
+				      mp->m_sb.sb_sectsize + 128,
+				      0,
+				      0,
+				      XFS_DEFAULT_LOG_COUNT))) {
+		goto error0;
+	}
+
+	qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
+	xfs_trans_log_quotaoff_item(tp, qoffi);
+
+	s = XFS_SB_LOCK(mp);
+	oldsbqflag = mp->m_sb.sb_qflags;
+	mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
+	XFS_SB_UNLOCK(mp, s);
+
+	xfs_mod_sb(tp, XFS_SB_QFLAGS);
+
+	/*
+	 * We have to make sure that the transaction is secure on disk before we
+	 * return and actually stop quota accounting. So, make it synchronous.
+	 * We don't care about quotoff's performance.
+	 */
+	xfs_trans_set_sync(tp);
+	error = xfs_trans_commit(tp, 0, NULL);
+
+error0:
+	if (error) {
+		xfs_trans_cancel(tp, 0);
+		/*
+		 * No one else is modifying sb_qflags, so this is OK.
+		 * We still hold the quotaofflock.
+		 */
+		s = XFS_SB_LOCK(mp);
+		mp->m_sb.sb_qflags = oldsbqflag;
+		XFS_SB_UNLOCK(mp, s);
+	}
+	*qoffstartp = qoffi;
+	return (error);
+}
+
+
+/*
+ * Translate an internal style on-disk-dquot to the exportable format.
+ * The main differences are that the counters/limits are all in Basic
+ * Blocks (BBs) instead of the internal FSBs, and all on-disk data has
+ * to be converted to the native endianness.
+ */
+STATIC void
+xfs_qm_export_dquot(
+	xfs_mount_t		*mp,
+	xfs_disk_dquot_t	*src,
+	struct fs_disk_quota	*dst)
+{
+	memset(dst, 0, sizeof(*dst));
+	dst->d_version = FS_DQUOT_VERSION;  /* different from src->d_version */
+	dst->d_flags =
+		xfs_qm_export_qtype_flags(INT_GET(src->d_flags, ARCH_CONVERT));
+	dst->d_id = INT_GET(src->d_id, ARCH_CONVERT);
+	dst->d_blk_hardlimit = (__uint64_t)
+		XFS_FSB_TO_BB(mp, INT_GET(src->d_blk_hardlimit, ARCH_CONVERT));
+	dst->d_blk_softlimit = (__uint64_t)
+		XFS_FSB_TO_BB(mp, INT_GET(src->d_blk_softlimit, ARCH_CONVERT));
+	dst->d_ino_hardlimit = (__uint64_t)
+		INT_GET(src->d_ino_hardlimit, ARCH_CONVERT);
+	dst->d_ino_softlimit = (__uint64_t)
+		INT_GET(src->d_ino_softlimit, ARCH_CONVERT);
+	dst->d_bcount = (__uint64_t)
+		XFS_FSB_TO_BB(mp, INT_GET(src->d_bcount, ARCH_CONVERT));
+	dst->d_icount = (__uint64_t) INT_GET(src->d_icount, ARCH_CONVERT);
+	dst->d_btimer = (__uint32_t) INT_GET(src->d_btimer, ARCH_CONVERT);
+	dst->d_itimer = (__uint32_t) INT_GET(src->d_itimer, ARCH_CONVERT);
+	dst->d_iwarns = INT_GET(src->d_iwarns, ARCH_CONVERT);
+	dst->d_bwarns = INT_GET(src->d_bwarns, ARCH_CONVERT);
+
+	dst->d_rtb_hardlimit = (__uint64_t)
+		XFS_FSB_TO_BB(mp, INT_GET(src->d_rtb_hardlimit, ARCH_CONVERT));
+	dst->d_rtb_softlimit = (__uint64_t)
+		XFS_FSB_TO_BB(mp, INT_GET(src->d_rtb_softlimit, ARCH_CONVERT));
+	dst->d_rtbcount = (__uint64_t)
+		XFS_FSB_TO_BB(mp, INT_GET(src->d_rtbcount, ARCH_CONVERT));
+	dst->d_rtbtimer = (__uint32_t) INT_GET(src->d_rtbtimer, ARCH_CONVERT);
+	dst->d_rtbwarns = INT_GET(src->d_rtbwarns, ARCH_CONVERT);
+
+	/*
+	 * Internally, we don't reset all the timers when quota enforcement
+	 * gets turned off. No need to confuse the userlevel code,
+	 * so return zeroes in that case.
+	 */
+	if (! XFS_IS_QUOTA_ENFORCED(mp)) {
+		dst->d_btimer = 0;
+		dst->d_itimer = 0;
+		dst->d_rtbtimer = 0;
+	}
+
+#ifdef DEBUG
+	if (XFS_IS_QUOTA_ENFORCED(mp) && dst->d_id != 0) {
+		if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
+		    (dst->d_blk_softlimit > 0)) {
+			ASSERT(dst->d_btimer != 0);
+		}
+		if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
+		    (dst->d_ino_softlimit > 0)) {
+			ASSERT(dst->d_itimer != 0);
+		}
+	}
+#endif
+}
+
+STATIC uint
+xfs_qm_import_qtype_flags(
+	uint uflags)
+{
+	/*
+	 * Can't be both at the same time.
+	 */
+	if (((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ==
+	     (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ||
+	    ((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) == 0))
+		return (0);
+
+	return (uflags & XFS_USER_QUOTA) ?
+		XFS_DQ_USER : XFS_DQ_GROUP;
+}
+
+STATIC uint
+xfs_qm_export_qtype_flags(
+	uint flags)
+{
+	/*
+	 * Can't be both at the same time.
+	 */
+	ASSERT((flags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) !=
+		(XFS_GROUP_QUOTA | XFS_USER_QUOTA));
+	ASSERT((flags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) != 0);
+
+	return (flags & XFS_DQ_USER) ?
+		XFS_USER_QUOTA : XFS_GROUP_QUOTA;
+}
+
+STATIC uint
+xfs_qm_import_flags(
+	uint uflags)
+{
+	uint flags = 0;
+
+	if (uflags & XFS_QUOTA_UDQ_ACCT)
+		flags |= XFS_UQUOTA_ACCT;
+	if (uflags & XFS_QUOTA_GDQ_ACCT)
+		flags |= XFS_GQUOTA_ACCT;
+	if (uflags & XFS_QUOTA_UDQ_ENFD)
+		flags |= XFS_UQUOTA_ENFD;
+	if (uflags & XFS_QUOTA_GDQ_ENFD)
+		flags |= XFS_GQUOTA_ENFD;
+	return (flags);
+}
+
+
+STATIC uint
+xfs_qm_export_flags(
+	uint flags)
+{
+	uint uflags;
+
+	uflags = 0;
+	if (flags & XFS_UQUOTA_ACCT)
+		uflags |= XFS_QUOTA_UDQ_ACCT;
+	if (flags & XFS_GQUOTA_ACCT)
+		uflags |= XFS_QUOTA_GDQ_ACCT;
+	if (flags & XFS_UQUOTA_ENFD)
+		uflags |= XFS_QUOTA_UDQ_ENFD;
+	if (flags & XFS_GQUOTA_ENFD)
+		uflags |= XFS_QUOTA_GDQ_ENFD;
+	return (uflags);
+}
+
+
+/*
+ * Go thru all the inodes in the file system, releasing their dquots.
+ * Note that the mount structure gets modified to indicate that quotas are off
+ * AFTER this, in the case of quotaoff. This also gets called from
+ * xfs_rootumount.
+ */
+void
+xfs_qm_dqrele_all_inodes(
+	struct xfs_mount *mp,
+	uint		 flags)
+{
+	vmap_t		vmap;
+	xfs_inode_t	*ip, *topino;
+	uint		ireclaims;
+	vnode_t		*vp;
+	boolean_t	vnode_refd;
+
+	ASSERT(mp->m_quotainfo);
+
+again:
+	XFS_MOUNT_ILOCK(mp);
+	ip = mp->m_inodes;
+	if (ip == NULL) {
+		XFS_MOUNT_IUNLOCK(mp);
+		return;
+	}
+	do {
+		/* Skip markers inserted by xfs_sync */
+		if (ip->i_mount == NULL) {
+			ip = ip->i_mnext;
+			continue;
+		}
+		/* Root inode, rbmip and rsumip have associated blocks */
+		if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
+			ASSERT(ip->i_udquot == NULL);
+			ASSERT(ip->i_gdquot == NULL);
+			ip = ip->i_mnext;
+			continue;
+		}
+		vp = XFS_ITOV_NULL(ip);
+		if (!vp) {
+			ASSERT(ip->i_udquot == NULL);
+			ASSERT(ip->i_gdquot == NULL);
+			ip = ip->i_mnext;
+			continue;
+		}
+		vnode_refd = B_FALSE;
+		if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
+			/*
+			 * Sample vp mapping while holding the mplock, lest
+			 * we come across a non-existent vnode.
+			 */
+			VMAP(vp, vmap);
+			ireclaims = mp->m_ireclaims;
+			topino = mp->m_inodes;
+			XFS_MOUNT_IUNLOCK(mp);
+
+			/* XXX restart limit ? */
+			if ( ! (vp = vn_get(vp, &vmap)))
+				goto again;
+			xfs_ilock(ip, XFS_ILOCK_EXCL);
+			vnode_refd = B_TRUE;
+		} else {
+			ireclaims = mp->m_ireclaims;
+			topino = mp->m_inodes;
+			XFS_MOUNT_IUNLOCK(mp);
+		}
+
+		/*
+		 * We don't keep the mountlock across the dqrele() call,
+		 * since it can take a while..
+		 */
+		if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
+			xfs_qm_dqrele(ip->i_udquot);
+			ip->i_udquot = NULL;
+		}
+		if ((flags & XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+			xfs_qm_dqrele(ip->i_gdquot);
+			ip->i_gdquot = NULL;
+		}
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		/*
+		 * Wait until we've dropped the ilock and mountlock to
+		 * do the vn_rele. Or be condemned to an eternity in the
+		 * inactive code in hell.
+		 */
+		if (vnode_refd)
+			VN_RELE(vp);
+		XFS_MOUNT_ILOCK(mp);
+		/*
+		 * If an inode was inserted or removed, we gotta
+		 * start over again.
+		 */
+		if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) {
+			/* XXX use a sentinel */
+			XFS_MOUNT_IUNLOCK(mp);
+			goto again;
+		}
+		ip = ip->i_mnext;
+	} while (ip != mp->m_inodes);
+
+	XFS_MOUNT_IUNLOCK(mp);
+}
+
+/*------------------------------------------------------------------------*/
+#ifdef DEBUG
+/*
+ * This contains all the test functions for XFS disk quotas.
+ * Currently it does a quota accounting check. ie. it walks through
+ * all inodes in the file system, calculating the dquot accounting fields,
+ * and prints out any inconsistencies.
+ */
+xfs_dqhash_t *qmtest_udqtab;
+xfs_dqhash_t *qmtest_gdqtab;
+int	      qmtest_hashmask;
+int	      qmtest_nfails;
+mutex_t	      qcheck_lock;
+
+#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
+				 (__psunsigned_t)(id)) & \
+				(qmtest_hashmask - 1))
+
+#define DQTEST_HASH(mp, id, type)   ((type & XFS_DQ_USER) ? \
+				     (qmtest_udqtab + \
+				      DQTEST_HASHVAL(mp, id)) : \
+				     (qmtest_gdqtab + \
+				      DQTEST_HASHVAL(mp, id)))
+
+#define DQTEST_LIST_PRINT(l, NXT, title) \
+{ \
+	  xfs_dqtest_t	*dqp; int i = 0;\
+	  cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \
+	  for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \
+	       dqp = (xfs_dqtest_t *)dqp->NXT) { \
+		cmn_err(CE_DEBUG, "  %d\. \"%d (%s)\"  bcnt = %d, icnt = %d", \
+			 ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp),	     \
+			 dqp->d_bcount, dqp->d_icount); } \
+}
+
+typedef struct dqtest {
+	xfs_dqmarker_t	q_lists;
+	xfs_dqhash_t	*q_hash;	/* the hashchain header */
+	xfs_mount_t	*q_mount;	/* filesystem this relates to */
+	xfs_dqid_t	d_id;		/* user id or group id */
+	xfs_qcnt_t	d_bcount;	/* # disk blocks owned by the user */
+	xfs_qcnt_t	d_icount;	/* # inodes owned by the user */
+} xfs_dqtest_t;
+
+STATIC void
+xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
+{
+	xfs_dquot_t *d;
+	if (((d) = (h)->qh_next))
+		(d)->HL_PREVP = &((dqp)->HL_NEXT);
+	(dqp)->HL_NEXT = d;
+	(dqp)->HL_PREVP = &((h)->qh_next);
+	(h)->qh_next = (xfs_dquot_t *)dqp;
+	(h)->qh_version++;
+	(h)->qh_nelems++;
+}
+STATIC void
+xfs_qm_dqtest_print(
+	xfs_dqtest_t	*d)
+{
+	cmn_err(CE_DEBUG, "-----------DQTEST DQUOT----------------");
+	cmn_err(CE_DEBUG, "---- dquot ID = %d", d->d_id);
+	cmn_err(CE_DEBUG, "---- type     = %s", XFS_QM_ISUDQ(d)? "USR" : "GRP");
+	cmn_err(CE_DEBUG, "---- fs       = 0x%p", d->q_mount);
+	cmn_err(CE_DEBUG, "---- bcount   = %Lu (0x%x)",
+		d->d_bcount, (int)d->d_bcount);
+	cmn_err(CE_DEBUG, "---- icount   = %Lu (0x%x)",
+		d->d_icount, (int)d->d_icount);
+	cmn_err(CE_DEBUG, "---------------------------");
+}
+
+STATIC void
+xfs_qm_dqtest_failed(
+	xfs_dqtest_t	*d,
+	xfs_dquot_t	*dqp,
+	char		*reason,
+	xfs_qcnt_t	a,
+	xfs_qcnt_t	b,
+	int		error)
+{
+	qmtest_nfails++;
+	if (error)
+		cmn_err(CE_DEBUG, "quotacheck failed id=%d, err=%d\nreason: %s",
+		       INT_GET(d->d_id, ARCH_CONVERT), error, reason);
+	else
+		cmn_err(CE_DEBUG, "quotacheck failed id=%d (%s) [%d != %d]",
+		       INT_GET(d->d_id, ARCH_CONVERT), reason, (int)a, (int)b);
+	xfs_qm_dqtest_print(d);
+	if (dqp)
+		xfs_qm_dqprint(dqp);
+}
+
+STATIC int
+xfs_dqtest_cmp2(
+	xfs_dqtest_t	*d,
+	xfs_dquot_t	*dqp)
+{
+	int err = 0;
+	if (INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) != d->d_icount) {
+		xfs_qm_dqtest_failed(d, dqp, "icount mismatch",
+			INT_GET(dqp->q_core.d_icount, ARCH_CONVERT),
+			d->d_icount, 0);
+		err++;
+	}
+	if (INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT) != d->d_bcount) {
+		xfs_qm_dqtest_failed(d, dqp, "bcount mismatch",
+			INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT),
+			d->d_bcount, 0);
+		err++;
+	}
+	if (INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT) &&
+	    INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT) >=
+	    INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT)) {
+		if (INT_ISZERO(dqp->q_core.d_btimer, ARCH_CONVERT) &&
+		    !INT_ISZERO(dqp->q_core.d_id, ARCH_CONVERT)) {
+			cmn_err(CE_DEBUG,
+				"%d [%s] [0x%p] BLK TIMER NOT STARTED",
+				d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount);
+			err++;
+		}
+	}
+	if (INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT) &&
+	    INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >=
+	    INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT)) {
+		if (INT_ISZERO(dqp->q_core.d_itimer, ARCH_CONVERT) &&
+		    !INT_ISZERO(dqp->q_core.d_id, ARCH_CONVERT)) {
+			cmn_err(CE_DEBUG,
+				"%d [%s] [0x%p] INO TIMER NOT STARTED",
+				d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount);
+			err++;
+		}
+	}
+#ifdef QUOTADEBUG
+	if (!err) {
+		cmn_err(CE_DEBUG, "%d [%s] [0x%p] qchecked",
+			d->d_id, XFS_QM_ISUDQ(d) ? "USR" : "GRP", d->q_mount);
+	}
+#endif
+	return (err);
+}
+
+STATIC void
+xfs_dqtest_cmp(
+	xfs_dqtest_t	*d)
+{
+	xfs_dquot_t	*dqp;
+	int		error;
+
+	/* xfs_qm_dqtest_print(d); */
+	if ((error = xfs_qm_dqget(d->q_mount, NULL, d->d_id, d->dq_flags, 0,
+				 &dqp))) {
+		xfs_qm_dqtest_failed(d, NULL, "dqget failed", 0, 0, error);
+		return;
+	}
+	xfs_dqtest_cmp2(d, dqp);
+	xfs_qm_dqput(dqp);
+}
+
+STATIC int
+xfs_qm_internalqcheck_dqget(
+	xfs_mount_t	*mp,
+	xfs_dqid_t	id,
+	uint		type,
+	xfs_dqtest_t	**O_dq)
+{
+	xfs_dqtest_t	*d;
+	xfs_dqhash_t	*h;
+
+	h = DQTEST_HASH(mp, id, type);
+	for (d = (xfs_dqtest_t *) h->qh_next; d != NULL;
+	     d = (xfs_dqtest_t *) d->HL_NEXT) {
+		/* DQTEST_LIST_PRINT(h, HL_NEXT, "@@@@@ dqtestlist @@@@@"); */
+		if (d->d_id == id && mp == d->q_mount) {
+			*O_dq = d;
+			return (0);
+		}
+	}
+	d = kmem_zalloc(sizeof(xfs_dqtest_t), KM_SLEEP);
+	d->dq_flags = type;
+	d->d_id = id;
+	d->q_mount = mp;
+	d->q_hash = h;
+	xfs_qm_hashinsert(h, d);
+	*O_dq = d;
+	return (0);
+}
+
+STATIC void
+xfs_qm_internalqcheck_get_dquots(
+	xfs_mount_t	*mp,
+	xfs_dqid_t	uid,
+	xfs_dqid_t	gid,
+	xfs_dqtest_t	**ud,
+	xfs_dqtest_t	**gd)
+{
+	if (XFS_IS_UQUOTA_ON(mp))
+		xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud);
+	if (XFS_IS_GQUOTA_ON(mp))
+		xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd);
+}
+
+
+STATIC void
+xfs_qm_internalqcheck_dqadjust(
+	xfs_inode_t		*ip,
+	xfs_dqtest_t		*d)
+{
+	d->d_icount++;
+	d->d_bcount += (xfs_qcnt_t)ip->i_d.di_nblocks;
+}
+
+STATIC int
+xfs_qm_internalqcheck_adjust(
+	xfs_mount_t	*mp,		/* mount point for filesystem */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_ino_t	ino,		/* inode number to get data for */
+	void		*buffer,	/* not used */
+	xfs_daddr_t	bno,		/* starting block of inode cluster */
+	void		*dip,		/* not used */
+	int		*res)		/* bulkstat result code */
+{
+	xfs_inode_t		*ip;
+	xfs_dqtest_t		*ud, *gd;
+	uint			lock_flags;
+	boolean_t		ipreleased;
+	int			error;
+
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+	if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
+		*res = BULKSTAT_RV_NOTHING;
+		qdprintk("internalqcheck: ino=%llu, uqino=%llu, gqino=%llu\n",
+			(unsigned long long) ino,
+			(unsigned long long) mp->m_sb.sb_uquotino,
+			(unsigned long long) mp->m_sb.sb_gquotino);
+		return XFS_ERROR(EINVAL);
+	}
+	ipreleased = B_FALSE;
+ again:
+	lock_flags = XFS_ILOCK_SHARED;
+	if ((error = xfs_iget(mp, tp, ino, lock_flags, &ip, bno))) {
+		*res = BULKSTAT_RV_NOTHING;
+		return (error);
+	}
+
+	if (ip->i_d.di_mode == 0) {
+		xfs_iput_new(ip, lock_flags);
+		*res = BULKSTAT_RV_NOTHING;
+		return XFS_ERROR(ENOENT);
+	}
+
+	/*
+	 * This inode can have blocks after eof which can get released
+	 * when we send it to inactive. Since we don't check the dquot
+	 * until the after all our calculations are done, we must get rid
+	 * of those now.
+	 */
+	if (! ipreleased) {
+		xfs_iput(ip, lock_flags);
+		ipreleased = B_TRUE;
+		goto again;
+	}
+	xfs_qm_internalqcheck_get_dquots(mp,
+					(xfs_dqid_t) ip->i_d.di_uid,
+					(xfs_dqid_t) ip->i_d.di_gid,
+					&ud, &gd);
+	if (XFS_IS_UQUOTA_ON(mp)) {
+		ASSERT(ud);
+		xfs_qm_internalqcheck_dqadjust(ip, ud);
+	}
+	if (XFS_IS_GQUOTA_ON(mp)) {
+		ASSERT(gd);
+		xfs_qm_internalqcheck_dqadjust(ip, gd);
+	}
+	xfs_iput(ip, lock_flags);
+	*res = BULKSTAT_RV_DIDONE;
+	return (0);
+}
+
+
+/* PRIVATE, debugging */
+int
+xfs_qm_internalqcheck(
+	xfs_mount_t	*mp)
+{
+	xfs_ino_t	lastino;
+	int		done, count;
+	int		i;
+	xfs_dqtest_t	*d, *e;
+	xfs_dqhash_t	*h1;
+	int		error;
+
+	lastino = 0;
+	qmtest_hashmask = 32;
+	count = 5;
+	done = 0;
+	qmtest_nfails = 0;
+
+	if (! XFS_IS_QUOTA_ON(mp))
+		return XFS_ERROR(ESRCH);
+
+	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
+	XFS_bflush(mp->m_ddev_targp);
+	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
+	XFS_bflush(mp->m_ddev_targp);
+
+	mutex_lock(&qcheck_lock, PINOD);
+	/* There should be absolutely no quota activity while this
+	   is going on. */
+	qmtest_udqtab = kmem_zalloc(qmtest_hashmask *
+				    sizeof(xfs_dqhash_t), KM_SLEEP);
+	qmtest_gdqtab = kmem_zalloc(qmtest_hashmask *
+				    sizeof(xfs_dqhash_t), KM_SLEEP);
+	do {
+		/*
+		 * Iterate thru all the inodes in the file system,
+		 * adjusting the corresponding dquot counters
+		 */
+		if ((error = xfs_bulkstat(mp, NULL, &lastino, &count,
+				 xfs_qm_internalqcheck_adjust,
+				 0, NULL, BULKSTAT_FG_IGET, &done))) {
+			break;
+		}
+	} while (! done);
+	if (error) {
+		cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
+	}
+	cmn_err(CE_DEBUG, "Checking results against system dquots");
+	for (i = 0; i < qmtest_hashmask; i++) {
+		h1 = &qmtest_udqtab[i];
+		for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
+			xfs_dqtest_cmp(d);
+			e = (xfs_dqtest_t *) d->HL_NEXT;
+			kmem_free(d, sizeof(xfs_dqtest_t));
+			d = e;
+		}
+		h1 = &qmtest_gdqtab[i];
+		for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
+			xfs_dqtest_cmp(d);
+			e = (xfs_dqtest_t *) d->HL_NEXT;
+			kmem_free(d, sizeof(xfs_dqtest_t));
+			d = e;
+		}
+	}
+
+	if (qmtest_nfails) {
+		cmn_err(CE_DEBUG, "******** quotacheck failed  ********");
+		cmn_err(CE_DEBUG, "failures = %d", qmtest_nfails);
+	} else {
+		cmn_err(CE_DEBUG, "******** quotacheck successful! ********");
+	}
+	kmem_free(qmtest_udqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
+	kmem_free(qmtest_gdqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
+	mutex_unlock(&qcheck_lock);
+	return (qmtest_nfails);
+}
+
+#endif /* DEBUG */
diff -Nru a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/quota/xfs_quota_priv.h	Mon Mar 31 13:41:08 2003
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_QUOTA_PRIV_H__
+#define __XFS_QUOTA_PRIV_H__
+
+/*
+ * Number of bmaps that we ask from bmapi when doing a quotacheck.
+ * We make this restriction to keep the memory usage to a minimum.
+ */
+#define XFS_DQITER_MAP_SIZE	10
+
+/* Number of dquots that fit in to a dquot block */
+#define XFS_QM_DQPERBLK(mp)	((mp)->m_quotainfo->qi_dqperchunk)
+
+#define XFS_ISLOCKED_INODE(ip)		(ismrlocked(&(ip)->i_lock, \
+					    MR_UPDATE | MR_ACCESS) != 0)
+#define XFS_ISLOCKED_INODE_EXCL(ip)	(ismrlocked(&(ip)->i_lock, \
+					    MR_UPDATE) != 0)
+
+#define XFS_DQ_IS_ADDEDTO_TRX(t, d)	((d)->q_transp == (t))
+
+#define XFS_QI_MPLRECLAIMS(mp)	((mp)->m_quotainfo->qi_dqreclaims)
+#define XFS_QI_UQIP(mp)		((mp)->m_quotainfo->qi_uquotaip)
+#define XFS_QI_GQIP(mp)		((mp)->m_quotainfo->qi_gquotaip)
+#define XFS_QI_DQCHUNKLEN(mp)	((mp)->m_quotainfo->qi_dqchunklen)
+#define XFS_QI_BTIMELIMIT(mp)	((mp)->m_quotainfo->qi_btimelimit)
+#define XFS_QI_RTBTIMELIMIT(mp) ((mp)->m_quotainfo->qi_rtbtimelimit)
+#define XFS_QI_ITIMELIMIT(mp)	((mp)->m_quotainfo->qi_itimelimit)
+#define XFS_QI_BWARNLIMIT(mp)	((mp)->m_quotainfo->qi_bwarnlimit)
+#define XFS_QI_IWARNLIMIT(mp)	((mp)->m_quotainfo->qi_iwarnlimit)
+#define XFS_QI_QOFFLOCK(mp)	((mp)->m_quotainfo->qi_quotaofflock)
+
+#define XFS_QI_MPL_LIST(mp)	((mp)->m_quotainfo->qi_dqlist)
+#define XFS_QI_MPLLOCK(mp)	((mp)->m_quotainfo->qi_dqlist.qh_lock)
+#define XFS_QI_MPLNEXT(mp)	((mp)->m_quotainfo->qi_dqlist.qh_next)
+#define XFS_QI_MPLNDQUOTS(mp)	((mp)->m_quotainfo->qi_dqlist.qh_nelems)
+
+#define XQMLCK(h)			(mutex_lock(&((h)->qh_lock), PINOD))
+#define XQMUNLCK(h)			(mutex_unlock(&((h)->qh_lock)))
+#ifdef DEBUG
+struct xfs_dqhash;
+static inline int XQMISLCKD(struct xfs_dqhash *h)
+{
+	if (mutex_trylock(&h->qh_lock)) {
+		mutex_unlock(&h->qh_lock);
+		return 0;
+	}
+	return 1;
+}
+#endif
+
+#define XFS_DQ_HASH_LOCK(h)		XQMLCK(h)
+#define XFS_DQ_HASH_UNLOCK(h)		XQMUNLCK(h)
+#define XFS_DQ_IS_HASH_LOCKED(h)	XQMISLCKD(h)
+
+#define xfs_qm_mplist_lock(mp)		XQMLCK(&(XFS_QI_MPL_LIST(mp)))
+#define xfs_qm_mplist_unlock(mp)	XQMUNLCK(&(XFS_QI_MPL_LIST(mp)))
+#define XFS_QM_IS_MPLIST_LOCKED(mp)	XQMISLCKD(&(XFS_QI_MPL_LIST(mp)))
+
+#define xfs_qm_freelist_lock(qm)	XQMLCK(&((qm)->qm_dqfreelist))
+#define xfs_qm_freelist_unlock(qm)	XQMUNLCK(&((qm)->qm_dqfreelist))
+#define XFS_QM_IS_FREELIST_LOCKED(qm)	XQMISLCKD(&((qm)->qm_dqfreelist))
+
+/*
+ * Hash into a bucket in the dquot hash table, based on <mp, id>.
+ */
+#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
+				 (__psunsigned_t)(id)) & \
+				(xfs_Gqm->qm_dqhashmask - 1))
+#define XFS_DQ_HASH(mp, id, type)   (type == XFS_DQ_USER ? \
+				     (xfs_Gqm->qm_usr_dqhtable + \
+				      XFS_DQ_HASHVAL(mp, id)) : \
+				     (xfs_Gqm->qm_grp_dqhtable + \
+				      XFS_DQ_HASHVAL(mp, id)))
+#define XFS_IS_DQTYPE_ON(mp, type)   (type == XFS_DQ_USER ? \
+				      XFS_IS_UQUOTA_ON(mp):XFS_IS_GQUOTA_ON(mp))
+#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
+	INT_ISZERO(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT) && \
+	INT_ISZERO(dqp->q_core.d_blk_softlimit, ARCH_CONVERT) && \
+	INT_ISZERO(dqp->q_core.d_rtb_hardlimit, ARCH_CONVERT) && \
+	INT_ISZERO(dqp->q_core.d_rtb_softlimit, ARCH_CONVERT) && \
+	INT_ISZERO(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT) && \
+	INT_ISZERO(dqp->q_core.d_ino_softlimit, ARCH_CONVERT) && \
+	INT_ISZERO(dqp->q_core.d_bcount, ARCH_CONVERT)	      && \
+	INT_ISZERO(dqp->q_core.d_rtbcount, ARCH_CONVERT)      && \
+	INT_ISZERO(dqp->q_core.d_icount, ARCH_CONVERT))
+
+#define HL_PREVP	dq_hashlist.ql_prevp
+#define HL_NEXT		dq_hashlist.ql_next
+#define MPL_PREVP	dq_mplist.ql_prevp
+#define MPL_NEXT	dq_mplist.ql_next
+
+
+#define _LIST_REMOVE(h, dqp, PVP, NXT)				\
+	{							\
+		 xfs_dquot_t *d;				\
+		 if (((d) = (dqp)->NXT))				\
+			 (d)->PVP = (dqp)->PVP;			\
+		 *((dqp)->PVP) = d;				\
+		 (dqp)->NXT = NULL;				\
+		 (dqp)->PVP = NULL;				\
+		 (h)->qh_version++;				\
+		 (h)->qh_nelems--;				\
+	}
+
+#define _LIST_INSERT(h, dqp, PVP, NXT)				\
+	{							\
+		 xfs_dquot_t *d;				\
+		 if (((d) = (h)->qh_next))			\
+			 (d)->PVP = &((dqp)->NXT);		\
+		 (dqp)->NXT = d;				\
+		 (dqp)->PVP = &((h)->qh_next);			\
+		 (h)->qh_next = dqp;				\
+		 (h)->qh_version++;				\
+		 (h)->qh_nelems++;				\
+	 }
+
+#define FOREACH_DQUOT_IN_MP(dqp, mp) \
+	for ((dqp) = XFS_QI_MPLNEXT(mp); (dqp) != NULL; (dqp) = (dqp)->MPL_NEXT)
+
+#define FOREACH_DQUOT_IN_FREELIST(dqp, qlist)	\
+for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
+     (dqp) = (dqp)->dq_flnext)
+
+#define XQM_HASHLIST_INSERT(h, dqp)	\
+	 _LIST_INSERT(h, dqp, HL_PREVP, HL_NEXT)
+
+#define XQM_FREELIST_INSERT(h, dqp)	\
+	 xfs_qm_freelist_append(h, dqp)
+
+#define XQM_MPLIST_INSERT(h, dqp)	\
+	 _LIST_INSERT(h, dqp, MPL_PREVP, MPL_NEXT)
+
+#define XQM_HASHLIST_REMOVE(h, dqp)	\
+	 _LIST_REMOVE(h, dqp, HL_PREVP, HL_NEXT)
+#define XQM_FREELIST_REMOVE(dqp)	\
+	 xfs_qm_freelist_unlink(dqp)
+#define XQM_MPLIST_REMOVE(h, dqp)	\
+	{ _LIST_REMOVE(h, dqp, MPL_PREVP, MPL_NEXT); \
+	  XFS_QI_MPLRECLAIMS((dqp)->q_mount)++; }
+
+#define XFS_DQ_IS_LOGITEM_INITD(dqp)	((dqp)->q_logitem.qli_dquot == (dqp))
+
+#define XFS_QM_DQP_TO_DQACCT(tp, dqp)	(XFS_QM_ISUDQ(dqp) ? \
+					 (tp)->t_dqinfo->dqa_usrdquots : \
+					 (tp)->t_dqinfo->dqa_grpdquots)
+#define XFS_IS_SUSER_DQUOT(dqp)		\
+	(INT_ISZERO((dqp)->q_core.d_id, ARCH_CONVERT))
+
+#define XFS_PURGE_INODE(ip)		\
+	{				\
+	  vmap_t dqvmap;		\
+	  vnode_t *dqvp;		\
+	  dqvp = XFS_ITOV(ip);		\
+	  VMAP(dqvp, dqvmap);		\
+	  VN_RELE(dqvp);		\
+	}
+
+#define DQFLAGTO_TYPESTR(d)	(((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
+				 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : "???"))
+#define DQFLAGTO_DIRTYSTR(d)	(XFS_DQ_IS_DIRTY(d) ? "DIRTY" : "NOTDIRTY")
+
+#endif	/* __XFS_QUOTA_PRIV_H__ */
diff -Nru a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/quota/xfs_trans_dquot.c	Mon Mar 31 13:41:06 2003
@@ -0,0 +1,894 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+#include "xfs_qm.h"
+
+STATIC void	xfs_trans_alloc_dqinfo(xfs_trans_t *);
+
+/*
+ * Add the locked dquot to the transaction.
+ * The dquot must be locked, and it cannot be associated with any
+ * transaction.
+ */
+void
+xfs_trans_dqjoin(
+	xfs_trans_t	*tp,
+	xfs_dquot_t	*dqp)
+{
+	xfs_dq_logitem_t    *lp;
+
+	ASSERT(! XFS_DQ_IS_ADDEDTO_TRX(tp, dqp));
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	ASSERT(XFS_DQ_IS_LOGITEM_INITD(dqp));
+	lp = &dqp->q_logitem;
+
+	/*
+	 * Get a log_item_desc to point at the new item.
+	 */
+	(void) xfs_trans_add_item(tp, (xfs_log_item_t*)(lp));
+
+	/*
+	 * Initialize i_transp so we can later determine if this dquot is
+	 * associated with this transaction.
+	 */
+	dqp->q_transp = tp;
+}
+
+
+/*
+ * This is called to mark the dquot as needing
+ * to be logged when the transaction is committed.  The dquot must
+ * already be associated with the given transaction.
+ * Note that it marks the entire transaction as dirty. In the ordinary
+ * case, this gets called via xfs_trans_commit, after the transaction
+ * is already dirty. However, there's nothing stop this from getting
+ * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY
+ * flag.
+ */
+void
+xfs_trans_log_dquot(
+	xfs_trans_t	*tp,
+	xfs_dquot_t	*dqp)
+{
+	xfs_log_item_desc_t	*lidp;
+
+	ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp));
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+	lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem));
+	ASSERT(lidp != NULL);
+
+	tp->t_flags |= XFS_TRANS_DIRTY;
+	lidp->lid_flags |= XFS_LID_DIRTY;
+}
+
+/*
+ * Carry forward whatever is left of the quota blk reservation to
+ * the spanky new transaction
+ */
+STATIC void
+xfs_trans_dup_dqinfo(
+	xfs_trans_t	*otp,
+	xfs_trans_t	*ntp)
+{
+	xfs_dqtrx_t	*oq, *nq;
+	int		i,j;
+	xfs_dqtrx_t	*oqa, *nqa;
+
+	if (!otp->t_dqinfo)
+		return;
+
+	xfs_trans_alloc_dqinfo(ntp);
+	oqa = otp->t_dqinfo->dqa_usrdquots;
+	nqa = ntp->t_dqinfo->dqa_usrdquots;
+
+	/*
+	 * Because the quota blk reservation is carried forward,
+	 * it is also necessary to carry forward the DQ_DIRTY flag.
+	 */
+	if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
+		ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
+
+	for (j = 0; j < 2; j++) {
+		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+			if (oqa[i].qt_dquot == NULL)
+				break;
+			oq = &oqa[i];
+			nq = &nqa[i];
+
+			nq->qt_dquot = oq->qt_dquot;
+			nq->qt_bcount_delta = nq->qt_icount_delta = 0;
+			nq->qt_rtbcount_delta = 0;
+
+			/*
+			 * Transfer whatever is left of the reservations.
+			 */
+			nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
+			oq->qt_blk_res = oq->qt_blk_res_used;
+
+			nq->qt_rtblk_res = oq->qt_rtblk_res -
+				oq->qt_rtblk_res_used;
+			oq->qt_rtblk_res = oq->qt_rtblk_res_used;
+
+			nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used;
+			oq->qt_ino_res = oq->qt_ino_res_used;
+
+		}
+		oqa = otp->t_dqinfo->dqa_grpdquots;
+		nqa = ntp->t_dqinfo->dqa_grpdquots;
+	}
+}
+
+/*
+ * Wrap around mod_dquot to account for both user and group quotas.
+ */
+void
+xfs_trans_mod_dquot_byino(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	uint		field,
+	long		delta)
+{
+	xfs_mount_t	*mp;
+
+	ASSERT(tp);
+	mp = tp->t_mountp;
+
+	if (!XFS_IS_QUOTA_ON(mp) ||
+	    ip->i_ino == mp->m_sb.sb_uquotino ||
+	    ip->i_ino == mp->m_sb.sb_gquotino)
+		return;
+
+	if (tp->t_dqinfo == NULL)
+		xfs_trans_alloc_dqinfo(tp);
+
+	if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) {
+		(void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
+	}
+	if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) {
+		(void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
+	}
+}
+
+STATIC xfs_dqtrx_t *
+xfs_trans_get_dqtrx(
+	xfs_trans_t	*tp,
+	xfs_dquot_t	*dqp)
+{
+	int		i;
+	xfs_dqtrx_t	*qa;
+
+	for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+		qa = XFS_QM_DQP_TO_DQACCT(tp, dqp);
+
+		if (qa[i].qt_dquot == NULL ||
+		    qa[i].qt_dquot == dqp) {
+			return (&qa[i]);
+		}
+	}
+
+	return (NULL);
+}
+
+/*
+ * Make the changes in the transaction structure.
+ * The moral equivalent to xfs_trans_mod_sb().
+ * We don't touch any fields in the dquot, so we don't care
+ * if it's locked or not (most of the time it won't be).
+ */
+void
+xfs_trans_mod_dquot(
+	xfs_trans_t	*tp,
+	xfs_dquot_t	*dqp,
+	uint		field,
+	long		delta)
+{
+	xfs_dqtrx_t	*qtrx;
+
+	ASSERT(tp);
+	qtrx = NULL;
+
+	if (tp->t_dqinfo == NULL)
+		xfs_trans_alloc_dqinfo(tp);
+	/*
+	 * Find either the first free slot or the slot that belongs
+	 * to this dquot.
+	 */
+	qtrx = xfs_trans_get_dqtrx(tp, dqp);
+	ASSERT(qtrx);
+	if (qtrx->qt_dquot == NULL)
+		qtrx->qt_dquot = dqp;
+
+	switch (field) {
+
+		/*
+		 * regular disk blk reservation
+		 */
+	      case XFS_TRANS_DQ_RES_BLKS:
+		qtrx->qt_blk_res += (ulong)delta;
+		break;
+
+		/*
+		 * inode reservation
+		 */
+	      case XFS_TRANS_DQ_RES_INOS:
+		qtrx->qt_ino_res += (ulong)delta;
+		break;
+
+		/*
+		 * disk blocks used.
+		 */
+	      case XFS_TRANS_DQ_BCOUNT:
+		if (qtrx->qt_blk_res && delta > 0) {
+			qtrx->qt_blk_res_used += (ulong)delta;
+			ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
+		}
+		qtrx->qt_bcount_delta += delta;
+		break;
+
+	      case XFS_TRANS_DQ_DELBCOUNT:
+		qtrx->qt_delbcnt_delta += delta;
+		break;
+
+		/*
+		 * Inode Count
+		 */
+	      case XFS_TRANS_DQ_ICOUNT:
+		if (qtrx->qt_ino_res && delta > 0) {
+			qtrx->qt_ino_res_used += (ulong)delta;
+			ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
+		}
+		qtrx->qt_icount_delta += delta;
+		break;
+
+		/*
+		 * rtblk reservation
+		 */
+	      case XFS_TRANS_DQ_RES_RTBLKS:
+		qtrx->qt_rtblk_res += (ulong)delta;
+		break;
+
+		/*
+		 * rtblk count
+		 */
+	      case XFS_TRANS_DQ_RTBCOUNT:
+		if (qtrx->qt_rtblk_res && delta > 0) {
+			qtrx->qt_rtblk_res_used += (ulong)delta;
+			ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
+		}
+		qtrx->qt_rtbcount_delta += delta;
+		break;
+
+	      case XFS_TRANS_DQ_DELRTBCOUNT:
+		qtrx->qt_delrtb_delta += delta;
+		break;
+
+	      default:
+		ASSERT(0);
+	}
+	tp->t_flags |= XFS_TRANS_DQ_DIRTY;
+}
+
+
+/*
+ * Given an array of dqtrx structures, lock all the dquots associated
+ * and join them to the transaction, provided they have been modified.
+ * We know that the highest number of dquots (of one type - usr OR grp),
+ * involved in a transaction is 2 and that both usr and grp combined - 3.
+ * So, we don't attempt to make this very generic.
+ */
+STATIC void
+xfs_trans_dqlockedjoin(
+	xfs_trans_t	*tp,
+	xfs_dqtrx_t	*q)
+{
+	ASSERT(q[0].qt_dquot != NULL);
+	if (q[1].qt_dquot == NULL) {
+		xfs_dqlock(q[0].qt_dquot);
+		xfs_trans_dqjoin(tp, q[0].qt_dquot);
+	} else {
+		ASSERT(XFS_QM_TRANS_MAXDQS == 2);
+		xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot);
+		xfs_trans_dqjoin(tp, q[0].qt_dquot);
+		xfs_trans_dqjoin(tp, q[1].qt_dquot);
+	}
+}
+
+
+/*
+ * Called by xfs_trans_commit() and similar in spirit to
+ * xfs_trans_apply_sb_deltas().
+ * Go thru all the dquots belonging to this transaction and modify the
+ * INCORE dquot to reflect the actual usages.
+ * Unreserve just the reservations done by this transaction.
+ * dquot is still left locked at exit.
+ */
+void
+xfs_trans_apply_dquot_deltas(
+	xfs_trans_t		*tp)
+{
+	int			i, j;
+	xfs_dquot_t		*dqp;
+	xfs_dqtrx_t		*qtrx, *qa;
+	xfs_disk_dquot_t	*d;
+	long			totalbdelta;
+	long			totalrtbdelta;
+
+	if (! (tp->t_flags & XFS_TRANS_DQ_DIRTY))
+		return;
+
+	ASSERT(tp->t_dqinfo);
+	qa = tp->t_dqinfo->dqa_usrdquots;
+	for (j = 0; j < 2; j++) {
+		if (qa[0].qt_dquot == NULL) {
+			qa = tp->t_dqinfo->dqa_grpdquots;
+			continue;
+		}
+
+		/*
+		 * Lock all of the dquots and join them to the transaction.
+		 */
+		xfs_trans_dqlockedjoin(tp, qa);
+
+		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+			qtrx = &qa[i];
+			/*
+			 * The array of dquots is filled
+			 * sequentially, not sparsely.
+			 */
+			if ((dqp = qtrx->qt_dquot) == NULL)
+				break;
+
+			ASSERT(XFS_DQ_IS_LOCKED(dqp));
+			ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp));
+
+			/*
+			 * adjust the actual number of blocks used
+			 */
+			d = &dqp->q_core;
+
+			/*
+			 * The issue here is - sometimes we don't make a blkquota
+			 * reservation intentionally to be fair to users
+			 * (when the amount is small). On the other hand,
+			 * delayed allocs do make reservations, but that's
+			 * outside of a transaction, so we have no
+			 * idea how much was really reserved.
+			 * So, here we've accumulated delayed allocation blks and
+			 * non-delay blks. The assumption is that the
+			 * delayed ones are always reserved (outside of a
+			 * transaction), and the others may or may not have
+			 * quota reservations.
+			 */
+			totalbdelta = qtrx->qt_bcount_delta +
+				qtrx->qt_delbcnt_delta;
+			totalrtbdelta = qtrx->qt_rtbcount_delta +
+				qtrx->qt_delrtb_delta;
+#ifdef QUOTADEBUG
+			if (totalbdelta < 0)
+				ASSERT(INT_GET(d->d_bcount, ARCH_CONVERT) >=
+				       (xfs_qcnt_t) -totalbdelta);
+
+			if (totalrtbdelta < 0)
+				ASSERT(INT_GET(d->d_rtbcount, ARCH_CONVERT) >=
+				       (xfs_qcnt_t) -totalrtbdelta);
+
+			if (qtrx->qt_icount_delta < 0)
+				ASSERT(INT_GET(d->d_icount, ARCH_CONVERT) >=
+				       (xfs_qcnt_t) -qtrx->qt_icount_delta);
+#endif
+			if (totalbdelta)
+				INT_MOD(d->d_bcount, ARCH_CONVERT, (xfs_qcnt_t)totalbdelta);
+
+			if (qtrx->qt_icount_delta)
+				INT_MOD(d->d_icount, ARCH_CONVERT, (xfs_qcnt_t)qtrx->qt_icount_delta);
+
+			if (totalrtbdelta)
+				INT_MOD(d->d_rtbcount, ARCH_CONVERT, (xfs_qcnt_t)totalrtbdelta);
+
+			/*
+			 * Start/reset the timer(s) if needed.
+			 */
+			xfs_qm_adjust_dqtimers(tp->t_mountp, d);
+
+			dqp->dq_flags |= XFS_DQ_DIRTY;
+			/*
+			 * add this to the list of items to get logged
+			 */
+			xfs_trans_log_dquot(tp, dqp);
+			/*
+			 * Take off what's left of the original reservation.
+			 * In case of delayed allocations, there's no
+			 * reservation that a transaction structure knows of.
+			 */
+			if (qtrx->qt_blk_res != 0) {
+				if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
+					if (qtrx->qt_blk_res >
+					    qtrx->qt_blk_res_used)
+						dqp->q_res_bcount -= (xfs_qcnt_t)
+							(qtrx->qt_blk_res -
+							 qtrx->qt_blk_res_used);
+					else
+						dqp->q_res_bcount -= (xfs_qcnt_t)
+							(qtrx->qt_blk_res_used -
+							 qtrx->qt_blk_res);
+				}
+			} else {
+				/*
+				 * These blks were never reserved, either inside
+				 * a transaction or outside one (in a delayed
+				 * allocation). Also, this isn't always a
+				 * negative number since we sometimes
+				 * deliberately skip quota reservations.
+				 */
+				if (qtrx->qt_bcount_delta) {
+					dqp->q_res_bcount +=
+					      (xfs_qcnt_t)qtrx->qt_bcount_delta;
+				}
+			}
+			/*
+			 * Adjust the RT reservation.
+			 */
+			if (qtrx->qt_rtblk_res != 0) {
+				if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
+					if (qtrx->qt_rtblk_res >
+					    qtrx->qt_rtblk_res_used)
+					       dqp->q_res_rtbcount -= (xfs_qcnt_t)
+						       (qtrx->qt_rtblk_res -
+							qtrx->qt_rtblk_res_used);
+					else
+					       dqp->q_res_rtbcount -= (xfs_qcnt_t)
+						       (qtrx->qt_rtblk_res_used -
+							qtrx->qt_rtblk_res);
+				}
+			} else {
+				if (qtrx->qt_rtbcount_delta)
+					dqp->q_res_rtbcount +=
+					    (xfs_qcnt_t)qtrx->qt_rtbcount_delta;
+			}
+
+			/*
+			 * Adjust the inode reservation.
+			 */
+			if (qtrx->qt_ino_res != 0) {
+				ASSERT(qtrx->qt_ino_res >=
+				       qtrx->qt_ino_res_used);
+				if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
+					dqp->q_res_icount -= (xfs_qcnt_t)
+						(qtrx->qt_ino_res -
+						 qtrx->qt_ino_res_used);
+			} else {
+				if (qtrx->qt_icount_delta)
+					dqp->q_res_icount +=
+					    (xfs_qcnt_t)qtrx->qt_icount_delta;
+			}
+
+
+#ifdef QUOTADEBUG
+			if (qtrx->qt_rtblk_res != 0)
+				cmn_err(CE_DEBUG, "RT res %d for 0x%p\n",
+					(int) qtrx->qt_rtblk_res, dqp);
+#endif
+			ASSERT(dqp->q_res_bcount >=
+				INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT));
+			ASSERT(dqp->q_res_icount >=
+				INT_GET(dqp->q_core.d_icount, ARCH_CONVERT));
+			ASSERT(dqp->q_res_rtbcount >=
+				INT_GET(dqp->q_core.d_rtbcount, ARCH_CONVERT));
+		}
+		/*
+		 * Do the group quotas next
+		 */
+		qa = tp->t_dqinfo->dqa_grpdquots;
+	}
+}
+
+/*
+ * Release the reservations, and adjust the dquots accordingly.
+ * This is called only when the transaction is being aborted. If by
+ * any chance we have done dquot modifications incore (ie. deltas) already,
+ * we simply throw those away, since that's the expected behavior
+ * when a transaction is curtailed without a commit.
+ */
+STATIC void
+xfs_trans_unreserve_and_mod_dquots(
+	xfs_trans_t		*tp)
+{
+	int			i, j;
+	xfs_dquot_t		*dqp;
+	xfs_dqtrx_t		*qtrx, *qa;
+	boolean_t		locked;
+
+	if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+		return;
+
+	qa = tp->t_dqinfo->dqa_usrdquots;
+
+	for (j = 0; j < 2; j++) {
+		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+			qtrx = &qa[i];
+			/*
+			 * We assume that the array of dquots is filled
+			 * sequentially, not sparsely.
+			 */
+			if ((dqp = qtrx->qt_dquot) == NULL)
+				break;
+			/*
+			 * Unreserve the original reservation. We don't care
+			 * about the number of blocks used field, or deltas.
+			 * Also we don't bother to zero the fields.
+			 */
+			locked = B_FALSE;
+			if (qtrx->qt_blk_res) {
+				xfs_dqlock(dqp);
+				locked = B_TRUE;
+				dqp->q_res_bcount -=
+					(xfs_qcnt_t)qtrx->qt_blk_res;
+			}
+			if (qtrx->qt_ino_res) {
+				if (!locked) {
+					xfs_dqlock(dqp);
+					locked = B_TRUE;
+				}
+				dqp->q_res_icount -=
+					(xfs_qcnt_t)qtrx->qt_ino_res;
+			}
+
+			if (qtrx->qt_rtblk_res) {
+				if (!locked) {
+					xfs_dqlock(dqp);
+					locked = B_TRUE;
+				}
+				dqp->q_res_rtbcount -=
+					(xfs_qcnt_t)qtrx->qt_rtblk_res;
+			}
+			if (locked)
+				xfs_dqunlock(dqp);
+
+		}
+		qa = tp->t_dqinfo->dqa_grpdquots;
+	}
+}
+
+/*
+ * This reserves disk blocks and inodes against a dquot.
+ * Flags indicate if the dquot is to be locked here and also
+ * if the blk reservation is for RT or regular blocks.
+ * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
+ * Returns EDQUOT if quota is exceeded.
+ */
+STATIC int
+xfs_trans_dqresv(
+	xfs_trans_t	*tp,
+	xfs_dquot_t	*dqp,
+	long		nblks,
+	long		ninos,
+	uint		flags)
+{
+	int		error;
+	xfs_qcnt_t	hardlimit;
+	xfs_qcnt_t	softlimit;
+	time_t		btimer;
+	xfs_qcnt_t	*resbcountp;
+
+	if (! (flags & XFS_QMOPT_DQLOCK)) {
+		xfs_dqlock(dqp);
+	}
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	if (flags & XFS_TRANS_DQ_RES_BLKS) {
+		hardlimit = INT_GET(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT);
+		softlimit = INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT);
+		btimer = INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT);
+		resbcountp = &dqp->q_res_bcount;
+	} else {
+		ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
+		hardlimit = INT_GET(dqp->q_core.d_rtb_hardlimit, ARCH_CONVERT);
+		softlimit = INT_GET(dqp->q_core.d_rtb_softlimit, ARCH_CONVERT);
+		btimer = INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT);
+		resbcountp = &dqp->q_res_rtbcount;
+	}
+	error = 0;
+
+	if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
+	    !INT_ISZERO(dqp->q_core.d_id, ARCH_CONVERT) &&
+	    XFS_IS_QUOTA_ENFORCED(dqp->q_mount)) {
+#ifdef QUOTADEBUG
+		cmn_err(CE_DEBUG, "BLK Res: nblks=%ld + resbcount=%Ld"
+			  " > hardlimit=%Ld?", nblks, *resbcountp, hardlimit);
+#endif
+		if (nblks > 0) {
+			/*
+			 * dquot is locked already. See if we'd go over the
+			 * hardlimit or exceed the timelimit if we allocate
+			 * nblks.
+			 */
+			if (hardlimit > 0ULL &&
+			     (hardlimit <= nblks + *resbcountp)) {
+				error = EDQUOT;
+				goto error_return;
+			}
+
+			if (softlimit > 0ULL &&
+			     (softlimit <= nblks + *resbcountp)) {
+				/*
+				 * If timer or warnings has expired,
+				 * return EDQUOT
+				 */
+				if ((btimer != 0 && get_seconds() > btimer) ||
+				    (!INT_ISZERO(dqp->q_core.d_bwarns, ARCH_CONVERT) &&
+				     INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT) >=
+				     XFS_QI_BWARNLIMIT(dqp->q_mount))) {
+					error = EDQUOT;
+					goto error_return;
+				}
+			}
+		}
+		if (ninos > 0) {
+			if (INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT) > 0ULL &&
+			    INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >=
+			    INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT)) {
+				error = EDQUOT;
+				goto error_return;
+			} else if (INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT) > 0ULL &&
+				   INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >=
+				   INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT)) {
+				/*
+				 * If timer or warnings has expired,
+				 * return EDQUOT
+				 */
+				if ((!INT_ISZERO(dqp->q_core.d_itimer, ARCH_CONVERT) &&
+				     get_seconds() > INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT)) ||
+				    (!INT_ISZERO(dqp->q_core.d_iwarns, ARCH_CONVERT) &&
+				     INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT) >=
+				     XFS_QI_IWARNLIMIT(dqp->q_mount))) {
+					error = EDQUOT;
+					goto error_return;
+				}
+			}
+		}
+	}
+
+	/*
+	 * Change the reservation, but not the actual usage.
+	 * Note that q_res_bcount = q_core.d_bcount + resv
+	 */
+	(*resbcountp) += (xfs_qcnt_t)nblks;
+	if (ninos != 0)
+		dqp->q_res_icount += (xfs_qcnt_t)ninos;
+
+	/*
+	 * note the reservation amt in the trans struct too,
+	 * so that the transaction knows how much was reserved by
+	 * it against this particular dquot.
+	 * We don't do this when we are reserving for a delayed allocation,
+	 * because we don't have the luxury of a transaction envelope then.
+	 */
+	if (tp) {
+		ASSERT(tp->t_dqinfo);
+		ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
+		if (nblks != 0)
+			xfs_trans_mod_dquot(tp, dqp,
+					    flags & XFS_QMOPT_RESBLK_MASK,
+					    nblks);
+		if (ninos != 0)
+			xfs_trans_mod_dquot(tp, dqp,
+					    XFS_TRANS_DQ_RES_INOS,
+					    ninos);
+	}
+	ASSERT(dqp->q_res_bcount >= INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT));
+	ASSERT(dqp->q_res_rtbcount >= INT_GET(dqp->q_core.d_rtbcount, ARCH_CONVERT));
+	ASSERT(dqp->q_res_icount >= INT_GET(dqp->q_core.d_icount, ARCH_CONVERT));
+
+error_return:
+	if (! (flags & XFS_QMOPT_DQLOCK)) {
+		xfs_dqunlock(dqp);
+	}
+	return (error);
+}
+
+
+/*
+ * Given a dquot(s), make disk block and/or inode reservations against them.
+ * The fact that this does the reservation against both the usr and
+ * grp quotas is important, because this follows a both-or-nothing
+ * approach.
+ *
+ * flags = XFS_QMOPT_DQLOCK indicate if dquot(s) need to be locked.
+ *	   XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
+ *	   XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
+ *	   XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
+ * dquots are unlocked on return, if they were not locked by caller.
+ */
+int
+xfs_trans_reserve_quota_bydquots(
+	xfs_trans_t	*tp,
+	xfs_mount_t	*mp,
+	xfs_dquot_t	*udqp,
+	xfs_dquot_t	*gdqp,
+	long		nblks,
+	long		ninos,
+	uint		flags)
+{
+	int		resvd;
+
+	if (! XFS_IS_QUOTA_ON(mp))
+		return (0);
+
+	if (tp && tp->t_dqinfo == NULL)
+		xfs_trans_alloc_dqinfo(tp);
+
+	ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
+	resvd = 0;
+
+	if (udqp) {
+		if (xfs_trans_dqresv(tp, udqp, nblks, ninos, flags))
+			return (EDQUOT);
+		resvd = 1;
+	}
+
+	if (gdqp) {
+		if (xfs_trans_dqresv(tp, gdqp, nblks, ninos, flags)) {
+			/*
+			 * can't do it, so backout previous reservation
+			 */
+			if (resvd) {
+				xfs_trans_dqresv(tp, udqp,  -nblks, -ninos,
+						 flags);
+			}
+			return (EDQUOT);
+		}
+	}
+
+	/*
+	 * Didnt change anything critical, so, no need to log
+	 */
+	return (0);
+}
+
+
+/*
+ * Lock the dquot and change the reservation if we can.
+ * This doesn't change the actual usage, just the reservation.
+ * The inode sent in is locked.
+ *
+ * Returns 0 on success, EDQUOT or other errors otherwise
+ */
+STATIC int
+xfs_trans_reserve_quota_nblks(
+	xfs_trans_t	*tp,
+	xfs_mount_t	*mp,
+	xfs_inode_t	*ip,
+	long		nblks,
+	long		ninos,
+	uint		type)
+{
+	int		error;
+
+	if (!XFS_IS_QUOTA_ON(mp))
+		return (0);
+
+	ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
+	ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
+
+#ifdef QUOTADEBUG
+	if (ip->i_udquot)
+		ASSERT(! XFS_DQ_IS_LOCKED(ip->i_udquot));
+	if (ip->i_gdquot)
+		ASSERT(! XFS_DQ_IS_LOCKED(ip->i_gdquot));
+#endif
+
+	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
+	ASSERT((type & ~XFS_QMOPT_FORCE_RES) == XFS_TRANS_DQ_RES_RTBLKS ||
+	       (type & ~XFS_QMOPT_FORCE_RES) == XFS_TRANS_DQ_RES_BLKS);
+
+	/*
+	 * Reserve nblks against these dquots, with trans as the mediator.
+	 */
+	error = xfs_trans_reserve_quota_bydquots(tp, mp,
+						 ip->i_udquot, ip->i_gdquot,
+						 nblks, ninos,
+						 type);
+	return (error);
+}
+
+/*
+ * This routine is called to allocate a quotaoff log item.
+ */
+xfs_qoff_logitem_t *
+xfs_trans_get_qoff_item(
+	xfs_trans_t		*tp,
+	xfs_qoff_logitem_t	*startqoff,
+	uint			flags)
+{
+	xfs_qoff_logitem_t	*q;
+
+	ASSERT(tp != NULL);
+
+	q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags);
+	ASSERT(q != NULL);
+
+	/*
+	 * Get a log_item_desc to point at the new item.
+	 */
+	(void) xfs_trans_add_item(tp, (xfs_log_item_t*)q);
+
+	return (q);
+}
+
+
+/*
+ * This is called to mark the quotaoff logitem as needing
+ * to be logged when the transaction is committed.  The logitem must
+ * already be associated with the given transaction.
+ */
+void
+xfs_trans_log_quotaoff_item(
+	xfs_trans_t		*tp,
+	xfs_qoff_logitem_t	*qlp)
+{
+	xfs_log_item_desc_t	*lidp;
+
+	lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)qlp);
+	ASSERT(lidp != NULL);
+
+	tp->t_flags |= XFS_TRANS_DIRTY;
+	lidp->lid_flags |= XFS_LID_DIRTY;
+}
+
+STATIC void
+xfs_trans_alloc_dqinfo(
+	xfs_trans_t	*tp)
+{
+	(tp)->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+}
+
+STATIC void
+xfs_trans_free_dqinfo(
+	xfs_trans_t	*tp)
+{
+	if (!tp->t_dqinfo)
+		return;
+	kmem_zone_free(xfs_Gqm->qm_dqtrxzone, (tp)->t_dqinfo);
+	(tp)->t_dqinfo = NULL;
+}
+
+xfs_dqtrxops_t	xfs_trans_dquot_ops = {
+	.qo_dup_dqinfo			= xfs_trans_dup_dqinfo,
+	.qo_free_dqinfo			= xfs_trans_free_dqinfo,
+	.qo_mod_dquot_byino		= xfs_trans_mod_dquot_byino,
+	.qo_apply_dquot_deltas		= xfs_trans_apply_dquot_deltas,
+	.qo_reserve_quota_nblks		= xfs_trans_reserve_quota_nblks,
+	.qo_reserve_quota_bydquots	= xfs_trans_reserve_quota_bydquots,
+	.qo_unreserve_and_mod_dquots	= xfs_trans_unreserve_and_mod_dquots,
+};
diff -Nru a/fs/xfs/support/atomic.h b/fs/xfs/support/atomic.h
--- a/fs/xfs/support/atomic.h	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/support/atomic.h	Mon Mar 31 13:41:06 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -43,20 +43,20 @@
  * This is used for two variables in XFS, one of which is a debug trace
  * buffer index. They are not accessed via any other atomic operations
  * so this is safe. All other atomic increments and decrements in XFS
- * now use the linux built in functions.
+ * now use the Linux built-in functions.
  */
 
-extern spinlock_t Atomic_spin;
+extern spinlock_t xfs_atomic_spin;
 
 static __inline__ int atomicIncWithWrap(int *ip, int val)
 {
 	unsigned long flags;
 	int ret;
-	spin_lock_irqsave(&Atomic_spin, flags);
+	spin_lock_irqsave(&xfs_atomic_spin, flags);
 	ret = *ip;
 	(*ip)++;
 	if (*ip == val) *ip = 0;
-	spin_unlock_irqrestore(&Atomic_spin, flags);
+	spin_unlock_irqrestore(&xfs_atomic_spin, flags);
 	return ret;
 }
 
diff -Nru a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
--- a/fs/xfs/support/debug.c	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/support/debug.c	Mon Mar 31 13:41:06 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -55,7 +55,7 @@
     BUG();
 }
 
-#if (defined(DEBUG) || defined(INDUCE_IO_ERRROR))
+#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
 
 unsigned long
 random(void)
@@ -79,7 +79,7 @@
 	return current->pid;
 }
 
-#endif /* DEBUG */
+#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */
 
 void
 cmn_err(register int level, char *fmt, ...)
diff -Nru a/fs/xfs/support/mrlock.c b/fs/xfs/support/mrlock.c
--- a/fs/xfs/support/mrlock.c	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/support/mrlock.c	Mon Mar 31 13:41:06 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -271,16 +271,4 @@
 		wake_up(&mrp->mr_readerq);
 	}
 	MRUNLOCK(mrp);
-}
-
-int
-mrislocked_access(mrlock_t *mrp)
-{
-	return(mrp->mr_count > 0);
-}
-
-int
-mrislocked_update(mrlock_t *mrp)
-{
-	return(mrp->mr_count < 0);
 }
diff -Nru a/fs/xfs/support/mrlock.h b/fs/xfs/support/mrlock.h
--- a/fs/xfs/support/mrlock.h	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/support/mrlock.h	Mon Mar 31 13:41:07 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -80,7 +80,9 @@
 extern void	mrfree(mrlock_t *);
 
 #define mrinit(mrp, name)	mrlock_init(mrp, MRLOCK_BARRIER, name, -1)
-#define mraccess(mrp)	mraccessf(mrp, 0)	/* grab for READ/ACCESS */
-#define mrupdate(mrp)	mrupdatef(mrp, 0)	/* grab for WRITE/UPDATE */
+#define mraccess(mrp)		mraccessf(mrp, 0) /* grab for READ/ACCESS */
+#define mrupdate(mrp)		mrupdatef(mrp, 0) /* grab for WRITE/UPDATE */
+#define mrislocked_access(mrp)	((mrp)->mr_count > 0)
+#define mrislocked_update(mrp)	((mrp)->mr_count < 0)
 
 #endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff -Nru a/fs/xfs/support/mutex.h b/fs/xfs/support/mutex.h
--- a/fs/xfs/support/mutex.h	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/support/mutex.h	Mon Mar 31 13:41:06 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  * Portions Copyright (c) 2002 Christoph Hellwig.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -46,7 +46,6 @@
 typedef struct semaphore	mutex_t;
 
 #define mutex_init(lock, type, name)		sema_init(lock, 1)
-#define init_mutex(ptr, type, name, sequence)	sema_init(lock, 1)
 #define mutex_destroy(lock)			sema_init(lock, -99)
 #define mutex_lock(lock, num)			down(lock)
 #define mutex_trylock(lock)			(down_trylock(lock) ? 0 : 1)
diff -Nru a/fs/xfs/support/time.h b/fs/xfs/support/time.h
--- a/fs/xfs/support/time.h	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/support/time.h	Mon Mar 31 13:41:08 2003
@@ -45,11 +45,7 @@
 
 static inline void nanotime(struct timespec *tvp)
 {
-	struct timeval tv;
-
-	do_gettimeofday(&tv);
-	tvp->tv_sec = tv.tv_sec;
-	tvp->tv_nsec = tv.tv_usec * 1000;
+	*tvp = CURRENT_TIME;
 }
 
 #endif /* __XFS_SUPPORT_TIME_H__ */
diff -Nru a/fs/xfs/xfs.h b/fs/xfs/xfs.h
--- a/fs/xfs/xfs.h	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/xfs.h	Mon Mar 31 13:41:06 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -66,6 +66,9 @@
 #include <xfs_dir.h>
 #include <xfs_dir2.h>
 #include <xfs_imap.h>
+#include <xfs_alloc.h>
+#include <xfs_dmapi.h>
+#include <xfs_quota.h>
 #include <xfs_mount.h>
 #include <xfs_alloc_btree.h>
 #include <xfs_bmap_btree.h>
@@ -77,17 +80,11 @@
 #include <xfs_dir2_sf.h>
 #include <xfs_dinode.h>
 #include <xfs_inode.h>
-#include <xfs_alloc.h>
 #include <xfs_bmap.h>
 #include <xfs_bit.h>
 #include <xfs_rtalloc.h>
 #include <xfs_error.h>
-#include <xfs_quota.h>
 #include <xfs_itable.h>
-#include <xfs_dqblk.h>
-#include <xfs_dquot_item.h>
-#include <xfs_dquot.h>
-#include <xfs_qm.h>
 #include <xfs_rw.h>
 #include <xfs_da_btree.h>
 #include <xfs_dir_leaf.h>
@@ -108,6 +105,5 @@
 #include <xfs_trans_priv.h>
 #include <xfs_trans_space.h>
 #include <xfs_utils.h>
-#include <xfs_dmapi.h>
 
 #endif	/* __XFS_H__ */
diff -Nru a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
--- a/fs/xfs/xfs_attr.c	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/xfs_attr.c	Mon Mar 31 13:41:07 2003
@@ -197,10 +197,8 @@
 	/*
 	 * Attach the dquots to the inode.
 	 */
-	if (XFS_IS_QUOTA_ON(mp)) {
-		if ((error = xfs_qm_dqattach(dp, 0)))
-			return (error);
-	}
+	if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
+		return (error);
 
 	/*
 	 * If the inode doesn't have an attribute fork, add one.
@@ -280,19 +278,13 @@
 	}
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 
-	if (XFS_IS_QUOTA_ON(mp)) {
-		if (rsvd) {
-			error = xfs_trans_reserve_blkquota_force(args.trans,
-					dp, nblks);
-		} else {
-			error = xfs_trans_reserve_blkquota(args.trans,
-					dp, nblks);
-		}
-		if (error) {
-			xfs_iunlock(dp, XFS_ILOCK_EXCL);
-			xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
-			return (error);
-		}
+	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, nblks, 0,
+			 rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
+				XFS_QMOPT_RES_REGBLKS);
+	if (error) {
+		xfs_iunlock(dp, XFS_ILOCK_EXCL);
+		xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
+		return (error);
 	}
 
 	xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
@@ -483,12 +475,9 @@
 	/*
 	 * Attach the dquots to the inode.
 	 */
-	if (XFS_IS_QUOTA_ON(mp)) {
-		if (XFS_NOT_DQATTACHED(mp, dp)) {
-			if ((error = xfs_qm_dqattach(dp, 0)))
-				return (error);
-		}
-	}
+	if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
+		return (error);
+
 	/*
 	 * Start our first transaction of the day.
 	 *
diff -Nru a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
--- a/fs/xfs/xfs_bmap.c	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/xfs_bmap.c	Mon Mar 31 13:41:08 2003
@@ -2145,7 +2145,7 @@
 	 */
 	mp = ap->ip->i_mount;
 	nullfb = ap->firstblock == NULLFSBLOCK;
-	rt = (ap->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && ap->userdata;
+	rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
 	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
 #ifdef __KERNEL__
 	if (rt) {
@@ -2467,14 +2467,10 @@
 			 * Adjust the disk quota also. This was reserved
 			 * earlier.
 			 */
-			if (XFS_IS_QUOTA_ON(mp) &&
-			    ap->ip->i_ino != mp->m_sb.sb_uquotino &&
-			    ap->ip->i_ino != mp->m_sb.sb_gquotino)
-				xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
-					ap->wasdel ?
-						XFS_TRANS_DQ_DELRTBCOUNT :
+			XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+				ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
 						XFS_TRANS_DQ_RTBCOUNT,
-					(long)ralen);
+				(long) ralen);
 		} else
 			ap->alen = 0;
 #endif	/* __KERNEL__ */
@@ -2691,14 +2687,10 @@
 			 * Adjust the disk quota also. This was reserved
 			 * earlier.
 			 */
-			if (XFS_IS_QUOTA_ON(mp) &&
-			    ap->ip->i_ino != mp->m_sb.sb_uquotino &&
-			    ap->ip->i_ino != mp->m_sb.sb_gquotino)
-				xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
-					ap->wasdel ?
-						XFS_TRANS_DQ_DELBCOUNT :
+			XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+				ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
 						XFS_TRANS_DQ_BCOUNT,
-					(long)args.len);
+				(long) args.len);
 		} else {
 			ap->rval = NULLFSBLOCK;
 			ap->alen = 0;
@@ -2755,10 +2747,7 @@
 		return error;
 	xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
 	ip->i_d.di_nblocks--;
-	if (XFS_IS_QUOTA_ON(mp) &&
-	    ip->i_ino != mp->m_sb.sb_uquotino &&
-	    ip->i_ino != mp->m_sb.sb_gquotino)
-		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+	XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(tp, cbp);
 	if (cur->bc_bufs[0] == cbp)
 		cur->bc_bufs[0] = NULL;
@@ -2854,10 +2843,7 @@
 				goto done;
 			do_fx = 0;
 			nblks = len * mp->m_sb.sb_rextsize;
-			if (XFS_IS_QUOTA_ON(mp) &&
-			    ip->i_ino != mp->m_sb.sb_uquotino &&
-			    ip->i_ino != mp->m_sb.sb_gquotino)
-				qfield = XFS_TRANS_DQ_RTBCOUNT;
+			qfield = XFS_TRANS_DQ_RTBCOUNT;
 		}
 		/*
 		 * Ordinary allocation.
@@ -2865,10 +2851,7 @@
 		else {
 			do_fx = 1;
 			nblks = del->br_blockcount;
-			if (XFS_IS_QUOTA_ON(mp) &&
-			    ip->i_ino != mp->m_sb.sb_uquotino &&
-			    ip->i_ino != mp->m_sb.sb_gquotino)
-				qfield = XFS_TRANS_DQ_BCOUNT;
+			qfield = XFS_TRANS_DQ_BCOUNT;
 		}
 		/*
 		 * Set up del_endblock and cur for later.
@@ -3088,7 +3071,8 @@
 	 * Adjust quota data.
 	 */
 	if (qfield)
-		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
+		XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, qfield, (long)-nblks);
+
 	/*
 	 * Account for change in delayed indirect blocks.
 	 * Nothing to do for disk quota accounting here.
@@ -3239,10 +3223,7 @@
 	*firstblock = cur->bc_private.b.firstblock = args.fsbno;
 	cur->bc_private.b.allocated++;
 	ip->i_d.di_nblocks++;
-	if (XFS_IS_QUOTA_ON(mp) &&
-	    ip->i_ino != mp->m_sb.sb_uquotino &&
-	    ip->i_ino != mp->m_sb.sb_gquotino)
-		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
+	XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 	abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
 	/*
 	 * Fill in the child block.
@@ -3385,11 +3366,8 @@
 		xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork);
 		XFS_IFORK_NEXT_SET(ip, whichfork, 1);
 		ip->i_d.di_nblocks = 1;
-		if (XFS_IS_QUOTA_ON(args.mp) &&
-		    ip->i_ino != args.mp->m_sb.sb_uquotino &&
-		    ip->i_ino != args.mp->m_sb.sb_gquotino)
-			xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT,
-				1L);
+		XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
+			XFS_TRANS_DQ_BCOUNT, 1L);
 		flags |= XFS_ILOG_FEXT(whichfork);
 	} else
 		ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
@@ -3772,18 +3750,13 @@
 			XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
 		goto error0;
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	if (XFS_IS_QUOTA_ON(mp)) {
-		if (rsvd) {
-			error = xfs_trans_reserve_blkquota_force(tp, ip, blks);
-		} else {
-			error = xfs_trans_reserve_blkquota(tp, ip, blks);
-		}
-
-		if (error) {
-			xfs_iunlock(ip, XFS_ILOCK_EXCL);
-			xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
-			return error;
-		}
+	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, blks, 0, rsvd ?
+			XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
+			XFS_QMOPT_RES_REGBLKS);
+	if (error) {
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
+		return error;
 	}
 	if (XFS_IFORK_Q(ip))
 		goto error1;
@@ -4655,8 +4628,8 @@
 	cur = NULL;
 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
 		ASSERT(wr && tp);
-		if ((error = xfs_bmap_local_to_extents(tp, ip, firstblock, total,
-				&logflags, whichfork)))
+		if ((error = xfs_bmap_local_to_extents(tp, ip,
+				firstblock, total, &logflags, whichfork)))
 			goto error0;
 	}
 	if (wr && *firstblock == NULLFSBLOCK) {
@@ -4730,9 +4703,8 @@
 				 * We return EDQUOT if we haven't allocated
 				 * blks already inside this loop;
 				 */
-				if (XFS_IS_QUOTA_ON(ip->i_mount) &&
-				    xfs_trans_reserve_blkquota(NULL, ip,
-					    (long)alen)) {
+				if (XFS_TRANS_RESERVE_BLKQUOTA(
+						mp, NULL, ip, (long)alen)) {
 					if (n == 0) {
 						*nmap = 0;
 						ASSERT(cur == NULL);
@@ -4740,12 +4712,10 @@
 					}
 					break;
 				}
-				if (xfs_mod_incore_sb(ip->i_mount,
-						XFS_SBS_FDBLOCKS,
+				if (xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
 						-(alen + indlen), rsvd)) {
-					if (XFS_IS_QUOTA_ON(ip->i_mount))
-						xfs_trans_unreserve_blkquota(
-							NULL, ip, (long)alen);
+					XFS_TRANS_UNRESERVE_BLKQUOTA(
+						mp, NULL, ip, (long)alen);
 					break;
 				}
 				ip->i_delayed_blks += alen;
@@ -4808,15 +4778,11 @@
 				alen = bma.alen;
 				aoff = bma.off;
 				ASSERT(*firstblock == NULLFSBLOCK ||
-				       XFS_FSB_TO_AGNO(ip->i_mount,
-					       *firstblock) ==
-				       XFS_FSB_TO_AGNO(ip->i_mount,
-					       bma.firstblock) ||
+				       XFS_FSB_TO_AGNO(mp, *firstblock) ==
+				       XFS_FSB_TO_AGNO(mp, bma.firstblock) ||
 				       (flist->xbf_low &&
-					XFS_FSB_TO_AGNO(ip->i_mount,
-						*firstblock) <
-					XFS_FSB_TO_AGNO(ip->i_mount,
-						bma.firstblock)));
+					XFS_FSB_TO_AGNO(mp, *firstblock) <
+					XFS_FSB_TO_AGNO(mp, bma.firstblock)));
 				*firstblock = bma.firstblock;
 				if (cur)
 					cur->bc_private.b.firstblock =
@@ -4824,7 +4790,7 @@
 				if (abno == NULLFSBLOCK)
 					break;
 				if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
-					cur = xfs_btree_init_cursor(ip->i_mount,
+					cur = xfs_btree_init_cursor(mp,
 						tp, NULL, 0, XFS_BTNUM_BMAP,
 						ip, whichfork);
 					cur->bc_private.b.firstblock =
@@ -4941,7 +4907,7 @@
 			 */
 			ASSERT(mval->br_blockcount <= len);
 			if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
-				cur = xfs_btree_init_cursor(ip->i_mount,
+				cur = xfs_btree_init_cursor(mp,
 					tp, NULL, 0, XFS_BTNUM_BMAP,
 					ip, whichfork);
 				cur->bc_private.b.firstblock =
@@ -5063,12 +5029,12 @@
 	if (cur) {
 		if (!error) {
 			ASSERT(*firstblock == NULLFSBLOCK ||
-			       XFS_FSB_TO_AGNO(ip->i_mount, *firstblock) ==
-			       XFS_FSB_TO_AGNO(ip->i_mount,
+			       XFS_FSB_TO_AGNO(mp, *firstblock) ==
+			       XFS_FSB_TO_AGNO(mp,
 				       cur->bc_private.b.firstblock) ||
 			       (flist->xbf_low &&
-				XFS_FSB_TO_AGNO(ip->i_mount, *firstblock) <
-				XFS_FSB_TO_AGNO(ip->i_mount,
+				XFS_FSB_TO_AGNO(mp, *firstblock) <
+				XFS_FSB_TO_AGNO(mp,
 					cur->bc_private.b.firstblock)));
 			*firstblock = cur->bc_private.b.firstblock;
 		}
@@ -5378,16 +5344,11 @@
 			ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);
 			xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
 				(int)del.br_blockcount, rsvd);
-			if (XFS_IS_QUOTA_ON(ip->i_mount)) {
-				ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
-				ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
-				if (!isrt)
-					xfs_trans_unreserve_blkquota(NULL, ip,
-					      (long)del.br_blockcount);
-				else
-					xfs_trans_unreserve_rtblkquota(NULL, ip,
-					      (long)del.br_blockcount);
-			}
+			/* Unreserve our quota space */
+			XFS_TRANS_RESERVE_QUOTA_NBLKS(
+				mp, NULL, ip, -((long)del.br_blockcount), 0,
+				isrt ?	XFS_QMOPT_RES_RTBLKS :
+					XFS_QMOPT_RES_REGBLKS);
 			ip->i_delayed_blks -= del.br_blockcount;
 			if (cur)
 				cur->bc_private.b.flags |=
@@ -5556,8 +5517,7 @@
 	    && DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)
 	    && whichfork == XFS_DATA_FORK) {
 
-		error = xfs_dm_send_data_event(DM_EVENT_READ, bdp,
-				0, 0, 0, NULL);
+		error = XFS_SEND_DATA(mp, DM_EVENT_READ, bdp, 0, 0, 0, NULL);
 		if (error)
 			return XFS_ERROR(error);
 	}
@@ -5579,7 +5539,6 @@
 		   ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
 		   ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
 		return XFS_ERROR(EINVAL);
-
 	if (whichfork == XFS_DATA_FORK) {
 		if (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC) {
 			prealloced = 1;
@@ -5928,10 +5887,13 @@
 				thispa = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize,
 					xfs_bmbt, block, j, dmxr);
 			}
-			if (INT_GET(*thispa, ARCH_CONVERT) == INT_GET(*pp, ARCH_CONVERT)) {
-				printk("xfs_check_block: thispa(%d) == pp(%d) %Ld\n",
-						j, i, INT_GET(*thispa, ARCH_CONVERT));
-				panic("xfs_check_block: ptrs are equal in node\n");
+			if (INT_GET(*thispa, ARCH_CONVERT) ==
+			    INT_GET(*pp, ARCH_CONVERT)) {
+				cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld",
+					__FUNCTION__, j, i,
+					INT_GET(*thispa, ARCH_CONVERT));
+				panic("%s: ptrs are equal in node\n",
+					__FUNCTION__);
 			}
 		}
 	}
@@ -6089,12 +6051,13 @@
 	return;
 
 error0:
-	printk("at error0\n");
+	cmn_err(CE_WARN, "%s: at error0", __FUNCTION__);
 	if (bp_release)
 		xfs_trans_brelse(NULL, bp);
 error_norelse:
-	printk("xfs_bmap_check_leaf_extents: BAD after btree leaves for %d extents\n", i);
-	panic("xfs_bmap_check_leaf_extents: CORRUPTED BTREE OR SOMETHING");
+	cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents",
+		i, __FUNCTION__);
+	panic("%s: CORRUPTED BTREE OR SOMETHING", __FUNCTION__);
 	return;
 }
 #endif
diff -Nru a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
--- a/fs/xfs/xfs_bmap.h	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/xfs_bmap.h	Mon Mar 31 13:41:06 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -76,10 +76,6 @@
 #define XFS_BMAPI_IGSTATE	0x200	/* Ignore state - */
 					/* combine contig. space */
 #define XFS_BMAPI_CONTIG	0x400	/* must allocate only one extent */
-#define XFS_BMAPI_DIRECT_IO	0x800	/* Flag from cxfs client, not used
-					 * by xfs directly. Indicates alloc
-					 * request is for direct I/O not
-					 * extent conversion by server */
 
 #if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAPI_AFLAG)
 int xfs_bmapi_aflag(int w);
diff -Nru a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
--- a/fs/xfs/xfs_bmap_btree.c	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/xfs_bmap_btree.c	Mon Mar 31 13:41:07 2003
@@ -629,8 +629,8 @@
 	xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS);
 	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
 		if ((error = xfs_btree_read_bufl(mp, cur->bc_tp,
-				INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rrbp,
-				XFS_BMAP_BTREE_REF))) {
+				INT_GET(left->bb_rightsib, ARCH_CONVERT),
+				0, &rrbp, XFS_BMAP_BTREE_REF))) {
 			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 			goto error0;
 		}
@@ -646,10 +646,7 @@
 		cur->bc_private.b.flist, mp);
 	cur->bc_private.b.ip->i_d.di_nblocks--;
 	xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
-	if (XFS_IS_QUOTA_ON(mp) &&
-	    cur->bc_private.b.ip->i_ino != mp->m_sb.sb_uquotino &&
-	    cur->bc_private.b.ip->i_ino != mp->m_sb.sb_gquotino)
-		xfs_trans_mod_dquot_byino(cur->bc_tp, cur->bc_private.b.ip,
+	XFS_TRANS_MOD_DQUOT_BYINO(mp, cur->bc_tp, cur->bc_private.b.ip,
 			XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(cur->bc_tp, rbp);
 	if (bp != lbp) {
@@ -986,13 +983,10 @@
 #endif
 	memcpy(pp, cpp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
 	xfs_bmap_add_free(XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(cbp)), 1,
-		cur->bc_private.b.flist, cur->bc_mp);
+			cur->bc_private.b.flist, cur->bc_mp);
 	ip->i_d.di_nblocks--;
-	if (XFS_IS_QUOTA_ON(cur->bc_mp) &&
-	    ip->i_ino != cur->bc_mp->m_sb.sb_uquotino &&
-	    ip->i_ino != cur->bc_mp->m_sb.sb_gquotino)
-		xfs_trans_mod_dquot_byino(cur->bc_tp, ip, XFS_TRANS_DQ_BCOUNT,
-			-1L);
+	XFS_TRANS_MOD_DQUOT_BYINO(cur->bc_mp, cur->bc_tp, ip,
+			XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(cur->bc_tp, cbp);
 	cur->bc_bufs[level - 1] = NULL;
 	INT_MOD(block->bb_level, ARCH_CONVERT, -1);
@@ -1589,10 +1583,7 @@
 	cur->bc_private.b.allocated++;
 	cur->bc_private.b.ip->i_d.di_nblocks++;
 	xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
-	if (XFS_IS_QUOTA_ON(args.mp) &&
-	    cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_uquotino &&
-	    cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_gquotino)
-		xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
+	XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
 			XFS_TRANS_DQ_BCOUNT, 1L);
 	rbp = xfs_btree_get_bufl(args.mp, args.tp, args.fsbno, 0);
 	right = XFS_BUF_TO_BMBT_BLOCK(rbp);
@@ -2390,11 +2381,8 @@
 	cur->bc_private.b.firstblock = args.fsbno;
 	cur->bc_private.b.allocated++;
 	cur->bc_private.b.ip->i_d.di_nblocks++;
-	if (XFS_IS_QUOTA_ON(args.mp) &&
-	    cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_uquotino &&
-	    cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_gquotino)
-		xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
-					  XFS_TRANS_DQ_BCOUNT, 1L);
+	XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
+			  XFS_TRANS_DQ_BCOUNT, 1L);
 	bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0);
 	cblock = XFS_BUF_TO_BMBT_BLOCK(bp);
 	*cblock = *block;
diff -Nru a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
--- a/fs/xfs/xfs_buf.h	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/xfs_buf.h	Mon Mar 31 13:41:08 2003
@@ -124,6 +124,9 @@
 typedef struct pb_target xfs_buftarg_t;
 #define xfs_buftarg pb_target
 
+#define XFS_BUF_DATAIO(x)	((x)->pb_flags |= PBF_FS_DATAIOD)
+#define XFS_BUF_UNDATAIO(x)	((x)->pb_flags &= ~PBF_FS_DATAIOD)
+
 #define XFS_BUF_IODONE_FUNC(buf)	(buf)->pb_iodone
 #define XFS_BUF_SET_IODONE_FUNC(buf, func)	\
 			(buf)->pb_iodone = (func)
@@ -244,7 +247,7 @@
 
 
 #define xfs_biodone(pb)		    \
-	    pagebuf_iodone(pb, 0, 0)
+	    pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), 0)
 
 #define xfs_incore(buftarg,blkno,len,lockit) \
 	    pagebuf_find(buftarg, blkno ,len, lockit)
diff -Nru a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
--- a/fs/xfs/xfs_clnt.h	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/xfs_clnt.h	Mon Mar 31 13:41:07 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -64,16 +64,6 @@
 	int	sunit;		/* stripe unit (BBs) */
 	int	swidth;		/* stripe width (BBs), multiple of sunit */
 	uchar_t iosizelog;	/* log2 of the preferred I/O size */
-
-	/*  The remainder is for CXFS support.	*/
-	char	**servlist;	/* Table of hosts which may be servers */
-	int	*servlistlen;	/* Table of hostname lengths. */
-	int	slcount;	/* Count of hosts which may be servers. */
-	int	stimeout;	/* Server timeout in milliseconds */
-	int	ctimeout;	/* Client timeout in milliseconds */
-	char	*server;	/* Designated server hostname (for remount). */
-	int	servlen;	/* Length of server hostname (for remount). */
-	int	servcell;	/* Server cell (internal testing only) */
 };
 
 /*
@@ -101,17 +91,6 @@
 #define XFSMNT_IOSIZE		0x00002000	/* optimize for I/O size */
 #define XFSMNT_OSYNCISOSYNC	0x00004000	/* o_sync is REALLY o_sync */
 						/* (osyncisdsync is now default) */
-#define XFSMNT_CLNTONLY		0x00008000	/* cxfs mount as client only */
-#define XFSMNT_UNSHARED		0x00010000	/* cxfs filesystem mounted
-						 * unshared */
-#define XFSMNT_CHGCLNTONLY	0x00020000	/* changing client only flag */
-						/* (for remount only) */
-#define XFSMNT_SERVCELL		0x00040000	/* setting server cell */
-						/* (allowed on remount) */
-#define XFSMNT_MAKESERVER	0x00080000	/* become the server (remount */
-						/* only) */
-#define XFSMNT_NOTSERVER	0x00100000	/* give up being the server */
-						/* (remount only) */
 #define XFSMNT_32BITINODES	0x00200000	/* restrict inodes to 32
 						 * bits of address space */
 #define XFSMNT_GQUOTA		0x00400000	/* group quota accounting */
@@ -120,11 +99,5 @@
 #define XFSMNT_NOUUID		0x01000000	/* Ignore fs uuid */
 #define XFSMNT_DMAPI		0x02000000	/* enable dmapi/xdsm */
 #define XFSMNT_NOLOGFLUSH	0x04000000	/* Don't flush for log blocks */
-
-/* Did we get any args for CXFS to consume? */
-#define XFSARGS_FOR_CXFSARR(ap)		\
-	((ap)->servlist || (ap)->slcount >= 0 || \
-	 (ap)->stimeout >= 0 || (ap)->ctimeout >= 0 || \
-	 (ap)->flags & (XFSMNT_CLNTONLY | XFSMNT_UNSHARED))
 
 #endif	/* __XFS_CLNT_H__ */
diff -Nru a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
--- a/fs/xfs/xfs_dmapi.h	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/xfs_dmapi.h	Mon Mar 31 13:41:08 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -32,11 +32,6 @@
 #ifndef __XFS_DMAPI_H__
 #define __XFS_DMAPI_H__
 
-#ifdef CONFIG_XFS_DMAPI
-
-#include <dmapi/dmapi.h>
-#include <dmapi/dmapi_kern.h>
-
 /*	Values used to define the on-disk version of dm_attrname_t. All
  *	on-disk attribute names start with the 8-byte string "SGI_DMI_".
  *
@@ -48,6 +43,42 @@
 #define DMATTR_PREFIXLEN	8
 #define DMATTR_PREFIXSTRING	"SGI_DMI_"
 
+typedef enum {
+	DM_EVENT_INVALID	= -1,
+	DM_EVENT_CANCEL		= 0,		/* not supported */
+	DM_EVENT_MOUNT		= 1,
+	DM_EVENT_PREUNMOUNT	= 2,
+	DM_EVENT_UNMOUNT	= 3,
+	DM_EVENT_DEBUT		= 4,		/* not supported */
+	DM_EVENT_CREATE		= 5,
+	DM_EVENT_CLOSE		= 6,		/* not supported */
+	DM_EVENT_POSTCREATE	= 7,
+	DM_EVENT_REMOVE		= 8,
+	DM_EVENT_POSTREMOVE	= 9,
+	DM_EVENT_RENAME		= 10,
+	DM_EVENT_POSTRENAME	= 11,
+	DM_EVENT_LINK		= 12,
+	DM_EVENT_POSTLINK	= 13,
+	DM_EVENT_SYMLINK	= 14,
+	DM_EVENT_POSTSYMLINK	= 15,
+	DM_EVENT_READ		= 16,
+	DM_EVENT_WRITE		= 17,
+	DM_EVENT_TRUNCATE	= 18,
+	DM_EVENT_ATTRIBUTE	= 19,
+	DM_EVENT_DESTROY	= 20,
+	DM_EVENT_NOSPACE	= 21,
+	DM_EVENT_USER		= 22,
+	DM_EVENT_MAX		= 23
+} dm_eventtype_t;
+#define HAVE_DM_EVENTTYPE_T
+
+typedef enum {
+	DM_RIGHT_NULL,
+	DM_RIGHT_SHARED,
+	DM_RIGHT_EXCL
+} dm_right_t;
+#define HAVE_DM_RIGHT_T
+
 /* Defines for determining if an event message should be sent. */
 #define DM_EVENT_ENABLED(vfsp, ip, event) ( \
 	unlikely ((vfsp)->vfs_flag & VFS_DMI) && \
@@ -61,21 +92,6 @@
 		  ((io)->io_mount->m_dmevmask & (1 << event)) ) \
 	)
 
-/*
- *	Macros to turn caller specified delay/block flags into
- *	dm_send_xxxx_event flag DM_FLAGS_NDELAY.
- */
-
-#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \
-			DM_FLAGS_NDELAY : 0)
-#define AT_DELAY_FLAG(f) ((f&ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
-
-
-
-/* events valid in dm_set_eventlist() when called with a filesystem handle.
-   These events are not persistent.
-*/
-
 #define DM_XFS_VALID_FS_EVENTS		( \
 	(1 << DM_EVENT_PREUNMOUNT)	| \
 	(1 << DM_EVENT_UNMOUNT)		| \
@@ -120,7 +136,6 @@
 	(1 << DM_EVENT_ATTRIBUTE)	| \
 	(1 << DM_EVENT_DESTROY)		)
 
-
 /* Events supported by the XFS filesystem. */
 #define DM_XFS_SUPPORTED_EVENTS		( \
 	(1 << DM_EVENT_MOUNT)		| \
@@ -144,162 +159,34 @@
 	(1 << DM_EVENT_DESTROY)		)
 
 
-extern int
-xfs_dm_mount(
-	vfs_t		*vfsp,
-	char		*dir_name,
-	char		*fsname);
-
-extern int
-xfs_dm_get_fsys_vector(
-	bhv_desc_t	*bdp,
-	dm_fcntl_vector_t *vecrq);
-
-extern int
-xfs_dm_send_data_event(
-	dm_eventtype_t	event,
-	bhv_desc_t	*bdp,
-	xfs_off_t	offset,
-	size_t		length,
-	int		flags,
-	vrwlock_t	*locktype);
-
-extern int
-xfs_dm_send_mmap_event(
-	struct vm_area_struct *vma,
-	unsigned int	wantflag);
-
-#else /* CONFIG_XFS_DMAPI */
-
-/*
- *	Flags needed to build with dmapi disabled.
- */
-
-typedef enum {
-	DM_EVENT_INVALID	= -1,
-	DM_EVENT_CANCEL		= 0,		/* not supported */
-	DM_EVENT_MOUNT		= 1,
-	DM_EVENT_PREUNMOUNT	= 2,
-	DM_EVENT_UNMOUNT	= 3,
-	DM_EVENT_DEBUT		= 4,		/* not supported */
-	DM_EVENT_CREATE		= 5,
-	DM_EVENT_CLOSE		= 6,		/* not supported */
-	DM_EVENT_POSTCREATE	= 7,
-	DM_EVENT_REMOVE		= 8,
-	DM_EVENT_POSTREMOVE	= 9,
-	DM_EVENT_RENAME		= 10,
-	DM_EVENT_POSTRENAME	= 11,
-	DM_EVENT_LINK		= 12,
-	DM_EVENT_POSTLINK	= 13,
-	DM_EVENT_SYMLINK	= 14,
-	DM_EVENT_POSTSYMLINK	= 15,
-	DM_EVENT_READ		= 16,
-	DM_EVENT_WRITE		= 17,
-	DM_EVENT_TRUNCATE	= 18,
-	DM_EVENT_ATTRIBUTE	= 19,
-	DM_EVENT_DESTROY	= 20,
-	DM_EVENT_NOSPACE	= 21,
-	DM_EVENT_USER		= 22,
-	DM_EVENT_MAX		= 23
-} dm_eventtype_t;
-
-typedef enum {
-	DM_RIGHT_NULL,
-	DM_RIGHT_SHARED,
-	DM_RIGHT_EXCL
-} dm_right_t;
-
-/*
- *	Defines for determining if an event message should be sent.
- */
-#define DM_EVENT_ENABLED(vfsp, ip, event)	0
-#define DM_EVENT_ENABLED_IO(vfsp, io, event)	0
-
 /*
- *	Stubbed out DMAPI delay macros.
+ *	Definitions used for the flags field on dm_send_*_event().
  */
 
-#define FILP_DELAY_FLAG(filp)			0
-#define AT_DELAY_FLAG(f)			0
+#define DM_FLAGS_NDELAY		0x001	/* return EAGAIN after dm_pending() */
+#define DM_FLAGS_UNWANTED	0x002	/* event not in fsys dm_eventset_t */
 
 /*
- *	Events supported by the XFS filesystem.
+ *	Macros to turn caller specified delay/block flags into
+ *	dm_send_xxxx_event flag DM_FLAGS_NDELAY.
  */
 
-#define DM_XFS_VALID_FS_EVENTS			0
-#define DM_XFS_VALID_FILE_EVENTS		0
-#define DM_XFS_VALID_DIRECTORY_EVENTS		0
-#define DM_XFS_SUPPORTED_EVENTS			0
+#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \
+			DM_FLAGS_NDELAY : 0)
+#define AT_DELAY_FLAG(f) ((f&ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
 
 /*
- *	Dummy definitions used for the flags field on dm_send_*_event().
+ *	Macros to turn caller specified delay/block flags into
+ *	dm_send_xxxx_event flag DM_FLAGS_NDELAY.
  */
 
-#define DM_FLAGS_NDELAY		0x001	/* return EAGAIN after dm_pending() */
-#define DM_FLAGS_UNWANTED	0x002	/* event not in fsys dm_eventset_t */
-
-/*
- *	Stubs for XFS DMAPI utility routines.
- */
+#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \
+			DM_FLAGS_NDELAY : 0)
 
-static __inline int
-xfs_dm_send_data_event(
-	dm_eventtype_t	event,
-	bhv_desc_t	*bdp,
-	xfs_off_t	offset,
-	size_t		length,
-	int		flags,
-	vrwlock_t	*locktype)
-{
-	return ENOSYS;
-}
-
-static __inline int
-xfs_dm_send_mmap_event(
-	struct vm_area_struct *vma,
-	unsigned int	wantflag)
-{
-	return 0;
-}
 
-/*
- *	Stubs for routines needed for the X/Open version of DMAPI.
- */
+extern struct bhv_vfsops xfs_dmops;
 
-static __inline int
-dm_send_destroy_event(
-	bhv_desc_t	*bdp,
-	dm_right_t	vp_right)
-{
-	return ENOSYS;
-}
-
-static __inline int
-dm_send_namesp_event(
-	dm_eventtype_t	event,
-	bhv_desc_t	*bdp1,
-	dm_right_t	vp1_right,
-	bhv_desc_t	*bdp2,
-	dm_right_t	vp2_right,
-	char		*name1,
-	char		*name2,
-	mode_t		mode,
-	int		retcode,
-	int		flags)
-{
-	return ENOSYS;
-}
-
-static __inline void
-dm_send_unmount_event(
-	vfs_t		*vfsp,
-	vnode_t		*vp,
-	dm_right_t	vfsp_right,
-	mode_t		mode,
-	int		retcode,
-	int		flags)
-{
-}
+extern void xfs_dm_init(void);
+extern void xfs_dm_exit(void);
 
-#endif	/* CONFIG_XFS_DMAPI */
 #endif	/* __XFS_DMAPI_H__ */
diff -Nru a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/xfs_dmops.c	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include <xfs.h>
+
+#ifndef CONFIG_XFS_DMAPI
+xfs_dmops_t	xfs_dmcore_xfs = {
+	.xfs_send_data		= (xfs_send_data_t)fs_nosys,
+	.xfs_send_mmap		= (xfs_send_mmap_t)fs_noerr,
+	.xfs_send_destroy	= (xfs_send_destroy_t)fs_nosys,
+	.xfs_send_namesp	= (xfs_send_namesp_t)fs_nosys,
+	.xfs_send_unmount	= (xfs_send_unmount_t)fs_noval,
+};
+#endif /* CONFIG_XFS_DMAPI */
diff -Nru a/fs/xfs/xfs_dqblk.h b/fs/xfs/xfs_dqblk.h
--- a/fs/xfs/xfs_dqblk.h	Mon Mar 31 13:41:08 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_DQBLK_H__
-#define __XFS_DQBLK_H__
-
-/*
- * The ondisk form of a dquot structure.
- */
-#define XFS_DQUOT_MAGIC		0x4451		/* 'DQ' */
-#define XFS_DQUOT_VERSION	(u_int8_t)0x01	/* latest version number */
-
-/*
- * This is the main portion of the on-disk representation of quota
- * information for a user. This is the q_core of the xfs_dquot_t that
- * is kept in kernel memory. We pad this with some more expansion room
- * to construct the on disk structure.
- */
-typedef struct	xfs_disk_dquot {
-/*16*/	u_int16_t	d_magic;	/* dquot magic = XFS_DQUOT_MAGIC */
-/*8 */	u_int8_t	d_version;	/* dquot version */
-/*8 */	u_int8_t	d_flags;	/* XFS_DQ_USER/PROJ/GROUP */
-/*32*/	xfs_dqid_t	d_id;		/* user,project,group id */
-/*64*/	xfs_qcnt_t	d_blk_hardlimit;/* absolute limit on disk blks */
-/*64*/	xfs_qcnt_t	d_blk_softlimit;/* preferred limit on disk blks */
-/*64*/	xfs_qcnt_t	d_ino_hardlimit;/* maximum # allocated inodes */
-/*64*/	xfs_qcnt_t	d_ino_softlimit;/* preferred inode limit */
-/*64*/	xfs_qcnt_t	d_bcount;	/* disk blocks owned by the user */
-/*64*/	xfs_qcnt_t	d_icount;	/* inodes owned by the user */
-/*32*/	__int32_t	d_itimer;	/* zero if within inode limits if not,
-					   this is when we refuse service */
-/*32*/	__int32_t	d_btimer;	/* similar to above; for disk blocks */
-/*16*/	xfs_qwarncnt_t	d_iwarns;	/* warnings issued wrt num inodes */
-/*16*/	xfs_qwarncnt_t	d_bwarns;	/* warnings issued wrt disk blocks */
-/*32*/	__int32_t	d_pad0;		/* 64 bit align */
-/*64*/	xfs_qcnt_t	d_rtb_hardlimit;/* absolute limit on realtime blks */
-/*64*/	xfs_qcnt_t	d_rtb_softlimit;/* preferred limit on RT disk blks */
-/*64*/	xfs_qcnt_t	d_rtbcount;	/* realtime blocks owned */
-/*32*/	__int32_t	d_rtbtimer;	/* similar to above; for RT disk blocks */
-/*16*/	xfs_qwarncnt_t	d_rtbwarns;	/* warnings issued wrt RT disk blocks */
-/*16*/	__uint16_t	d_pad;
-} xfs_disk_dquot_t;
-
-/*
- * This is what goes on disk. This is separated from the xfs_disk_dquot because
- * carrying the unnecessary padding would be a waste of memory.
- */
-typedef struct xfs_dqblk {
-	xfs_disk_dquot_t  dd_diskdq;	/* portion that lives incore as well */
-	char		  dd_fill[32];	/* filling for posterity */
-} xfs_dqblk_t;
-
-/*
- * flags for q_flags field in the dquot.
- */
-#define XFS_DQ_USER		0x0001		/* a user quota */
-/* #define XFS_DQ_PROJ		0x0002		-- project quota (IRIX) */
-#define XFS_DQ_GROUP		0x0004		/* a group quota */
-#define XFS_DQ_FLOCKED		0x0008		/* flush lock taken */
-#define XFS_DQ_DIRTY		0x0010		/* dquot is dirty */
-#define XFS_DQ_WANT		0x0020		/* for lookup/reclaim race */
-#define XFS_DQ_INACTIVE		0x0040		/* dq off mplist & hashlist */
-#define XFS_DQ_MARKER		0x0080		/* sentinel */
-
-/*
- * In the worst case, when both user and group quotas are on,
- * we can have a max of three dquots changing in a single transaction.
- */
-#define XFS_DQUOT_LOGRES(mp)	(sizeof(xfs_disk_dquot_t) * 3)
-
-#endif	/* __XFS_DQBLK_H__ */
diff -Nru a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
--- a/fs/xfs/xfs_dquot.c	Mon Mar 31 13:41:07 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,1660 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-#include <xfs.h>
-#include <xfs_quota_priv.h>
-
-
-/*
-   LOCK ORDER
-
-   inode lock		    (ilock)
-   dquot hash-chain lock    (hashlock)
-   xqm dquot freelist lock  (freelistlock
-   mount's dquot list lock  (mplistlock)
-   user dquot lock - lock ordering among dquots is based on the uid or gid
-   group dquot lock - similar to udquots. Between the two dquots, the udquot
-		      has to be locked first.
-   pin lock - the dquot lock must be held to take this lock.
-   flush lock - ditto.
-*/
-
-STATIC void		xfs_qm_dqflush_done(xfs_buf_t *, xfs_dq_logitem_t *);
-
-#ifdef DEBUG
-dev_t xfs_dqerror_dev = 0;
-int xfs_do_dqerror = 0;
-int xfs_dqreq_num = 0;
-int xfs_dqerror_mod = 33;
-#endif
-
-/*
- * Allocate and initialize a dquot. We don't always allocate fresh memory;
- * we try to reclaim a free dquot if the number of incore dquots are above
- * a threshold.
- * The only field inside the core that gets initialized at this point
- * is the d_id field. The idea is to fill in the entire q_core
- * when we read in the on disk dquot.
- */
-xfs_dquot_t *
-xfs_qm_dqinit(
-	xfs_mount_t  *mp,
-	xfs_dqid_t   id,
-	uint	     type)
-{
-	xfs_dquot_t	*dqp;
-	boolean_t	brandnewdquot;
-
-	brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
-	dqp->dq_flags = type;
-	INT_SET(dqp->q_core.d_id, ARCH_CONVERT, id);
-	dqp->q_mount = mp;
-
-	/*
-	 * No need to re-initialize these if this is a reclaimed dquot.
-	 */
-	if (brandnewdquot) {
-		dqp->dq_flnext = dqp->dq_flprev = dqp;
-		mutex_init(&dqp->q_qlock,  MUTEX_DEFAULT, "xdq");
-		initnsema(&dqp->q_flock, 1, "fdq");
-		sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
-
-#ifdef DQUOT_TRACING
-		dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_SLEEP);
-		xfs_dqtrace_entry(dqp, "DQINIT");
-#endif
-	} else {
-		/*
-		 * Only the q_core portion was zeroed in dqreclaim_one().
-		 * So, we need to reset others.
-		 */
-		 dqp->q_nrefs = 0;
-		 dqp->q_blkno = 0;
-		 dqp->MPL_NEXT = dqp->HL_NEXT = NULL;
-		 dqp->HL_PREVP = dqp->MPL_PREVP = NULL;
-		 dqp->q_bufoffset = 0;
-		 dqp->q_fileoffset = 0;
-		 dqp->q_transp = NULL;
-		 dqp->q_gdquot = NULL;
-		 dqp->q_res_bcount = 0;
-		 dqp->q_res_icount = 0;
-		 dqp->q_res_rtbcount = 0;
-		 dqp->q_pincount = 0;
-		 dqp->q_hash = 0;
-		 ASSERT(dqp->dq_flnext == dqp->dq_flprev);
-
-#ifdef DQUOT_TRACING
-		 ASSERT(dqp->q_trace);
-		 xfs_dqtrace_entry(dqp, "DQRECLAIMED_INIT");
-#endif
-	 }
-
-	/*
-	 * log item gets initialized later
-	 */
-	return (dqp);
-}
-
-/*
- * This is called to free all the memory associated with a dquot
- */
-void
-xfs_qm_dqdestroy(
-	xfs_dquot_t	*dqp)
-{
-	ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp));
-
-	mutex_destroy(&dqp->q_qlock);
-	freesema(&dqp->q_flock);
-	sv_destroy(&dqp->q_pinwait);
-
-#ifdef DQUOT_TRACING
-	if (dqp->q_trace)
-	     ktrace_free(dqp->q_trace);
-	dqp->q_trace = NULL;
-#endif
-	kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
-	atomic_dec(&xfs_Gqm->qm_totaldquots);
-}
-
-/*
- * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
- */
-STATIC void
-xfs_qm_dqinit_core(
-	xfs_dqid_t	 id,
-	uint		 type,
-	xfs_dqblk_t	 *d)
-{
-	/*
-	 * Caller has zero'd the entire dquot 'chunk' already.
-	 */
-	INT_SET(d->dd_diskdq.d_magic, ARCH_CONVERT, XFS_DQUOT_MAGIC);
-	INT_SET(d->dd_diskdq.d_version, ARCH_CONVERT, XFS_DQUOT_VERSION);
-	INT_SET(d->dd_diskdq.d_id, ARCH_CONVERT, id);
-	INT_SET(d->dd_diskdq.d_flags, ARCH_CONVERT, type);
-}
-
-
-#ifdef DQUOT_TRACING
-/*
- * Dquot tracing for debugging.
- */
-/* ARGSUSED */
-void
-xfs_dqtrace_entry__(
-	xfs_dquot_t *dqp,
-	char *func,
-	void *retaddr,
-	xfs_inode_t *ip)
-{
-	xfs_dquot_t *udqp = NULL;
-	int ino;
-
-	ASSERT(dqp->q_trace);
-	if (ip) {
-		ino = ip->i_ino;
-		udqp = ip->i_udquot;
-	}
-	ktrace_enter(dqp->q_trace,
-		     (void *)(__psint_t)DQUOT_KTRACE_ENTRY,
-		     (void *)func,
-		     (void *)(__psint_t)dqp->q_nrefs,
-		     (void *)(__psint_t)dqp->dq_flags,
-		     (void *)(__psint_t)dqp->q_res_bcount,
-		     (void *)(__psint_t)INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT),
-		     (void *)(__psint_t)INT_GET(dqp->q_core.d_icount, ARCH_CONVERT),
-		     (void *)(__psint_t)INT_GET(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT),
-		     (void *)(__psint_t)INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT),
-		     (void *)(__psint_t)INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT),
-		     (void *)(__psint_t)INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT),
-		     (void *)(__psint_t)INT_GET(dqp->q_core.d_id, ARCH_CONVERT), /* 11 */
-		     (void *)(__psint_t)current_pid(),
-		     (void *)(__psint_t)ino,
-		     (void *)(__psint_t)retaddr,
-		     (void *)(__psint_t)udqp);
-	return;
-}
-#endif
-
-
-/*
- * Check the limits and timers of a dquot and start or reset timers
- * if necessary.
- * This gets called even when quota enforcement is OFF, which makes our
- * life a little less complicated. (We just don't reject any quota
- * reservations in that case, when enforcement is off).
- * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
- * enforcement's off.
- * In contrast, warnings are a little different in that they don't
- * 'automatically' get started when limits get exceeded.
- */
-void
-xfs_qm_adjust_dqtimers(
-	xfs_mount_t		*mp,
-	xfs_disk_dquot_t	*d)
-{
-	/*
-	 * The dquot had better be locked. We are modifying it here.
-	 */
-
-	/*
-	 * root's limits are not real limits.
-	 */
-	if (INT_ISZERO(d->d_id, ARCH_CONVERT))
-		return;
-
-#ifdef QUOTADEBUG
-	if (INT_GET(d->d_blk_hardlimit, ARCH_CONVERT))
-		ASSERT(INT_GET(d->d_blk_softlimit, ARCH_CONVERT) <= INT_GET(d->d_blk_hardlimit, ARCH_CONVERT));
-	if (INT_GET(d->d_ino_hardlimit, ARCH_CONVERT))
-		ASSERT(INT_GET(d->d_ino_softlimit, ARCH_CONVERT) <= INT_GET(d->d_ino_hardlimit, ARCH_CONVERT));
-#endif
-	if (INT_ISZERO(d->d_btimer, ARCH_CONVERT)) {
-		if ((INT_GET(d->d_blk_softlimit, ARCH_CONVERT) &&
-		    (INT_GET(d->d_bcount, ARCH_CONVERT) >= INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) ||
-		    (INT_GET(d->d_blk_hardlimit, ARCH_CONVERT) &&
-		    (INT_GET(d->d_bcount, ARCH_CONVERT) >= INT_GET(d->d_blk_hardlimit, ARCH_CONVERT)))) {
-			INT_SET(d->d_btimer, ARCH_CONVERT, get_seconds() + XFS_QI_BTIMELIMIT(mp));
-		}
-	} else {
-		if ((INT_ISZERO(d->d_blk_softlimit, ARCH_CONVERT) ||
-		    (INT_GET(d->d_bcount, ARCH_CONVERT) < INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) &&
-		    (INT_ISZERO(d->d_blk_hardlimit, ARCH_CONVERT) ||
-		    (INT_GET(d->d_bcount, ARCH_CONVERT) < INT_GET(d->d_blk_hardlimit, ARCH_CONVERT)))) {
-			INT_ZERO(d->d_btimer, ARCH_CONVERT);
-		}
-	}
-
-	if (INT_ISZERO(d->d_itimer, ARCH_CONVERT)) {
-		if ((INT_GET(d->d_ino_softlimit, ARCH_CONVERT) &&
-		    (INT_GET(d->d_icount, ARCH_CONVERT) >= INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) ||
-		    (INT_GET(d->d_ino_hardlimit, ARCH_CONVERT) &&
-		    (INT_GET(d->d_icount, ARCH_CONVERT) >= INT_GET(d->d_ino_hardlimit, ARCH_CONVERT)))) {
-			INT_SET(d->d_itimer, ARCH_CONVERT, get_seconds() + XFS_QI_ITIMELIMIT(mp));
-		}
-	} else {
-		if ((INT_ISZERO(d->d_ino_softlimit, ARCH_CONVERT) ||
-		    (INT_GET(d->d_icount, ARCH_CONVERT) < INT_GET(d->d_ino_softlimit, ARCH_CONVERT)))  &&
-		    (INT_ISZERO(d->d_ino_hardlimit, ARCH_CONVERT) ||
-		    (INT_GET(d->d_icount, ARCH_CONVERT) < INT_GET(d->d_ino_hardlimit, ARCH_CONVERT)))) {
-			INT_ZERO(d->d_itimer, ARCH_CONVERT);
-		}
-	}
-}
-
-/*
- * Increment or reset warnings of a given dquot.
- */
-int
-xfs_qm_dqwarn(
-	xfs_disk_dquot_t	*d,
-	uint			flags)
-{
-	int	warned;
-
-	/*
-	 * root's limits are not real limits.
-	 */
-	if (INT_ISZERO(d->d_id, ARCH_CONVERT))
-		return (0);
-
-	warned = 0;
-	if (INT_GET(d->d_blk_softlimit, ARCH_CONVERT) &&
-	    (INT_GET(d->d_bcount, ARCH_CONVERT) >= INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) {
-		if (flags & XFS_QMOPT_DOWARN) {
-			INT_MOD(d->d_bwarns, ARCH_CONVERT, +1);
-			warned++;
-		}
-	} else {
-		if (INT_ISZERO(d->d_blk_softlimit, ARCH_CONVERT) ||
-		    (INT_GET(d->d_bcount, ARCH_CONVERT) < INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) {
-			INT_ZERO(d->d_bwarns, ARCH_CONVERT);
-		}
-	}
-
-	if (INT_GET(d->d_ino_softlimit, ARCH_CONVERT) > 0 &&
-	    (INT_GET(d->d_icount, ARCH_CONVERT) >= INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) {
-		if (flags & XFS_QMOPT_DOWARN) {
-			INT_MOD(d->d_iwarns, ARCH_CONVERT, +1);
-			warned++;
-		}
-	} else {
-		if ((INT_ISZERO(d->d_ino_softlimit, ARCH_CONVERT)) ||
-		    (INT_GET(d->d_icount, ARCH_CONVERT) < INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) {
-			INT_ZERO(d->d_iwarns, ARCH_CONVERT);
-		}
-	}
-#ifdef QUOTADEBUG
-	if (INT_GET(d->d_iwarns, ARCH_CONVERT))
-		printk("--------@@Inode warnings running : %Lu >= %Lu\n",
-		       INT_GET(d->d_icount, ARCH_CONVERT), INT_GET(d->d_ino_softlimit, ARCH_CONVERT));
-	if (INT_GET(d->d_bwarns, ARCH_CONVERT))
-		printk("--------@@Blks warnings running : %Lu >= %Lu\n",
-		       INT_GET(d->d_bcount, ARCH_CONVERT), INT_GET(d->d_blk_softlimit, ARCH_CONVERT));
-#endif
-	return (warned);
-}
-
-
-/*
- * initialize a buffer full of dquots and log the whole thing
- */
-STATIC void
-xfs_qm_init_dquot_blk(
-	xfs_trans_t	*tp,
-	xfs_mount_t	*mp,
-	xfs_dqid_t	id,
-	uint		type,
-	xfs_buf_t	*bp)
-{
-	xfs_dqblk_t	*d;
-	int		curid, i;
-
-	ASSERT(tp);
-	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
-
-	d = (xfs_dqblk_t *)XFS_BUF_PTR(bp);
-
-	/*
-	 * ID of the first dquot in the block - id's are zero based.
-	 */
-	curid = id - (id % XFS_QM_DQPERBLK(mp));
-	ASSERT(curid >= 0);
-	memset(d, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)));
-	for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++)
-		xfs_qm_dqinit_core(curid, type, d);
-	xfs_trans_dquot_buf(tp, bp,
-			    type & XFS_DQ_USER ?
-			    XFS_BLI_UDQUOT_BUF :
-			    XFS_BLI_GDQUOT_BUF);
-	xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1);
-}
-
-
-
-/*
- * Allocate a block and fill it with dquots.
- * This is called when the bmapi finds a hole.
- */
-STATIC int
-xfs_qm_dqalloc(
-	xfs_trans_t	*tp,
-	xfs_mount_t	*mp,
-	xfs_dquot_t	*dqp,
-	xfs_inode_t	*quotip,
-	xfs_fileoff_t	offset_fsb,
-	xfs_buf_t	**O_bpp)
-{
-	xfs_fsblock_t	firstblock;
-	xfs_bmap_free_t flist;
-	xfs_bmbt_irec_t map;
-	int		nmaps, error, committed;
-	xfs_buf_t	*bp;
-
-	ASSERT(tp != NULL);
-	xfs_dqtrace_entry(dqp, "DQALLOC");
-
-	/*
-	 * Initialize the bmap freelist prior to calling bmapi code.
-	 */
-	XFS_BMAP_INIT(&flist, &firstblock);
-	xfs_ilock(quotip, XFS_ILOCK_EXCL);
-	/*
-	 * Return if this type of quotas is turned off while we didn't
-	 * have an inode lock
-	 */
-	if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
-		xfs_iunlock(quotip, XFS_ILOCK_EXCL);
-		return (ESRCH);
-	}
-
-	/*
-	 * xfs_trans_commit normally decrements the vnode ref count
-	 * when it unlocks the inode. Since we want to keep the quota
-	 * inode around, we bump the vnode ref count now.
-	 */
-	VN_HOLD(XFS_ITOV(quotip));
-
-	xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
-	nmaps = 1;
-	if ((error = xfs_bmapi(tp, quotip,
-			      offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
-			      XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
-			      &firstblock,
-			      XFS_QM_DQALLOC_SPACE_RES(mp),
-			      &map, &nmaps, &flist))) {
-		goto error0;
-	}
-	ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
-	ASSERT(nmaps == 1);
-	ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
-	       (map.br_startblock != HOLESTARTBLOCK));
-
-	/*
-	 * Keep track of the blkno to save a lookup later
-	 */
-	dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-
-	/* now we can just get the buffer (there's nothing to read yet) */
-	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
-			       dqp->q_blkno,
-			       XFS_QI_DQCHUNKLEN(mp),
-			       0);
-	if (!bp || (error = XFS_BUF_GETERROR(bp)))
-		goto error1;
-	/*
-	 * Make a chunk of dquots out of this buffer and log
-	 * the entire thing.
-	 */
-	xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT),
-			      dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP),
-			      bp);
-
-	if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) {
-		goto error1;
-	}
-
-	*O_bpp = bp;
-	return 0;
-
-      error1:
-	xfs_bmap_cancel(&flist);
-      error0:
-	xfs_iunlock(quotip, XFS_ILOCK_EXCL);
-
-	return (error);
-}
-
-/*
- * Maps a dquot to the buffer containing its on-disk version.
- * This returns a ptr to the buffer containing the on-disk dquot
- * in the bpp param, and a ptr to the on-disk dquot within that buffer
- */
-STATIC int
-xfs_qm_dqtobp(
-	xfs_trans_t		*tp,
-	xfs_dquot_t		*dqp,
-	xfs_disk_dquot_t	**O_ddpp,
-	xfs_buf_t		**O_bpp,
-	uint			flags)
-{
-	xfs_bmbt_irec_t map;
-	int		nmaps, error;
-	xfs_buf_t	*bp;
-	xfs_inode_t	*quotip;
-	xfs_mount_t	*mp;
-	xfs_disk_dquot_t *ddq;
-	xfs_dqid_t	id;
-	boolean_t	newdquot;
-
-	mp = dqp->q_mount;
-	id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT);
-	nmaps = 1;
-	newdquot = B_FALSE;
-
-	/*
-	 * If we don't know where the dquot lives, find out.
-	 */
-	if (dqp->q_blkno == (xfs_daddr_t) 0) {
-		/* We use the id as an index */
-		dqp->q_fileoffset = (xfs_fileoff_t) ((uint)id /
-						     XFS_QM_DQPERBLK(mp));
-		nmaps = 1;
-		quotip = XFS_DQ_TO_QIP(dqp);
-		xfs_ilock(quotip, XFS_ILOCK_SHARED);
-		/*
-		 * Return if this type of quotas is turned off while we didn't
-		 * have an inode lock
-		 */
-		if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
-			xfs_iunlock(quotip, XFS_ILOCK_SHARED);
-			return (ESRCH);
-		}
-		/*
-		 * Find the block map; no allocations yet
-		 */
-		error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
-				  XFS_DQUOT_CLUSTER_SIZE_FSB,
-				  XFS_BMAPI_METADATA,
-				  NULL, 0, &map, &nmaps, NULL);
-
-		xfs_iunlock(quotip, XFS_ILOCK_SHARED);
-		if (error)
-			return (error);
-		ASSERT(nmaps == 1);
-		ASSERT(map.br_blockcount == 1);
-
-		/*
-		 * offset of dquot in the (fixed sized) dquot chunk.
-		 */
-		dqp->q_bufoffset = (id % XFS_QM_DQPERBLK(mp)) *
-			sizeof(xfs_dqblk_t);
-		if (map.br_startblock == HOLESTARTBLOCK) {
-			/*
-			 * We don't allocate unless we're asked to
-			 */
-			if (!(flags & XFS_QMOPT_DQALLOC))
-				return (ENOENT);
-
-			ASSERT(tp);
-			if ((error = xfs_qm_dqalloc(tp, mp, dqp, quotip,
-						dqp->q_fileoffset, &bp)))
-				return (error);
-			newdquot = B_TRUE;
-		} else {
-			/*
-			 * store the blkno etc so that we don't have to do the
-			 * mapping all the time
-			 */
-			dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-		}
-	}
-	ASSERT(dqp->q_blkno != DELAYSTARTBLOCK);
-	ASSERT(dqp->q_blkno != HOLESTARTBLOCK);
-
-	/*
-	 * Read in the buffer, unless we've just done the allocation
-	 * (in which case we already have the buf).
-	 */
-	if (! newdquot) {
-		xfs_dqtrace_entry(dqp, "DQTOBP READBUF");
-		if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
-					       dqp->q_blkno,
-					       XFS_QI_DQCHUNKLEN(mp),
-					       0, &bp))) {
-			return (error);
-		}
-		if (error || !bp)
-			return XFS_ERROR(error);
-	}
-	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
-
-	/*
-	 * calculate the location of the dquot inside the buffer.
-	 */
-	ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset);
-
-	/*
-	 * A simple sanity check in case we got a corrupted dquot...
-	 */
-	if (xfs_qm_dqcheck(ddq, id,
-			   dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP),
-			   flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
-			   "dqtobp")) {
-		if (!(flags & XFS_QMOPT_DQREPAIR)) {
-			xfs_trans_brelse(tp, bp);
-			return XFS_ERROR(EIO);
-		}
-		XFS_BUF_BUSY(bp); /* We dirtied this */
-	}
-
-	*O_bpp = bp;
-	*O_ddpp = ddq;
-
-	return (0);
-}
-
-
-/*
- * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
- * and release the buffer immediately.
- *
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_dqread(
-	xfs_trans_t	*tp,
-	xfs_dqid_t	id,
-	xfs_dquot_t	*dqp,	/* dquot to get filled in */
-	uint		flags)
-{
-	xfs_disk_dquot_t *ddqp;
-	xfs_buf_t	 *bp;
-	int		 error;
-
-	/*
-	 * get a pointer to the on-disk dquot and the buffer containing it
-	 * dqp already knows its own type (GROUP/USER).
-	 */
-	xfs_dqtrace_entry(dqp, "DQREAD");
-	if ((error = xfs_qm_dqtobp(tp, dqp, &ddqp, &bp, flags))) {
-		return (error);
-	}
-
-	/* copy everything from disk dquot to the incore dquot */
-	memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
-	ASSERT(INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id);
-	xfs_qm_dquot_logitem_init(dqp);
-
-	/*
-	 * Reservation counters are defined as reservation plus current usage
-	 * to avoid having to add everytime.
-	 */
-	dqp->q_res_bcount = INT_GET(ddqp->d_bcount, ARCH_CONVERT);
-	dqp->q_res_icount = INT_GET(ddqp->d_icount, ARCH_CONVERT);
-	dqp->q_res_rtbcount = INT_GET(ddqp->d_rtbcount, ARCH_CONVERT);
-
-	/* Mark the buf so that this will stay incore a little longer */
-	XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
-
-	/*
-	 * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
-	 * So we need to release with xfs_trans_brelse().
-	 * The strategy here is identical to that of inodes; we lock
-	 * the dquot in xfs_qm_dqget() before making it accessible to
-	 * others. This is because dquots, like inodes, need a good level of
-	 * concurrency, and we don't want to take locks on the entire buffers
-	 * for dquot accesses.
-	 * Note also that the dquot buffer may even be dirty at this point, if
-	 * this particular dquot was repaired. We still aren't afraid to
-	 * brelse it because we have the changes incore.
-	 */
-	ASSERT(XFS_BUF_ISBUSY(bp));
-	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
-	xfs_trans_brelse(tp, bp);
-
-	return (error);
-}
-
-
-/*
- * allocate an incore dquot from the kernel heap,
- * and fill its core with quota information kept on disk.
- * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
- * if it wasn't already allocated.
- */
-STATIC int
-xfs_qm_idtodq(
-	xfs_mount_t	*mp,
-	xfs_dqid_t	id,	 /* gid or uid, depending on type */
-	uint		type,	 /* UDQUOT or GDQUOT */
-	uint		flags,	 /* DQALLOC, DQREPAIR */
-	xfs_dquot_t	**O_dqpp)/* OUT : incore dquot, not locked */
-{
-	xfs_dquot_t	*dqp;
-	int		error;
-	xfs_trans_t	*tp;
-	int		cancelflags=0;
-
-	dqp = xfs_qm_dqinit(mp, id, type);
-	tp = NULL;
-	if (flags & XFS_QMOPT_DQALLOC) {
-		tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
-		if ((error = xfs_trans_reserve(tp,
-				       XFS_QM_DQALLOC_SPACE_RES(mp),
-				       XFS_WRITE_LOG_RES(mp) +
-					      BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1 +
-					      128,
-				       0,
-				       XFS_TRANS_PERM_LOG_RES,
-				       XFS_WRITE_LOG_COUNT))) {
-			cancelflags = 0;
-			goto error0;
-		}
-		cancelflags = XFS_TRANS_RELEASE_LOG_RES;
-	}
-
-	/*
-	 * Read it from disk; xfs_dqread() takes care of
-	 * all the necessary initialization of dquot's fields (locks, etc)
-	 */
-	if ((error = xfs_qm_dqread(tp, id, dqp, flags))) {
-		/*
-		 * This can happen if quotas got turned off (ESRCH),
-		 * or if the dquot didn't exist on disk and we ask to
-		 * allocate (ENOENT).
-		 */
-		xfs_dqtrace_entry(dqp, "DQREAD FAIL");
-		cancelflags |= XFS_TRANS_ABORT;
-		goto error0;
-	}
-	if (tp) {
-		if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES,
-					     NULL)))
-			goto error1;
-	}
-
-	*O_dqpp = dqp;
-	ASSERT(! XFS_DQ_IS_LOCKED(dqp));
-	return (0);
-
- error0:
-	ASSERT(error);
-	if (tp)
-		xfs_trans_cancel(tp, cancelflags);
- error1:
-	xfs_qm_dqdestroy(dqp);
-	*O_dqpp = NULL;
-	return (error);
-}
-
-/*
- * Lookup a dquot in the incore dquot hashtable. We keep two separate
- * hashtables for user and group dquots; and, these are global tables
- * inside the XQM, not per-filesystem tables.
- * The hash chain must be locked by caller, and it is left locked
- * on return. Returning dquot is locked.
- */
-STATIC int
-xfs_qm_dqlookup(
-	xfs_mount_t		*mp,
-	xfs_dqid_t		id,
-	xfs_dqhash_t		*qh,
-	xfs_dquot_t		**O_dqpp)
-{
-	xfs_dquot_t		*dqp;
-	uint			flist_locked;
-	xfs_dquot_t		*d;
-
-	ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
-
-	flist_locked = B_FALSE;
-
-	/*
-	 * Traverse the hashchain looking for a match
-	 */
-	for (dqp = qh->qh_next; dqp != NULL; dqp = dqp->HL_NEXT) {
-		/*
-		 * We already have the hashlock. We don't need the
-		 * dqlock to look at the id field of the dquot, since the
-		 * id can't be modified without the hashlock anyway.
-		 */
-		if (INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id && dqp->q_mount == mp) {
-			xfs_dqtrace_entry(dqp, "DQFOUND BY LOOKUP");
-			/*
-			 * All in core dquots must be on the dqlist of mp
-			 */
-			ASSERT(dqp->MPL_PREVP != NULL);
-
-			xfs_dqlock(dqp);
-			if (dqp->q_nrefs == 0) {
-				ASSERT (XFS_DQ_IS_ON_FREELIST(dqp));
-				if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
-					xfs_dqtrace_entry(dqp, "DQLOOKUP: WANT");
-
-					/*
-					 * We may have raced with dqreclaim_one()
-					 * (and lost). So, flag that we don't
-					 * want the dquot to be reclaimed.
-					 */
-					dqp->dq_flags |= XFS_DQ_WANT;
-					xfs_dqunlock(dqp);
-					xfs_qm_freelist_lock(xfs_Gqm);
-					xfs_dqlock(dqp);
-					dqp->dq_flags &= ~(XFS_DQ_WANT);
-				}
-				flist_locked = B_TRUE;
-			}
-
-			/*
-			 * id couldn't have changed; we had the hashlock all
-			 * along
-			 */
-			ASSERT(INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id);
-
-			if (flist_locked) {
-				if (dqp->q_nrefs != 0) {
-					xfs_qm_freelist_unlock(xfs_Gqm);
-					flist_locked = B_FALSE;
-				} else {
-					/*
-					 * take it off the freelist
-					 */
-					xfs_dqtrace_entry(dqp,
-							"DQLOOKUP: TAKEOFF FL");
-					XQM_FREELIST_REMOVE(dqp);
-					/* xfs_qm_freelist_print(&(xfs_Gqm->
-							qm_dqfreelist),
-							"after removal"); */
-				}
-			}
-
-			/*
-			 * grab a reference
-			 */
-			XFS_DQHOLD(dqp);
-
-			if (flist_locked)
-				xfs_qm_freelist_unlock(xfs_Gqm);
-			/*
-			 * move the dquot to the front of the hashchain
-			 */
-			ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
-			if (dqp->HL_PREVP != &qh->qh_next) {
-				xfs_dqtrace_entry(dqp,
-						  "DQLOOKUP: HASH MOVETOFRONT");
-				if ((d = dqp->HL_NEXT))
-					d->HL_PREVP = dqp->HL_PREVP;
-				*(dqp->HL_PREVP) = d;
-				d = qh->qh_next;
-				d->HL_PREVP = &dqp->HL_NEXT;
-				dqp->HL_NEXT = d;
-				dqp->HL_PREVP = &qh->qh_next;
-				qh->qh_next = dqp;
-			}
-			xfs_dqtrace_entry(dqp, "LOOKUP END");
-			*O_dqpp = dqp;
-			ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
-			return (0);
-		}
-	}
-
-	*O_dqpp = NULL;
-	ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
-	return (1);
-}
-
-/*
- * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
- * a locked dquot, doing an allocation (if requested) as needed.
- * When both an inode and an id are given, the inode's id takes precedence.
- * That is, if the id changes while we don't hold the ilock inside this
- * function, the new dquot is returned, not necessarily the one requested
- * in the id argument.
- */
-int
-xfs_qm_dqget(
-	xfs_mount_t	*mp,
-	xfs_inode_t	*ip,	  /* locked inode (optional) */
-	xfs_dqid_t	id,	  /* gid or uid, depending on type */
-	uint		type,	  /* UDQUOT or GDQUOT */
-	uint		flags,	  /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
-	xfs_dquot_t	**O_dqpp) /* OUT : locked incore dquot */
-{
-	xfs_dquot_t	*dqp;
-	xfs_dqhash_t	*h;
-	uint		version;
-	int		error;
-
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-	if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
-	    (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
-		return (ESRCH);
-	}
-	h = XFS_DQ_HASH(mp, id, type);
-
-#ifdef DEBUG
-	if (xfs_do_dqerror) {
-		if ((xfs_dqerror_dev == mp->m_dev) &&
-		    (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
-			printk("Returning error in dqget\n");
-			return (EIO);
-		}
-	}
-#endif
-
- again:
-
-#ifdef DEBUG
-	ASSERT(type == XFS_DQ_USER || type == XFS_DQ_GROUP);
-	if (ip) {
-		ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
-		if (type == XFS_DQ_USER)
-			ASSERT(ip->i_udquot == NULL);
-		else
-			ASSERT(ip->i_gdquot == NULL);
-	}
-#endif
-	XFS_DQ_HASH_LOCK(h);
-
-	/*
-	 * Look in the cache (hashtable).
-	 * The chain is kept locked during lookup.
-	 */
-	if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
-		XFS_STATS_INC(xfsstats.xs_qm_dqcachehits);
-		/*
-		 * The dquot was found, moved to the front of the chain,
-		 * taken off the freelist if it was on it, and locked
-		 * at this point. Just unlock the hashchain and return.
-		 */
-		ASSERT(*O_dqpp);
-		ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
-		XFS_DQ_HASH_UNLOCK(h);
-		xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)");
-		return (0);	/* success */
-	}
-	XFS_STATS_INC(xfsstats.xs_qm_dqcachemisses);
-
-	/*
-	 * Dquot cache miss. We don't want to keep the inode lock across
-	 * a (potential) disk read. Also we don't want to deal with the lock
-	 * ordering between quotainode and this inode. OTOH, dropping the inode
-	 * lock here means dealing with a chown that can happen before
-	 * we re-acquire the lock.
-	 */
-	if (ip)
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	/*
-	 * Save the hashchain version stamp, and unlock the chain, so that
-	 * we don't keep the lock across a disk read
-	 */
-	version = h->qh_version;
-	XFS_DQ_HASH_UNLOCK(h);
-
-	/*
-	 * Allocate the dquot on the kernel heap, and read the ondisk
-	 * portion off the disk. Also, do all the necessary initialization
-	 * This can return ENOENT if dquot didn't exist on disk and we didn't
-	 * ask it to allocate; ESRCH if quotas got turned off suddenly.
-	 */
-	if ((error = xfs_qm_idtodq(mp, id, type,
-				  flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
-					   XFS_QMOPT_DOWARN),
-				  &dqp))) {
-		if (ip)
-			xfs_ilock(ip, XFS_ILOCK_EXCL);
-		return (error);
-	}
-
-	/*
-	 * See if this is mount code calling to look at the overall quota limits
-	 * which are stored in the id == 0 user or group's dquot.
-	 * Since we may not have done a quotacheck by this point, just return
-	 * the dquot without attaching it to any hashtables, lists, etc, or even
-	 * taking a reference.
-	 * The caller must dqdestroy this once done.
-	 */
-	if (flags & XFS_QMOPT_DQSUSER) {
-		ASSERT(id == 0);
-		ASSERT(! ip);
-		goto dqret;
-	}
-
-	/*
-	 * Dquot lock comes after hashlock in the lock ordering
-	 */
-	ASSERT(! XFS_DQ_IS_LOCKED(dqp));
-	if (ip) {
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		if (! XFS_IS_DQTYPE_ON(mp, type)) {
-			/* inode stays locked on return */
-			xfs_qm_dqdestroy(dqp);
-			return XFS_ERROR(ESRCH);
-		}
-		/*
-		 * A dquot could be attached to this inode by now, since
-		 * we had dropped the ilock.
-		 */
-		if (type == XFS_DQ_USER) {
-			if (ip->i_udquot) {
-				xfs_qm_dqdestroy(dqp);
-				dqp = ip->i_udquot;
-				xfs_dqlock(dqp);
-				goto dqret;
-			}
-		} else {
-			if (ip->i_gdquot) {
-				xfs_qm_dqdestroy(dqp);
-				dqp = ip->i_gdquot;
-				xfs_dqlock(dqp);
-				goto dqret;
-			}
-		}
-	}
-
-	/*
-	 * Hashlock comes after ilock in lock order
-	 */
-	XFS_DQ_HASH_LOCK(h);
-	if (version != h->qh_version) {
-		xfs_dquot_t *tmpdqp;
-		/*
-		 * Now, see if somebody else put the dquot in the
-		 * hashtable before us. This can happen because we didn't
-		 * keep the hashchain lock. We don't have to worry about
-		 * lock order between the two dquots here since dqp isn't
-		 * on any findable lists yet.
-		 */
-		if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
-			/*
-			 * Duplicate found. Just throw away the new dquot
-			 * and start over.
-			 */
-			xfs_qm_dqput(tmpdqp);
-			XFS_DQ_HASH_UNLOCK(h);
-			xfs_qm_dqdestroy(dqp);
-			XFS_STATS_INC(xfsstats.xs_qm_dquot_dups);
-			goto again;
-		}
-	}
-
-	/*
-	 * Put the dquot at the beginning of the hash-chain and mp's list
-	 * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
-	 */
-	ASSERT(XFS_DQ_IS_HASH_LOCKED(h));
-	dqp->q_hash = h;
-	XQM_HASHLIST_INSERT(h, dqp);
-
-	/*
-	 * Attach this dquot to this filesystem's list of all dquots,
-	 * kept inside the mount structure in m_quotainfo field
-	 */
-	xfs_qm_mplist_lock(mp);
-
-	/*
-	 * We return a locked dquot to the caller, with a reference taken
-	 */
-	xfs_dqlock(dqp);
-	dqp->q_nrefs = 1;
-
-	XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp);
-
-	xfs_qm_mplist_unlock(mp);
-	XFS_DQ_HASH_UNLOCK(h);
- dqret:
-	ASSERT((ip == NULL) || XFS_ISLOCKED_INODE_EXCL(ip));
-	xfs_dqtrace_entry(dqp, "DQGET DONE");
-	*O_dqpp = dqp;
-	return (0);
-}
-
-
-/*
- * Release a reference to the dquot (decrement ref-count)
- * and unlock it. If there is a group quota attached to this
- * dquot, carefully release that too without tripping over
- * deadlocks'n'stuff.
- */
-void
-xfs_qm_dqput(
-	xfs_dquot_t	*dqp)
-{
-	xfs_dquot_t	*gdqp;
-
-	ASSERT(dqp->q_nrefs > 0);
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	xfs_dqtrace_entry(dqp, "DQPUT");
-
-	if (dqp->q_nrefs != 1) {
-		dqp->q_nrefs--;
-		xfs_dqunlock(dqp);
-		return;
-	}
-
-	/*
-	 * drop the dqlock and acquire the freelist and dqlock
-	 * in the right order; but try to get it out-of-order first
-	 */
-	if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
-		xfs_dqtrace_entry(dqp, "DQPUT: FLLOCK-WAIT");
-		xfs_dqunlock(dqp);
-		xfs_qm_freelist_lock(xfs_Gqm);
-		xfs_dqlock(dqp);
-	}
-
-	while (1) {
-		gdqp = NULL;
-
-		/* We can't depend on nrefs being == 1 here */
-		if (--dqp->q_nrefs == 0) {
-			xfs_dqtrace_entry(dqp, "DQPUT: ON FREELIST");
-			/*
-			 * insert at end of the freelist.
-			 */
-			XQM_FREELIST_INSERT(&(xfs_Gqm->qm_dqfreelist), dqp);
-
-			/*
-			 * If we just added a udquot to the freelist, then
-			 * we want to release the gdquot reference that
-			 * it (probably) has. Otherwise it'll keep the
-			 * gdquot from getting reclaimed.
-			 */
-			if ((gdqp = dqp->q_gdquot)) {
-				/*
-				 * Avoid a recursive dqput call
-				 */
-				xfs_dqlock(gdqp);
-				dqp->q_gdquot = NULL;
-			}
-
-			/* xfs_qm_freelist_print(&(xfs_Gqm->qm_dqfreelist),
-			   "@@@@@++ Free list (after append) @@@@@+");
-			   */
-		}
-		xfs_dqunlock(dqp);
-
-		/*
-		 * If we had a group quota inside the user quota as a hint,
-		 * release it now.
-		 */
-		if (! gdqp)
-			break;
-		dqp = gdqp;
-	}
-	xfs_qm_freelist_unlock(xfs_Gqm);
-}
-
-/*
- * Release a dquot. Flush it if dirty, then dqput() it.
- * dquot must not be locked.
- */
-void
-xfs_qm_dqrele(
-	xfs_dquot_t	*dqp)
-{
-	ASSERT(dqp);
-	xfs_dqtrace_entry(dqp, "DQRELE");
-
-	xfs_dqlock(dqp);
-	/*
-	 * We don't care to flush it if the dquot is dirty here.
-	 * That will create stutters that we want to avoid.
-	 * Instead we do a delayed write when we try to reclaim
-	 * a dirty dquot. Also xfs_sync will take part of the burden...
-	 */
-	xfs_qm_dqput(dqp);
-}
-
-
-/*
- * Write a modified dquot to disk.
- * The dquot must be locked and the flush lock too taken by caller.
- * The flush lock will not be unlocked until the dquot reaches the disk,
- * but the dquot is free to be unlocked and modified by the caller
- * in the interim. Dquot is still locked on return. This behavior is
- * identical to that of inodes.
- */
-int
-xfs_qm_dqflush(
-	xfs_dquot_t		*dqp,
-	uint			flags)
-{
-	xfs_mount_t		*mp;
-	xfs_buf_t		*bp;
-	xfs_disk_dquot_t	*ddqp;
-	int			error;
-	SPLDECL(s);
-
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp));
-	xfs_dqtrace_entry(dqp, "DQFLUSH");
-
-	/*
-	 * If not dirty, nada.
-	 */
-	if (!XFS_DQ_IS_DIRTY(dqp)) {
-		xfs_dqfunlock(dqp);
-		return (0);
-	}
-
-	/*
-	 * Cant flush a pinned dquot. Wait for it.
-	 */
-	xfs_qm_dqunpin_wait(dqp);
-
-	/*
-	 * This may have been unpinned because the filesystem is shutting
-	 * down forcibly. If that's the case we must not write this dquot
-	 * to disk, because the log record didn't make it to disk!
-	 */
-	if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) {
-		dqp->dq_flags &= ~(XFS_DQ_DIRTY);
-		xfs_dqfunlock(dqp);
-		return XFS_ERROR(EIO);
-	}
-
-	/*
-	 * Get the buffer containing the on-disk dquot
-	 * We don't need a transaction envelope because we know that the
-	 * the ondisk-dquot has already been allocated for.
-	 */
-	if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) {
-		xfs_dqtrace_entry(dqp, "DQTOBP FAIL");
-		ASSERT(error != ENOENT);
-		/*
-		 * Quotas could have gotten turned off (ESRCH)
-		 */
-		xfs_dqfunlock(dqp);
-		return (error);
-	}
-
-	if (xfs_qm_dqcheck(&dqp->q_core, INT_GET(ddqp->d_id, ARCH_CONVERT), 0, XFS_QMOPT_DOWARN,
-			   "dqflush (incore copy)")) {
-		xfs_force_shutdown(dqp->q_mount, XFS_CORRUPT_INCORE);
-		return XFS_ERROR(EIO);
-	}
-
-	/* This is the only portion of data that needs to persist */
-	memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t));
-
-	/*
-	 * Clear the dirty field and remember the flush lsn for later use.
-	 */
-	dqp->dq_flags &= ~(XFS_DQ_DIRTY);
-	mp = dqp->q_mount;
-
-	/* lsn is 64 bits */
-	AIL_LOCK(mp, s);
-	dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn;
-	AIL_UNLOCK(mp, s);
-
-	/*
-	 * Attach an iodone routine so that we can remove this dquot from the
-	 * AIL and release the flush lock once the dquot is synced to disk.
-	 */
-	xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t *, xfs_log_item_t *))
-			      xfs_qm_dqflush_done, &(dqp->q_logitem.qli_item));
-	/*
-	 * If the buffer is pinned then push on the log so we won't
-	 * get stuck waiting in the write for too long.
-	 */
-	if (XFS_BUF_ISPINNED(bp)) {
-		xfs_dqtrace_entry(dqp, "DQFLUSH LOG FORCE");
-		xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
-	}
-
-	if (flags & XFS_QMOPT_DELWRI) {
-		xfs_bdwrite(mp, bp);
-	} else if (flags & XFS_QMOPT_ASYNC) {
-		xfs_bawrite(mp, bp);
-	} else {
-		error = xfs_bwrite(mp, bp);
-	}
-	xfs_dqtrace_entry(dqp, "DQFLUSH END");
-	/*
-	 * dqp is still locked, but caller is free to unlock it now.
-	 */
-	return (error);
-
-}
-
-/*
- * This is the dquot flushing I/O completion routine.  It is called
- * from interrupt level when the buffer containing the dquot is
- * flushed to disk.  It is responsible for removing the dquot logitem
- * from the AIL if it has not been re-logged, and unlocking the dquot's
- * flush lock. This behavior is very similar to that of inodes..
- */
-/*ARGSUSED*/
-STATIC void
-xfs_qm_dqflush_done(
-	xfs_buf_t		*bp,
-	xfs_dq_logitem_t	*qip)
-{
-	xfs_dquot_t		*dqp;
-	SPLDECL(s);
-
-	dqp = qip->qli_dquot;
-
-	/*
-	 * We only want to pull the item from the AIL if its
-	 * location in the log has not changed since we started the flush.
-	 * Thus, we only bother if the dquot's lsn has
-	 * not changed. First we check the lsn outside the lock
-	 * since it's cheaper, and then we recheck while
-	 * holding the lock before removing the dquot from the AIL.
-	 */
-	if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
-	    qip->qli_item.li_lsn == qip->qli_flush_lsn) {
-
-		AIL_LOCK(dqp->q_mount, s);
-		/*
-		 * xfs_trans_delete_ail() drops the AIL lock.
-		 */
-		if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
-			xfs_trans_delete_ail(dqp->q_mount,
-					     (xfs_log_item_t*)qip, s);
-		else
-			AIL_UNLOCK(dqp->q_mount, s);
-	}
-
-	/*
-	 * Release the dq's flush lock since we're done with it.
-	 */
-	xfs_dqfunlock(dqp);
-}
-
-
-int
-xfs_qm_dqflock_nowait(
-	xfs_dquot_t *dqp)
-{
-	int locked;
-
-	locked = cpsema(&((dqp)->q_flock));
-
-	/* XXX ifdef these out */
-	if (locked)
-		(dqp)->dq_flags |= XFS_DQ_FLOCKED;
-	return (locked);
-}
-
-
-int
-xfs_qm_dqlock_nowait(
-	xfs_dquot_t *dqp)
-{
-	return (mutex_trylock(&((dqp)->q_qlock)));
-}
-
-void
-xfs_dqlock(
-	xfs_dquot_t *dqp)
-{
-	mutex_lock(&(dqp->q_qlock), PINOD);
-}
-
-void
-xfs_dqunlock(
-	xfs_dquot_t *dqp)
-{
-	mutex_unlock(&(dqp->q_qlock));
-	if (dqp->q_logitem.qli_dquot == dqp) {
-		/* Once was dqp->q_mount, but might just have been cleared */
-		xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_mountp,
-					(xfs_log_item_t*)&(dqp->q_logitem));
-	}
-}
-
-
-void
-xfs_dqunlock_nonotify(
-	xfs_dquot_t *dqp)
-{
-	mutex_unlock(&(dqp->q_qlock));
-}
-
-void
-xfs_dqlock2(
-	xfs_dquot_t	*d1,
-	xfs_dquot_t	*d2)
-{
-	if (d1 && d2) {
-		ASSERT(d1 != d2);
-		if (INT_GET(d1->q_core.d_id, ARCH_CONVERT) > INT_GET(d2->q_core.d_id, ARCH_CONVERT)) {
-			xfs_dqlock(d2);
-			xfs_dqlock(d1);
-		} else {
-			xfs_dqlock(d1);
-			xfs_dqlock(d2);
-		}
-	} else {
-		if (d1) {
-			xfs_dqlock(d1);
-		} else if (d2) {
-			xfs_dqlock(d2);
-		}
-	}
-}
-
-
-/*
- * A rarely used accessor. This exists because we don't really want
- * to expose the internals of a dquot to the outside world.
- */
-xfs_dqid_t
-xfs_qm_dqid(
-	xfs_dquot_t	*dqp)
-{
-	return (INT_GET(dqp->q_core.d_id, ARCH_CONVERT));
-}
-
-
-/*
- * Take a dquot out of the mount's dqlist as well as the hashlist.
- * This is called via unmount as well as quotaoff, and the purge
- * will always succeed unless there are soft (temp) references
- * outstanding.
- *
- * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
- * that we're returning! XXXsup - not cool.
- */
-/* ARGSUSED */
-int
-xfs_qm_dqpurge(
-	xfs_dquot_t	*dqp,
-	uint		flags)
-{
-	xfs_dqhash_t	*thishash;
-	xfs_mount_t	*mp;
-
-	mp = dqp->q_mount;
-
-	ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
-	ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash));
-
-	xfs_dqlock(dqp);
-	/*
-	 * We really can't afford to purge a dquot that is
-	 * referenced, because these are hard refs.
-	 * It shouldn't happen in general because we went thru _all_ inodes in
-	 * dqrele_all_inodes before calling this and didn't let the mountlock go.
-	 * However it is possible that we have dquots with temporary
-	 * references that are not attached to an inode. e.g. see xfs_setattr().
-	 */
-	if (dqp->q_nrefs != 0) {
-		xfs_dqunlock(dqp);
-		XFS_DQ_HASH_UNLOCK(dqp->q_hash);
-		return (1);
-	}
-
-	ASSERT(XFS_DQ_IS_ON_FREELIST(dqp));
-
-	/*
-	 * If we're turning off quotas, we have to make sure that, for
-	 * example, we don't delete quota disk blocks while dquots are
-	 * in the process of getting written to those disk blocks.
-	 * This dquot might well be on AIL, and we can't leave it there
-	 * if we're turning off quotas. Basically, we need this flush
-	 * lock, and are willing to block on it.
-	 */
-	if (! xfs_qm_dqflock_nowait(dqp)) {
-		/*
-		 * Block on the flush lock after nudging dquot buffer,
-		 * if it is incore.
-		 */
-		xfs_qm_dqflock_pushbuf_wait(dqp);
-	}
-
-	/*
-	 * XXXIf we're turning this type of quotas off, we don't care
-	 * about the dirty metadata sitting in this dquot. OTOH, if
-	 * we're unmounting, we do care, so we flush it and wait.
-	 */
-	if (XFS_DQ_IS_DIRTY(dqp)) {
-		xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY");
-		/* dqflush unlocks dqflock */
-		/*
-		 * Given that dqpurge is a very rare occurrence, it is OK
-		 * that we're holding the hashlist and mplist locks
-		 * across the disk write. But, ... XXXsup
-		 *
-		 * We don't care about getting disk errors here. We need
-		 * to purge this dquot anyway, so we go ahead regardless.
-		 */
-		(void) xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC);
-		xfs_dqflock(dqp);
-	}
-	ASSERT(dqp->q_pincount == 0);
-	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
-	       !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
-
-	thishash = dqp->q_hash;
-	XQM_HASHLIST_REMOVE(thishash, dqp);
-	XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(mp)), dqp);
-	/*
-	 * XXX Move this to the front of the freelist, if we can get the
-	 * freelist lock.
-	 */
-	ASSERT(XFS_DQ_IS_ON_FREELIST(dqp));
-
-	dqp->q_mount = NULL;;
-	dqp->q_hash = NULL;
-	dqp->dq_flags = XFS_DQ_INACTIVE;
-	memset(&dqp->q_core, 0, sizeof(dqp->q_core));
-	xfs_dqfunlock(dqp);
-	xfs_dqunlock(dqp);
-	XFS_DQ_HASH_UNLOCK(thishash);
-	return (0);
-}
-
-
-/*
- * Do some primitive error checking on ondisk dquot
- * data structures. Not just for debugging, actually;
- * this can be useful for detecting data corruption mainly due to
- * disk failures.
- */
-/* ARGSUSED */
-int
-xfs_qm_dqcheck(
-	xfs_disk_dquot_t *ddq,
-	xfs_dqid_t	 id,
-	uint		 type,	  /* used only when IO_dorepair is true */
-	uint		 flags,
-	char		 *str)
-{
-	int errs;
-
-	errs = 0;
-	/* ASSERT(flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN)); */
-	/*
-	 * We can encounter an uninitialized dquot buffer for 2 reasons:
-	 * 1. If we crash while deleting the quotainode(s), and those blks get used
-	 *    for some user data. This is because we take the path of regular
-	 *    file deletion; however, the size field of quotainodes is never
-	 *    updated, so all the tricks that we play in itruncate_finish
-	 *    don't quite matter.
-	 *
-	 * 2. We don't play the quota buffers when there's a quotaoff logitem.
-	 *    But the allocation will be replayed so we'll end up with an
-	 *    uninitialized quota block.
-	 *
-	 * This is all fine; things are still consistent, and we haven't lost
-	 * any quota information. Just don't complain about bad dquot blks.
-	 */
-	if (INT_GET(ddq->d_magic, ARCH_CONVERT) != XFS_DQUOT_MAGIC) {
-		if (flags & XFS_QMOPT_DOWARN)
-			cmn_err(CE_ALERT,
-			"%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
-			str, id, INT_GET(ddq->d_magic, ARCH_CONVERT), XFS_DQUOT_MAGIC);
-		errs++;
-	}
-	if (INT_GET(ddq->d_version, ARCH_CONVERT) != XFS_DQUOT_VERSION) {
-		if (flags & XFS_QMOPT_DOWARN)
-			cmn_err(CE_ALERT,
-			"%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
-			str, id, INT_GET(ddq->d_magic, ARCH_CONVERT), XFS_DQUOT_VERSION);
-		errs++;
-	}
-
-	if (INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_USER && INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_GROUP) {
-		if (flags & XFS_QMOPT_DOWARN)
-			cmn_err(CE_ALERT,
-			"%s : XFS dquot ID 0x%x, unknown flags 0x%x",
-			str, id, INT_GET(ddq->d_flags, ARCH_CONVERT));
-		errs++;
-	}
-
-	if (id != -1 && id != INT_GET(ddq->d_id, ARCH_CONVERT)) {
-		if (flags & XFS_QMOPT_DOWARN)
-			cmn_err(CE_ALERT,
-			"%s : ondisk-dquot 0x%x, ID mismatch: "
-			"0x%x expected, found id 0x%x",
-			str, ddq, id, INT_GET(ddq->d_id, ARCH_CONVERT));
-		errs++;
-	}
-
-	if (! errs) {
-		if (INT_GET(ddq->d_blk_softlimit, ARCH_CONVERT) &&
-		    INT_GET(ddq->d_bcount, ARCH_CONVERT) >= INT_GET(ddq->d_blk_softlimit, ARCH_CONVERT)) {
-			if (INT_ISZERO(ddq->d_btimer, ARCH_CONVERT) && !INT_ISZERO(ddq->d_id, ARCH_CONVERT)) {
-				if (flags & XFS_QMOPT_DOWARN)
-					cmn_err(CE_ALERT,
-					"%s : Dquot ID 0x%x (0x%x) "
-					"BLK TIMER NOT STARTED",
-					str, (int) INT_GET(ddq->d_id, ARCH_CONVERT), ddq);
-				errs++;
-			}
-		}
-		if (INT_GET(ddq->d_ino_softlimit, ARCH_CONVERT) &&
-		    INT_GET(ddq->d_icount, ARCH_CONVERT) >= INT_GET(ddq->d_ino_softlimit, ARCH_CONVERT)) {
-			if (INT_ISZERO(ddq->d_itimer, ARCH_CONVERT) && !INT_ISZERO(ddq->d_id, ARCH_CONVERT)) {
-				if (flags & XFS_QMOPT_DOWARN)
-					cmn_err(CE_ALERT,
-					"%s : Dquot ID 0x%x (0x%x) "
-					"INODE TIMER NOT STARTED",
-					str, (int) INT_GET(ddq->d_id, ARCH_CONVERT), ddq);
-				errs++;
-			}
-		}
-	}
-
-	if (!errs || !(flags & XFS_QMOPT_DQREPAIR))
-		return (errs);
-
-	if (flags & XFS_QMOPT_DOWARN)
-		cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id);
-
-	/*
-	 * Typically, a repair is only requested by quotacheck.
-	 */
-	ASSERT(id != -1);
-	ASSERT(flags & XFS_QMOPT_DQREPAIR);
-	memset(ddq, 0, sizeof(xfs_dqblk_t));
-	xfs_qm_dqinit_core(id, type, (xfs_dqblk_t *)ddq);
-	return (errs);
-}
-
-#ifdef QUOTADEBUG
-void
-xfs_qm_dqprint(xfs_dquot_t *dqp)
-{
-	printk( "-----------KERNEL DQUOT----------------\n");
-	printk( "---- dquot ID	=  %d\n", (int) INT_GET(dqp->q_core.d_id, ARCH_CONVERT));
-	printk( "---- type	=  %s\n", XFS_QM_ISUDQ(dqp) ? "USR" : "GRP");
-	printk( "---- fs	=  0x%p\n", dqp->q_mount);
-	printk( "---- blkno	=  0x%x\n", (int) dqp->q_blkno);
-	printk( "---- boffset	=  0x%x\n", (int) dqp->q_bufoffset);
-	printk( "---- blkhlimit =  %Lu (0x%x)\n",
-	       INT_GET(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT),
-	       (int) INT_GET(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT));
-	printk( "---- blkslimit =  %Lu (0x%x)\n",
-	       INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT),
-	       (int)INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT));
-	printk( "---- inohlimit =  %Lu (0x%x)\n",
-	       INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT),
-	       (int)INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT));
-	printk( "---- inoslimit =  %Lu (0x%x)\n",
-	       INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT),
-	       (int)INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT));
-	printk( "---- bcount	=  %Lu (0x%x)\n",
-	       INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT),
-	       (int)INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT));
-	printk( "---- icount	=  %Lu (0x%x)\n",
-	       INT_GET(dqp->q_core.d_icount, ARCH_CONVERT),
-	       (int)INT_GET(dqp->q_core.d_icount, ARCH_CONVERT));
-	printk( "---- btimer	=  %d\n", (int)INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT));
-	printk( "---- itimer	=  %d\n", (int)INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT));
-
-	printk( "---------------------------\n");
-}
-#endif
-
-/*
- * Give the buffer a little push if it is incore and
- * wait on the flush lock.
- */
-void
-xfs_qm_dqflock_pushbuf_wait(
-	xfs_dquot_t	*dqp)
-{
-	xfs_buf_t	*bp;
-
-	/*
-	 * Check to see if the dquot has been flushed delayed
-	 * write.  If so, grab its buffer and send it
-	 * out immediately.  We'll be able to acquire
-	 * the flush lock when the I/O completes.
-	 */
-	bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno,
-		    XFS_QI_DQCHUNKLEN(dqp->q_mount),
-		    XFS_INCORE_TRYLOCK);
-	if (bp != NULL) {
-		if (XFS_BUF_ISDELAYWRITE(bp)) {
-			if (XFS_BUF_ISPINNED(bp)) {
-				xfs_log_force(dqp->q_mount,
-					      (xfs_lsn_t)0,
-					      XFS_LOG_FORCE);
-			}
-			xfs_bawrite(dqp->q_mount, bp);
-		} else {
-			xfs_buf_relse(bp);
-		}
-	}
-	xfs_dqflock(dqp);
-}
diff -Nru a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
--- a/fs/xfs/xfs_dquot.h	Mon Mar 31 13:41:08 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_DQUOT_H__
-#define __XFS_DQUOT_H__
-
-/*
- * Dquots are structures that hold quota information about a user or a group,
- * much like inodes are for files. In fact, dquots share many characteristics
- * with inodes. However, dquots can also be a centralized resource, relative
- * to a collection of inodes. In this respect, dquots share some characteristics
- * of the superblock.
- * XFS dquots exploit both those in its algorithms. They make every attempt
- * to not be a bottleneck when quotas are on and have minimal impact, if any,
- * when quotas are off.
- */
-
-/*
- * The hash chain headers (hash buckets)
- */
-typedef struct xfs_dqhash {
-	struct xfs_dquot *qh_next;
-	mutex_t		  qh_lock;
-	uint		  qh_version;	/* ever increasing version */
-	uint		  qh_nelems;	/* number of dquots on the list */
-} xfs_dqhash_t;
-
-typedef struct xfs_dqlink {
-	struct xfs_dquot  *ql_next;	/* forward link */
-	struct xfs_dquot **ql_prevp;	/* pointer to prev ql_next */
-} xfs_dqlink_t;
-
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * This is the marker which is designed to occupy the first few
- * bytes of the xfs_dquot_t structure. Even inside this, the freelist pointers
- * must come first.
- * This serves as the marker ("sentinel") when we have to restart list
- * iterations because of locking considerations.
- */
-typedef struct xfs_dqmarker {
-	struct xfs_dquot*dqm_flnext;	/* link to freelist: must be first */
-	struct xfs_dquot*dqm_flprev;
-	xfs_dqlink_t	 dqm_mplist;	/* link to mount's list of dquots */
-	xfs_dqlink_t	 dqm_hashlist;	/* link to the hash chain */
-	uint		 dqm_flags;	/* various flags (XFS_DQ_*) */
-} xfs_dqmarker_t;
-
-/*
- * The incore dquot structure
- */
-typedef struct xfs_dquot {
-	xfs_dqmarker_t	 q_lists;	/* list ptrs, q_flags (marker) */
-	xfs_dqhash_t	*q_hash;	/* the hashchain header */
-	struct xfs_mount*q_mount;	/* filesystem this relates to */
-	struct xfs_trans*q_transp;	/* trans this belongs to currently */
-	uint		 q_nrefs;	/* # active refs from inodes */
-	xfs_daddr_t	 q_blkno;	/* blkno of dquot buffer */
-	int		 q_bufoffset;	/* off of dq in buffer (# dquots) */
-	xfs_fileoff_t	 q_fileoffset;	/* offset in quotas file */
-
-	struct xfs_dquot*q_gdquot;	/* group dquot, hint only */
-	xfs_disk_dquot_t q_core;	/* actual usage & quotas */
-	xfs_dq_logitem_t q_logitem;	/* dquot log item */
-	xfs_qcnt_t	 q_res_bcount;	/* total regular nblks used+reserved */
-	xfs_qcnt_t	 q_res_icount;	/* total inos allocd+reserved */
-	xfs_qcnt_t	 q_res_rtbcount;/* total realtime blks used+reserved */
-	mutex_t		 q_qlock;	/* quota lock */
-	sema_t		 q_flock;	/* flush lock */
-	uint		 q_pincount;	/* pin count for this dquot */
-	sv_t		 q_pinwait;	/* sync var for pinning */
-#ifdef DQUOT_TRACING
-	struct ktrace	*q_trace;	/* trace header structure */
-#endif
-} xfs_dquot_t;
-
-
-#define dq_flnext	q_lists.dqm_flnext
-#define dq_flprev	q_lists.dqm_flprev
-#define dq_mplist	q_lists.dqm_mplist
-#define dq_hashlist	q_lists.dqm_hashlist
-#define dq_flags	q_lists.dqm_flags
-
-#define XFS_DQHOLD(dqp)		((dqp)->q_nrefs++)
-
-/*
- * Quota Accounting flags
- */
-#define XFS_ALL_QUOTA_ACCT	(XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT)
-#define XFS_ALL_QUOTA_ENFD	(XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD)
-#define XFS_ALL_QUOTA_CHKD	(XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD)
-#define XFS_ALL_QUOTA_ACTV	(XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE)
-#define XFS_ALL_QUOTA_ACCT_ENFD (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
-				 XFS_GQUOTA_ACCT|XFS_GQUOTA_ENFD)
-
-#define XFS_IS_QUOTA_RUNNING(mp)  ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
-#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT)
-#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT)
-
-/*
- * Quota Limit Enforcement flags
- */
-#define XFS_IS_QUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ENFD)
-#define XFS_IS_UQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_UQUOTA_ENFD)
-#define XFS_IS_GQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_GQUOTA_ENFD)
-
-#ifdef DEBUG
-static inline int
-XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
-{
-	if (mutex_trylock(&dqp->q_qlock)) {
-		mutex_unlock(&dqp->q_qlock);
-		return 0;
-	}
-	return 1;
-}
-#endif
-
-/*
- * The following three routines simply manage the q_flock
- * semaphore embedded in the dquot.  This semaphore synchronizes
- * processes attempting to flush the in-core dquot back to disk.
- */
-#define xfs_dqflock(dqp)	 { psema(&((dqp)->q_flock), PINOD | PRECALC);\
-				   (dqp)->dq_flags |= XFS_DQ_FLOCKED; }
-#define xfs_dqfunlock(dqp)	 { ASSERT(valusema(&((dqp)->q_flock)) <= 0); \
-				   vsema(&((dqp)->q_flock)); \
-				   (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); }
-
-#define XFS_DQ_PINLOCK(dqp)	   mutex_spinlock( \
-				     &(XFS_DQ_TO_QINF(dqp)->qi_pinlock))
-#define XFS_DQ_PINUNLOCK(dqp, s)   mutex_spinunlock( \
-				     &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s)
-
-#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (valusema(&((dqp)->q_flock)) <= 0)
-#define XFS_DQ_IS_ON_FREELIST(dqp)  ((dqp)->dq_flnext != (dqp))
-#define XFS_DQ_IS_DIRTY(dqp)	((dqp)->dq_flags & XFS_DQ_DIRTY)
-#define XFS_QM_ISUDQ(dqp)	((dqp)->dq_flags & XFS_DQ_USER)
-#define XFS_DQ_TO_QINF(dqp)	((dqp)->q_mount->m_quotainfo)
-#define XFS_DQ_TO_QIP(dqp)	(XFS_QM_ISUDQ(dqp) ? \
-				 XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
-				 XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
-
-#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
-				     (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
-				     (XFS_IS_GQUOTA_ON((d)->q_mount))))
-#ifdef DQUOT_TRACING
-/*
- * Dquot Tracing stuff.
- */
-#define DQUOT_TRACE_SIZE	64
-#define DQUOT_KTRACE_ENTRY	1
-
-#define xfs_dqtrace_entry_ino(a,b,ip) \
-xfs_dqtrace_entry__((a), (b), (void*)__return_address, (ip))
-#define xfs_dqtrace_entry(a,b) \
-xfs_dqtrace_entry__((a), (b), (void*)__return_address, NULL)
-extern void		xfs_dqtrace_entry__(xfs_dquot_t *dqp, char *func,
-					    void *, xfs_inode_t *);
-#else
-#define xfs_dqtrace_entry(a,b)
-#define xfs_dqtrace_entry_ino(a,b,ip)
-#endif
-#ifdef QUOTADEBUG
-extern void		xfs_qm_dqprint(xfs_dquot_t *);
-#else
-#define xfs_qm_dqprint(a)
-#endif
-
-extern xfs_dquot_t	*xfs_qm_dqinit(xfs_mount_t *, xfs_dqid_t, uint);
-extern void		xfs_qm_dqdestroy(xfs_dquot_t *);
-extern int		xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int		xfs_qm_dqpurge(xfs_dquot_t *, uint);
-extern void		xfs_qm_dqunpin_wait(xfs_dquot_t *);
-extern int		xfs_qm_dqlock_nowait(xfs_dquot_t *);
-extern int		xfs_qm_dqflock_nowait(xfs_dquot_t *);
-extern void		xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
-extern void		xfs_qm_adjust_dqtimers(xfs_mount_t *,
-					       xfs_disk_dquot_t *);
-extern int		xfs_qm_dqwarn(xfs_disk_dquot_t *, uint);
-
-#endif /* __XFS_DQUOT_H__ */
diff -Nru a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
--- a/fs/xfs/xfs_dquot_item.c	Mon Mar 31 13:41:06 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,680 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-#include <xfs.h>
-#include <xfs_quota_priv.h>
-
-
-/*
- * returns the number of iovecs needed to log the given dquot item.
- */
-/* ARGSUSED */
-STATIC uint
-xfs_qm_dquot_logitem_size(
-	xfs_dq_logitem_t	*logitem)
-{
-	/*
-	 * we need only two iovecs, one for the format, one for the real thing
-	 */
-	return (2);
-}
-
-/*
- * fills in the vector of log iovecs for the given dquot log item.
- */
-STATIC void
-xfs_qm_dquot_logitem_format(
-	xfs_dq_logitem_t	*logitem,
-	xfs_log_iovec_t		*logvec)
-{
-	ASSERT(logitem);
-	ASSERT(logitem->qli_dquot);
-
-	logvec->i_addr = (xfs_caddr_t)&logitem->qli_format;
-	logvec->i_len  = sizeof(xfs_dq_logformat_t);
-	logvec++;
-	logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core;
-	logvec->i_len  = sizeof(xfs_disk_dquot_t);
-
-	ASSERT(2 == logitem->qli_item.li_desc->lid_size);
-	logitem->qli_format.qlf_size = 2;
-
-}
-
-/*
- * Increment the pin count of the given dquot.
- * This value is protected by pinlock spinlock in the xQM structure.
- */
-STATIC void
-xfs_qm_dquot_logitem_pin(
-	xfs_dq_logitem_t *logitem)
-{
-	unsigned long	s;
-	xfs_dquot_t *dqp;
-
-	dqp = logitem->qli_dquot;
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	s = XFS_DQ_PINLOCK(dqp);
-	dqp->q_pincount++;
-	XFS_DQ_PINUNLOCK(dqp, s);
-}
-
-/*
- * Decrement the pin count of the given dquot, and wake up
- * anyone in xfs_dqwait_unpin() if the count goes to 0.	 The
- * dquot must have been previously pinned with a call to xfs_dqpin().
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_dquot_logitem_unpin(
-	xfs_dq_logitem_t *logitem,
-	int		  stale)
-{
-	unsigned long	s;
-	xfs_dquot_t *dqp;
-
-	dqp = logitem->qli_dquot;
-	ASSERT(dqp->q_pincount > 0);
-	s = XFS_DQ_PINLOCK(dqp);
-	dqp->q_pincount--;
-	if (dqp->q_pincount == 0) {
-		sv_broadcast(&dqp->q_pinwait);
-	}
-	XFS_DQ_PINUNLOCK(dqp, s);
-}
-
-/* ARGSUSED */
-STATIC void
-xfs_qm_dquot_logitem_unpin_remove(
-	xfs_dq_logitem_t *logitem,
-	xfs_trans_t	 *tp)
-{
-	xfs_qm_dquot_logitem_unpin(logitem, 0);
-}
-
-/*
- * Given the logitem, this writes the corresponding dquot entry to disk
- * asynchronously. This is called with the dquot entry securely locked;
- * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
- * at the end.
- */
-STATIC void
-xfs_qm_dquot_logitem_push(
-	xfs_dq_logitem_t	*logitem)
-{
-	xfs_dquot_t	*dqp;
-
-	dqp = logitem->qli_dquot;
-
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp));
-
-	/*
-	 * Since we were able to lock the dquot's flush lock and
-	 * we found it on the AIL, the dquot must be dirty.  This
-	 * is because the dquot is removed from the AIL while still
-	 * holding the flush lock in xfs_dqflush_done().  Thus, if
-	 * we found it in the AIL and were able to obtain the flush
-	 * lock without sleeping, then there must not have been
-	 * anyone in the process of flushing the dquot.
-	 */
-	xfs_qm_dqflush(dqp, XFS_B_DELWRI);
-	xfs_dqunlock(dqp);
-}
-
-/*ARGSUSED*/
-STATIC xfs_lsn_t
-xfs_qm_dquot_logitem_committed(
-	xfs_dq_logitem_t	*l,
-	xfs_lsn_t		lsn)
-{
-	/*
-	 * We always re-log the entire dquot when it becomes dirty,
-	 * so, the latest copy _is_ the only one that matters.
-	 */
-	return (lsn);
-}
-
-
-/*
- * This is called to wait for the given dquot to be unpinned.
- * Most of these pin/unpin routines are plagiarized from inode code.
- */
-void
-xfs_qm_dqunpin_wait(
-	xfs_dquot_t	*dqp)
-{
-	SPLDECL(s);
-
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	if (dqp->q_pincount == 0) {
-		return;
-	}
-
-	/*
-	 * Give the log a push so we don't wait here too long.
-	 */
-	xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE);
-	s = XFS_DQ_PINLOCK(dqp);
-	if (dqp->q_pincount == 0) {
-		XFS_DQ_PINUNLOCK(dqp, s);
-		return;
-	}
-	sv_wait(&(dqp->q_pinwait), PINOD,
-		&(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s);
-}
-
-/*
- * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
- * the dquot is locked by us, but the flush lock isn't. So, here we are
- * going to see if the relevant dquot buffer is incore, waiting on DELWRI.
- * If so, we want to push it out to help us take this item off the AIL as soon
- * as possible.
- *
- * We must not be holding the AIL_LOCK at this point. Calling incore() to
- * search the buffercache can be a time consuming thing, and AIL_LOCK is a
- * spinlock.
- */
-STATIC void
-xfs_qm_dquot_logitem_pushbuf(
-	xfs_dq_logitem_t    *qip)
-{
-	xfs_dquot_t	*dqp;
-	xfs_mount_t	*mp;
-	xfs_buf_t	*bp;
-	uint		dopush;
-
-	dqp = qip->qli_dquot;
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-	/*
-	 * The qli_pushbuf_flag keeps others from
-	 * trying to duplicate our effort.
-	 */
-	ASSERT(qip->qli_pushbuf_flag != 0);
-	ASSERT(qip->qli_push_owner == get_thread_id());
-
-	/*
-	 * If flushlock isn't locked anymore, chances are that the
-	 * inode flush completed and the inode was taken off the AIL.
-	 * So, just get out.
-	 */
-	if ((valusema(&(dqp->q_flock)) > 0)  ||
-	    ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
-		qip->qli_pushbuf_flag = 0;
-		xfs_dqunlock(dqp);
-		return;
-	}
-	mp = dqp->q_mount;
-	bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno,
-		    XFS_QI_DQCHUNKLEN(mp),
-		    XFS_INCORE_TRYLOCK);
-	if (bp != NULL) {
-		if (XFS_BUF_ISDELAYWRITE(bp)) {
-			dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
-				  (valusema(&(dqp->q_flock)) <= 0));
-			qip->qli_pushbuf_flag = 0;
-			xfs_dqunlock(dqp);
-
-			if (XFS_BUF_ISPINNED(bp)) {
-				xfs_log_force(mp, (xfs_lsn_t)0,
-					      XFS_LOG_FORCE);
-			}
-			if (dopush) {
-#ifdef XFSRACEDEBUG
-				delay_for_intr();
-				delay(300);
-#endif
-				xfs_bawrite(mp, bp);
-			} else {
-				xfs_buf_relse(bp);
-			}
-		} else {
-			qip->qli_pushbuf_flag = 0;
-			xfs_dqunlock(dqp);
-			xfs_buf_relse(bp);
-		}
-		return;
-	}
-
-	qip->qli_pushbuf_flag = 0;
-	xfs_dqunlock(dqp);
-}
-
-/*
- * This is called to attempt to lock the dquot associated with this
- * dquot log item.  Don't sleep on the dquot lock or the flush lock.
- * If the flush lock is already held, indicating that the dquot has
- * been or is in the process of being flushed, then see if we can
- * find the dquot's buffer in the buffer cache without sleeping.  If
- * we can and it is marked delayed write, then we want to send it out.
- * We delay doing so until the push routine, though, to avoid sleeping
- * in any device strategy routines.
- */
-STATIC uint
-xfs_qm_dquot_logitem_trylock(
-	xfs_dq_logitem_t	*qip)
-{
-	xfs_dquot_t		*dqp;
-	uint			retval;
-
-	dqp = qip->qli_dquot;
-	if (dqp->q_pincount > 0)
-		return (XFS_ITEM_PINNED);
-
-	if (! xfs_qm_dqlock_nowait(dqp))
-		return (XFS_ITEM_LOCKED);
-
-	retval = XFS_ITEM_SUCCESS;
-	if (! xfs_qm_dqflock_nowait(dqp)) {
-		/*
-		 * The dquot is already being flushed.	It may have been
-		 * flushed delayed write, however, and we don't want to
-		 * get stuck waiting for that to complete.  So, we want to check
-		 * to see if we can lock the dquot's buffer without sleeping.
-		 * If we can and it is marked for delayed write, then we
-		 * hold it and send it out from the push routine.  We don't
-		 * want to do that now since we might sleep in the device
-		 * strategy routine.  We also don't want to grab the buffer lock
-		 * here because we'd like not to call into the buffer cache
-		 * while holding the AIL_LOCK.
-		 * Make sure to only return PUSHBUF if we set pushbuf_flag
-		 * ourselves.  If someone else is doing it then we don't
-		 * want to go to the push routine and duplicate their efforts.
-		 */
-		if (qip->qli_pushbuf_flag == 0) {
-			qip->qli_pushbuf_flag = 1;
-			ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno);
-#ifdef DEBUG
-			qip->qli_push_owner = get_thread_id();
-#endif
-			/*
-			 * The dquot is left locked.
-			 */
-			retval = XFS_ITEM_PUSHBUF;
-		} else {
-			retval = XFS_ITEM_FLUSHING;
-			xfs_dqunlock_nonotify(dqp);
-		}
-	}
-
-	ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL);
-	return (retval);
-}
-
-
-/*
- * Unlock the dquot associated with the log item.
- * Clear the fields of the dquot and dquot log item that
- * are specific to the current transaction.  If the
- * hold flags is set, do not unlock the dquot.
- */
-STATIC void
-xfs_qm_dquot_logitem_unlock(
-	xfs_dq_logitem_t    *ql)
-{
-	xfs_dquot_t	*dqp;
-
-	ASSERT(ql != NULL);
-	dqp = ql->qli_dquot;
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-	/*
-	 * Clear the transaction pointer in the dquot
-	 */
-	dqp->q_transp = NULL;
-
-	/*
-	 * dquots are never 'held' from getting unlocked at the end of
-	 * a transaction.  Their locking and unlocking is hidden inside the
-	 * transaction layer, within trans_commit. Hence, no LI_HOLD flag
-	 * for the logitem.
-	 */
-	xfs_dqunlock(dqp);
-}
-
-
-/*
- * The transaction with the dquot locked has aborted.  The dquot
- * must not be dirty within the transaction.  We simply unlock just
- * as if the transaction had been cancelled.
- */
-STATIC void
-xfs_qm_dquot_logitem_abort(
-	xfs_dq_logitem_t    *ql)
-{
-	xfs_qm_dquot_logitem_unlock(ql);
-}
-
-/*
- * this needs to stamp an lsn into the dquot, I think.
- * rpc's that look at user dquot's would then have to
- * push on the dependency recorded in the dquot
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_dquot_logitem_committing(
-	xfs_dq_logitem_t	*l,
-	xfs_lsn_t		lsn)
-{
-	return;
-}
-
-
-/*
- * This is the ops vector for dquots
- */
-struct xfs_item_ops xfs_dquot_item_ops = {
-	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size,
-	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
-					xfs_qm_dquot_logitem_format,
-	.iop_pin	= (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin,
-	.iop_unpin	= (void(*)(xfs_log_item_t*, int))
-					xfs_qm_dquot_logitem_unpin,
-	.iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
-					xfs_qm_dquot_logitem_unpin_remove,
-	.iop_trylock	= (uint(*)(xfs_log_item_t*))
-					xfs_qm_dquot_logitem_trylock,
-	.iop_unlock	= (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unlock,
-	.iop_committed	= (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
-					xfs_qm_dquot_logitem_committed,
-	.iop_push	= (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push,
-	.iop_abort	= (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_abort,
-	.iop_pushbuf	= (void(*)(xfs_log_item_t*))
-					xfs_qm_dquot_logitem_pushbuf,
-	.iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
-					xfs_qm_dquot_logitem_committing
-};
-
-/*
- * Initialize the dquot log item for a newly allocated dquot.
- * The dquot isn't locked at this point, but it isn't on any of the lists
- * either, so we don't care.
- */
-void
-xfs_qm_dquot_logitem_init(
-	struct xfs_dquot *dqp)
-{
-	xfs_dq_logitem_t  *lp;
-	lp = &dqp->q_logitem;
-
-	lp->qli_item.li_type = XFS_LI_DQUOT;
-	lp->qli_item.li_ops = &xfs_dquot_item_ops;
-	lp->qli_item.li_mountp = dqp->q_mount;
-	lp->qli_dquot = dqp;
-	lp->qli_format.qlf_type = XFS_LI_DQUOT;
-	lp->qli_format.qlf_id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT);
-	lp->qli_format.qlf_blkno = dqp->q_blkno;
-	lp->qli_format.qlf_len = 1;
-	/*
-	 * This is just the offset of this dquot within its buffer
-	 * (which is currently 1 FSB and probably won't change).
-	 * Hence 32 bits for this offset should be just fine.
-	 * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t))
-	 * here, and recompute it at recovery time.
-	 */
-	lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;
-}
-
-/*------------------  QUOTAOFF LOG ITEMS  -------------------*/
-
-/*
- * This returns the number of iovecs needed to log the given quotaoff item.
- * We only need 1 iovec for an quotaoff item.  It just logs the
- * quotaoff_log_format structure.
- */
-/*ARGSUSED*/
-STATIC uint
-xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf)
-{
-	return (1);
-}
-
-/*
- * This is called to fill in the vector of log iovecs for the
- * given quotaoff log item. We use only 1 iovec, and we point that
- * at the quotaoff_log_format structure embedded in the quotaoff item.
- * It is at this point that we assert that all of the extent
- * slots in the quotaoff item have been filled.
- */
-STATIC void
-xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t	*qf,
-			   xfs_log_iovec_t	*log_vector)
-{
-	ASSERT(qf->qql_format.qf_type == XFS_LI_QUOTAOFF);
-
-	log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format);
-	log_vector->i_len = sizeof(xfs_qoff_logitem_t);
-	qf->qql_format.qf_size = 1;
-}
-
-
-/*
- * Pinning has no meaning for an quotaoff item, so just return.
- */
-/*ARGSUSED*/
-STATIC void
-xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf)
-{
-	return;
-}
-
-
-/*
- * Since pinning has no meaning for an quotaoff item, unpinning does
- * not either.
- */
-/*ARGSUSED*/
-STATIC void
-xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf, int stale)
-{
-	return;
-}
-
-/*ARGSUSED*/
-STATIC void
-xfs_qm_qoff_logitem_unpin_remove(xfs_qoff_logitem_t *qf, xfs_trans_t *tp)
-{
-	return;
-}
-
-/*
- * Quotaoff items have no locking, so just return success.
- */
-/*ARGSUSED*/
-STATIC uint
-xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf)
-{
-	return XFS_ITEM_LOCKED;
-}
-
-/*
- * Quotaoff items have no locking or pushing, so return failure
- * so that the caller doesn't bother with us.
- */
-/*ARGSUSED*/
-STATIC void
-xfs_qm_qoff_logitem_unlock(xfs_qoff_logitem_t *qf)
-{
-	return;
-}
-
-/*
- * The quotaoff-start-item is logged only once and cannot be moved in the log,
- * so simply return the lsn at which it's been logged.
- */
-/*ARGSUSED*/
-STATIC xfs_lsn_t
-xfs_qm_qoff_logitem_committed(xfs_qoff_logitem_t *qf, xfs_lsn_t lsn)
-{
-	return (lsn);
-}
-
-/*
- * The transaction of which this QUOTAOFF is a part has been aborted.
- * Just clean up after ourselves.
- * Shouldn't this never happen in the case of qoffend logitems? XXX
- */
-STATIC void
-xfs_qm_qoff_logitem_abort(xfs_qoff_logitem_t *qf)
-{
-	kmem_free(qf, sizeof(xfs_qoff_logitem_t));
-}
-
-/*
- * There isn't much you can do to push on an quotaoff item.  It is simply
- * stuck waiting for the log to be flushed to disk.
- */
-/*ARGSUSED*/
-STATIC void
-xfs_qm_qoff_logitem_push(xfs_qoff_logitem_t *qf)
-{
-	return;
-}
-
-
-/*ARGSUSED*/
-STATIC xfs_lsn_t
-xfs_qm_qoffend_logitem_committed(
-	xfs_qoff_logitem_t *qfe,
-	xfs_lsn_t lsn)
-{
-	xfs_qoff_logitem_t	*qfs;
-	SPLDECL(s);
-
-	qfs = qfe->qql_start_lip;
-	AIL_LOCK(qfs->qql_item.li_mountp,s);
-	/*
-	 * Delete the qoff-start logitem from the AIL.
-	 * xfs_trans_delete_ail() drops the AIL lock.
-	 */
-	xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs, s);
-	kmem_free(qfs, sizeof(xfs_qoff_logitem_t));
-	kmem_free(qfe, sizeof(xfs_qoff_logitem_t));
-	return (xfs_lsn_t)-1;
-}
-
-/*
- * XXX rcc - don't know quite what to do with this.  I think we can
- * just ignore it.  The only time that isn't the case is if we allow
- * the client to somehow see that quotas have been turned off in which
- * we can't allow that to get back until the quotaoff hits the disk.
- * So how would that happen?  Also, do we need different routines for
- * quotaoff start and quotaoff end?  I suspect the answer is yes but
- * to be sure, I need to look at the recovery code and see how quota off
- * recovery is handled (do we roll forward or back or do something else).
- * If we roll forwards or backwards, then we need two separate routines,
- * one that does nothing and one that stamps in the lsn that matters
- * (truly makes the quotaoff irrevocable).  If we do something else,
- * then maybe we don't need two.
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_qoff_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
-{
-	return;
-}
-
-/* ARGSUSED */
-STATIC void
-xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
-{
-	return;
-}
-
-struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
-	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
-	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
-					xfs_qm_qoff_logitem_format,
-	.iop_pin	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
-	.iop_unpin	= (void(*)(xfs_log_item_t* ,int))
-					xfs_qm_qoff_logitem_unpin,
-	.iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
-					xfs_qm_qoff_logitem_unpin_remove,
-	.iop_trylock	= (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
-	.iop_unlock	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock,
-	.iop_committed	= (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
-					xfs_qm_qoffend_logitem_committed,
-	.iop_push	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
-	.iop_abort	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_abort,
-	.iop_pushbuf	= NULL,
-	.iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
-					xfs_qm_qoffend_logitem_committing
-};
-
-/*
- * This is the ops vector shared by all quotaoff-start log items.
- */
-struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
-	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
-	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
-					xfs_qm_qoff_logitem_format,
-	.iop_pin	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
-	.iop_unpin	= (void(*)(xfs_log_item_t*, int))
-					xfs_qm_qoff_logitem_unpin,
-	.iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
-					xfs_qm_qoff_logitem_unpin_remove,
-	.iop_trylock	= (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
-	.iop_unlock	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock,
-	.iop_committed	= (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
-					xfs_qm_qoff_logitem_committed,
-	.iop_push	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
-	.iop_abort	= (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_abort,
-	.iop_pushbuf	= NULL,
-	.iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
-					xfs_qm_qoff_logitem_committing
-};
-
-/*
- * Allocate and initialize an quotaoff item of the correct quota type(s).
- */
-xfs_qoff_logitem_t *
-xfs_qm_qoff_logitem_init(
-	struct xfs_mount *mp,
-	xfs_qoff_logitem_t *start,
-	uint flags)
-{
-	xfs_qoff_logitem_t	*qf;
-
-	qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP);
-
-	qf->qql_item.li_type = XFS_LI_QUOTAOFF;
-	if (start)
-		qf->qql_item.li_ops = &xfs_qm_qoffend_logitem_ops;
-	else
-		qf->qql_item.li_ops = &xfs_qm_qoff_logitem_ops;
-	qf->qql_item.li_mountp = mp;
-	qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
-	qf->qql_format.qf_flags = flags;
-	qf->qql_start_lip = start;
-	return (qf);
-}
diff -Nru a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h
--- a/fs/xfs/xfs_dquot_item.h	Mon Mar 31 13:41:07 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_DQUOT_ITEM_H__
-#define __XFS_DQUOT_ITEM_H__
-
-/*
- * These are the structures used to lay out dquots and quotaoff
- * records on the log. Quite similar to those of inodes.
- */
-
-/*
- * log format struct for dquots.
- * The first two fields must be the type and size fitting into
- * 32 bits : log_recovery code assumes that.
- */
-typedef struct xfs_dq_logformat {
-	__uint16_t		qlf_type;      /* dquot log item type */
-	__uint16_t		qlf_size;      /* size of this item */
-	xfs_dqid_t		qlf_id;	       /* usr/grp id number : 32 bits */
-	__int64_t		qlf_blkno;     /* blkno of dquot buffer */
-	__int32_t		qlf_len;       /* len of dquot buffer */
-	__uint32_t		qlf_boffset;   /* off of dquot in buffer */
-} xfs_dq_logformat_t;
-
-/*
- * log format struct for QUOTAOFF records.
- * The first two fields must be the type and size fitting into
- * 32 bits : log_recovery code assumes that.
- * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer
- * to the first and ensures that the first logitem is taken out of the AIL
- * only when the last one is securely committed.
- */
-typedef struct xfs_qoff_logformat {
-	unsigned short		qf_type;	/* quotaoff log item type */
-	unsigned short		qf_size;	/* size of this item */
-	unsigned int		qf_flags;	/* USR and/or GRP */
-	char			qf_pad[12];	/* padding for future */
-} xfs_qoff_logformat_t;
-
-
-#ifdef __KERNEL__
-
-struct xfs_dquot;
-struct xfs_trans;
-struct xfs_mount;
-typedef struct xfs_dq_logitem {
-	xfs_log_item_t		 qli_item;	   /* common portion */
-	struct xfs_dquot	*qli_dquot;	   /* dquot ptr */
-	xfs_lsn_t		 qli_flush_lsn;	   /* lsn at last flush */
-	unsigned short		 qli_pushbuf_flag; /* one bit used in push_ail */
-#ifdef DEBUG
-	uint64_t		 qli_push_owner;
-#endif
-	xfs_dq_logformat_t	 qli_format;	   /* logged structure */
-} xfs_dq_logitem_t;
-
-
-typedef struct xfs_qoff_logitem {
-	xfs_log_item_t		 qql_item;	/* common portion */
-	struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
-	xfs_qoff_logformat_t	 qql_format;	/* logged structure */
-} xfs_qoff_logitem_t;
-
-
-extern void		   xfs_qm_dquot_logitem_init(struct xfs_dquot *);
-extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
-						    xfs_qoff_logitem_t *, uint);
-extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
-						   xfs_qoff_logitem_t *, uint);
-extern void		   xfs_trans_log_quotaoff_item(struct xfs_trans *,
-						       xfs_qoff_logitem_t *);
-
-#endif	/* __KERNEL__ */
-
-#endif	/* __XFS_DQUOT_ITEM_H__ */
diff -Nru a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
--- a/fs/xfs/xfs_iget.c	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/xfs_iget.c	Mon Mar 31 13:41:08 2003
@@ -591,9 +591,7 @@
 	 * Release dquots (and their references) if any. An inode may escape
 	 * xfs_inactive and get here via vn_alloc->vn_reclaim path.
 	 */
-	if (ip->i_udquot || ip->i_gdquot) {
-		xfs_qm_dqdettach_inode(ip);
-	}
+	XFS_QM_DQDETACH(ip->i_mount, ip);
 
 	/*
 	 * Pull our behavior descriptor from the vnode chain.
@@ -611,11 +609,8 @@
 
 /*
  * This routine removes an about-to-be-destroyed inode from
- * all of the lists in which it is lcoated with the exception
- * of the behavior chain.  It is used by xfs_ireclaim and
- * by cxfs relocation cocde, in which case, we are removing
- * the xfs_inode but leaving the vnode alone since it has
- * been transformed into a client vnode.
+ * all of the lists in which it is located with the exception
+ * of the behavior chain. 
  */
 void
 xfs_iextract(
diff -Nru a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
--- a/fs/xfs/xfs_inode.c	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/xfs_inode.c	Mon Mar 31 13:41:08 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -1190,25 +1190,6 @@
 	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 	ip->i_d.di_anextents = 0;
 
-#if DEBUG
-	{
-		uint	badflags = VNOSWAP |
-			       VISSWAP |
-			       VREPLICABLE |
-			   /*  VNONREPLICABLE | XXX uncomment this */
-			       VDOCMP |
-			       VFRLOCKS;
-
-		/*
-		 * For shared mounts, VNOSWAP is set in xfs_iget
-		 */
-		if (tp->t_mountp->m_cxfstype != XFS_CXFS_NOT)
-			badflags &= ~VNOSWAP;
-
-		ASSERT(!(vp->v_flag & badflags));
-	}
-#endif /* DEBUG */
-
 	/*
 	 * Log the new values stuffed into the inode.
 	 */
@@ -3599,16 +3580,19 @@
 
 	nanotime(&tv);
 	if (flags & XFS_ICHGTIME_MOD) {
-		inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
-		inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
+		inode->i_mtime = tv;
+		ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
+		ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
 	}
 	if (flags & XFS_ICHGTIME_ACC) {
-		inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec;
-		inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec;
+		inode->i_atime  = tv;
+		ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec;
+		ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec;
 	}
 	if (flags & XFS_ICHGTIME_CHG) {
-		inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec;
-		inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec;
+		inode->i_ctime  = tv;
+		ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec;
+		ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec;
 	}
 
 	/*
diff -Nru a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
--- a/fs/xfs/xfs_inode.h	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/xfs_inode.h	Mon Mar 31 13:41:08 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -154,11 +154,7 @@
  * Flags in the flags field
  */
 
-#define XFS_IOCORE_ISXFS	0x01
-#define XFS_IOCORE_ISCXFS	0x02
-#define XFS_IOCORE_RT		0x04
-
-#define IO_IS_XFS(io)	((io)->io_flags & XFS_IOCORE_ISXFS)
+#define XFS_IOCORE_RT		0x1
 
 /*
  * xfs_iocore prototypes
diff -Nru a/fs/xfs/xfs_iocore.c b/fs/xfs/xfs_iocore.c
--- a/fs/xfs/xfs_iocore.c	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/xfs_iocore.c	Mon Mar 31 13:41:08 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -33,15 +33,25 @@
 #include <xfs.h>
 
 
-static xfs_fsize_t
+STATIC xfs_fsize_t
 xfs_size_fn(
-	xfs_inode_t	*ip)
+	xfs_inode_t		*ip)
 {
 	return (ip->i_d.di_size);
 }
 
+STATIC int
+xfs_ioinit(
+	struct vfs		*vfsp,
+	struct xfs_mount_args	*mntargs,
+	int			flags)
+{
+	return xfs_mountfs(vfsp, XFS_VFSTOM(vfsp),
+			   vfsp->vfs_super->s_bdev->bd_dev, flags);
+}
+
 xfs_ioops_t	xfs_iocore_xfs = {
-	.xfs_ioinit		= (xfs_ioinit_t) fs_noerr,
+	.xfs_ioinit		= (xfs_ioinit_t) xfs_ioinit,
 	.xfs_bmapi_func		= (xfs_bmapi_t) xfs_bmapi,
 	.xfs_bmap_eof_func	= (xfs_bmap_eof_t) xfs_bmap_eof,
 	.xfs_iomap_write_direct =
@@ -67,11 +77,9 @@
 {
 	xfs_iocore_t	*io = &ip->i_iocore;
 
-	io->io_flags = XFS_IOCORE_ISXFS;
-	if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
+	io->io_flags = 0;
+	if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)
 		io->io_flags |= XFS_IOCORE_RT;
-	}
-
 	io->io_dmevmask = ip->i_d.di_dmevmask;
 	io->io_dmstate = ip->i_d.di_dmstate;
 }
diff -Nru a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
--- a/fs/xfs/xfs_log_recover.c	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/xfs_log_recover.c	Mon Mar 31 13:41:07 2003
@@ -136,16 +136,15 @@
 /*
  * check log record header for recovery
  */
-
 static void
 xlog_header_check_dump(xfs_mount_t *mp, xlog_rec_header_t *head)
 {
     int b;
 
-    printk("xlog_header_check_dump:\n	 SB : uuid = ");
+    printk("%s:  SB : uuid = ", __FUNCTION__);
     for (b=0;b<16;b++) printk("%02x",((unsigned char *)&mp->m_sb.sb_uuid)[b]);
     printk(", fmt = %d\n",XLOG_FMT);
-    printk("    log: uuid = ");
+    printk("    log : uuid = ");
     for (b=0;b<16;b++) printk("%02x",((unsigned char *)&head->h_fs_uuid)[b]);
     printk(", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT));
 }
@@ -1813,7 +1812,6 @@
 		 */
 		error = 0;
 		if (buf_f->blf_flags & (XFS_BLI_UDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
-			/* OK, if this returns ENOSYS */
 			error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
 					       item->ri_buf[i].i_addr,
 					       -1, 0, XFS_QMOPT_DOWARN,
@@ -1821,9 +1819,9 @@
 		}
 		if (!error)
 			memcpy(xfs_buf_offset(bp,
-					(uint)bit << XFS_BLI_SHIFT),	/* dest */
-				item->ri_buf[i].i_addr,			/* source */
-				nbits<<XFS_BLI_SHIFT);			/* length */
+				(uint)bit << XFS_BLI_SHIFT),	/* dest */
+				item->ri_buf[i].i_addr,		/* source */
+				nbits<<XFS_BLI_SHIFT);		/* length */
 		i++;
 		bit += nbits;
 	}
@@ -1832,6 +1830,120 @@
 	ASSERT(i == item->ri_total);
 }	/* xlog_recover_do_reg_buffer */
 
+/*
+ * Do some primitive error checking on ondisk dquot data structures.
+ */
+int
+xfs_qm_dqcheck(
+	xfs_disk_dquot_t *ddq,
+	xfs_dqid_t	 id,
+	uint		 type,	  /* used only when IO_dorepair is true */
+	uint		 flags,
+	char		 *str)
+{
+	xfs_dqblk_t	 *d = (xfs_dqblk_t *)ddq;
+	int		errs = 0;
+
+	/*
+	 * We can encounter an uninitialized dquot buffer for 2 reasons:
+	 * 1. If we crash while deleting the quotainode(s), and those blks got
+	 *    used for user data. This is because we take the path of regular
+	 *    file deletion; however, the size field of quotainodes is never
+	 *    updated, so all the tricks that we play in itruncate_finish
+	 *    don't quite matter.
+	 *
+	 * 2. We don't play the quota buffers when there's a quotaoff logitem.
+	 *    But the allocation will be replayed so we'll end up with an
+	 *    uninitialized quota block.
+	 *
+	 * This is all fine; things are still consistent, and we haven't lost
+	 * any quota information. Just don't complain about bad dquot blks.
+	 */
+	if (INT_GET(ddq->d_magic, ARCH_CONVERT) != XFS_DQUOT_MAGIC) {
+		if (flags & XFS_QMOPT_DOWARN)
+			cmn_err(CE_ALERT,
+			"%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
+			str, id,
+			INT_GET(ddq->d_magic, ARCH_CONVERT), XFS_DQUOT_MAGIC);
+		errs++;
+	}
+	if (INT_GET(ddq->d_version, ARCH_CONVERT) != XFS_DQUOT_VERSION) {
+		if (flags & XFS_QMOPT_DOWARN)
+			cmn_err(CE_ALERT,
+			"%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
+			str, id,
+			INT_GET(ddq->d_magic, ARCH_CONVERT), XFS_DQUOT_VERSION);
+		errs++;
+	}
+
+	if (INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_USER &&
+	    INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_GROUP) {
+		if (flags & XFS_QMOPT_DOWARN)
+			cmn_err(CE_ALERT,
+			"%s : XFS dquot ID 0x%x, unknown flags 0x%x",
+			str, id, INT_GET(ddq->d_flags, ARCH_CONVERT));
+		errs++;
+	}
+
+	if (id != -1 && id != INT_GET(ddq->d_id, ARCH_CONVERT)) {
+		if (flags & XFS_QMOPT_DOWARN)
+			cmn_err(CE_ALERT,
+			"%s : ondisk-dquot 0x%x, ID mismatch: "
+			"0x%x expected, found id 0x%x",
+			str, ddq, id, INT_GET(ddq->d_id, ARCH_CONVERT));
+		errs++;
+	}
+
+	if (! errs) {
+		if (INT_GET(ddq->d_blk_softlimit, ARCH_CONVERT) &&
+		    INT_GET(ddq->d_bcount, ARCH_CONVERT) >=
+				INT_GET(ddq->d_blk_softlimit, ARCH_CONVERT)) {
+			if (INT_ISZERO(ddq->d_btimer, ARCH_CONVERT) &&
+			    !INT_ISZERO(ddq->d_id, ARCH_CONVERT)) {
+				if (flags & XFS_QMOPT_DOWARN)
+					cmn_err(CE_ALERT,
+					"%s : Dquot ID 0x%x (0x%x) "
+					"BLK TIMER NOT STARTED",
+					str, (int)
+					INT_GET(ddq->d_id, ARCH_CONVERT), ddq);
+				errs++;
+			}
+		}
+		if (INT_GET(ddq->d_ino_softlimit, ARCH_CONVERT) &&
+		    INT_GET(ddq->d_icount, ARCH_CONVERT) >=
+				INT_GET(ddq->d_ino_softlimit, ARCH_CONVERT)) {
+			if (INT_ISZERO(ddq->d_itimer, ARCH_CONVERT) &&
+			    !INT_ISZERO(ddq->d_id, ARCH_CONVERT)) {
+				if (flags & XFS_QMOPT_DOWARN)
+					cmn_err(CE_ALERT,
+					"%s : Dquot ID 0x%x (0x%x) "
+					"INODE TIMER NOT STARTED",
+					str, (int)
+					INT_GET(ddq->d_id, ARCH_CONVERT), ddq);
+				errs++;
+			}
+		}
+	}
+
+	if (!errs || !(flags & XFS_QMOPT_DQREPAIR))
+		return errs;
+
+	if (flags & XFS_QMOPT_DOWARN)
+		cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id);
+
+	/*
+	 * Typically, a repair is only requested by quotacheck.
+	 */
+	ASSERT(id != -1);
+	ASSERT(flags & XFS_QMOPT_DQREPAIR);
+	memset(d, 0, sizeof(xfs_dqblk_t));
+	INT_SET(d->dd_diskdq.d_magic, ARCH_CONVERT, XFS_DQUOT_MAGIC);
+	INT_SET(d->dd_diskdq.d_version, ARCH_CONVERT, XFS_DQUOT_VERSION);
+	INT_SET(d->dd_diskdq.d_id, ARCH_CONVERT, id);
+	INT_SET(d->dd_diskdq.d_flags, ARCH_CONVERT, type);
+
+	return errs;
+}
 
 /*
  * Perform a dquot buffer recovery.
@@ -2335,8 +2447,6 @@
 			   dq_f->qlf_id,
 			   0, XFS_QMOPT_DOWARN,
 			   "xlog_recover_do_dquot_trans (log copy)"))) {
-		if (error == ENOSYS)
-			return (0);
 		return XFS_ERROR(EIO);
 	}
 	ASSERT(dq_f->qlf_len == 1);
@@ -2923,8 +3033,6 @@
 
 	/*
 	 * Prevent any DMAPI event from being sent while in this function.
-	 * Not a problem for xfs since the file system isn't mounted
-	 * yet.	 It is a problem for cxfs recovery.
 	 */
 	mp_dmevmask = mp->m_dmevmask;
 	mp->m_dmevmask = 0;
@@ -2982,11 +3090,8 @@
 					 * Prevent any DMAPI event from
 					 * being sent when the
 					 * reference on the inode is
-					 * dropped.  Not a problem for
-					 * xfs since the file system
-					 * isn't mounted yet.  It is a
-					 * problem for cxfs recovery.
-					 */
+					 * dropped.
+                                         */
 					ip->i_d.di_dmevmask = 0;
 
 					/*
diff -Nru a/fs/xfs/xfs_macros.c b/fs/xfs/xfs_macros.c
--- a/fs/xfs/xfs_macros.c	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/xfs_macros.c	Mon Mar 31 13:41:06 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -355,6 +355,14 @@
 xfs_bhvtom(bhv_desc_t *bdp)
 {
 	return XFS_BHVTOM(bdp);
+}
+#endif
+
+#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_VFSTOM)
+xfs_mount_t *
+xfs_vfstom(vfs_t *vfs)
+{
+	return XFS_VFSTOM(vfs);
 }
 #endif
 
diff -Nru a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
--- a/fs/xfs/xfs_mount.c	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/xfs_mount.c	Mon Mar 31 13:41:06 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -32,16 +32,14 @@
 
 #include <xfs.h>
 
-STATIC void	xfs_mount_reset_sbqflags(xfs_mount_t *);
 STATIC void	xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
 STATIC int	xfs_uuid_mount(xfs_mount_t *);
+STATIC void	xfs_uuid_unmount(xfs_mount_t *mp);
 
 mutex_t		xfs_uuidtabmon;		/* monitor for uuidtab */
 STATIC int	xfs_uuidtab_size;
 STATIC uuid_t	*xfs_uuidtab;
 
-STATIC void	xfs_uuid_unmount(xfs_mount_t *);
-
 void xfs_xlatesb(void *, xfs_sb_t *, int, xfs_arch_t, __int64_t);
 
 static struct {
@@ -120,10 +118,9 @@
 	spinlock_init(&mp->m_freeze_lock, "xfs_freeze");
 	init_sv(&mp->m_wait_unfreeze, SV_DEFAULT, "xfs_freeze", 0);
 	atomic_set(&mp->m_active_trans, 0);
-	mp->m_cxfstype = XFS_CXFS_NOT;
 
 	return mp;
-}	/* xfs_mount_init */
+}
 
 /*
  * Free up the resources associated with a mount structure.  Assume that
@@ -146,34 +143,29 @@
 		for (agno = 0; agno < mp->m_maxagi; agno++)
 			if (mp->m_perag[agno].pagb_list)
 				kmem_free(mp->m_perag[agno].pagb_list,
-				  sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS);
+						sizeof(xfs_perag_busy_t) *
+							XFS_PAGB_NUM_SLOTS);
 		kmem_free(mp->m_perag,
 			  sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
 	}
 
-#if 0
-	/*
-	 * XXXdpd - Doesn't work now for shutdown case.
-	 * Should at least free the memory.
-	 */
-	ASSERT(mp->m_ail.ail_back == (xfs_log_item_t*)&(mp->m_ail));
-	ASSERT(mp->m_ail.ail_forw == (xfs_log_item_t*)&(mp->m_ail));
-#endif
 	AIL_LOCK_DESTROY(&mp->m_ail_lock);
 	spinlock_destroy(&mp->m_sb_lock);
 	mutex_destroy(&mp->m_ilock);
 	freesema(&mp->m_growlock);
+	if (mp->m_quotainfo)
+		XFS_QM_DONE(mp);
 
-	if (mp->m_fsname != NULL) {
+	if (mp->m_fsname != NULL)
 		kmem_free(mp->m_fsname, mp->m_fsname_len);
-	}
-	if (mp->m_quotainfo != NULL) {
-		xfs_qm_unmount_quotadestroy(mp);
-	}
 
 	if (remove_bhv) {
-		VFS_REMOVEBHV(XFS_MTOVFS(mp), &mp->m_bhv);
+		struct vfs	*vfsp = XFS_MTOVFS(mp);
+
+		bhv_remove_all_vfsops(vfsp, 0);
+		VFS_REMOVEBHV(vfsp, &mp->m_bhv);
 	}
+
 	spinlock_destroy(&mp->m_freeze_lock);
 	sv_destroy(&mp->m_wait_unfreeze);
 	kmem_free(mp, sizeof(xfs_mount_t));
@@ -605,21 +597,17 @@
 {
 	xfs_buf_t	*bp;
 	xfs_sb_t	*sbp = &(mp->m_sb);
-	int		error = 0;
 	xfs_inode_t	*rip;
 	vnode_t		*rvp = 0;
-	int		readio_log;
-	int		writeio_log;
+	int		readio_log, writeio_log;
 	vmap_t		vmap;
 	xfs_daddr_t	d;
-	extern xfs_ioops_t xfs_iocore_xfs;	/* from xfs_iocore.c */
 	__uint64_t	ret64;
-	uint		quotaflags, quotaondisk;
-	uint		uquotaondisk = 0, gquotaondisk = 0;
-	boolean_t	needquotamount;
 	__int64_t	update_flags;
+	uint		quotamount, quotaflags;
 	int		agno, noio;
 	int		uuid_mounted = 0;
+	int		error = 0;
 
 	noio = dev == 0 && mp->m_sb_bp != NULL;
 	if (mp->m_sb_bp == NULL) {
@@ -644,7 +632,8 @@
 		if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
 		    (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
 			if (mp->m_flags & XFS_MOUNT_RETERR) {
-				cmn_err(CE_WARN, "XFS: alignment check 1 failed");
+				cmn_err(CE_WARN,
+					"XFS: alignment check 1 failed");
 				error = XFS_ERROR(EINVAL);
 				goto error1;
 			}
@@ -664,7 +653,8 @@
 				mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
 			} else {
 				if (mp->m_flags & XFS_MOUNT_RETERR) {
-					cmn_err(CE_WARN, "XFS: alignment check 3 failed");
+					cmn_err(CE_WARN,
+					"XFS: alignment check 3 failed");
 					error = XFS_ERROR(EINVAL);
 					goto error1;
 				}
@@ -718,7 +708,8 @@
 	 * since a single partition filesystem is identical to a single
 	 * partition volume/filesystem.
 	 */
-	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
+	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
+	    (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
 		if (xfs_uuid_mount(mp)) {
 			error = XFS_ERROR(EINVAL);
 			goto error1;
@@ -859,9 +850,6 @@
 		return(0);
 	}
 
-	/* Initialize the I/O function vector with XFS functions */
-	mp->m_io_ops = xfs_iocore_xfs;
-
 	/*
 	 *  Copies the low order bits of the timestamp and the randomly
 	 *  set "sequence" number out of a UUID.
@@ -953,54 +941,22 @@
 
 	ASSERT(rip != NULL);
 	rvp = XFS_ITOV(rip);
+	VMAP(rvp, vmap);
+
 	if (unlikely((rip->i_d.di_mode & IFMT) != IFDIR)) {
 		cmn_err(CE_WARN, "XFS: corrupted root inode");
-		VMAP(rvp, vmap);
 		prdev("Root inode %llu is not a directory",
 		      mp->m_dev, (unsigned long long)rip->i_ino);
 		xfs_iunlock(rip, XFS_ILOCK_EXCL);
-		VN_RELE(rvp);
-		vn_purge(rvp, &vmap);
 		XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
 				 mp);
 		error = XFS_ERROR(EFSCORRUPTED);
-		goto error3;
+		goto error4;
 	}
 	mp->m_rootip = rip;	/* save it */
 
 	xfs_iunlock(rip, XFS_ILOCK_EXCL);
 
-	quotaondisk = XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
-		mp->m_sb.sb_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT);
-
-	if (quotaondisk) {
-		uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
-		gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
-	}
-
-	/*
-	 * If the device itself is read-only, we can't allow
-	 * the user to change the state of quota on the mount -
-	 * this would generate a transaction on the ro device,
-	 * which would lead to an I/O error and shutdown
-	 */
-
-	if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
-	    (!uquotaondisk &&  XFS_IS_UQUOTA_ON(mp)) ||
-	     (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
-	    (!gquotaondisk &&  XFS_IS_GQUOTA_ON(mp)))  &&
-	    xfs_dev_is_read_only(mp, "changing quota state")) {
-		cmn_err(CE_WARN,
-			"XFS: please mount with%s%s%s.",
-			(!quotaondisk ? "out quota" : ""),
-			(uquotaondisk ? " usrquota" : ""),
-			(gquotaondisk ? " grpquota" : ""));
-		VN_RELE(rvp);
-		vn_remove(rvp);
-		error = XFS_ERROR(EPERM);
-		goto error3;
-	}
-
 	/*
 	 * Initialize realtime inode pointers in the mount structure
 	 */
@@ -1009,10 +965,7 @@
 		 * Free up the root inode.
 		 */
 		cmn_err(CE_WARN, "XFS: failed to read RT inodes");
-		VMAP(rvp, vmap);
-		VN_RELE(rvp);
-		vn_purge(rvp, &vmap);
-		goto error3;
+		goto error4;
 	}
 
 	/*
@@ -1022,41 +975,11 @@
 	if (update_flags && !(vfsp->vfs_flag & VFS_RDONLY))
 		xfs_mount_log_sbunit(mp, update_flags);
 
-	quotaflags = 0;
-	needquotamount = B_FALSE;
-
 	/*
-	 * Figure out if we'll need to do a quotacheck.
+	 * Initialise the XFS quota management subsystem for this mount
 	 */
-	if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
-		/*
-		 * Call mount_quotas at this point only if we won't have to do
-		 * a quotacheck.
-		 */
-		if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
-			/*
-			 * If the xfs quota code isn't installed,
-			 * we have to reset the quotachk'd bit.
-			 * If an error occurred, qm_mount_quotas code
-			 * has already disabled quotas. So, just finish
-			 * mounting, and get on with the boring life
-			 * without disk quotas.
-			 */
-			if (xfs_qm_mount_quotas(mp))
-				xfs_mount_reset_sbqflags(mp);
-		} else {
-			/*
-			 * Clear the quota flags, but remember them. This
-			 * is so that the quota code doesn't get invoked
-			 * before we're ready. This can happen when an
-			 * inode goes inactive and wants to free blocks,
-			 * or via xfs_log_mount_finish.
-			 */
-			quotaflags = mp->m_qflags;
-			mp->m_qflags = 0;
-			needquotamount = B_TRUE;
-		}
-	}
+	if ((error = XFS_QM_INIT(mp, &quotamount, &quotaflags)))
+		goto error4;
 
 	/*
 	 * Finish recovering the file system.  This part needed to be
@@ -1066,30 +989,23 @@
 	error = xfs_log_mount_finish(mp, mfsi_flags);
 	if (error) {
 		cmn_err(CE_WARN, "XFS: log mount finish failed");
-		goto error3;
-	}
-
-	if (needquotamount) {
-		ASSERT(mp->m_qflags == 0);
-		mp->m_qflags = quotaflags;
-		if (xfs_qm_mount_quotas(mp))
-			xfs_mount_reset_sbqflags(mp);
+		goto error4;
 	}
 
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-	if (! (XFS_IS_QUOTA_ON(mp)))
-		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
-	else
-		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
-#endif
-
-#ifdef QUOTADEBUG
-	if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp))
-		cmn_err(CE_WARN, "XFS: mount internalqcheck failed");
-#endif
+	/*
+	 * Complete the quota initialisation, post-log-replay component.
+	 */
+	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags)))
+		goto error4;
 
-	return (0);
+	return 0;
 
+ error4:
+	/*
+	 * Free up the root inode.
+	 */
+	VN_RELE(rvp);
+	vn_purge(rvp, &vmap);
  error3:
 	xfs_log_unmount_dealloc(mp);
  error2:
@@ -1118,25 +1034,15 @@
 int
 xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 {
-	int		ndquots;
+	struct vfs	*vfsp = XFS_MTOVFS(mp);
 #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
 	int64_t		fsid;
 #endif
 
 	xfs_iflush_all(mp, XFS_FLUSH_ALL);
 
-	/*
-	 * Purge the dquot cache.
-	 * None of the dquots should really be busy at this point.
-	 */
-	if (mp->m_quotainfo) {
-		while ((ndquots = xfs_qm_dqpurge_all(mp,
-						  XFS_QMOPT_UQUOTA|
-						  XFS_QMOPT_GQUOTA|
-						  XFS_QMOPT_UMOUNTING))) {
-			delay(ndquots * 10);
-		}
-	}
+	XFS_QM_DQPURGEALL(mp,
+		XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING);
 
 	/*
 	 * Flush out the log synchronously so that we know for sure
@@ -1178,14 +1084,14 @@
 	/*
 	 * clear all error tags on this filesystem
 	 */
-	memcpy(&fsid, &(XFS_MTOVFS(mp)->vfs_fsid), sizeof(int64_t));
-	(void) xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0);
+	memcpy(&fsid, &vfsp->vfs_fsid, sizeof(int64_t));
+	xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0);
 #endif
-
+	XFS_IODONE(vfsp);
 	xfs_mount_free(mp, 1);
 	return 0;
 }
-
+ 
 void
 xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
 {
@@ -1649,47 +1555,6 @@
 	}
 	ASSERT(i < xfs_uuidtab_size);
 	mutex_unlock(&xfs_uuidtabmon);
-}
-
-/*
- * When xfsquotas isn't installed and the superblock had quotas, we need to
- * clear the quotaflags from superblock.
- */
-STATIC void
-xfs_mount_reset_sbqflags(
-	xfs_mount_t	*mp)
-{
-	xfs_trans_t	*tp;
-	unsigned long		s;
-
-	mp->m_qflags = 0;
-	/*
-	 * It is OK to look at sb_qflags here in mount path,
-	 * without SB_LOCK.
-	 */
-	if (mp->m_sb.sb_qflags == 0)
-		return;
-	s = XFS_SB_LOCK(mp);
-	mp->m_sb.sb_qflags = 0;
-	XFS_SB_UNLOCK(mp, s);
-
-	/*
-	 * if the fs is readonly, let the incore superblock run
-	 * with quotas off but don't flush the update out to disk
-	 */
-	if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)
-		return;
-#ifdef QUOTADEBUG
-	xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
-#endif
-	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-	if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
-				      XFS_DEFAULT_LOG_COUNT)) {
-		xfs_trans_cancel(tp, 0);
-		return;
-	}
-	xfs_mod_sb(tp, XFS_SB_QFLAGS);
-	(void)xfs_trans_commit(tp, 0, NULL);
 }
 
 /*
diff -Nru a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
--- a/fs/xfs/xfs_mount.h	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/xfs_mount.h	Mon Mar 31 13:41:06 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -75,7 +75,6 @@
 struct xfs_chash;
 struct xfs_inode;
 struct xfs_perag;
-struct xfs_quotainfo;
 struct xfs_iocore;
 struct xfs_bmbt_irec;
 struct xfs_bmap_free;
@@ -87,15 +86,120 @@
 #define AIL_LOCK(mp,s)		s=mutex_spinlock(&(mp)->m_ail_lock)
 #define AIL_UNLOCK(mp,s)	mutex_spinunlock(&(mp)->m_ail_lock, s)
 
+
+/*
+ * Prototypes and functions for the Data Migration subsystem.
+ */
+
+typedef int	(*xfs_send_data_t)(int, struct bhv_desc *,
+			xfs_off_t, size_t, int, vrwlock_t *);
+typedef int	(*xfs_send_mmap_t)(struct vm_area_struct *, uint);
+typedef int	(*xfs_send_destroy_t)(struct bhv_desc *, dm_right_t);
+typedef int	(*xfs_send_namesp_t)(dm_eventtype_t, struct bhv_desc *,
+			dm_right_t, struct bhv_desc *, dm_right_t,
+			char *, char *, mode_t, int, int);
+typedef void	(*xfs_send_unmount_t)(struct vfs *, struct vnode *,
+			dm_right_t, mode_t, int, int);
+
+typedef struct xfs_dmops {
+	xfs_send_data_t		xfs_send_data;
+	xfs_send_mmap_t		xfs_send_mmap;
+	xfs_send_destroy_t	xfs_send_destroy;
+	xfs_send_namesp_t	xfs_send_namesp;
+	xfs_send_unmount_t	xfs_send_unmount;
+} xfs_dmops_t;
+
+#define XFS_SEND_DATA(mp, ev,bdp,off,len,fl,lock) \
+	(*(mp)->m_dm_ops.xfs_send_data)(ev,bdp,off,len,fl,lock)
+#define XFS_SEND_MMAP(mp, vma,fl) \
+	(*(mp)->m_dm_ops.xfs_send_mmap)(vma,fl)
+#define XFS_SEND_DESTROY(mp, bdp,right) \
+	(*(mp)->m_dm_ops.xfs_send_destroy)(bdp,right)
+#define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
+	(*(mp)->m_dm_ops.xfs_send_namesp)(ev,b1,r1,b2,r2,n1,n2,mode,rval,fl)
+#define XFS_SEND_UNMOUNT(mp, vfsp,vp,right,mode,rval,fl) \
+	(*(mp)->m_dm_ops.xfs_send_unmount)(vfsp,vp,right,mode,rval,fl)
+
+
+/*
+ * Prototypes and functions for the Quota Management subsystem.
+ */
+
+struct xfs_dquot;
+struct xfs_dqtrxops;
+struct xfs_quotainfo;
+
+typedef int	(*xfs_qminit_t)(struct xfs_mount *, uint *, uint *);
+typedef int	(*xfs_qmmount_t)(struct xfs_mount *, uint, uint);
+typedef int	(*xfs_qmunmount_t)(struct xfs_mount *);
+typedef void	(*xfs_qmdone_t)(struct xfs_mount *);
+typedef void	(*xfs_dqrele_t)(struct xfs_dquot *);
+typedef int	(*xfs_dqattach_t)(struct xfs_inode *, uint);
+typedef void	(*xfs_dqdetach_t)(struct xfs_inode *);
+typedef int	(*xfs_dqpurgeall_t)(struct xfs_mount *, uint);
+typedef int	(*xfs_dqvopalloc_t)(struct xfs_mount *,
+			struct xfs_inode *, uid_t, gid_t, uint,
+			struct xfs_dquot **, struct xfs_dquot **);
+typedef void	(*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *,
+			struct xfs_dquot *, struct xfs_dquot *);
+typedef int	(*xfs_dqvoprename_t)(struct xfs_inode **);
+typedef struct xfs_dquot * (*xfs_dqvopchown_t)(
+			struct xfs_trans *, struct xfs_inode *,
+			struct xfs_dquot **, struct xfs_dquot *);
+typedef int	(*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *,
+			struct xfs_dquot *, struct xfs_dquot *, uint);
+
+typedef struct xfs_qmops {
+	xfs_qminit_t		xfs_qminit;
+	xfs_qmdone_t		xfs_qmdone;
+	xfs_qmmount_t		xfs_qmmount;
+	xfs_qmunmount_t		xfs_qmunmount;
+	xfs_dqrele_t		xfs_dqrele;
+	xfs_dqattach_t		xfs_dqattach;
+	xfs_dqdetach_t		xfs_dqdetach;
+	xfs_dqpurgeall_t	xfs_dqpurgeall;
+	xfs_dqvopalloc_t	xfs_dqvopalloc;
+	xfs_dqvopcreate_t	xfs_dqvopcreate;
+	xfs_dqvoprename_t	xfs_dqvoprename;
+	xfs_dqvopchown_t	xfs_dqvopchown;
+	xfs_dqvopchownresv_t	xfs_dqvopchownresv;
+	struct xfs_dqtrxops	*xfs_dqtrxops;
+} xfs_qmops_t;
+
+#define XFS_QM_INIT(mp, mnt, fl) \
+	(*(mp)->m_qm_ops.xfs_qminit)(mp, mnt, fl)
+#define XFS_QM_MOUNT(mp, mnt, fl) \
+	(*(mp)->m_qm_ops.xfs_qmmount)(mp, mnt, fl)
+#define XFS_QM_UNMOUNT(mp) \
+	(*(mp)->m_qm_ops.xfs_qmunmount)(mp)
+#define XFS_QM_DONE(mp) \
+	(*(mp)->m_qm_ops.xfs_qmdone)(mp)
+#define XFS_QM_DQRELE(mp, dq) \
+	(*(mp)->m_qm_ops.xfs_dqrele)(dq)
+#define XFS_QM_DQATTACH(mp, ip, fl) \
+	(*(mp)->m_qm_ops.xfs_dqattach)(ip, fl)
+#define XFS_QM_DQDETACH(mp, ip) \
+	(*(mp)->m_qm_ops.xfs_dqdetach)(ip)
+#define XFS_QM_DQPURGEALL(mp, fl) \
+	(*(mp)->m_qm_ops.xfs_dqpurgeall)(mp, fl)
+#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, fl, dq1, dq2) \
+	(*(mp)->m_qm_ops.xfs_dqvopalloc)(mp, ip, uid, gid, fl, dq1, dq2)
+#define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \
+	(*(mp)->m_qm_ops.xfs_dqvopcreate)(tp, ip, dq1, dq2)
+#define XFS_QM_DQVOPRENAME(mp, ip) \
+	(*(mp)->m_qm_ops.xfs_dqvoprename)(ip)
+#define XFS_QM_DQVOPCHOWN(mp, tp, ip, dqp, dq) \
+	(*(mp)->m_qm_ops.xfs_dqvopchown)(tp, ip, dqp, dq)
+#define XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, dq1, dq2, fl) \
+	(*(mp)->m_qm_ops.xfs_dqvopchownresv)(tp, ip, dq1, dq2, fl)
+
+
 /*
  * Prototypes and functions for I/O core modularization.
  */
  
-struct flid;
-struct buf;
-
 typedef int		(*xfs_ioinit_t)(struct vfs *,
-				struct xfs_mount_args *, int *);
+				struct xfs_mount_args *, int);
 typedef int		(*xfs_bmapi_t)(struct xfs_trans *, void *,
 				xfs_fileoff_t, xfs_filblks_t, int,
 				xfs_fsblock_t *, xfs_extlen_t,
@@ -137,61 +241,42 @@
 	xfs_iodone_t			xfs_iodone;
 } xfs_ioops_t;
 
-
 #define XFS_IOINIT(vfsp, args, flags) \
 	(*(mp)->m_io_ops.xfs_ioinit)(vfsp, args, flags)
-
 #define XFS_BMAPI(mp, trans,io,bno,len,f,first,tot,mval,nmap,flist)	\
 	(*(mp)->m_io_ops.xfs_bmapi_func) \
 		(trans,(io)->io_obj,bno,len,f,first,tot,mval,nmap,flist)
-
 #define XFS_BMAP_EOF(mp, io, endoff, whichfork, eof) \
 	(*(mp)->m_io_ops.xfs_bmap_eof_func) \
 		((io)->io_obj, endoff, whichfork, eof)
-
 #define XFS_IOMAP_WRITE_DIRECT(mp, io, offset, count, flags, mval, nmap, found)\
 	(*(mp)->m_io_ops.xfs_iomap_write_direct) \
 		((io)->io_obj, offset, count, flags, mval, nmap, found)
-
 #define XFS_IOMAP_WRITE_DELAY(mp, io, offset, count, flags, mval, nmap) \
 	(*(mp)->m_io_ops.xfs_iomap_write_delay) \
 		((io)->io_obj, offset, count, flags, mval, nmap)
-
 #define XFS_IOMAP_WRITE_ALLOCATE(mp, io, mval, nmap) \
 	(*(mp)->m_io_ops.xfs_iomap_write_allocate) \
 		((io)->io_obj, mval, nmap)
-
 #define XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count) \
 	(*(mp)->m_io_ops.xfs_iomap_write_unwritten) \
 		((io)->io_obj, offset, count)
-
 #define XFS_LCK_MAP_SHARED(mp, io) \
 	(*(mp)->m_io_ops.xfs_lck_map_shared)((io)->io_obj)
-
 #define XFS_ILOCK(mp, io, mode) \
 	(*(mp)->m_io_ops.xfs_ilock)((io)->io_obj, mode)
-
 #define XFS_ILOCK_NOWAIT(mp, io, mode) \
 	(*(mp)->m_io_ops.xfs_ilock_nowait)((io)->io_obj, mode)
-
 #define XFS_IUNLOCK(mp, io, mode) \
 	(*(mp)->m_io_ops.xfs_unlock)((io)->io_obj, mode)
-
 #define XFS_ILOCK_DEMOTE(mp, io, mode) \
 	(*(mp)->m_io_ops.xfs_ilock_demote)((io)->io_obj, mode)
-
 #define XFS_SIZE(mp, io) \
 	(*(mp)->m_io_ops.xfs_size_func)((io)->io_obj)
-
 #define XFS_IODONE(vfsp) \
 	(*(mp)->m_io_ops.xfs_iodone)(vfsp)
 
 
-/*
- * Prototypes and functions for the XFS realtime subsystem.
- */
-
-
 typedef struct xfs_mount {
 	bhv_desc_t		m_bhv;		/* vfs xfs behavior */
 	xfs_tid_t		m_tid;		/* next unused tid for fs */
@@ -289,13 +374,9 @@
 	int			m_chsize;	/* size of next field */
 	struct xfs_chash	*m_chash;	/* fs private inode per-cluster
 						 * hash table */
+	struct xfs_dmops	m_dm_ops;	/* vector of DMI ops */
+	struct xfs_qmops	m_qm_ops;	/* vector of XQM ops */
 	struct xfs_ioops	m_io_ops;	/* vector of I/O ops */
-	struct xfs_expinfo	*m_expinfo;	/* info to export to other
-						   cells. */
-	uint64_t		m_shadow_pinmask;
-						/* which bits matter in rpc
-						   log item pin masks */
-	uint			m_cxfstype;	/* mounted shared, etc. */
 	lock_t			m_freeze_lock;	/* Lock for m_frozen */
 	uint			m_frozen;	/* FS frozen for shutdown or
 						 * snapshot */
@@ -324,8 +405,7 @@
 #define XFS_MOUNT_NOALIGN	0x00000080	/* turn off stripe alignment
 						   allocations */
 			     /* 0x00000100	-- currently unused */
-#define XFS_MOUNT_REGISTERED	0x00000200	/* registered with cxfs master
-						   cell logic */
+                             /*	0x00000200	-- currently unused */
 #define XFS_MOUNT_NORECOVERY	0x00000400	/* no recovery - dirty fs */
 #define XFS_MOUNT_SHARED	0x00000800	/* shared mount */
 #define XFS_MOUNT_DFLT_IOSIZE	0x00001000	/* set default i/o size */
@@ -336,14 +416,6 @@
 						 * 32 bits in size */
 #define XFS_MOUNT_NOLOGFLUSH	0x00010000
 
-/*
- * Flags for m_cxfstype
- */
-#define XFS_CXFS_NOT		0x00000001	/* local mount */
-#define XFS_CXFS_SERVER		0x00000002	/* we're the CXFS server */
-#define XFS_CXFS_CLIENT		0x00000004	/* We're a CXFS client */
-#define XFS_CXFS_REC_ENABLED	0x00000008	/* recovery is enabled */
-
 #define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
 
 /*
@@ -370,15 +442,17 @@
 #define XFS_WSYNC_READIO_LOG	15	/* 32K */
 #define XFS_WSYNC_WRITEIO_LOG	14	/* 16K */
 
-#define xfs_force_shutdown(m,f)	VFS_FORCE_SHUTDOWN(XFS_MTOVFS(m),f)
+#define xfs_force_shutdown(m,f)	\
+	VFS_FORCE_SHUTDOWN((XFS_MTOVFS(m)), f, __FILE__, __LINE__)
+
 /*
  * Flags sent to xfs_force_shutdown.
  */
 #define XFS_METADATA_IO_ERROR	0x1
 #define XFS_LOG_IO_ERROR	0x2
 #define XFS_FORCE_UMOUNT	0x4
-#define XFS_CORRUPT_INCORE	0x8	/* corrupt in-memory data structures */
-#define XFS_SHUTDOWN_REMOTE_REQ 0x10	/* shutdown came from remote cell */
+#define XFS_CORRUPT_INCORE	0x8	/* Corrupt in-memory data structures */
+#define XFS_SHUTDOWN_REMOTE_REQ 0x10	/* Shutdown came from remote cell */
 
 /*
  * xflags for xfs_syncsub
@@ -388,9 +462,7 @@
 /*
  * Flags for xfs_mountfs
  */
-#define XFS_MFSI_SECOND		0x01	/* Is a cxfs secondary mount -- skip */
-					/* stuff which should only be done */
-					/* once. */
+#define XFS_MFSI_SECOND		0x01	/* Secondary mount -- skip stuff */
 #define XFS_MFSI_CLIENT		0x02	/* Is a client -- skip lots of stuff */
 #define XFS_MFSI_NOUNLINK	0x08	/* Skip unlinked inode processing in */
 					/* log recovery */
@@ -410,6 +482,13 @@
 #else
 #define XFS_BHVTOM(bdp)		((xfs_mount_t *)BHV_PDATA(bdp))
 #endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_VFSTOM)
+xfs_mount_t *xfs_vfstom(vfs_t *vfs);
+#define XFS_VFSTOM(vfs) xfs_vfstom(vfs)
+#else
+#define XFS_VFSTOM(vfs)		\
+	(XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfs), &xfs_vfsops)))
+#endif
 
 
 /*
@@ -447,7 +526,7 @@
  */
 typedef struct xfs_mod_sb {
 	xfs_sb_field_t	msb_field;	/* Field to modify, see below */
-	int		msb_delta;	/* change to make to the specified field */
+	int		msb_delta;	/* Change to make to specified field */
 } xfs_mod_sb_t;
 
 #define XFS_MOUNT_ILOCK(mp)	mutex_lock(&((mp)->m_ilock), PINOD)
@@ -455,24 +534,26 @@
 #define XFS_SB_LOCK(mp)		mutex_spinlock(&(mp)->m_sb_lock)
 #define XFS_SB_UNLOCK(mp,s)	mutex_spinunlock(&(mp)->m_sb_lock,(s))
 
-void		xfs_mod_sb(xfs_trans_t *, __int64_t);
-xfs_mount_t	*xfs_mount_init(void);
-void		xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
-int		xfs_mountfs(struct vfs *, xfs_mount_t *mp, dev_t, int);
-
-int		xfs_unmountfs(xfs_mount_t *, struct cred *);
-void		xfs_unmountfs_close(xfs_mount_t *, struct cred *);
-int		xfs_unmountfs_writesb(xfs_mount_t *);
-int		xfs_unmount_flush(xfs_mount_t *, int);
-int		xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int, int);
-int		xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, uint, int);
-int		xfs_readsb(xfs_mount_t *mp);
-struct xfs_buf	*xfs_getsb(xfs_mount_t *, int);
-void		xfs_freesb(xfs_mount_t *);
-void		xfs_do_force_shutdown(bhv_desc_t *, int, char *, int);
-int		xfs_syncsub(xfs_mount_t *, int, int, int *);
-void		xfs_initialize_perag(xfs_mount_t *, int);
-void		xfs_xlatesb(void *, struct xfs_sb *, int, xfs_arch_t, __int64_t);
+extern xfs_mount_t *xfs_mount_init(void);
+extern void	xfs_mod_sb(xfs_trans_t *, __int64_t);
+extern void	xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
+extern int	xfs_mountfs(struct vfs *, xfs_mount_t *mp, dev_t, int);
+
+extern int	xfs_unmountfs(xfs_mount_t *, struct cred *);
+extern void	xfs_unmountfs_close(xfs_mount_t *, struct cred *);
+extern int	xfs_unmountfs_writesb(xfs_mount_t *);
+extern int	xfs_unmount_flush(xfs_mount_t *, int);
+extern int	xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int, int);
+extern int	xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
+			uint, int);
+extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
+extern int	xfs_readsb(xfs_mount_t *mp);
+extern void	xfs_freesb(xfs_mount_t *);
+extern void	xfs_do_force_shutdown(bhv_desc_t *, int, char *, int);
+extern int	xfs_syncsub(xfs_mount_t *, int, int, int *);
+extern void	xfs_initialize_perag(xfs_mount_t *, int);
+extern void	xfs_xlatesb(void *, struct xfs_sb *, int, xfs_arch_t,
+			__int64_t);
 
 /*
  * Flags for freeze operations.
@@ -480,11 +561,19 @@
 #define XFS_FREEZE_WRITE	1
 #define XFS_FREEZE_TRANS	2
 
-void		xfs_start_freeze(xfs_mount_t *, int);
-void		xfs_finish_freeze(xfs_mount_t *);
-void		xfs_check_frozen(xfs_mount_t *, bhv_desc_t *, int);
+extern void	xfs_start_freeze(xfs_mount_t *, int);
+extern void	xfs_finish_freeze(xfs_mount_t *);
+extern void	xfs_check_frozen(xfs_mount_t *, bhv_desc_t *, int);
+
+extern struct vfsops xfs_vfsops;
+extern struct vnodeops xfs_vnodeops;
+
+extern struct xfs_dmops xfs_dmcore_xfs;
+extern struct xfs_qmops xfs_qmcore_xfs;
+extern struct xfs_ioops xfs_iocore_xfs;
 
-extern	struct vfsops xfs_vfsops;
+extern int 	xfs_init(void);
+extern void	xfs_cleanup(void);
 
 #endif	/* __KERNEL__ */
 
diff -Nru a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
--- a/fs/xfs/xfs_qm.c	Mon Mar 31 13:41:06 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,2810 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-#include <xfs.h>
-#include <xfs_quota_priv.h>
-
-
-kmem_zone_t	*qm_dqzone;
-kmem_zone_t	*qm_dqtrxzone;
-
-STATIC void	xfs_qm_list_init(xfs_dqlist_t *, char *, int);
-STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
-STATIC int	xfs_qm_quotacheck(xfs_mount_t *);
-
-STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
-STATIC void	xfs_qm_shake(void);
-
-#ifdef DEBUG
-extern mutex_t	qcheck_lock;
-#endif
-
-#ifdef QUOTADEBUG
-#define XQM_LIST_PRINT(l, NXT, title) \
-{ \
-	  xfs_dquot_t	*dqp; int i = 0;\
-	  printk("%s (#%d)\n", title, (int) (l)->qh_nelems); \
-	  for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \
-	    printk("\t%d.\t\"%d (%s)\"\t bcnt = %d, icnt = %d refs = %d\n", \
-			 ++i, (int) INT_GET(dqp->q_core.d_id, ARCH_CONVERT), \
-			 DQFLAGTO_TYPESTR(dqp),	     \
-			 (int) INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT), \
-			 (int) INT_GET(dqp->q_core.d_icount, ARCH_CONVERT), \
-			 (int) dqp->q_nrefs);  } \
-}
-#endif
-
-/*
- * Initialize the XQM structure.
- * Note that there is not one quota manager per file system.
- */
-struct xfs_qm *
-xfs_qm_init(void)
-{
-	xfs_qm_t		*xqm;
-	int			hsize, i;
-
-	xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
-	ASSERT(xqm);
-
-	/*
-	 * Initialize the dquot hash tables.
-	 */
-	hsize = (DQUOT_HASH_HEURISTIC < XFS_QM_NCSIZE_THRESHOLD) ?
-		XFS_QM_HASHSIZE_LOW : XFS_QM_HASHSIZE_HIGH;
-	xqm->qm_dqhashmask = hsize - 1;
-
-	/*
-	 * XXXsup We could keep reference counts on usr and grp quotas
-	 * inside XQM separately, and avoid having two hashtables even
-	 * when only one 'type' is active in the system.
-	 */
-	xqm->qm_usr_dqhtable = (xfs_dqhash_t *)kmem_zalloc(hsize *
-						      sizeof(xfs_dqhash_t),
-						      KM_SLEEP);
-	xqm->qm_grp_dqhtable = (xfs_dqhash_t *)kmem_zalloc(hsize *
-						      sizeof(xfs_dqhash_t),
-						      KM_SLEEP);
-	ASSERT(xqm->qm_usr_dqhtable != NULL);
-	ASSERT(xqm->qm_grp_dqhtable != NULL);
-
-	for (i = 0; i < hsize; i++) {
-		xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
-		xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
-	}
-
-	/*
-	 * Freelist of all dquots of all file systems
-	 */
-	xfs_qm_freelist_init(&(xqm->qm_dqfreelist));
-
-	/*
-	 * dquot zone. we register our own low-memory callback.
-	 */
-	if (!qm_dqzone) {
-		xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
-						"xfs_dquots");
-		qm_dqzone = xqm->qm_dqzone;
-	} else
-		xqm->qm_dqzone = qm_dqzone;
-
-	kmem_shake_register(xfs_qm_shake);
-
-	/*
-	 * The t_dqinfo portion of transactions.
-	 */
-	if (!qm_dqtrxzone) {
-		xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
-						   "xfs_dqtrx");
-		qm_dqtrxzone = xqm->qm_dqtrxzone;
-	} else
-		xqm->qm_dqtrxzone = qm_dqtrxzone;
-
-	atomic_set(&xqm->qm_totaldquots, 0);
-	xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
-	xqm->qm_nrefs = 0;
-#ifdef DEBUG
-	mutex_init(&qcheck_lock, MUTEX_DEFAULT, "qchk");
-#endif
-	return (xqm);
-}
-
-/*
- * Destroy the global quota manager when its reference count goes to zero.
- */
-void
-xfs_qm_destroy(
-	struct xfs_qm *xqm)
-{
-	int	hsize, i;
-
-	ASSERT(xqm != NULL);
-	ASSERT(xqm->qm_nrefs == 0);
-	kmem_shake_deregister(xfs_qm_shake);
-	hsize = xqm->qm_dqhashmask + 1;
-	for (i = 0; i < hsize; i++) {
-		xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
-		xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
-	}
-	kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t));
-	kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t));
-	xqm->qm_usr_dqhtable = NULL;
-	xqm->qm_grp_dqhtable = NULL;
-	xqm->qm_dqhashmask = 0;
-	xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist));
-#ifdef DEBUG
-	mutex_destroy(&qcheck_lock);
-#endif
-	kmem_free(xqm, sizeof(xfs_qm_t));
-}
-
-/*
- * Called at mount time to let XQM know that another file system is
- * starting quotas. This isn't crucial information as the individual mount
- * structures are pretty independent, but it helps the XQM keep a
- * global view of what's going on.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_hold_quotafs_ref(
-	struct xfs_mount *mp)
-{
-	/*
-	 * Need to lock the xfs_Gqm structure for things like this. For example,
-	 * the structure could disappear between the entry to this routine and
-	 * a HOLD operation if not locked.
-	 */
-	XFS_QM_LOCK(xfs_Gqm);
-
-	if (xfs_Gqm == NULL) {
-		if ((xfs_Gqm = xfs_qm_init()) == NULL) {
-			return (XFS_ERROR(EINVAL));
-		}
-	}
-	/*
-	 * We can keep a list of all filesystems with quotas mounted for
-	 * debugging and statistical purposes, but ...
-	 * Just take a reference and get out.
-	 */
-	XFS_QM_HOLD(xfs_Gqm);
-	XFS_QM_UNLOCK(xfs_Gqm);
-
-	return 0;
-}
-
-
-/*
- * Release the reference that a filesystem took at mount time,
- * so that we know when we need to destroy the entire quota manager.
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_rele_quotafs_ref(
-	struct xfs_mount *mp)
-{
-	xfs_dquot_t	*dqp, *nextdqp;
-
-	ASSERT(xfs_Gqm);
-	ASSERT(xfs_Gqm->qm_nrefs > 0);
-
-	/*
-	 * Go thru the freelist and destroy all inactive dquots.
-	 */
-	xfs_qm_freelist_lock(xfs_Gqm);
-
-	for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
-	     dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
-		xfs_dqlock(dqp);
-		nextdqp = dqp->dq_flnext;
-		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-			ASSERT(dqp->q_mount == NULL);
-			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-			ASSERT(dqp->HL_PREVP == NULL);
-			ASSERT(dqp->MPL_PREVP == NULL);
-			XQM_FREELIST_REMOVE(dqp);
-			xfs_dqunlock(dqp);
-			xfs_qm_dqdestroy(dqp);
-		} else {
-			xfs_dqunlock(dqp);
-		}
-		dqp = nextdqp;
-	}
-	xfs_qm_freelist_unlock(xfs_Gqm);
-
-	/*
-	 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
-	 * be restarted.
-	 */
-	XFS_QM_LOCK(xfs_Gqm);
-	XFS_QM_RELE(xfs_Gqm);
-	if (xfs_Gqm->qm_nrefs == 0) {
-		xfs_qm_destroy(xfs_Gqm);
-		xfs_Gqm = NULL;
-	}
-	XFS_QM_UNLOCK(xfs_Gqm);
-}
-
-/*
- * This is called at mount time from xfs_mountfs to initialize the quotainfo
- * structure and start the global quotamanager (xfs_Gqm) if it hasn't done
- * so already.	Note that the superblock has not been read in yet.
- */
-void
-xfs_qm_mount_quotainit(
-	xfs_mount_t	*mp,
-	uint		flags)
-{
-	/*
-	 * User or group quotas has to be on.
-	 */
-	ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA));
-
-	/*
-	 * Initialize the flags in the mount structure. From this point
-	 * onwards we look at m_qflags to figure out if quotas's ON/OFF, etc.
-	 * Note that we enforce nothing if accounting is off.
-	 * ie.	XFSMNT_*QUOTA must be ON for XFSMNT_*QUOTAENF.
-	 * It isn't necessary to take the quotaoff lock to do this; this is
-	 * called from mount.
-	 */
-	if (flags & XFSMNT_UQUOTA) {
-		mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
-		if (flags & XFSMNT_UQUOTAENF)
-			mp->m_qflags |= XFS_UQUOTA_ENFD;
-	}
-	if (flags & XFSMNT_GQUOTA) {
-		mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
-		if (flags & XFSMNT_GQUOTAENF)
-			mp->m_qflags |= XFS_GQUOTA_ENFD;
-	}
-}
-
-/*
- * Just destroy the quotainfo structure.
- */
-void
-xfs_qm_unmount_quotadestroy(
-	xfs_mount_t	*mp)
-{
-	xfs_qm_destroy_quotainfo(mp);
-}
-
-
-/*
- * This is called from xfs_mountfs to start quotas and initialize all
- * necessary data structures like quotainfo.  This is also responsible for
- * running a quotacheck as necessary.  We are guaranteed that the superblock
- * is consistently read in at this point.
- */
-int
-xfs_qm_mount_quotas(
-	xfs_mount_t	*mp)
-{
-	unsigned long	s;
-	int		error = 0;
-	uint		sbf;
-
-	/*
-	 * If a file system had quotas running earlier, but decided to
-	 * mount without -o quota/uquota/gquota options, revoke the
-	 * quotachecked license, and bail out.
-	 */
-	if (! XFS_IS_QUOTA_ON(mp) &&
-	    (mp->m_sb.sb_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT))) {
-		mp->m_qflags = 0;
-		goto write_changes;
-	}
-
-	/*
-	 * If quotas on realtime volumes is not supported, we disable
-	 * quotas immediately.
-	 */
-	if (mp->m_sb.sb_rextents) {
-		cmn_err(CE_NOTE,
-			"Cannot turn on quotas for realtime filesystem %s",
-			mp->m_fsname);
-		mp->m_qflags = 0;
-		goto write_changes;
-	}
-
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-	cmn_err(CE_NOTE, "Attempting to turn on disk quotas.");
-#endif
-
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-	/*
-	 * Allocate the quotainfo structure inside the mount struct, and
-	 * create quotainode(s), and change/rev superblock if necessary.
-	 */
-	if ((error = xfs_qm_init_quotainfo(mp))) {
-		/*
-		 * We must turn off quotas.
-		 */
-		ASSERT(mp->m_quotainfo == NULL);
-		mp->m_qflags = 0;
-		goto write_changes;
-	}
-	/*
-	 * If any of the quotas are not consistent, do a quotacheck.
-	 */
-	if (XFS_QM_NEED_QUOTACHECK(mp)) {
-#ifdef DEBUG
-		cmn_err(CE_NOTE, "Doing a quotacheck. Please wait.");
-#endif
-		if ((error = xfs_qm_quotacheck(mp))) {
-			cmn_err(CE_WARN, "Quotacheck unsuccessful (Error %d): "
-				"Disabling quotas.",
-				error);
-			/*
-			 * We must turn off quotas.
-			 */
-			ASSERT(mp->m_quotainfo != NULL);
-			ASSERT(xfs_Gqm != NULL);
-			xfs_qm_destroy_quotainfo(mp);
-			mp->m_qflags = 0;
-			goto write_changes;
-		}
-#ifdef DEBUG
-		cmn_err(CE_NOTE, "Done quotacheck.");
-#endif
-	}
- write_changes:
-	/*
-	 * We actually don't have to acquire the SB_LOCK at all.
-	 * This can only be called from mount, and that's single threaded. XXX
-	 */
-	s = XFS_SB_LOCK(mp);
-	sbf = mp->m_sb.sb_qflags;
-	mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
-	XFS_SB_UNLOCK(mp, s);
-
-	if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
-		if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
-			/*
-			 * We could only have been turning quotas off.
-			 * We aren't in very good shape actually because
-			 * the incore structures are convinced that quotas are
-			 * off, but the on disk superblock doesn't know that !
-			 */
-			ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
-			xfs_fs_cmn_err(CE_ALERT, mp,
-				"XFS mount_quotas: Superblock update failed!");
-		}
-	}
-
-	if (error) {
-		xfs_fs_cmn_err(CE_WARN, mp,
-			"Failed to initialize disk quotas.");
-	}
-	return XFS_ERROR(error);
-}
-
-/*
- * Called from the vfsops layer.
- */
-int
-xfs_qm_unmount_quotas(
-	xfs_mount_t	*mp)
-{
-	xfs_inode_t	*uqp, *gqp;
-	int		error;
-
-	error = 0;
-
-	/*
-	 * Release the dquots that root inode, et al might be holding,
-	 * before we flush quotas and blow away the quotainfo structure.
-	 */
-	ASSERT(mp->m_rootip);
-	if (mp->m_rootip->i_udquot || mp->m_rootip->i_gdquot)
-		xfs_qm_dqdettach_inode(mp->m_rootip);
-	if (mp->m_rbmip &&
-	    (mp->m_rbmip->i_udquot || mp->m_rbmip->i_gdquot))
-		xfs_qm_dqdettach_inode(mp->m_rbmip);
-	if (mp->m_rsumip &&
-	    (mp->m_rsumip->i_udquot || mp->m_rsumip->i_gdquot))
-		xfs_qm_dqdettach_inode(mp->m_rsumip);
-
-	/*
-	 * Flush out the quota inodes.
-	 */
-	uqp = gqp = NULL;
-	if (mp->m_quotainfo) {
-		if ((uqp = mp->m_quotainfo->qi_uquotaip) != NULL) {
-			xfs_ilock(uqp, XFS_ILOCK_EXCL);
-			xfs_iflock(uqp);
-			error = xfs_iflush(uqp, XFS_IFLUSH_SYNC);
-			xfs_iunlock(uqp, XFS_ILOCK_EXCL);
-			if (unlikely(error == EFSCORRUPTED)) {
-				XFS_ERROR_REPORT("xfs_qm_unmount_quotas(1)",
-						 XFS_ERRLEVEL_LOW, mp);
-				goto out;
-			}
-		}
-		if ((gqp = mp->m_quotainfo->qi_gquotaip) != NULL) {
-			xfs_ilock(gqp, XFS_ILOCK_EXCL);
-			xfs_iflock(gqp);
-			error = xfs_iflush(gqp, XFS_IFLUSH_SYNC);
-			xfs_iunlock(gqp, XFS_ILOCK_EXCL);
-			if (unlikely(error == EFSCORRUPTED)) {
-				XFS_ERROR_REPORT("xfs_qm_unmount_quotas(2)",
-						 XFS_ERRLEVEL_LOW, mp);
-				goto out;
-			}
-		}
-	}
-	if (uqp) {
-		 XFS_PURGE_INODE(uqp);
-		 mp->m_quotainfo->qi_uquotaip = NULL;
-	}
-	if (gqp) {
-		XFS_PURGE_INODE(gqp);
-		mp->m_quotainfo->qi_gquotaip = NULL;
-	}
-out:
-	return XFS_ERROR(error);
-}
-
-/*
- * Flush all dquots of the given file system to disk. The dquots are
- * _not_ purged from memory here, just their data written to disk.
- */
-int
-xfs_qm_dqflush_all(
-	xfs_mount_t	*mp,
-	int		flags)
-{
-	int		recl;
-	xfs_dquot_t	*dqp;
-	int		niters;
-	int		error;
-
-	if (mp->m_quotainfo == NULL)
-		return (0);
-	niters = 0;
-again:
-	xfs_qm_mplist_lock(mp);
-	FOREACH_DQUOT_IN_MP(dqp, mp) {
-		xfs_dqlock(dqp);
-		if (! XFS_DQ_IS_DIRTY(dqp)) {
-			xfs_dqunlock(dqp);
-			continue;
-		}
-		xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
-		/* XXX a sentinel would be better */
-		recl = XFS_QI_MPLRECLAIMS(mp);
-		if (! xfs_qm_dqflock_nowait(dqp)) {
-			/*
-			 * If we can't grab the flush lock then check
-			 * to see if the dquot has been flushed delayed
-			 * write.  If so, grab its buffer and send it
-			 * out immediately.  We'll be able to acquire
-			 * the flush lock when the I/O completes.
-			 */
-			xfs_qm_dqflock_pushbuf_wait(dqp);
-		}
-		/*
-		 * Let go of the mplist lock. We don't want to hold it
-		 * across a disk write.
-		 */
-		xfs_qm_mplist_unlock(mp);
-		error = xfs_qm_dqflush(dqp, flags);
-		xfs_dqunlock(dqp);
-		if (error)
-			return (error);
-
-		xfs_qm_mplist_lock(mp);
-		if (recl != XFS_QI_MPLRECLAIMS(mp)) {
-			xfs_qm_mplist_unlock(mp);
-			/* XXX restart limit */
-			goto again;
-		}
-	}
-
-	xfs_qm_mplist_unlock(mp);
-	/* return ! busy */
-	return (0);
-}
-/*
- * Release the group dquot pointers the user dquots may be
- * carrying around as a hint. mplist is locked on entry and exit.
- */
-STATIC void
-xfs_qm_detach_gdquots(
-	xfs_mount_t	*mp)
-{
-	xfs_dquot_t	*dqp, *gdqp;
-	int		nrecl;
-
- again:
-	ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
-	dqp = XFS_QI_MPLNEXT(mp);
-	while (dqp) {
-		xfs_dqlock(dqp);
-		if ((gdqp = dqp->q_gdquot)) {
-			xfs_dqlock(gdqp);
-			dqp->q_gdquot = NULL;
-		}
-		xfs_dqunlock(dqp);
-
-		if (gdqp) {
-			/*
-			 * Can't hold the mplist lock across a dqput.
-			 * XXXmust convert to marker based iterations here.
-			 */
-			nrecl = XFS_QI_MPLRECLAIMS(mp);
-			xfs_qm_mplist_unlock(mp);
-			xfs_qm_dqput(gdqp);
-
-			xfs_qm_mplist_lock(mp);
-			if (nrecl != XFS_QI_MPLRECLAIMS(mp))
-				goto again;
-		}
-		dqp = dqp->MPL_NEXT;
-	}
-}
-
-/*
- * Go through all the incore dquots of this file system and take them
- * off the mplist and hashlist, if the dquot type matches the dqtype
- * parameter. This is used when turning off quota accounting for
- * users and/or groups, as well as when the filesystem is unmounting.
- */
-int
-xfs_qm_dqpurge_all(
-		   xfs_mount_t	*mp,
-		   uint		flags) /* QUOTAOFF/UMOUNTING/UQUOTA/GQUOTA */
-{
-	xfs_dquot_t	*dqp;
-	uint		dqtype;
-	int		nrecl;
-	xfs_dquot_t	*nextdqp;
-	int		nmisses;
-
-	if (mp->m_quotainfo == NULL)
-		return (0);
-
-	dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
-	dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
-
-	xfs_qm_mplist_lock(mp);
-
-	/*
-	 * In the first pass through all incore dquots of this filesystem,
-	 * we release the group dquot pointers the user dquots may be
-	 * carrying around as a hint. We need to do this irrespective of
-	 * what's being turned off.
-	 */
-	xfs_qm_detach_gdquots(mp);
-
-      again:
-	nmisses = 0;
-	ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
-	/*
-	 * Try to get rid of all of the unwanted dquots. The idea is to
-	 * get them off mplist and hashlist, but leave them on freelist.
-	 */
-	dqp = XFS_QI_MPLNEXT(mp);
-	while (dqp) {
-		/*
-		 * It's OK to look at the type without taking dqlock here.
-		 * We're holding the mplist lock here, and that's needed for
-		 * a dqreclaim.
-		 */
-		if ((dqp->dq_flags & dqtype) == 0) {
-			dqp = dqp->MPL_NEXT;
-			continue;
-		}
-
-		if (! xfs_qm_dqhashlock_nowait(dqp)) {
-			nrecl = XFS_QI_MPLRECLAIMS(mp);
-			xfs_qm_mplist_unlock(mp);
-			XFS_DQ_HASH_LOCK(dqp->q_hash);
-			xfs_qm_mplist_lock(mp);
-
-			/*
-			 * XXXTheoretically, we can get into a very long
-			 * ping pong game here.
-			 * No one can be adding dquots to the mplist at
-			 * this point, but somebody might be taking things off.
-			 */
-			if (nrecl != XFS_QI_MPLRECLAIMS(mp)) {
-				XFS_DQ_HASH_UNLOCK(dqp->q_hash);
-				goto again;
-			}
-		}
-
-		/*
-		 * Take the dquot off the mplist and hashlist. It may remain on
-		 * freelist in INACTIVE state.
-		 */
-		nextdqp = dqp->MPL_NEXT;
-		nmisses += xfs_qm_dqpurge(dqp, flags);
-		dqp = nextdqp;
-	}
-	xfs_qm_mplist_unlock(mp);
-	return (nmisses);
-}
-
-STATIC int
-xfs_qm_dqattach_one(
-	xfs_inode_t	*ip,
-	xfs_dqid_t	id,
-	uint		type,
-	uint		doalloc,
-	uint		dolock,
-	xfs_dquot_t	*udqhint, /* hint */
-	xfs_dquot_t	**IO_idqpp)
-{
-	xfs_dquot_t	*dqp;
-	int		error;
-
-	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
-	error = 0;
-	/*
-	 * See if we already have it in the inode itself. IO_idqpp is
-	 * &i_udquot or &i_gdquot. This made the code look weird, but
-	 * made the logic a lot simpler.
-	 */
-	if ((dqp = *IO_idqpp)) {
-		if (dolock)
-			xfs_dqlock(dqp);
-		xfs_dqtrace_entry(dqp, "DQATTACH: found in ip");
-		goto done;
-	}
-
-	/*
-	 * udqhint is the i_udquot field in inode, and is non-NULL only
-	 * when the type arg is XFS_DQ_GROUP. Its purpose is to save a
-	 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
-	 * the user dquot.
-	 */
-	ASSERT(!udqhint || type == XFS_DQ_GROUP);
-	if (udqhint && !dolock)
-		xfs_dqlock(udqhint);
-
-	/*
-	 * No need to take dqlock to look at the id.
-	 * The ID can't change until it gets reclaimed, and it won't
-	 * be reclaimed as long as we have a ref from inode and we hold
-	 * the ilock.
-	 */
-	if (udqhint &&
-	    (dqp = udqhint->q_gdquot) &&
-	    (INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id)) {
-		ASSERT(XFS_DQ_IS_LOCKED(udqhint));
-		xfs_dqlock(dqp);
-		XFS_DQHOLD(dqp);
-		ASSERT(*IO_idqpp == NULL);
-		*IO_idqpp = dqp;
-		if (!dolock) {
-			xfs_dqunlock(dqp);
-			xfs_dqunlock(udqhint);
-		}
-		/* XXX XFS_STATS */
-		goto done;
-	}
-	/*
-	 * We can't hold a dquot lock when we call the dqget code.
-	 * We'll deadlock in no time, because of (not conforming to)
-	 * lock ordering - the inodelock comes before any dquot lock,
-	 * and we may drop and reacquire the ilock in xfs_qm_dqget().
-	 */
-	if (udqhint)
-		xfs_dqunlock(udqhint);
-	/*
-	 * Find the dquot from somewhere. This bumps the
-	 * reference count of dquot and returns it locked.
-	 * This can return ENOENT if dquot didn't exist on
-	 * disk and we didn't ask it to allocate;
-	 * ESRCH if quotas got turned off suddenly.
-	 */
-	if ((error = xfs_qm_dqget(ip->i_mount, ip, id, type,
-				 doalloc|XFS_QMOPT_DOWARN, &dqp))) {
-		if (udqhint && dolock)
-			xfs_dqlock(udqhint);
-		goto done;
-	}
-
-	xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget");
-	/*
-	 * dqget may have dropped and re-acquired the ilock, but it guarantees
-	 * that the dquot returned is the one that should go in the inode.
-	 */
-	*IO_idqpp = dqp;
-	ASSERT(dqp);
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	if (! dolock) {
-		xfs_dqunlock(dqp);
-		ASSERT(!udqhint || !XFS_DQ_IS_LOCKED(udqhint));
-		goto done;
-	}
-	if (! udqhint)
-		goto done;
-
-	ASSERT(udqhint);
-	ASSERT(dolock);
-	ASSERT(! XFS_DQ_IS_LOCKED(udqhint));
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	if (! xfs_qm_dqlock_nowait(udqhint)) {
-		xfs_dqunlock(dqp);
-		xfs_dqlock(udqhint);
-		xfs_dqlock(dqp);
-	}
-      done:
-#ifdef QUOTADEBUG
-	if (udqhint) {
-		if (dolock)
-			ASSERT(XFS_DQ_IS_LOCKED(udqhint));
-		else
-			ASSERT(! XFS_DQ_IS_LOCKED(udqhint));
-	}
-	if (! error) {
-		if (dolock)
-			ASSERT(XFS_DQ_IS_LOCKED(dqp));
-		else
-			ASSERT(! XFS_DQ_IS_LOCKED(dqp));
-	}
-#endif
-	return (error);
-}
-
-
-/*
- * Given a udquot and gdquot, attach a ptr to the group dquot in the
- * udquot as a hint for future lookups. The idea sounds simple, but the
- * execution isn't, because the udquot might have a group dquot attached
- * already and getting rid of that gets us into lock ordering contraints.
- * The process is complicated more by the fact that the dquots may or may not
- * be locked on entry.
- */
-STATIC void
-xfs_qm_dqattach_grouphint(
-	xfs_dquot_t	*udq,
-	xfs_dquot_t	*gdq,
-	uint		locked)
-{
-	xfs_dquot_t	*tmp;
-
-#ifdef QUOTADEBUG
-	if (locked) {
-		ASSERT(XFS_DQ_IS_LOCKED(udq));
-		ASSERT(XFS_DQ_IS_LOCKED(gdq));
-	} else {
-		ASSERT(! XFS_DQ_IS_LOCKED(udq));
-		ASSERT(! XFS_DQ_IS_LOCKED(gdq));
-	}
-#endif
-	if (! locked)
-		xfs_dqlock(udq);
-
-	if ((tmp = udq->q_gdquot)) {
-		if (tmp == gdq) {
-			if (! locked)
-				xfs_dqunlock(udq);
-			return;
-		}
-
-		udq->q_gdquot = NULL;
-		/*
-		 * We can't keep any dqlocks when calling dqrele,
-		 * because the freelist lock comes before dqlocks.
-		 */
-		xfs_dqunlock(udq);
-		if (locked)
-			xfs_dqunlock(gdq);
-		/*
-		 * we took a hard reference once upon a time in dqget,
-		 * so give it back when the udquot no longer points at it
-		 * dqput() does the unlocking of the dquot.
-		 */
-		xfs_qm_dqrele(tmp);
-
-		ASSERT(! XFS_DQ_IS_LOCKED(udq));
-		ASSERT(! XFS_DQ_IS_LOCKED(gdq));
-		xfs_dqlock(udq);
-		xfs_dqlock(gdq);
-
-	} else {
-		ASSERT(XFS_DQ_IS_LOCKED(udq));
-		if (! locked) {
-			ASSERT(! XFS_DQ_IS_LOCKED(gdq));
-			xfs_dqlock(gdq);
-		}
-	}
-
-	ASSERT(XFS_DQ_IS_LOCKED(udq));
-	ASSERT(XFS_DQ_IS_LOCKED(gdq));
-	/*
-	 * Somebody could have attached a gdquot here,
-	 * when we dropped the uqlock. If so, just do nothing.
-	 */
-	if (udq->q_gdquot == NULL) {
-		XFS_DQHOLD(gdq);
-		udq->q_gdquot = gdq;
-	}
-	if (! locked) {
-		xfs_dqunlock(gdq);
-		xfs_dqunlock(udq);
-	}
-}
-
-
-/*
- * Given a locked inode, attach dquot(s) to it, taking UQUOTAON / GQUOTAON
- * in to account.
- * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
- * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty
- * much made this code a complete mess, but it has been pretty useful.
- * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
- * Inode may get unlocked and relocked in here, and the caller must deal with
- * the consequences.
- */
-int
-xfs_qm_dqattach(
-		xfs_inode_t	*ip,
-		uint		flags)
-{
-	int		error;
-	xfs_mount_t	*mp;
-	uint		nquotas;
-
-	mp = ip->i_mount;
-	ASSERT(ip->i_ino != mp->m_sb.sb_uquotino &&
-	       ip->i_ino != mp->m_sb.sb_gquotino);
-
-	ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
-	       XFS_ISLOCKED_INODE_EXCL(ip));
-
-	nquotas = 0;
-	error = 0;
-	if (! (flags & XFS_QMOPT_ILOCKED))
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-	if (XFS_IS_UQUOTA_ON(mp)) {
-		if ((error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
-						flags & XFS_QMOPT_DQALLOC,
-						flags & XFS_QMOPT_DQLOCK,
-						NULL, &ip->i_udquot)))
-			goto done;
-		nquotas++;
-	}
-	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
-	if (XFS_IS_GQUOTA_ON(mp)) {
-		if ((error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
-						flags & XFS_QMOPT_DQALLOC,
-						flags & XFS_QMOPT_DQLOCK,
-						ip->i_udquot, &ip->i_gdquot)))
-			/*
-			 * Don't worry about the udquot that we may have
-			 * attached above. It'll get dettached, if not already.
-			 */
-			goto done;
-		nquotas++;
-	}
-
-	/*
-	 * Attach this group quota to the user quota as a hint.
-	 * This WON'T, in general, result in a thrash.
-	 */
-	if (nquotas == 2) {
-		ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
-		ASSERT(ip->i_udquot);
-		ASSERT(ip->i_gdquot);
-
-		/*
-		 * We may or may not have the i_udquot locked at this point,
-		 * but this check is OK since we don't depend on the i_gdquot to
-		 * be accurate 100% all the time. It is just a hint, and this
-		 * will succeed in general.
-		 */
-		if (ip->i_udquot->q_gdquot == ip->i_gdquot)
-			goto done;
-		/*
-		 * Attach i_gdquot to the gdquot hint inside the i_udquot.
-		 */
-		xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot,
-					 flags & XFS_QMOPT_DQLOCK);
-	}
-
-      done:
-
-#ifdef QUOTADEBUG
-	if (! error) {
-		if (ip->i_udquot) {
-			if (flags & XFS_QMOPT_DQLOCK)
-				ASSERT(XFS_DQ_IS_LOCKED(ip->i_udquot));
-			else
-				ASSERT(! XFS_DQ_IS_LOCKED(ip->i_udquot));
-		}
-		if (ip->i_gdquot) {
-			if (flags & XFS_QMOPT_DQLOCK)
-				ASSERT(XFS_DQ_IS_LOCKED(ip->i_gdquot));
-			else
-				ASSERT(! XFS_DQ_IS_LOCKED(ip->i_gdquot));
-		}
-		if (XFS_IS_UQUOTA_ON(mp))
-			ASSERT(ip->i_udquot);
-		if (XFS_IS_GQUOTA_ON(mp))
-			ASSERT(ip->i_gdquot);
-	}
-#endif
-
-	if (! (flags & XFS_QMOPT_ILOCKED))
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-#ifdef QUOTADEBUG
-	else
-		ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
-#endif
-	return (error);
-}
-
-/*
- * Release dquots (and their references) if any.
- * The inode should be locked EXCL except when this's called by
- * xfs_ireclaim.
- */
-void
-xfs_qm_dqdettach_inode(
-	xfs_inode_t	*ip)
-{
-	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
-	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
-	if (ip->i_udquot)
-		xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
-	if (ip->i_udquot) {
-		xfs_qm_dqrele(ip->i_udquot);
-		ip->i_udquot = NULL;
-	}
-	if (ip->i_gdquot) {
-		xfs_qm_dqrele(ip->i_gdquot);
-		ip->i_gdquot = NULL;
-	}
-}
-
-int
-xfs_qm_unmount(
-	xfs_mount_t	*mp)
-{
-	vnode_t		*vp;
-
-	if (XFS_IS_UQUOTA_ON(mp)) {
-		vp = XFS_ITOV(XFS_QI_UQIP(mp));
-		VN_RELE(vp);
-		if (vn_count(vp) > 1)
-			cmn_err(CE_WARN, "UQUOTA busy vp=0x%x count=%d",
-				vp, vn_count(vp));
-	}
-	if (XFS_IS_GQUOTA_ON(mp)) {
-		vp = XFS_ITOV(XFS_QI_GQIP(mp));
-		VN_RELE(vp);
-		if (vn_count(vp) > 1)
-			cmn_err(CE_WARN, "GQUOTA busy vp=0x%x count=%d",
-				vp, vn_count(vp));
-	}
-
-	return (0);
-}
-
-
-/*
- * This is called by xfs_sync and flags arg determines the caller,
- * and its motives, as done in xfs_sync.
- *
- * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31
- * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25
- * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA
- */
-
-int
-xfs_qm_sync(
-	xfs_mount_t	*mp,
-	short		flags)
-{
-	int		recl, restarts;
-	xfs_dquot_t	*dqp;
-	uint		flush_flags;
-	boolean_t	nowait;
-	int		error;
-
-	restarts = 0;
-	/*
-	 * We won't block unless we are asked to.
-	 */
-	nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
-
-  again:
-	xfs_qm_mplist_lock(mp);
-	/*
-	 * dqpurge_all() also takes the mplist lock and iterate thru all dquots
-	 * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
-	 * when we have the mplist lock, we know that dquots will be consistent
-	 * as long as we have it locked.
-	 */
-	if (! XFS_IS_QUOTA_ON(mp)) {
-		xfs_qm_mplist_unlock(mp);
-		return (0);
-	}
-	FOREACH_DQUOT_IN_MP(dqp, mp) {
-		/*
-		 * If this is vfs_sync calling, then skip the dquots that
-		 * don't 'seem' to be dirty. ie. don't acquire dqlock.
-		 * This is very similar to what xfs_sync does with inodes.
-		 */
-		if (flags & SYNC_BDFLUSH) {
-			if (! XFS_DQ_IS_DIRTY(dqp))
-				continue;
-		}
-
-		if (nowait) {
-			/*
-			 * Try to acquire the dquot lock. We are NOT out of
-			 * lock order, but we just don't want to wait for this
-			 * lock, unless somebody wanted us to.
-			 */
-			if (! xfs_qm_dqlock_nowait(dqp))
-				continue;
-		} else {
-			xfs_dqlock(dqp);
-		}
-
-		/*
-		 * Now, find out for sure if this dquot is dirty or not.
-		 */
-		if (! XFS_DQ_IS_DIRTY(dqp)) {
-			xfs_dqunlock(dqp);
-			continue;
-		}
-
-		/* XXX a sentinel would be better */
-		recl = XFS_QI_MPLRECLAIMS(mp);
-		if (! xfs_qm_dqflock_nowait(dqp)) {
-			if (nowait) {
-				xfs_dqunlock(dqp);
-				continue;
-			}
-			/*
-			 * If we can't grab the flush lock then if the caller
-			 * really wanted us to give this our best shot,
-			 * see if we can give a push to the buffer before we wait
-			 * on the flush lock. At this point, we know that
-			 * eventhough the dquot is being flushed,
-			 * it has (new) dirty data.
-			 */
-			xfs_qm_dqflock_pushbuf_wait(dqp);
-		}
-		/*
-		 * Let go of the mplist lock. We don't want to hold it
-		 * across a disk write
-		 */
-		flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
-		xfs_qm_mplist_unlock(mp);
-		xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
-		error = xfs_qm_dqflush(dqp, flush_flags);
-		xfs_dqunlock(dqp);
-		if (error && XFS_FORCED_SHUTDOWN(mp))
-			return(0);	/* Need to prevent umount failure */
-		else if (error)
-			return (error);
-
-		xfs_qm_mplist_lock(mp);
-		if (recl != XFS_QI_MPLRECLAIMS(mp)) {
-			if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
-				break;
-
-			xfs_qm_mplist_unlock(mp);
-			goto again;
-		}
-	}
-
-	xfs_qm_mplist_unlock(mp);
-	return (0);
-}
-
-
-/*
- * This initializes all the quota information that's kept in the
- * mount structure
- */
-int
-xfs_qm_init_quotainfo(
-	xfs_mount_t	*mp)
-{
-	xfs_quotainfo_t *qinf;
-	int		error;
-	xfs_dquot_t	*dqp;
-
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-	/*
-	 * Tell XQM that we exist as soon as possible.
-	 */
-	if ((error = xfs_qm_hold_quotafs_ref(mp))) {
-		return (error);
-	}
-
-	qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
-
-	/*
-	 * See if quotainodes are setup, and if not, allocate them,
-	 * and change the superblock accordingly.
-	 */
-	if ((error = xfs_qm_init_quotainos(mp))) {
-		kmem_free(qinf, sizeof(xfs_quotainfo_t));
-		mp->m_quotainfo = NULL;
-		return (error);
-	}
-
-	spinlock_init(&qinf->qi_pinlock, "xfs_qinf_pin");
-	xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
-	qinf->qi_dqreclaims = 0;
-
-	/* mutex used to serialize quotaoffs */
-	mutex_init(&qinf->qi_quotaofflock, MUTEX_DEFAULT, "qoff");
-
-	/* Precalc some constants */
-	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
-	ASSERT(qinf->qi_dqchunklen);
-	qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
-	do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
-
-	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
-
-	/*
-	 * We try to get the limits from the superuser's limits fields.
-	 * This is quite hacky, but it is standard quota practice.
-	 * We look at the USR dquot with id == 0 first, but if user quotas
-	 * are not enabled we goto the GRP dquot with id == 0.
-	 * We don't really care to keep separate default limits for user
-	 * and group quotas, at least not at this point.
-	 */
-	error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
-			     (XFS_IS_UQUOTA_RUNNING(mp)) ?
-			     XFS_DQ_USER : XFS_DQ_GROUP,
-			     XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
-			     &dqp);
-	if (! error) {
-		/*
-		 * The warnings and timers set the grace period given to
-		 * a user or group before he or she can not perform any
-		 * more writing. If it is zero, a default is used.
-		 */
-		qinf->qi_btimelimit = INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT) ?
-			INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT) : XFS_QM_BTIMELIMIT;
-		qinf->qi_itimelimit = INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT) ?
-			INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT) : XFS_QM_ITIMELIMIT;
-		qinf->qi_rtbtimelimit = INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT) ?
-			INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT) : XFS_QM_RTBTIMELIMIT;
-		qinf->qi_bwarnlimit = INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT) ?
-			INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT) : XFS_QM_BWARNLIMIT;
-		qinf->qi_iwarnlimit = INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT) ?
-			INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT) : XFS_QM_IWARNLIMIT;
-
-		/*
-		 * We sent the XFS_QMOPT_DQSUSER flag to dqget because
-		 * we don't want this dquot cached. We haven't done a
-		 * quotacheck yet, and quotacheck doesn't like incore dquots.
-		 */
-		xfs_qm_dqdestroy(dqp);
-	} else {
-		qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
-		qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
-		qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
-		qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
-		qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
-	}
-
-	return (0);
-}
-
-
-/*
- * Gets called when unmounting a filesystem or when all quotas get
- * turned off.
- * This purges the quota inodes, destroys locks and frees itself.
- */
-void
-xfs_qm_destroy_quotainfo(
-	xfs_mount_t	*mp)
-{
-	xfs_quotainfo_t *qi;
-
-	qi = mp->m_quotainfo;
-	ASSERT(qi != NULL);
-	ASSERT(xfs_Gqm != NULL);
-
-	/*
-	 * Release the reference that XQM kept, so that we know
-	 * when the XQM structure should be freed. We cannot assume
-	 * that xfs_Gqm is non-null after this point.
-	 */
-	xfs_qm_rele_quotafs_ref(mp);
-
-	spinlock_destroy(&qi->qi_pinlock);
-	xfs_qm_list_destroy(&qi->qi_dqlist);
-
-	if (qi->qi_uquotaip) {
-		XFS_PURGE_INODE(qi->qi_uquotaip);
-		qi->qi_uquotaip = NULL; /* paranoia */
-	}
-	if (qi->qi_gquotaip) {
-		XFS_PURGE_INODE(qi->qi_gquotaip);
-		qi->qi_gquotaip = NULL;
-	}
-	mutex_destroy(&qi->qi_quotaofflock);
-	kmem_free(qi, sizeof(xfs_quotainfo_t));
-	mp->m_quotainfo = NULL;
-}
-
-
-
-/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
-
-/* ARGSUSED */
-STATIC void
-xfs_qm_list_init(
-	xfs_dqlist_t	*list,
-	char		*str,
-	int		n)
-{
-	mutex_init(&list->qh_lock, MUTEX_DEFAULT, str);
-	list->qh_next = NULL;
-	list->qh_version = 0;
-	list->qh_nelems = 0;
-}
-
-STATIC void
-xfs_qm_list_destroy(
-	xfs_dqlist_t	*list)
-{
-	mutex_destroy(&(list->qh_lock));
-}
-
-
-/*
- * Stripped down version of dqattach. This doesn't attach, or even look at the
- * dquots attached to the inode. The rationale is that there won't be any
- * attached at the time this is called from quotacheck.
- */
-STATIC int
-xfs_qm_dqget_noattach(
-	xfs_inode_t	*ip,
-	xfs_dquot_t	**O_udqpp,
-	xfs_dquot_t	**O_gdqpp)
-{
-	int		error;
-	xfs_mount_t	*mp;
-	xfs_dquot_t	*udqp, *gdqp;
-
-	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
-	mp = ip->i_mount;
-	udqp = NULL;
-	gdqp = NULL;
-
-	if (XFS_IS_UQUOTA_ON(mp)) {
-		ASSERT(ip->i_udquot == NULL);
-		/*
-		 * We want the dquot allocated if it doesn't exist.
-		 */
-		if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
-					 XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
-					 &udqp))) {
-			/*
-			 * Shouldn't be able to turn off quotas here.
-			 */
-			ASSERT(error != ESRCH);
-			ASSERT(error != ENOENT);
-			return (error);
-		}
-		ASSERT(udqp);
-	}
-
-	if (XFS_IS_GQUOTA_ON(mp)) {
-		ASSERT(ip->i_gdquot == NULL);
-		if (udqp)
-			xfs_dqunlock(udqp);
-		if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_gid, XFS_DQ_GROUP,
-					 XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
-					 &gdqp))) {
-			if (udqp)
-				xfs_qm_dqrele(udqp);
-			ASSERT(error != ESRCH);
-			ASSERT(error != ENOENT);
-			return (error);
-		}
-		ASSERT(gdqp);
-
-		/* Reacquire the locks in the right order */
-		if (udqp) {
-			if (! xfs_qm_dqlock_nowait(udqp)) {
-				xfs_dqunlock(gdqp);
-				xfs_dqlock(udqp);
-				xfs_dqlock(gdqp);
-			}
-		}
-	}
-
-	*O_udqpp = udqp;
-	*O_gdqpp = gdqp;
-
-#ifdef QUOTADEBUG
-	if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
-	if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
-#endif
-	return (0);
-}
-
-/*
- * Create an inode and return with a reference already taken, but unlocked
- * This is how we create quota inodes
- */
-STATIC int
-xfs_qm_qino_alloc(
-	xfs_mount_t	*mp,
-	xfs_inode_t	**ip,
-	__int64_t	sbfields,
-	uint		flags)
-{
-	xfs_trans_t	*tp;
-	int		error;
-	unsigned long s;
-	cred_t		zerocr;
-	int		committed;
-
-	tp = xfs_trans_alloc(mp,XFS_TRANS_QM_QINOCREATE);
-	if ((error = xfs_trans_reserve(tp,
-				      XFS_QM_QINOCREATE_SPACE_RES(mp),
-				      XFS_CREATE_LOG_RES(mp), 0,
-				      XFS_TRANS_PERM_LOG_RES,
-				      XFS_CREATE_LOG_COUNT))) {
-		xfs_trans_cancel(tp, 0);
-		return (error);
-	}
-	memset(&zerocr, 0, sizeof(zerocr));
-
-	if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, IFREG, 1, 0,
-				   &zerocr, 0, 1, ip, &committed))) {
-		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
-				 XFS_TRANS_ABORT);
-		return (error);
-	}
-
-	/*
-	 * Keep an extra reference to this quota inode. This inode is
-	 * locked exclusively and joined to the transaction already.
-	 */
-	ASSERT(XFS_ISLOCKED_INODE_EXCL(*ip));
-	VN_HOLD(XFS_ITOV((*ip)));
-
-	/*
-	 * Make the changes in the superblock, and log those too.
-	 * sbfields arg may contain fields other than *QUOTINO;
-	 * VERSIONNUM for example.
-	 */
-	s = XFS_SB_LOCK(mp);
-	if (flags & XFS_QMOPT_SBVERSION) {
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-		unsigned oldv = mp->m_sb.sb_versionnum;
-#endif
-		ASSERT(!XFS_SB_VERSION_HASQUOTA(&mp->m_sb));
-		ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-				   XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
-		       (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-			XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
-
-		XFS_SB_VERSION_ADDQUOTA(&mp->m_sb);
-		mp->m_sb.sb_uquotino = NULLFSINO;
-		mp->m_sb.sb_gquotino = NULLFSINO;
-
-		/* qflags will get updated _after_ quotacheck */
-		mp->m_sb.sb_qflags = 0;
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-		cmn_err(CE_NOTE,
-			"Old superblock version %x, converting to %x.",
-			oldv, mp->m_sb.sb_versionnum);
-#endif
-	}
-	if (flags & XFS_QMOPT_UQUOTA)
-		mp->m_sb.sb_uquotino = (*ip)->i_ino;
-	else
-		mp->m_sb.sb_gquotino = (*ip)->i_ino;
-	XFS_SB_UNLOCK(mp, s);
-	xfs_mod_sb(tp, sbfields);
-
-	if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES,
-				     NULL))) {
-		xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
-		return (error);
-	}
-	return (0);
-}
-
-
-STATIC int
-xfs_qm_reset_dqcounts(
-	xfs_mount_t	*mp,
-	xfs_buf_t	*bp,
-	xfs_dqid_t	id,
-	uint		type)
-{
-	xfs_disk_dquot_t	*ddq;
-	int			j;
-
-	xfs_buftrace("RESET DQUOTS", bp);
-	/*
-	 * Reset all counters and timers. They'll be
-	 * started afresh by xfs_qm_quotacheck.
-	 */
-#ifdef DEBUG
-	j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
-	do_div(j, sizeof(xfs_dqblk_t));
-	ASSERT(XFS_QM_DQPERBLK(mp) == j);
-#endif
-	ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
-	for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) {
-		/*
-		 * Do a sanity check, and if needed, repair the dqblk. Don't
-		 * output any warnings because it's perfectly possible to
-		 * find unitialized dquot blks. See comment in xfs_qm_dqcheck.
-		 */
-		(void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
-				      "xfs_quotacheck");
-		INT_SET(ddq->d_bcount, ARCH_CONVERT, 0ULL);
-		INT_SET(ddq->d_icount, ARCH_CONVERT, 0ULL);
-		INT_SET(ddq->d_rtbcount, ARCH_CONVERT, 0ULL);
-		INT_SET(ddq->d_btimer, ARCH_CONVERT, (time_t)0);
-		INT_SET(ddq->d_itimer, ARCH_CONVERT, (time_t)0);
-		INT_SET(ddq->d_bwarns, ARCH_CONVERT, 0UL);
-		INT_SET(ddq->d_iwarns, ARCH_CONVERT, 0UL);
-		ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
-	}
-
-	return (0);
-}
-
-STATIC int
-xfs_qm_dqiter_bufs(
-	xfs_mount_t	*mp,
-	xfs_dqid_t	firstid,
-	xfs_fsblock_t	bno,
-	xfs_filblks_t	blkcnt,
-	uint		flags)
-{
-	xfs_buf_t	*bp;
-	int		error;
-	int		notcommitted;
-	int		incr;
-
-	ASSERT(blkcnt > 0);
-	notcommitted = 0;
-	incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
-		XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
-	error = 0;
-
-	/*
-	 * Blkcnt arg can be a very big number, and might even be
-	 * larger than the log itself. So, we have to break it up into
-	 * manageable-sized transactions.
-	 * Note that we don't start a permanent transaction here; we might
-	 * not be able to get a log reservation for the whole thing up front,
-	 * and we don't really care to either, because we just discard
-	 * everything if we were to crash in the middle of this loop.
-	 */
-	while (blkcnt--) {
-		error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
-			      XFS_FSB_TO_DADDR(mp, bno),
-			      (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp);
-		if (error)
-			break;
-
-		(void) xfs_qm_reset_dqcounts(mp, bp, firstid,
-					     flags & XFS_QMOPT_UQUOTA ?
-					     XFS_DQ_USER : XFS_DQ_GROUP);
-		xfs_bdwrite(mp, bp);
-		/*
-		 * goto the next block.
-		 */
-		bno++;
-		firstid += XFS_QM_DQPERBLK(mp);
-	}
-	return (error);
-}
-
-/*
- * Iterate over all allocated USR/GRP dquots in the system, calling a
- * caller supplied function for every chunk of dquots that we find.
- */
-STATIC int
-xfs_qm_dqiterate(
-	xfs_mount_t	*mp,
-	xfs_inode_t	*qip,
-	uint		flags)
-{
-	xfs_bmbt_irec_t		*map;
-	int			i, nmaps;	/* number of map entries */
-	int			error;		/* return value */
-	xfs_fileoff_t		lblkno;
-	xfs_filblks_t		maxlblkcnt;
-	xfs_dqid_t		firstid;
-	xfs_fsblock_t		rablkno;
-	xfs_filblks_t		rablkcnt;
-
-	error = 0;
-	/*
-	 * This looks racey, but we can't keep an inode lock across a
-	 * trans_reserve. But, this gets called during quotacheck, and that
-	 * happens only at mount time which is single threaded.
-	 */
-	if (qip->i_d.di_nblocks == 0)
-		return (0);
-
-	map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
-
-	lblkno = 0;
-	maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAX_FILE_OFFSET);
-	do {
-		nmaps = XFS_DQITER_MAP_SIZE;
-		/*
-		 * We aren't changing the inode itself. Just changing
-		 * some of its data. No new blocks are added here, and
-		 * the inode is never added to the transaction.
-		 */
-		xfs_ilock(qip, XFS_ILOCK_SHARED);
-		error = xfs_bmapi(NULL, qip, lblkno,
-				  maxlblkcnt - lblkno,
-				  XFS_BMAPI_METADATA,
-				  NULL,
-				  0, map, &nmaps, NULL);
-		xfs_iunlock(qip, XFS_ILOCK_SHARED);
-		if (error)
-			break;
-
-		ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
-		for (i = 0; i < nmaps; i++) {
-			ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
-			ASSERT(map[i].br_blockcount);
-
-
-			lblkno += map[i].br_blockcount;
-
-			if (map[i].br_startblock == HOLESTARTBLOCK)
-				continue;
-
-			firstid = (xfs_dqid_t) map[i].br_startoff *
-				XFS_QM_DQPERBLK(mp);
-			/*
-			 * Do a read-ahead on the next extent.
-			 */
-			if ((i+1 < nmaps) &&
-			    (map[i+1].br_startblock != HOLESTARTBLOCK)) {
-				rablkcnt =  map[i+1].br_blockcount;
-				rablkno = map[i+1].br_startblock;
-				while (rablkcnt--) {
-					xfs_baread(mp->m_ddev_targp,
-					       XFS_FSB_TO_DADDR(mp, rablkno),
-					       (int)XFS_QI_DQCHUNKLEN(mp));
-					rablkno++;
-				}
-			}
-			/*
-			 * Iterate thru all the blks in the extent and
-			 * reset the counters of all the dquots inside them.
-			 */
-			if ((error = xfs_qm_dqiter_bufs(mp,
-						       firstid,
-						       map[i].br_startblock,
-						       map[i].br_blockcount,
-						       flags))) {
-				break;
-			}
-		}
-
-		if (error)
-			break;
-	} while (nmaps > 0);
-
-	kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map));
-
-	return (error);
-}
-
-/*
- * Called by dqusage_adjust in doing a quotacheck.
- * Given the inode, and a dquot (either USR or GRP, doesn't matter),
- * this updates its incore copy as well as the buffer copy. This is
- * so that once the quotacheck is done, we can just log all the buffers,
- * as opposed to logging numerous updates to individual dquots.
- */
-STATIC void
-xfs_qm_quotacheck_dqadjust(
-	xfs_dquot_t		*dqp,
-	xfs_qcnt_t		nblks,
-	xfs_qcnt_t		rtblks)
-{
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	xfs_dqtrace_entry(dqp, "QCHECK DQADJUST");
-	/*
-	 * Adjust the inode count and the block count to reflect this inode's
-	 * resource usage.
-	 */
-	INT_MOD(dqp->q_core.d_icount, ARCH_CONVERT, +1);
-	dqp->q_res_icount++;
-	if (nblks) {
-		INT_MOD(dqp->q_core.d_bcount, ARCH_CONVERT, nblks);
-		dqp->q_res_bcount += nblks;
-	}
-	if (rtblks) {
-		INT_MOD(dqp->q_core.d_rtbcount, ARCH_CONVERT, rtblks);
-		dqp->q_res_rtbcount += rtblks;
-	}
-
-	/*
-	 * Adjust the timers since we just changed usages
-	 */
-	if (! XFS_IS_SUSER_DQUOT(dqp))
-		xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
-
-	dqp->dq_flags |= XFS_DQ_DIRTY;
-}
-
-STATIC int
-xfs_qm_get_rtblks(
-	xfs_inode_t	*ip,
-	xfs_qcnt_t	*O_rtblks)
-{
-	xfs_filblks_t	rtblks;			/* total rt blks */
-	xfs_ifork_t	*ifp;			/* inode fork pointer */
-	xfs_extnum_t	nextents;		/* number of extent entries */
-	xfs_bmbt_rec_t	*base;			/* base of extent array */
-	xfs_bmbt_rec_t	*ep;			/* pointer to an extent entry */
-	int		error;
-
-	ASSERT(XFS_IS_REALTIME_INODE(ip));
-	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-		if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
-			return (error);
-	}
-	rtblks = 0;
-	nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
-	base = &ifp->if_u1.if_extents[0];
-	for (ep = base; ep < &base[nextents]; ep++)
-		rtblks += xfs_bmbt_get_blockcount(ep);
-	*O_rtblks = (xfs_qcnt_t)rtblks;
-	return (0);
-}
-
-/*
- * callback routine supplied to bulkstat(). Given an inumber, find its
- * dquots and update them to account for resources taken by that inode.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_dqusage_adjust(
-	xfs_mount_t	*mp,		/* mount point for filesystem */
-	xfs_trans_t	*tp,		/* transaction pointer - NULL */
-	xfs_ino_t	ino,		/* inode number to get data for */
-	void		*buffer,	/* not used */
-	xfs_daddr_t	bno,		/* starting block of inode cluster */
-	void		*dip,		/* on-disk inode pointer (not used) */
-	int		*res)		/* result code value */
-{
-	xfs_inode_t	*ip;
-	xfs_dquot_t	*udqp, *gdqp;
-	xfs_qcnt_t	nblks, rtblks;
-	int		error;
-
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-	/*
-	 * rootino must have its resources accounted for, not so with the quota
-	 * inodes.
-	 */
-	if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
-		*res = BULKSTAT_RV_NOTHING;
-		return XFS_ERROR(EINVAL);
-	}
-
-	/*
-	 * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
-	 * interface expects the inode to be exclusively locked because that's
-	 * the case in all other instances. It's OK that we do this because
-	 * quotacheck is done only at mount time.
-	 */
-	if ((error = xfs_iget(mp, tp, ino, XFS_ILOCK_EXCL, &ip, bno))) {
-		*res = BULKSTAT_RV_NOTHING;
-		return (error);
-	}
-
-	if (ip->i_d.di_mode == 0) {
-		xfs_iput_new(ip, XFS_ILOCK_EXCL);
-		*res = BULKSTAT_RV_NOTHING;
-		return XFS_ERROR(ENOENT);
-	}
-
-	/*
-	 * Obtain the locked dquots. In case of an error (eg. allocation
-	 * fails for ENOSPC), we return the negative of the error number
-	 * to bulkstat, so that it can get propagated to quotacheck() and
-	 * making us disable quotas for the file system.
-	 */
-	if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
-		xfs_iput(ip, XFS_ILOCK_EXCL);
-		*res = BULKSTAT_RV_GIVEUP;
-		return (error);
-	}
-
-	rtblks = 0;
-	if (! XFS_IS_REALTIME_INODE(ip)) {
-		nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
-	} else {
-		/*
-		 * Walk thru the extent list and count the realtime blocks.
-		 */
-		if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
-			xfs_iput(ip, XFS_ILOCK_EXCL);
-			if (udqp)
-				xfs_qm_dqput(udqp);
-			if (gdqp)
-				xfs_qm_dqput(gdqp);
-			*res = BULKSTAT_RV_GIVEUP;
-			return (error);
-		}
-		nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
-	}
-	ASSERT(ip->i_delayed_blks == 0);
-
-	/*
-	 * We can't release the inode while holding its dquot locks.
-	 * The inode can go into inactive and might try to acquire the dquotlocks.
-	 * So, just unlock here and do a vn_rele at the end.
-	 */
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-	/*
-	 * Add the (disk blocks and inode) resources occupied by this
-	 * inode to its dquots. We do this adjustment in the incore dquot,
-	 * and also copy the changes to its buffer.
-	 * We don't care about putting these changes in a transaction
-	 * envelope because if we crash in the middle of a 'quotacheck'
-	 * we have to start from the beginning anyway.
-	 * Once we're done, we'll log all the dquot bufs.
-	 *
-	 * The *QUOTA_ON checks below may look pretty racey, but quotachecks
-	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
-	 */
-	if (XFS_IS_UQUOTA_ON(mp)) {
-		ASSERT(udqp);
-		xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
-		xfs_qm_dqput(udqp);
-	}
-	if (XFS_IS_GQUOTA_ON(mp)) {
-		ASSERT(gdqp);
-		xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
-		xfs_qm_dqput(gdqp);
-	}
-	/*
-	 * Now release the inode. This will send it to 'inactive', and
-	 * possibly even free blocks.
-	 */
-	VN_RELE(XFS_ITOV(ip));
-
-	/*
-	 * Goto next inode.
-	 */
-	*res = BULKSTAT_RV_DIDONE;
-	return (0);
-}
-
-/*
- * Walk thru all the filesystem inodes and construct a consistent view
- * of the disk quota world.
- */
-STATIC int
-xfs_qm_quotacheck(
-	xfs_mount_t	*mp)
-{
-	int		done, count, error;
-	xfs_ino_t	lastino;
-	size_t		structsz;
-	xfs_inode_t	*uip, *gip;
-	uint		flags;
-
-	count = INT_MAX;
-	structsz = 1;
-	lastino = 0;
-	flags = 0;
-
-	ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp));
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-	/*
-	 * There should be no cached dquots. The (simplistic) quotacheck
-	 * algorithm doesn't like that.
-	 */
-	ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0);
-
-	cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
-
-	/*
-	 * First we go thru all the dquots on disk, USR and GRP, and reset
-	 * their counters to zero. We need a clean slate.
-	 * We don't log our changes till later.
-	 */
-	if ((uip = XFS_QI_UQIP(mp))) {
-		if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA)))
-			goto error_return;
-		flags |= XFS_UQUOTA_CHKD;
-	}
-
-	if ((gip = XFS_QI_GQIP(mp))) {
-		if ((error = xfs_qm_dqiterate(mp, gip, XFS_QMOPT_GQUOTA)))
-			goto error_return;
-		flags |= XFS_GQUOTA_CHKD;
-	}
-
-	do {
-		/*
-		 * Iterate thru all the inodes in the file system,
-		 * adjusting the corresponding dquot counters in core.
-		 */
-		if ((error = xfs_bulkstat(mp, NULL, &lastino, &count,
-				     xfs_qm_dqusage_adjust,
-				     structsz, NULL,
-				     BULKSTAT_FG_IGET|BULKSTAT_FG_VFSLOCKED,
-				     &done)))
-			break;
-
-	} while (! done);
-
-	/*
-	 * We can get this error if we couldn't do a dquot allocation inside
-	 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
-	 * dirty dquots that might be cached, we just want to get rid of them
-	 * and turn quotaoff. The dquots won't be attached to any of the inodes
-	 * at this point (because we intentionally didn't in dqget_noattach).
-	 */
-	if (error) {
-		xfs_qm_dqpurge_all(mp,
-				   XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA|
-				   XFS_QMOPT_QUOTAOFF);
-		goto error_return;
-	}
-	/*
-	 * We've made all the changes that we need to make incore.
-	 * Now flush_them down to disk buffers.
-	 */
-	xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
-
-	/*
-	 * We didn't log anything, because if we crashed, we'll have to
-	 * start the quotacheck from scratch anyway. However, we must make
-	 * sure that our dquot changes are secure before we put the
-	 * quotacheck'd stamp on the superblock. So, here we do a synchronous
-	 * flush.
-	 */
-	XFS_bflush(mp->m_ddev_targp);
-
-	/*
-	 * If one type of quotas is off, then it will lose its
-	 * quotachecked status, since we won't be doing accounting for
-	 * that type anymore.
-	 */
-	mp->m_qflags &= ~(XFS_GQUOTA_CHKD | XFS_UQUOTA_CHKD);
-	mp->m_qflags |= flags;
-
-#ifdef QUOTADEBUG
-	XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++");
-#endif
-
- error_return:
-	cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
-	return (error);
-}
-
-/*
- * This is called after the superblock has been read in and we're ready to
- * iget the quota inodes.
- */
-STATIC int
-xfs_qm_init_quotainos(
-	xfs_mount_t	*mp)
-{
-	xfs_inode_t	*uip, *gip;
-	int		error;
-	__int64_t	sbflags;
-	uint		flags;
-
-	ASSERT(mp->m_quotainfo);
-	uip = gip = NULL;
-	sbflags = 0;
-	flags = 0;
-
-	/*
-	 * Get the uquota and gquota inodes
-	 */
-	if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
-		if (XFS_IS_UQUOTA_ON(mp) &&
-		    mp->m_sb.sb_uquotino != NULLFSINO) {
-			ASSERT(mp->m_sb.sb_uquotino > 0);
-			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-					     0, &uip, 0)))
-				return XFS_ERROR(error);
-		}
-		if (XFS_IS_GQUOTA_ON(mp) &&
-		    mp->m_sb.sb_gquotino != NULLFSINO) {
-			ASSERT(mp->m_sb.sb_gquotino > 0);
-			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-					     0, &gip, 0))) {
-				if (uip)
-					VN_RELE(XFS_ITOV(uip));
-				return XFS_ERROR(error);
-			}
-		}
-	} else {
-		flags |= XFS_QMOPT_SBVERSION;
-		sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-			    XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
-	}
-
-	/*
-	 * Create the two inodes, if they don't exist already. The changes
-	 * made above will get added to a transaction and logged in one of
-	 * the qino_alloc calls below.  If the device is readonly,
-	 * temporarily switch to read-write to do this.
-	 */
-	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
-		if ((error = xfs_qm_qino_alloc(mp, &uip,
-					      sbflags | XFS_SB_UQUOTINO,
-					      flags | XFS_QMOPT_UQUOTA)))
-			return XFS_ERROR(error);
-
-		flags &= ~XFS_QMOPT_SBVERSION;
-	}
-	if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) {
-		if ((error = xfs_qm_qino_alloc(mp, &gip,
-					      sbflags | XFS_SB_GQUOTINO,
-					      flags | XFS_QMOPT_GQUOTA))) {
-			if (uip)
-				VN_RELE(XFS_ITOV(uip));
-
-			return XFS_ERROR(error);
-		}
-	}
-
-	XFS_QI_UQIP(mp) = uip;
-	XFS_QI_GQIP(mp) = gip;
-
-	return (0);
-}
-
-
-/*
- * Traverse the freelist of dquots and attempt to reclaim a maximum of
- * 'howmany' dquots. This operation races with dqlookup(), and attempts to
- * favor the lookup function ...
- * XXXsup merge this with qm_reclaim_one().
- */
-STATIC int
-xfs_qm_shake_freelist(
-	int howmany)
-{
-	int		nreclaimed;
-	xfs_dqhash_t	*hash;
-	xfs_dquot_t	*dqp, *nextdqp;
-	int		restarts;
-	int		nflushes;
-
-	if (howmany <= 0)
-		return (0);
-
-	nreclaimed = 0;
-	restarts = 0;
-	nflushes = 0;
-
-#ifdef QUOTADEBUG
-	printk("Shake free 0x%x\n", howmany);
-#endif
-	/* lock order is : hashchainlock, freelistlock, mplistlock */
- tryagain:
-	xfs_qm_freelist_lock(xfs_Gqm);
-
-	for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
-	     ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) &&
-	      nreclaimed < howmany); ) {
-		xfs_dqlock(dqp);
-
-		/*
-		 * We are racing with dqlookup here. Naturally we don't
-		 * want to reclaim a dquot that lookup wants.
-		 */
-		if (dqp->dq_flags & XFS_DQ_WANT) {
-			xfs_dqunlock(dqp);
-			xfs_qm_freelist_unlock(xfs_Gqm);
-			if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-				return (nreclaimed != howmany);
-			XFS_STATS_INC(xfsstats.xs_qm_dqwants);
-			goto tryagain;
-		}
-
-		/*
-		 * If the dquot is inactive, we are assured that it is
-		 * not on the mplist or the hashlist, and that makes our
-		 * life easier.
-		 */
-		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-			ASSERT(dqp->q_mount == NULL);
-			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-			ASSERT(dqp->HL_PREVP == NULL);
-			ASSERT(dqp->MPL_PREVP == NULL);
-			XFS_STATS_INC(xfsstats.xs_qm_dqinact_reclaims);
-			nextdqp = dqp->dq_flnext;
-			goto off_freelist;
-		}
-
-		ASSERT(dqp->MPL_PREVP);
-		/*
-		 * Try to grab the flush lock. If this dquot is in the process of
-		 * getting flushed to disk, we don't want to reclaim it.
-		 */
-		if (! xfs_qm_dqflock_nowait(dqp)) {
-			xfs_dqunlock(dqp);
-			dqp = dqp->dq_flnext;
-			continue;
-		}
-
-		/*
-		 * We have the flush lock so we know that this is not in the
-		 * process of being flushed. So, if this is dirty, flush it
-		 * DELWRI so that we don't get a freelist infested with
-		 * dirty dquots.
-		 */
-		if (XFS_DQ_IS_DIRTY(dqp)) {
-			xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY");
-			/*
-			 * We flush it delayed write, so don't bother
-			 * releasing the mplock.
-			 */
-			(void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
-			xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
-			dqp = dqp->dq_flnext;
-			continue;
-		}
-		/*
-		 * We're trying to get the hashlock out of order. This races
-		 * with dqlookup; so, we giveup and goto the next dquot if
-		 * we couldn't get the hashlock. This way, we won't starve
-		 * a dqlookup process that holds the hashlock that is
-		 * waiting for the freelist lock.
-		 */
-		if (! xfs_qm_dqhashlock_nowait(dqp)) {
-			xfs_dqfunlock(dqp);
-			xfs_dqunlock(dqp);
-			dqp = dqp->dq_flnext;
-			continue;
-		}
-		/*
-		 * This races with dquot allocation code as well as dqflush_all
-		 * and reclaim code. So, if we failed to grab the mplist lock,
-		 * giveup everything and start over.
-		 */
-		hash = dqp->q_hash;
-		ASSERT(hash);
-		if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
-			/* XXX put a sentinel so that we can come back here */
-			xfs_dqfunlock(dqp);
-			xfs_dqunlock(dqp);
-			XFS_DQ_HASH_UNLOCK(hash);
-			xfs_qm_freelist_unlock(xfs_Gqm);
-			if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-				return (nreclaimed != howmany);
-			goto tryagain;
-		}
-		xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING");
-#ifdef QUOTADEBUG
-		printk("Shake 0x%p, ID 0x%x\n", dqp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT));
-#endif
-		ASSERT(dqp->q_nrefs == 0);
-		nextdqp = dqp->dq_flnext;
-		XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
-		XQM_HASHLIST_REMOVE(hash, dqp);
-		xfs_dqfunlock(dqp);
-		xfs_qm_mplist_unlock(dqp->q_mount);
-		XFS_DQ_HASH_UNLOCK(hash);
-
- off_freelist:
-		XQM_FREELIST_REMOVE(dqp);
-		xfs_dqunlock(dqp);
-		nreclaimed++;
-		XFS_STATS_INC(xfsstats.xs_qm_dqshake_reclaims);
-		xfs_qm_dqdestroy(dqp);
-		dqp = nextdqp;
-	}
-	xfs_qm_freelist_unlock(xfs_Gqm);
-	return (nreclaimed != howmany);
-}
-
-
-/*
- * The shake manager routine called by shaked() when memory is
- * running low.
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_shake(void)
-{
-	int	ndqused, nfree, n;
-
-	if (!xfs_Gqm)
-		return;
-
-	nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
-	/* incore dquots in all f/s's */
-	ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
-
-	ASSERT(ndqused >= 0);
-
-	if (nfree <= ndqused && nfree < ndquot)
-		return;
-
-	ndqused *= xfs_Gqm->qm_dqfree_ratio;	/* target # of free dquots */
-	n = nfree - ndqused - ndquot;		/* # over target */
-
-	(void) xfs_qm_shake_freelist(MAX(nfree, n));
-}
-
-
-/*
- * Just pop the least recently used dquot off the freelist and
- * recycle it. The returned dquot is locked.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqreclaim_one(void)
-{
-	xfs_dquot_t	*dqpout;
-	xfs_dquot_t	*dqp;
-	int		restarts;
-	int		nflushes;
-
-	restarts = 0;
-	dqpout = NULL;
-	nflushes = 0;
-
-	/* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
- startagain:
-	xfs_qm_freelist_lock(xfs_Gqm);
-
-	FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
-		xfs_dqlock(dqp);
-
-		/*
-		 * We are racing with dqlookup here. Naturally we don't
-		 * want to reclaim a dquot that lookup wants. We release the
-		 * freelist lock and start over, so that lookup will grab
-		 * both the dquot and the freelistlock.
-		 */
-		if (dqp->dq_flags & XFS_DQ_WANT) {
-			ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
-			xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT");
-			xfs_dqunlock(dqp);
-			xfs_qm_freelist_unlock(xfs_Gqm);
-			if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-				return (NULL);
-			XFS_STATS_INC(xfsstats.xs_qm_dqwants);
-			goto startagain;
-		}
-
-		/*
-		 * If the dquot is inactive, we are assured that it is
-		 * not on the mplist or the hashlist, and that makes our
-		 * life easier.
-		 */
-		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-			ASSERT(dqp->q_mount == NULL);
-			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-			ASSERT(dqp->HL_PREVP == NULL);
-			ASSERT(dqp->MPL_PREVP == NULL);
-			XQM_FREELIST_REMOVE(dqp);
-			xfs_dqunlock(dqp);
-			dqpout = dqp;
-			XFS_STATS_INC(xfsstats.xs_qm_dqinact_reclaims);
-			break;
-		}
-
-		ASSERT(dqp->q_hash);
-		ASSERT(dqp->MPL_PREVP);
-
-		/*
-		 * Try to grab the flush lock. If this dquot is in the process of
-		 * getting flushed to disk, we don't want to reclaim it.
-		 */
-		if (! xfs_qm_dqflock_nowait(dqp)) {
-			xfs_dqunlock(dqp);
-			continue;
-		}
-
-		/*
-		 * We have the flush lock so we know that this is not in the
-		 * process of being flushed. So, if this is dirty, flush it
-		 * DELWRI so that we don't get a freelist infested with
-		 * dirty dquots.
-		 */
-		if (XFS_DQ_IS_DIRTY(dqp)) {
-			xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY");
-			/*
-			 * We flush it delayed write, so don't bother
-			 * releasing the freelist lock.
-			 */
-			(void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
-			xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
-			continue;
-		}
-
-		if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
-			xfs_dqfunlock(dqp);
-			xfs_dqunlock(dqp);
-			continue;
-		}
-
-		if (! xfs_qm_dqhashlock_nowait(dqp))
-			goto mplistunlock;
-
-		ASSERT(dqp->q_nrefs == 0);
-		xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING");
-		XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
-		XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
-		XQM_FREELIST_REMOVE(dqp);
-		dqpout = dqp;
-		XFS_DQ_HASH_UNLOCK(dqp->q_hash);
- mplistunlock:
-		xfs_qm_mplist_unlock(dqp->q_mount);
-		xfs_dqfunlock(dqp);
-		xfs_dqunlock(dqp);
-		if (dqpout)
-			break;
-	}
-
-	xfs_qm_freelist_unlock(xfs_Gqm);
-	return (dqpout);
-}
-
-
-/*------------------------------------------------------------------*/
-
-/*
- * Return a new incore dquot. Depending on the number of
- * dquots in the system, we either allocate a new one on the kernel heap,
- * or reclaim a free one.
- * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
- * to reclaim an existing one from the freelist.
- */
-boolean_t
-xfs_qm_dqalloc_incore(
-	xfs_dquot_t **O_dqpp)
-{
-	xfs_dquot_t	*dqp;
-
-	/*
-	 * Check against high water mark to see if we want to pop
-	 * a nincompoop dquot off the freelist.
-	 */
-	if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
-		/*
-		 * Try to recycle a dquot from the freelist.
-		 */
-		if ((dqp = xfs_qm_dqreclaim_one())) {
-			XFS_STATS_INC(xfsstats.xs_qm_dqreclaims);
-			/*
-			 * Just zero the core here. The rest will get
-			 * reinitialized by caller. XXX we shouldn't even
-			 * do this zero ...
-			 */
-			memset(&dqp->q_core, 0, sizeof(dqp->q_core));
-			*O_dqpp = dqp;
-			return (B_FALSE);
-		}
-		XFS_STATS_INC(xfsstats.xs_qm_dqreclaim_misses);
-	}
-
-	/*
-	 * Allocate a brand new dquot on the kernel heap and return it
-	 * to the caller to initialize.
-	 */
-	ASSERT(xfs_Gqm->qm_dqzone != NULL);
-	*O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
-	atomic_inc(&xfs_Gqm->qm_totaldquots);
-
-	return (B_TRUE);
-}
-
-
-/*
- * Start a transaction and write the incore superblock changes to
- * disk. flags parameter indicates which fields have changed.
- */
-int
-xfs_qm_write_sb_changes(
-	xfs_mount_t	*mp,
-	__int64_t	flags)
-{
-	xfs_trans_t	*tp;
-	int		error;
-
-#ifdef QUOTADEBUG
-	cmn_err(CE_NOTE,
-		"Writing superblock quota changes :%s",
-		mp->m_fsname);
-#endif
-	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-	if ((error = xfs_trans_reserve(tp, 0,
-				      mp->m_sb.sb_sectsize + 128, 0,
-				      0,
-				      XFS_DEFAULT_LOG_COUNT))) {
-		xfs_trans_cancel(tp, 0);
-		return (error);
-	}
-
-	xfs_mod_sb(tp, flags);
-	(void) xfs_trans_commit(tp, 0, NULL);
-
-	return (0);
-}
-
-
-/* --------------- utility functions for vnodeops ---------------- */
-
-
-/*
- * Given an inode, a uid and gid (from cred_t) make sure that we have
- * allocated relevant dquot(s) on disk, and that we won't exceed inode
- * quotas by creating this file.
- * This also attaches dquot(s) to the given inode after locking it,
- * and returns the dquots corresponding to the uid and/or gid.
- *
- * in	: inode (unlocked)
- * out	: udquot, gdquot with references taken and unlocked
- */
-int
-xfs_qm_vop_dqalloc(
-	xfs_mount_t	*mp,
-	xfs_inode_t	*ip,
-	uid_t		uid,
-	gid_t		gid,
-	uint		flags,
-	xfs_dquot_t	**O_udqpp,
-	xfs_dquot_t	**O_gdqpp)
-{
-	int		error;
-	xfs_dquot_t	*uq, *gq;
-	uint		lockflags;
-
-	lockflags = XFS_ILOCK_EXCL;
-	xfs_ilock(ip, lockflags);
-
-	if ((flags & XFS_QMOPT_INHERIT) &&
-	    XFS_INHERIT_GID(ip, XFS_MTOVFS(mp)))
-		gid = ip->i_d.di_gid;
-
-	/*
-	 * Attach the dquot(s) to this inode, doing a dquot allocation
-	 * if necessary. The dquot(s) will not be locked.
-	 */
-	if (XFS_NOT_DQATTACHED(mp, ip)) {
-		if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
-					    XFS_QMOPT_ILOCKED))) {
-			xfs_iunlock(ip, lockflags);
-			return (error);
-		}
-	}
-
-	uq = gq = NULL;
-	if ((flags & XFS_QMOPT_UQUOTA) &&
-	    XFS_IS_UQUOTA_ON(mp)) {
-		if (ip->i_d.di_uid != uid) {
-			/*
-			 * What we need is the dquot that has this uid, and
-			 * if we send the inode to dqget, the uid of the inode
-			 * takes priority over what's sent in the uid argument.
-			 * We must unlock inode here before calling dqget if
-			 * we're not sending the inode, because otherwise
-			 * we'll deadlock by doing trans_reserve while
-			 * holding ilock.
-			 */
-			xfs_iunlock(ip, lockflags);
-			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
-						 XFS_DQ_USER,
-						 XFS_QMOPT_DQALLOC |
-						 XFS_QMOPT_DOWARN,
-						 &uq))) {
-				ASSERT(error != ENOENT);
-				return (error);
-			}
-			/*
-			 * Get the ilock in the right order.
-			 */
-			xfs_dqunlock(uq);
-			lockflags = XFS_ILOCK_SHARED;
-			xfs_ilock(ip, lockflags);
-		} else {
-			/*
-			 * Take an extra reference, because we'll return
-			 * this to caller
-			 */
-			ASSERT(ip->i_udquot);
-			uq = ip->i_udquot;
-			xfs_dqlock(uq);
-			XFS_DQHOLD(uq);
-			xfs_dqunlock(uq);
-		}
-	}
-	if ((flags & XFS_QMOPT_GQUOTA) &&
-	    XFS_IS_GQUOTA_ON(mp)) {
-		if (ip->i_d.di_gid != gid) {
-			xfs_iunlock(ip, lockflags);
-			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
-						 XFS_DQ_GROUP,
-						 XFS_QMOPT_DQALLOC |
-						 XFS_QMOPT_DOWARN,
-						 &gq))) {
-				if (uq)
-					xfs_qm_dqrele(uq);
-				ASSERT(error != ENOENT);
-				return (error);
-			}
-			xfs_dqunlock(gq);
-			lockflags = XFS_ILOCK_SHARED;
-			xfs_ilock(ip, lockflags);
-		} else {
-			ASSERT(ip->i_gdquot);
-			gq = ip->i_gdquot;
-			xfs_dqlock(gq);
-			XFS_DQHOLD(gq);
-			xfs_dqunlock(gq);
-		}
-	}
-	if (uq)
-		xfs_dqtrace_entry_ino(uq, "DQALLOC", ip);
-
-	xfs_iunlock(ip, lockflags);
-	if (O_udqpp)
-		*O_udqpp = uq;
-	else if (uq)
-		xfs_qm_dqrele(uq);
-	if (O_gdqpp)
-		*O_gdqpp = gq;
-	else if (gq)
-		xfs_qm_dqrele(gq);
-	return (0);
-}
-
-/*
- * Actually transfer ownership, and do dquot modifications.
- * These were already reserved.
- */
-xfs_dquot_t *
-xfs_qm_vop_chown(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	xfs_dquot_t	**IO_olddq,
-	xfs_dquot_t	*newdq)
-{
-	xfs_dquot_t	*prevdq;
-	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
-	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
-
-	/* old dquot */
-	prevdq = *IO_olddq;
-	ASSERT(prevdq);
-	ASSERT(prevdq != newdq);
-
-	xfs_trans_mod_dquot(tp, prevdq,
-			    XFS_TRANS_DQ_BCOUNT,
-			    -(ip->i_d.di_nblocks));
-	xfs_trans_mod_dquot(tp, prevdq,
-			    XFS_TRANS_DQ_ICOUNT,
-			    -1);
-
-	/* the sparkling new dquot */
-	xfs_trans_mod_dquot(tp, newdq,
-			    XFS_TRANS_DQ_BCOUNT,
-			    ip->i_d.di_nblocks);
-	xfs_trans_mod_dquot(tp, newdq,
-			    XFS_TRANS_DQ_ICOUNT,
-			    1);
-
-	/*
-	 * Take an extra reference, because the inode
-	 * is going to keep this dquot pointer even
-	 * after the trans_commit.
-	 */
-	xfs_dqlock(newdq);
-	XFS_DQHOLD(newdq);
-	xfs_dqunlock(newdq);
-	*IO_olddq = newdq;
-
-	return (prevdq);
-}
-
-/*
- * Quota reservations for setattr(AT_UID|AT_GID).
- */
-int
-xfs_qm_vop_chown_reserve(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	xfs_dquot_t	*udqp,
-	xfs_dquot_t	*gdqp,
-	uint		privileged)
-{
-	int		error;
-	xfs_mount_t	*mp;
-	uint		delblks;
-	xfs_dquot_t	*unresudq, *unresgdq, *delblksudq, *delblksgdq;
-
-	ASSERT(XFS_ISLOCKED_INODE(ip));
-	mp = ip->i_mount;
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-	delblks = ip->i_delayed_blks;
-	delblksudq = delblksgdq = unresudq = unresgdq = NULL;
-
-	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
-	    ip->i_d.di_uid != (uid_t)INT_GET(udqp->q_core.d_id, ARCH_CONVERT)) {
-		delblksudq = udqp;
-		/*
-		 * If there are delayed allocation blocks, then we have to
-		 * unreserve those from the old dquot, and add them to the
-		 * new dquot.
-		 */
-		if (delblks) {
-			ASSERT(ip->i_udquot);
-			unresudq = ip->i_udquot;
-		}
-	}
-	if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp &&
-	    ip->i_d.di_gid != INT_GET(gdqp->q_core.d_id, ARCH_CONVERT)) {
-		delblksgdq = gdqp;
-		if (delblks) {
-			ASSERT(ip->i_gdquot);
-			unresgdq = ip->i_gdquot;
-		}
-	}
-
-	if ((error = xfs_trans_reserve_quota(tp, delblksudq,
-					    delblksgdq,
-					    ip->i_d.di_nblocks, 1,
-					    privileged)))
-		return (error);
-
-
-	/*
-	 * Do the delayed blks reservations/unreservations now. Since, these
-	 * are done without the help of a transaction, if a reservation fails
-	 * its previous reservations won't be automatically undone by trans
-	 * code. So, we have to do it manually here.
-	 */
-	if (delblks) {
-		/*
-		 * Do the reservations first. Unreservation can't fail.
-		 */
-		ASSERT(delblksudq || delblksgdq);
-		ASSERT(unresudq || unresgdq);
-		if ((error = xfs_trans_reserve_quota(NULL,
-						    delblksudq, delblksgdq,
-						    (xfs_qcnt_t)delblks, 0,
-						    privileged)))
-			return (error);
-		(void) xfs_trans_unreserve_quota(NULL,
-						 unresudq, unresgdq,
-						 (xfs_qcnt_t)delblks, 0,
-						 0);
-	}
-
-	return (0);
-}
-
-int
-xfs_qm_vop_rename_dqattach(
-	xfs_inode_t	**i_tab)
-{
-	xfs_inode_t	*ip;
-	int		i;
-	int		error;
-
-	ip = i_tab[0];
-
-	if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
-		error = xfs_qm_dqattach(ip, 0);
-		if (error)
-			return (error);
-	}
-	for (i = 1; (i < 4 && i_tab[i]); i++) {
-		/*
-		 * Watch out for duplicate entries in the table.
-		 */
-		if ((ip = i_tab[i]) != i_tab[i-1]) {
-			if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
-				error = xfs_qm_dqattach(ip, 0);
-				if (error)
-					return (error);
-			}
-		}
-	}
-	return (0);
-}
-
-void
-xfs_qm_vop_dqattach_and_dqmod_newinode(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	xfs_dquot_t	*udqp,
-	xfs_dquot_t	*gdqp)
-{
-	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
-	ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
-
-	if (udqp) {
-		xfs_dqlock(udqp);
-		XFS_DQHOLD(udqp);
-		xfs_dqunlock(udqp);
-		ASSERT(ip->i_udquot == NULL);
-		ip->i_udquot = udqp;
-		ASSERT(ip->i_d.di_uid == INT_GET(udqp->q_core.d_id, ARCH_CONVERT));
-		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
-	}
-	if (gdqp) {
-		xfs_dqlock(gdqp);
-		XFS_DQHOLD(gdqp);
-		xfs_dqunlock(gdqp);
-		ASSERT(ip->i_gdquot == NULL);
-		ip->i_gdquot = gdqp;
-		ASSERT(ip->i_d.di_gid == INT_GET(gdqp->q_core.d_id, ARCH_CONVERT));
-		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
-	}
-}
-
-/* ------------- list stuff -----------------*/
-void
-xfs_qm_freelist_init(xfs_frlist_t *ql)
-{
-	ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
-	mutex_init(&ql->qh_lock, MUTEX_DEFAULT, "dqf");
-	ql->qh_version = 0;
-	ql->qh_nelems = 0;
-}
-
-void
-xfs_qm_freelist_destroy(xfs_frlist_t *ql)
-{
-	xfs_dquot_t	*dqp, *nextdqp;
-
-	mutex_lock(&ql->qh_lock, PINOD);
-	for (dqp = ql->qh_next;
-	     dqp != (xfs_dquot_t *)ql; ) {
-		xfs_dqlock(dqp);
-		nextdqp = dqp->dq_flnext;
-#ifdef QUOTADEBUG
-		printk("FREELIST destroy 0x%p\n", dqp);
-#endif
-		XQM_FREELIST_REMOVE(dqp);
-		xfs_dqunlock(dqp);
-		xfs_qm_dqdestroy(dqp);
-		dqp = nextdqp;
-	}
-	/*
-	 * Don't bother about unlocking.
-	 */
-	mutex_destroy(&ql->qh_lock);
-
-	ASSERT(ql->qh_nelems == 0);
-}
-
-void
-xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
-{
-	dq->dq_flnext = ql->qh_next;
-	dq->dq_flprev = (xfs_dquot_t *)ql;
-	ql->qh_next = dq;
-	dq->dq_flnext->dq_flprev = dq;
-	xfs_Gqm->qm_dqfreelist.qh_nelems++;
-	xfs_Gqm->qm_dqfreelist.qh_version++;
-}
-
-void
-xfs_qm_freelist_unlink(xfs_dquot_t *dq)
-{
-	xfs_dquot_t *next = dq->dq_flnext;
-	xfs_dquot_t *prev = dq->dq_flprev;
-
-	next->dq_flprev = prev;
-	prev->dq_flnext = next;
-	dq->dq_flnext = dq->dq_flprev = dq;
-	xfs_Gqm->qm_dqfreelist.qh_nelems--;
-	xfs_Gqm->qm_dqfreelist.qh_version++;
-}
-
-#ifdef QUOTADEBUG
-void
-xfs_qm_freelist_print(xfs_frlist_t *qlist, char *title)
-{
-	xfs_dquot_t *dq;
-	int i = 0;
-	printk("%s (#%d)\n", title, (int) qlist->qh_nelems);
-	FOREACH_DQUOT_IN_FREELIST(dq, qlist) {
-		printk("\t%d.\t\"%d (%s:0x%p)\"\t bcnt = %d, icnt = %d "
-		       "refs = %d\n",
-		       ++i, INT_GET(dq->q_core.d_id, ARCH_CONVERT),
-		       DQFLAGTO_TYPESTR(dq), dq,
-		       (int) INT_GET(dq->q_core.d_bcount, ARCH_CONVERT),
-		       (int) INT_GET(dq->q_core.d_icount, ARCH_CONVERT),
-		       (int) dq->q_nrefs);
-	}
-}
-#endif
-
-void
-xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
-{
-	xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
-}
-
-int
-xfs_qm_dqhashlock_nowait(
-	xfs_dquot_t *dqp)
-{
-	int locked;
-
-	locked = mutex_trylock(&((dqp)->q_hash->qh_lock));
-	return (locked);
-}
-
-int
-xfs_qm_freelist_lock_nowait(
-	xfs_qm_t *xqm)
-{
-	int locked;
-
-	locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock));
-	return (locked);
-}
-
-int
-xfs_qm_mplist_nowait(
-	xfs_mount_t	*mp)
-{
-	int locked;
-
-	ASSERT(mp->m_quotainfo);
-	locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp)));
-	return (locked);
-}
diff -Nru a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
--- a/fs/xfs/xfs_qm.h	Mon Mar 31 13:41:06 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,219 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_QM_H__
-#define __XFS_QM_H__
-
-struct	xfs_dqhash;
-struct	xfs_inode;
-struct	xfs_dquot;
-
-extern kmem_zone_t	*qm_dqzone;
-extern kmem_zone_t	*qm_dqtrxzone;
-
-/*
- * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
- * iterate over the mountpt's dquot list in one call.
- */
-#define XFS_QM_SYNC_MAX_RESTARTS	7
-
-/*
- * Ditto, for xfs_qm_dqreclaim_one.
- */
-#define XFS_QM_RECLAIM_MAX_RESTARTS	4
-
-/*
- * Ideal ratio of free to in use dquots. Quota manager makes an attempt
- * to keep this balance.
- */
-#define XFS_QM_DQFREE_RATIO		2
-
-/*
- * Dquot hashtable constants/threshold values.
- */
-#define XFS_QM_NCSIZE_THRESHOLD		5000
-#define XFS_QM_HASHSIZE_LOW		32
-#define XFS_QM_HASHSIZE_HIGH		64
-
-/*
- * We output a cmn_err when quotachecking a quota file with more than
- * this many fsbs.
- */
-#define XFS_QM_BIG_QCHECK_NBLKS		500
-
-/*
- * This defines the unit of allocation of dquots.
- * Currently, it is just one file system block, and a 4K blk contains 30
- * (136 * 30 = 4080) dquots. It's probably not worth trying to make
- * this more dynamic.
- * XXXsup However, if this number is changed, we have to make sure that we don't
- * implicitly assume that we do allocations in chunks of a single filesystem
- * block in the dquot/xqm code.
- */
-#define XFS_DQUOT_CLUSTER_SIZE_FSB	(xfs_filblks_t)1
-/*
- * When doing a quotacheck, we log dquot clusters of this many FSBs at most
- * in a single transaction. We don't want to ask for too huge a log reservation.
- */
-#define XFS_QM_MAX_DQCLUSTER_LOGSZ	3
-
-typedef xfs_dqhash_t	xfs_dqlist_t;
-/*
- * The freelist head. The first two fields match the first two in the
- * xfs_dquot_t structure (in xfs_dqmarker_t)
- */
-typedef struct xfs_frlist {
-       struct xfs_dquot *qh_next;
-       struct xfs_dquot *qh_prev;
-       mutex_t		 qh_lock;
-       uint		 qh_version;
-       uint		 qh_nelems;
-} xfs_frlist_t;
-
-/*
- * Quota Manager (global) structure. Lives only in core.
- */
-typedef struct xfs_qm {
-	xfs_dqlist_t	*qm_usr_dqhtable;/* udquot hash table */
-	xfs_dqlist_t	*qm_grp_dqhtable;/* gdquot hash table */
-	uint		 qm_dqhashmask;	 /* # buckets in dq hashtab - 1 */
-	xfs_frlist_t	 qm_dqfreelist;	 /* freelist of dquots */
-	atomic_t	 qm_totaldquots; /* total incore dquots */
-	uint		 qm_nrefs;	 /* file systems with quota on */
-	int		 qm_dqfree_ratio;/* ratio of free to inuse dquots */
-	kmem_zone_t	*qm_dqzone;	 /* dquot mem-alloc zone */
-	kmem_zone_t	*qm_dqtrxzone;	 /* t_dqinfo of transactions */
-} xfs_qm_t;
-
-/*
- * Various quota information for individual filesystems.
- * The mount structure keeps a pointer to this.
- */
-typedef struct xfs_quotainfo {
-	xfs_inode_t	*qi_uquotaip;	 /* user quota inode */
-	xfs_inode_t	*qi_gquotaip;	 /* group quota inode */
-	lock_t		 qi_pinlock;	 /* dquot pinning mutex */
-	xfs_dqlist_t	 qi_dqlist;	 /* all dquots in filesys */
-	int		 qi_dqreclaims;	 /* a change here indicates
-					    a removal in the dqlist */
-	time_t		 qi_btimelimit;	 /* limit for blks timer */
-	time_t		 qi_itimelimit;	 /* limit for inodes timer */
-	time_t		 qi_rtbtimelimit;/* limit for rt blks timer */
-	xfs_qwarncnt_t	 qi_bwarnlimit;	 /* limit for num warnings */
-	xfs_qwarncnt_t	 qi_iwarnlimit;	 /* limit for num warnings */
-	mutex_t		 qi_quotaofflock;/* to serialize quotaoff */
-	/* Some useful precalculated constants */
-	xfs_filblks_t	 qi_dqchunklen;	 /* # BBs in a chunk of dqs */
-	uint		 qi_dqperchunk;	 /* # ondisk dqs in above chunk */
-} xfs_quotainfo_t;
-
-
-/*
- * The structure kept inside the xfs_trans_t keep track of dquot changes
- * within a transaction and apply them later.
- */
-typedef struct xfs_dqtrx {
-	struct xfs_dquot *qt_dquot;	  /* the dquot this refers to */
-	ulong		qt_blk_res;	  /* blks reserved on a dquot */
-	ulong		qt_blk_res_used;  /* blks used from the reservation */
-	ulong		qt_ino_res;	  /* inode reserved on a dquot */
-	ulong		qt_ino_res_used;  /* inodes used from the reservation */
-	long		qt_bcount_delta;  /* dquot blk count changes */
-	long		qt_delbcnt_delta; /* delayed dquot blk count changes */
-	long		qt_icount_delta;  /* dquot inode count changes */
-	ulong		qt_rtblk_res;	  /* # blks reserved on a dquot */
-	ulong		qt_rtblk_res_used;/* # blks used from reservation */
-	long		qt_rtbcount_delta;/* dquot realtime blk changes */
-	long		qt_delrtb_delta;  /* delayed RT blk count changes */
-} xfs_dqtrx_t;
-
-/*
- * We keep the usr and grp dquots separately so that locking will be easier
- * to do at commit time. All transactions that we know of at this point
- * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.
- */
-#define XFS_QM_TRANS_MAXDQS		2
-typedef struct xfs_dquot_acct {
-	xfs_dqtrx_t	dqa_usrdquots[XFS_QM_TRANS_MAXDQS];
-	xfs_dqtrx_t	dqa_grpdquots[XFS_QM_TRANS_MAXDQS];
-} xfs_dquot_acct_t;
-
-/*
- * Users are allowed to have a usage exceeding their softlimit for
- * a period this long.
- */
-#define XFS_QM_BTIMELIMIT	DQ_BTIMELIMIT
-#define XFS_QM_RTBTIMELIMIT	DQ_BTIMELIMIT
-#define XFS_QM_ITIMELIMIT	DQ_FTIMELIMIT
-
-#define XFS_QM_BWARNLIMIT	5
-#define XFS_QM_IWARNLIMIT	5
-
-#define XFS_QM_LOCK(xqm)	(mutex_lock(&xqm##_lock, PINOD))
-#define XFS_QM_UNLOCK(xqm)	(mutex_unlock(&xqm##_lock))
-#define XFS_QM_HOLD(xqm)	((xqm)->qm_nrefs++)
-#define XFS_QM_RELE(xqm)	((xqm)->qm_nrefs--)
-
-extern int		xfs_qm_init_quotainfo(xfs_mount_t *);
-extern void		xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern void		xfs_qm_dqunlink(xfs_dquot_t *);
-extern boolean_t	xfs_qm_dqalloc_incore(xfs_dquot_t **);
-extern int		xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
-
-/* list stuff */
-extern void		xfs_qm_freelist_init(xfs_frlist_t *);
-extern void		xfs_qm_freelist_destroy(xfs_frlist_t *);
-extern void		xfs_qm_freelist_insert(xfs_frlist_t *, xfs_dquot_t *);
-extern void		xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
-extern void		xfs_qm_freelist_unlink(xfs_dquot_t *);
-extern int		xfs_qm_freelist_lock_nowait(xfs_qm_t *);
-extern int		xfs_qm_mplist_nowait(xfs_mount_t *);
-extern int		xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
-
-/* system call interface */
-extern int linvfs_getxstate(struct super_block *, struct fs_quota_stat *);
-extern int linvfs_setxstate(struct super_block *, unsigned int, int);
-extern int linvfs_getxquota(struct super_block *, int, qid_t, struct fs_disk_quota *);
-extern int linvfs_setxquota(struct super_block *, int, qid_t, struct fs_disk_quota *);
-
-#ifdef DEBUG
-extern int		xfs_qm_internalqcheck(xfs_mount_t *);
-#else
-#define xfs_qm_internalqcheck(mp)	(0)
-#endif
-
-#ifdef QUOTADEBUG
-extern void		xfs_qm_freelist_print(xfs_frlist_t *, char *);
-#else
-#define xfs_qm_freelist_print(a, b)	do { } while (0)
-#endif
-
-#endif /* __XFS_QM_H__ */
diff -Nru a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
--- a/fs/xfs/xfs_qm_syscalls.c	Mon Mar 31 13:41:07 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,1374 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-#include <xfs.h>
-#include <xfs_quota_priv.h>
-
-#ifdef DEBUG
-# define qdprintk(s, args...)		printk(s, ## args)
-#else
-# define qdprintk(s, args...)		do { } while (0)
-#endif
-
-STATIC int	xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
-STATIC int	xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
-					fs_disk_quota_t *);
-STATIC int	xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
-STATIC int	xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
-					fs_disk_quota_t *);
-STATIC int	xfs_qm_scall_quotaon(xfs_mount_t *, uint);
-STATIC int	xfs_qm_scall_quotaoff(xfs_mount_t *, uint, boolean_t);
-STATIC int	xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
-STATIC int	xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
-					uint);
-STATIC uint	xfs_qm_import_flags(uint);
-STATIC uint	xfs_qm_export_flags(uint);
-STATIC uint	xfs_qm_import_qtype_flags(uint);
-STATIC uint	xfs_qm_export_qtype_flags(uint);
-STATIC void	xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
-					fs_disk_quota_t *);
-
-
-int
-linvfs_getxstate(
-	struct super_block	*sb,
-	struct fs_quota_stat	*fqs)
-{
-	xfs_mount_t		*mp;
-	vfs_t			*vfsp;
-
-	vfsp = LINVFS_GET_VFS(sb);
-	mp = XFS_BHVTOM(vfsp->vfs_fbhv);
-	return -xfs_qm_scall_getqstat(mp, fqs);
-}
-
-int
-linvfs_setxstate(
-	struct super_block	*sb,
-	unsigned int		flags,
-	int			op)
-{
-	xfs_mount_t		*mp;
-	vfs_t			*vfsp;
-	uint			qflags;
-
-	vfsp = LINVFS_GET_VFS(sb);
-	mp = XFS_BHVTOM(vfsp->vfs_fbhv);
-	if (vfsp->vfs_flag & VFS_RDONLY)
-		return -EROFS;
-
-	switch (op) {
-	case Q_XQUOTARM:
-		if (XFS_IS_QUOTA_ON(mp)) {
-			qdprintk("cannot remove, quota on: flags=%x\n", flags);
-			return -EINVAL;
-		}
-		qflags = xfs_qm_import_qtype_flags(flags);
-		return -xfs_qm_scall_trunc_qfiles(mp, qflags);
-	case Q_XQUOTAON:
-		qflags = xfs_qm_import_flags(flags);
-		return -xfs_qm_scall_quotaon(mp, qflags);
-	case Q_XQUOTAOFF:
-		qflags = xfs_qm_import_flags(flags);
-		if (!XFS_IS_QUOTA_ON(mp))
-			return -ESRCH;
-		return -xfs_qm_scall_quotaoff(mp, qflags, B_FALSE);
-	}
-	qdprintk("cannot set state, invalid op: op=%x flags=%x\n", op, flags);
-	return -EINVAL;
-}
-
-int
-linvfs_getxquota(
-	struct super_block	*sb,
-	int			type,
-	qid_t			id,
-	struct fs_disk_quota	*fdq)
-{
-	xfs_mount_t		*mp;
-	vfs_t			*vfsp;
-	int			qtype;
-
-	vfsp = LINVFS_GET_VFS(sb);
-	mp = XFS_BHVTOM(vfsp->vfs_fbhv);
-	if (!XFS_IS_QUOTA_ON(mp))
-		return -ESRCH;
-	qtype = (type == GRPQUOTA)? XFS_DQ_GROUP : XFS_DQ_USER;
-	return -xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, qtype, fdq);
-}
-
-int
-linvfs_setxquota(
-	struct super_block	*sb,
-	int			type,
-	qid_t			id,
-	struct fs_disk_quota	*fdq)
-{
-	xfs_mount_t		*mp;
-	vfs_t			*vfsp;
-	int			qtype;
-
-	vfsp = LINVFS_GET_VFS(sb);
-	mp = XFS_BHVTOM(vfsp->vfs_fbhv);
-	if (!XFS_IS_QUOTA_ON(mp))
-		return -ESRCH;
-	if (vfsp->vfs_flag & VFS_RDONLY)
-		return -EROFS;
-	qtype = (type == GRPQUOTA)? XFS_DQ_GROUP : XFS_DQ_USER;
-	return xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, qtype, fdq);
-}
-
-
-/*
- * Turn off quota accounting and/or enforcement for all udquots and/or
- * gdquots. Called only at unmount time.
- *
- * This assumes that there are no dquots of this file system cached
- * incore, and modifies the ondisk dquot directly. Therefore, for example,
- * it is an error to call this twice, without purging the cache.
- */
-STATIC int
-xfs_qm_scall_quotaoff(
-	xfs_mount_t		*mp,
-	uint			flags,
-	boolean_t		force)
-{
-	uint			dqtype;
-	unsigned long	s;
-	int			error;
-	uint			inactivate_flags;
-	xfs_qoff_logitem_t	*qoffstart;
-	int			nculprits;
-
-	if (!force && !capable(CAP_SYS_ADMIN))
-		return XFS_ERROR(EPERM);
-	/*
-	 * No file system can have quotas enabled on disk but not in core.
-	 * Note that quota utilities (like quotaoff) _expect_
-	 * errno == EEXIST here.
-	 */
-	if ((mp->m_qflags & flags) == 0)
-		return XFS_ERROR(EEXIST);
-	error = 0;
-
-	flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
-
-	/*
-	 * We don't want to deal with two quotaoffs messing up each other,
-	 * so we're going to serialize it. quotaoff isn't exactly a performance
-	 * critical thing.
-	 * If quotaoff, then we must be dealing with the root filesystem.
-	 */
-	ASSERT(mp->m_quotainfo);
-	if (mp->m_quotainfo)
-		mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
-
-	ASSERT(mp->m_quotainfo);
-
-	/*
-	 * If we're just turning off quota enforcement, change mp and go.
-	 */
-	if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
-		mp->m_qflags &= ~(flags);
-
-		s = XFS_SB_LOCK(mp);
-		mp->m_sb.sb_qflags = mp->m_qflags;
-		XFS_SB_UNLOCK(mp, s);
-		mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
-
-		/* XXX what to do if error ? Revert back to old vals incore ? */
-		error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
-		return (error);
-	}
-
-	dqtype = 0;
-	inactivate_flags = 0;
-	/*
-	 * If accounting is off, we must turn enforcement off, clear the
-	 * quota 'CHKD' certificate to make it known that we have to
-	 * do a quotacheck the next time this quota is turned on.
-	 */
-	if (flags & XFS_UQUOTA_ACCT) {
-		dqtype |= XFS_QMOPT_UQUOTA;
-		flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD);
-		inactivate_flags |= XFS_UQUOTA_ACTIVE;
-	}
-	if (flags & XFS_GQUOTA_ACCT) {
-		dqtype |= XFS_QMOPT_GQUOTA;
-		flags |= (XFS_GQUOTA_CHKD | XFS_GQUOTA_ENFD);
-		inactivate_flags |= XFS_GQUOTA_ACTIVE;
-	}
-
-	/*
-	 * Nothing to do?  Don't complain. This happens when we're just
-	 * turning off quota enforcement.
-	 */
-	if ((mp->m_qflags & flags) == 0) {
-		mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
-		return (0);
-	}
-
-	/*
-	 * Write the LI_QUOTAOFF log record, and do SB changes atomically,
-	 * and synchronously.
-	 */
-	xfs_qm_log_quotaoff(mp, &qoffstart, flags);
-
-	/*
-	 * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
-	 * to take care of the race between dqget and quotaoff. We don't take
-	 * any special locks to reset these bits. All processes need to check
-	 * these bits *after* taking inode lock(s) to see if the particular
-	 * quota type is in the process of being turned off. If *ACTIVE, it is
-	 * guaranteed that all dquot structures and all quotainode ptrs will all
-	 * stay valid as long as that inode is kept locked.
-	 *
-	 * There is no turning back after this.
-	 */
-	mp->m_qflags &= ~inactivate_flags;
-
-	/*
-	 * Give back all the dquot reference(s) held by inodes.
-	 * Here we go thru every single incore inode in this file system, and
-	 * do a dqrele on the i_udquot/i_gdquot that it may have.
-	 * Essentially, as long as somebody has an inode locked, this guarantees
-	 * that quotas will not be turned off. This is handy because in a
-	 * transaction once we lock the inode(s) and check for quotaon, we can
-	 * depend on the quota inodes (and other things) being valid as long as
-	 * we keep the lock(s).
-	 */
-	xfs_qm_dqrele_all_inodes(mp, flags);
-
-	/*
-	 * Next we make the changes in the quota flag in the mount struct.
-	 * This isn't protected by a particular lock directly, because we
-	 * don't want to take a mrlock everytime we depend on quotas being on.
-	 */
-	mp->m_qflags &= ~(flags);
-
-	/*
-	 * Go through all the dquots of this file system and purge them,
-	 * according to what was turned off. We may not be able to get rid
-	 * of all dquots, because dquots can have temporary references that
-	 * are not attached to inodes. eg. xfs_setattr, xfs_create.
-	 * So, if we couldn't purge all the dquots from the filesystem,
-	 * we can't get rid of the incore data structures.
-	 */
-	while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype|XFS_QMOPT_QUOTAOFF)))
-		delay(10 * nculprits);
-
-	/*
-	 * Transactions that had started before ACTIVE state bit was cleared
-	 * could have logged many dquots, so they'd have higher LSNs than
-	 * the first QUOTAOFF log record does. If we happen to crash when
-	 * the tail of the log has gone past the QUOTAOFF record, but
-	 * before the last dquot modification, those dquots __will__
-	 * recover, and that's not good.
-	 *
-	 * So, we have QUOTAOFF start and end logitems; the start
-	 * logitem won't get overwritten until the end logitem appears...
-	 */
-	xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
-
-	/*
-	 * If quotas is completely disabled, close shop.
-	 */
-	if ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_ALL) {
-		mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
-		xfs_qm_destroy_quotainfo(mp);
-		return (0);
-	}
-
-	/*
-	 * Release our quotainode references, and vn_purge them,
-	 * if we don't need them anymore.
-	 */
-	if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) {
-		XFS_PURGE_INODE(XFS_QI_UQIP(mp));
-		XFS_QI_UQIP(mp) = NULL;
-	}
-	if ((dqtype & XFS_QMOPT_GQUOTA) && XFS_QI_GQIP(mp)) {
-		XFS_PURGE_INODE(XFS_QI_GQIP(mp));
-		XFS_QI_GQIP(mp) = NULL;
-	}
-	mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
-
-	return (error);
-}
-
-STATIC int
-xfs_qm_scall_trunc_qfiles(
-	xfs_mount_t	*mp,
-	uint		flags)
-{
-	int		error;
-	xfs_inode_t	*qip;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return XFS_ERROR(EPERM);
-	error = 0;
-	if (!XFS_SB_VERSION_HASQUOTA(&mp->m_sb) || flags == 0) {
-		qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags);
-		return XFS_ERROR(EINVAL);
-	}
-
-	if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
-		error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, &qip, 0);
-		if (! error) {
-			(void) xfs_truncate_file(mp, qip);
-			VN_RELE(XFS_ITOV(qip));
-		}
-	}
-
-	if ((flags & XFS_DQ_GROUP) && mp->m_sb.sb_gquotino != NULLFSINO) {
-		error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, &qip, 0);
-		if (! error) {
-			(void) xfs_truncate_file(mp, qip);
-			VN_RELE(XFS_ITOV(qip));
-		}
-	}
-
-	return (error);
-}
-
-
-/*
- * Switch on (a given) quota enforcement for a filesystem.  This takes
- * effect immediately.
- * (Switching on quota accounting must be done at mount time.)
- */
-STATIC int
-xfs_qm_scall_quotaon(
-	xfs_mount_t	*mp,
-	uint		flags)
-{
-	int		error;
-	unsigned long s;
-	uint		qf;
-	uint		accflags;
-	__int64_t	sbflags;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return XFS_ERROR(EPERM);
-
-	flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
-	/*
-	 * Switching on quota accounting must be done at mount time.
-	 */
-	accflags = flags & XFS_ALL_QUOTA_ACCT;
-	flags &= ~(XFS_ALL_QUOTA_ACCT);
-
-	sbflags = 0;
-
-	if (flags == 0) {
-		qdprintk("quotaon: zero flags, m_qflags=%x\n", mp->m_qflags);
-		return XFS_ERROR(EINVAL);
-	}
-
-	/* No fs can turn on quotas with a delayed effect */
-	ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0);
-
-	/*
-	 * Can't enforce without accounting. We check the superblock
-	 * qflags here instead of m_qflags because rootfs can have
-	 * quota acct on ondisk without m_qflags' knowing.
-	 */
-	if (((flags & XFS_UQUOTA_ACCT) == 0 &&
-	    (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
-	    (flags & XFS_UQUOTA_ENFD))
-	    ||
-	    ((flags & XFS_GQUOTA_ACCT) == 0 &&
-	    (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
-	    (flags & XFS_GQUOTA_ENFD))) {
-		qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n",
-			flags, mp->m_sb.sb_qflags);
-		return XFS_ERROR(EINVAL);
-	}
-	/*
-	 * If everything's upto-date incore, then don't waste time.
-	 */
-	if ((mp->m_qflags & flags) == flags)
-		return XFS_ERROR(EEXIST);
-
-	/*
-	 * Change sb_qflags on disk but not incore mp->qflags
-	 * if this is the root filesystem.
-	 */
-	s = XFS_SB_LOCK(mp);
-	qf = mp->m_sb.sb_qflags;
-	mp->m_sb.sb_qflags = qf | flags;
-	XFS_SB_UNLOCK(mp, s);
-
-	/*
-	 * There's nothing to change if it's the same.
-	 */
-	if ((qf & flags) == flags && sbflags == 0)
-		return XFS_ERROR(EEXIST);
-	sbflags |= XFS_SB_QFLAGS;
-
-	if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
-		return (error);
-	/*
-	 * If we aren't trying to switch on quota enforcement, we are done.
-	 */
-	if  (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
-	     (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
-	    (flags & XFS_ALL_QUOTA_ENFD) == 0)
-		return (0);
-
-	if (! XFS_IS_QUOTA_RUNNING(mp))
-		return XFS_ERROR(ESRCH);
-
-	/*
-	 * Switch on quota enforcement in core.
-	 */
-	mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
-	mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
-	mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
-
-	return (0);
-}
-
-
-
-/*
- * Return quota status information, such as uquota-off, enforcements, etc.
- */
-STATIC int
-xfs_qm_scall_getqstat(
-	xfs_mount_t	*mp,
-	fs_quota_stat_t *out)
-{
-	xfs_inode_t	*uip, *gip;
-	boolean_t	tempuqip, tempgqip;
-
-	uip = gip = NULL;
-	tempuqip = tempgqip = B_FALSE;
-	memset(out, 0, sizeof(fs_quota_stat_t));
-
-	out->qs_version = FS_QSTAT_VERSION;
-	if (! XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
-		out->qs_uquota.qfs_ino = NULLFSINO;
-		out->qs_gquota.qfs_ino = NULLFSINO;
-		return (0);
-	}
-	out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
-							(XFS_ALL_QUOTA_ACCT|
-							 XFS_ALL_QUOTA_ENFD));
-	out->qs_pad = 0;
-	out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
-	out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
-	if (mp->m_quotainfo) {
-		uip = mp->m_quotainfo->qi_uquotaip;
-		gip = mp->m_quotainfo->qi_gquotaip;
-	}
-	if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
-		if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, &uip, 0) == 0)
-			tempuqip = B_TRUE;
-	}
-	if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
-		if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, &gip, 0) == 0)
-			tempgqip = B_TRUE;
-	}
-	if (uip) {
-		out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
-		out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
-		if (tempuqip)
-			VN_RELE(XFS_ITOV(uip));
-	}
-	if (gip) {
-		out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
-		out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
-		if (tempgqip)
-			VN_RELE(XFS_ITOV(gip));
-	}
-	if (mp->m_quotainfo) {
-		out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp);
-		out->qs_btimelimit = XFS_QI_BTIMELIMIT(mp);
-		out->qs_itimelimit = XFS_QI_ITIMELIMIT(mp);
-		out->qs_rtbtimelimit = XFS_QI_RTBTIMELIMIT(mp);
-		out->qs_bwarnlimit = XFS_QI_BWARNLIMIT(mp);
-		out->qs_iwarnlimit = XFS_QI_IWARNLIMIT(mp);
-	}
-	return (0);
-}
-
-/*
- * Adjust quota limits, and start/stop timers accordingly.
- */
-STATIC int
-xfs_qm_scall_setqlim(
-	xfs_mount_t		*mp,
-	xfs_dqid_t		id,
-	uint			type,
-	fs_disk_quota_t		*newlim)
-{
-	xfs_disk_dquot_t	*ddq;
-	xfs_dquot_t		*dqp;
-	xfs_trans_t		*tp;
-	int			error;
-	xfs_qcnt_t		hard, soft;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return XFS_ERROR(EPERM);
-
-	if ((newlim->d_fieldmask & (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK)) == 0)
-		return (0);
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
-	if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
-				      0, 0, XFS_DEFAULT_LOG_COUNT))) {
-		xfs_trans_cancel(tp, 0);
-		return (error);
-	}
-
-	/*
-	 * We don't want to race with a quotaoff so take the quotaoff lock.
-	 * (We don't hold an inode lock, so there's nothing else to stop
-	 * a quotaoff from happening). (XXXThis doesn't currently happen
-	 * because we take the vfslock before calling xfs_qm_sysent).
-	 */
-	mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
-
-	/*
-	 * Get the dquot (locked), and join it to the transaction.
-	 * Allocate the dquot if this doesn't exist.
-	 */
-	if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
-		xfs_trans_cancel(tp, XFS_TRANS_ABORT);
-		mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
-		ASSERT(error != ENOENT);
-		return (error);
-	}
-	xfs_dqtrace_entry(dqp, "Q_SETQLIM: AFT DQGET");
-	xfs_trans_dqjoin(tp, dqp);
-	ddq = &dqp->q_core;
-
-	/*
-	 * Make sure that hardlimits are >= soft limits before changing.
-	 */
-	hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
-		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
-			INT_GET(ddq->d_blk_hardlimit, ARCH_CONVERT);
-	soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
-		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
-			INT_GET(ddq->d_blk_softlimit, ARCH_CONVERT);
-	if (hard == 0 || hard >= soft) {
-		INT_SET(ddq->d_blk_hardlimit, ARCH_CONVERT, hard);
-		INT_SET(ddq->d_blk_softlimit, ARCH_CONVERT, soft);
-	}
-	else {
-		qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft);
-	}
-	hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
-		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
-			INT_GET(ddq->d_rtb_hardlimit, ARCH_CONVERT);
-	soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
-		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
-			INT_GET(ddq->d_rtb_softlimit, ARCH_CONVERT);
-	if (hard == 0 || hard >= soft) {
-		INT_SET(ddq->d_rtb_hardlimit, ARCH_CONVERT, hard);
-		INT_SET(ddq->d_rtb_softlimit, ARCH_CONVERT, soft);
-	}
-	else
-		qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
-
-	hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
-		(xfs_qcnt_t) newlim->d_ino_hardlimit :
-		INT_GET(ddq->d_ino_hardlimit, ARCH_CONVERT);
-	soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
-		(xfs_qcnt_t) newlim->d_ino_softlimit :
-		INT_GET(ddq->d_ino_softlimit, ARCH_CONVERT);
-	if (hard == 0 || hard >= soft) {
-		INT_SET(ddq->d_ino_hardlimit, ARCH_CONVERT, hard);
-		INT_SET(ddq->d_ino_softlimit, ARCH_CONVERT, soft);
-	}
-	else
-		qdprintk("ihard %Ld < isoft %Ld\n", hard, soft);
-
-	if (id == 0) {
-		/*
-		 * Timelimits for the super user set the relative time
-		 * the other users can be over quota for this file system.
-		 * If it is zero a default is used.
-		 */
-		if (newlim->d_fieldmask & FS_DQ_BTIMER) {
-			mp->m_quotainfo->qi_btimelimit = newlim->d_btimer;
-			INT_SET(dqp->q_core.d_btimer, ARCH_CONVERT, newlim->d_btimer);
-		}
-		if (newlim->d_fieldmask & FS_DQ_ITIMER) {
-			mp->m_quotainfo->qi_itimelimit = newlim->d_itimer;
-			INT_SET(dqp->q_core.d_itimer, ARCH_CONVERT, newlim->d_itimer);
-		}
-		if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
-			mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer;
-			INT_SET(dqp->q_core.d_rtbtimer, ARCH_CONVERT, newlim->d_rtbtimer);
-		}
-	} else /* if (XFS_IS_QUOTA_ENFORCED(mp)) */ {
-		/*
-		 * If the user is now over quota, start the timelimit.
-		 * The user will not be 'warned'.
-		 * Note that we keep the timers ticking, whether enforcement
-		 * is on or off. We don't really want to bother with iterating
-		 * over all ondisk dquots and turning the timers on/off.
-		 */
-		xfs_qm_adjust_dqtimers(mp, ddq);
-	}
-	dqp->dq_flags |= XFS_DQ_DIRTY;
-	xfs_trans_log_dquot(tp, dqp);
-
-	xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT");
-	xfs_trans_commit(tp, 0, NULL);
-	xfs_qm_dqprint(dqp);
-	xfs_qm_dqrele(dqp);
-	mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
-
-	return (0);
-}
-
-STATIC int
-xfs_qm_scall_getquota(
-	xfs_mount_t	*mp,
-	xfs_dqid_t	id,
-	uint		type,
-	fs_disk_quota_t *out)
-{
-	xfs_dquot_t	*dqp;
-	int		error;
-
-	/*
-	 * Try to get the dquot. We don't want it allocated on disk, so
-	 * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
-	 * exist, we'll get ENOENT back.
-	 */
-	if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
-		return (error);
-	}
-
-	xfs_dqtrace_entry(dqp, "Q_GETQUOTA SUCCESS");
-	/*
-	 * If everything's NULL, this dquot doesn't quite exist as far as
-	 * our utility programs are concerned.
-	 */
-	if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
-		xfs_qm_dqput(dqp);
-		return XFS_ERROR(ENOENT);
-	}
-	/* xfs_qm_dqprint(dqp); */
-	/*
-	 * Convert the disk dquot to the exportable format
-	 */
-	xfs_qm_export_dquot(mp, &dqp->q_core, out);
-	xfs_qm_dqput(dqp);
-	return (error ? XFS_ERROR(EFAULT) : 0);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff_end(
-	xfs_mount_t		*mp,
-	xfs_qoff_logitem_t	*startqoff,
-	uint			flags)
-{
-	xfs_trans_t	       *tp;
-	int			error;
-	xfs_qoff_logitem_t     *qoffi;
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
-
-	if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
-				      0, 0, XFS_DEFAULT_LOG_COUNT))) {
-		xfs_trans_cancel(tp, 0);
-		return (error);
-	}
-
-	qoffi = xfs_trans_get_qoff_item(tp, startqoff,
-					flags & XFS_ALL_QUOTA_ACCT);
-	xfs_trans_log_quotaoff_item(tp, qoffi);
-
-	/*
-	 * We have to make sure that the transaction is secure on disk before we
-	 * return and actually stop quota accounting. So, make it synchronous.
-	 * We don't care about quotoff's performance.
-	 */
-	xfs_trans_set_sync(tp);
-	error = xfs_trans_commit(tp, 0, NULL);
-	return (error);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff(
-	xfs_mount_t	       *mp,
-	xfs_qoff_logitem_t     **qoffstartp,
-	uint		       flags)
-{
-	xfs_trans_t	       *tp;
-	int			error;
-	unsigned long	s;
-	xfs_qoff_logitem_t     *qoffi=NULL;
-	uint			oldsbqflag=0;
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
-	if ((error = xfs_trans_reserve(tp, 0,
-				      sizeof(xfs_qoff_logitem_t) * 2 +
-				      mp->m_sb.sb_sectsize + 128,
-				      0,
-				      0,
-				      XFS_DEFAULT_LOG_COUNT))) {
-		goto error0;
-	}
-
-	qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
-	xfs_trans_log_quotaoff_item(tp, qoffi);
-
-	s = XFS_SB_LOCK(mp);
-	oldsbqflag = mp->m_sb.sb_qflags;
-	mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
-	XFS_SB_UNLOCK(mp, s);
-
-	xfs_mod_sb(tp, XFS_SB_QFLAGS);
-
-	/*
-	 * We have to make sure that the transaction is secure on disk before we
-	 * return and actually stop quota accounting. So, make it synchronous.
-	 * We don't care about quotoff's performance.
-	 */
-	xfs_trans_set_sync(tp);
-	error = xfs_trans_commit(tp, 0, NULL);
-
-error0:
-	if (error) {
-		xfs_trans_cancel(tp, 0);
-		/*
-		 * No one else is modifying sb_qflags, so this is OK.
-		 * We still hold the quotaofflock.
-		 */
-		s = XFS_SB_LOCK(mp);
-		mp->m_sb.sb_qflags = oldsbqflag;
-		XFS_SB_UNLOCK(mp, s);
-	}
-	*qoffstartp = qoffi;
-	return (error);
-}
-
-
-/*
- * Translate an internal style on-disk-dquot to the exportable format.
- * The main differences are that the counters/limits are all in Basic
- * Blocks (BBs) instead of the internal FSBs, and all on-disk data has
- * to be converted to the native endianness.
- */
-STATIC void
-xfs_qm_export_dquot(
-	xfs_mount_t		*mp,
-	xfs_disk_dquot_t	*src,
-	struct fs_disk_quota	*dst)
-{
-	memset(dst, 0, sizeof(*dst));
-	dst->d_version = FS_DQUOT_VERSION;  /* different from src->d_version */
-	dst->d_flags =
-		xfs_qm_export_qtype_flags(INT_GET(src->d_flags, ARCH_CONVERT));
-	dst->d_id = INT_GET(src->d_id, ARCH_CONVERT);
-	dst->d_blk_hardlimit = (__uint64_t)
-		XFS_FSB_TO_BB(mp, INT_GET(src->d_blk_hardlimit, ARCH_CONVERT));
-	dst->d_blk_softlimit = (__uint64_t)
-		XFS_FSB_TO_BB(mp, INT_GET(src->d_blk_softlimit, ARCH_CONVERT));
-	dst->d_ino_hardlimit = (__uint64_t)
-		INT_GET(src->d_ino_hardlimit, ARCH_CONVERT);
-	dst->d_ino_softlimit = (__uint64_t)
-		INT_GET(src->d_ino_softlimit, ARCH_CONVERT);
-	dst->d_bcount = (__uint64_t)
-		XFS_FSB_TO_BB(mp, INT_GET(src->d_bcount, ARCH_CONVERT));
-	dst->d_icount = (__uint64_t) INT_GET(src->d_icount, ARCH_CONVERT);
-	dst->d_btimer = (__uint32_t) INT_GET(src->d_btimer, ARCH_CONVERT);
-	dst->d_itimer = (__uint32_t) INT_GET(src->d_itimer, ARCH_CONVERT);
-	dst->d_iwarns = INT_GET(src->d_iwarns, ARCH_CONVERT);
-	dst->d_bwarns = INT_GET(src->d_bwarns, ARCH_CONVERT);
-
-	dst->d_rtb_hardlimit = (__uint64_t)
-		XFS_FSB_TO_BB(mp, INT_GET(src->d_rtb_hardlimit, ARCH_CONVERT));
-	dst->d_rtb_softlimit = (__uint64_t)
-		XFS_FSB_TO_BB(mp, INT_GET(src->d_rtb_softlimit, ARCH_CONVERT));
-	dst->d_rtbcount = (__uint64_t)
-		XFS_FSB_TO_BB(mp, INT_GET(src->d_rtbcount, ARCH_CONVERT));
-	dst->d_rtbtimer = (__uint32_t) INT_GET(src->d_rtbtimer, ARCH_CONVERT);
-	dst->d_rtbwarns = INT_GET(src->d_rtbwarns, ARCH_CONVERT);
-
-	/*
-	 * Internally, we don't reset all the timers when quota enforcement
-	 * gets turned off. No need to confuse the userlevel code,
-	 * so return zeroes in that case.
-	 */
-	if (! XFS_IS_QUOTA_ENFORCED(mp)) {
-		dst->d_btimer = 0;
-		dst->d_itimer = 0;
-		dst->d_rtbtimer = 0;
-	}
-
-#ifdef DEBUG
-	if (XFS_IS_QUOTA_ENFORCED(mp) && dst->d_id != 0) {
-		if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
-		    (dst->d_blk_softlimit > 0)) {
-			ASSERT(dst->d_btimer != 0);
-		}
-		if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
-		    (dst->d_ino_softlimit > 0)) {
-			ASSERT(dst->d_itimer != 0);
-		}
-	}
-#endif
-}
-
-STATIC uint
-xfs_qm_import_qtype_flags(
-	uint uflags)
-{
-	/*
-	 * Can't be both at the same time.
-	 */
-	if (((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ==
-	     (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ||
-	    ((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) == 0))
-		return (0);
-
-	return (uflags & XFS_USER_QUOTA) ?
-		XFS_DQ_USER : XFS_DQ_GROUP;
-}
-
-STATIC uint
-xfs_qm_export_qtype_flags(
-	uint flags)
-{
-	/*
-	 * Can't be both at the same time.
-	 */
-	ASSERT((flags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) !=
-		(XFS_GROUP_QUOTA | XFS_USER_QUOTA));
-	ASSERT((flags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) != 0);
-
-	return (flags & XFS_DQ_USER) ?
-		XFS_USER_QUOTA : XFS_GROUP_QUOTA;
-}
-
-STATIC uint
-xfs_qm_import_flags(
-	uint uflags)
-{
-	uint flags = 0;
-
-	if (uflags & XFS_QUOTA_UDQ_ACCT)
-		flags |= XFS_UQUOTA_ACCT;
-	if (uflags & XFS_QUOTA_GDQ_ACCT)
-		flags |= XFS_GQUOTA_ACCT;
-	if (uflags & XFS_QUOTA_UDQ_ENFD)
-		flags |= XFS_UQUOTA_ENFD;
-	if (uflags & XFS_QUOTA_GDQ_ENFD)
-		flags |= XFS_GQUOTA_ENFD;
-	return (flags);
-}
-
-
-STATIC uint
-xfs_qm_export_flags(
-	uint flags)
-{
-	uint uflags;
-
-	uflags = 0;
-	if (flags & XFS_UQUOTA_ACCT)
-		uflags |= XFS_QUOTA_UDQ_ACCT;
-	if (flags & XFS_GQUOTA_ACCT)
-		uflags |= XFS_QUOTA_GDQ_ACCT;
-	if (flags & XFS_UQUOTA_ENFD)
-		uflags |= XFS_QUOTA_UDQ_ENFD;
-	if (flags & XFS_GQUOTA_ENFD)
-		uflags |= XFS_QUOTA_GDQ_ENFD;
-	return (uflags);
-}
-
-
-/*
- * Go thru all the inodes in the file system, releasing their dquots.
- * Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff. This also gets called from
- * xfs_rootumount.
- */
-void
-xfs_qm_dqrele_all_inodes(
-	struct xfs_mount *mp,
-	uint		 flags)
-{
-	vmap_t		vmap;
-	xfs_inode_t	*ip, *topino;
-	uint		ireclaims;
-	vnode_t		*vp;
-	boolean_t	vnode_refd;
-
-	ASSERT(mp->m_quotainfo);
-
-again:
-	XFS_MOUNT_ILOCK(mp);
-	ip = mp->m_inodes;
-	if (ip == NULL) {
-		XFS_MOUNT_IUNLOCK(mp);
-		return;
-	}
-	do {
-		/* Skip markers inserted by xfs_sync */
-		if (ip->i_mount == NULL) {
-			ip = ip->i_mnext;
-			continue;
-		}
-		/* Root inode, rbmip and rsumip have associated blocks */
-		if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
-			ASSERT(ip->i_udquot == NULL);
-			ASSERT(ip->i_gdquot == NULL);
-			ip = ip->i_mnext;
-			continue;
-		}
-		vp = XFS_ITOV_NULL(ip);
-		if (!vp) {
-			ASSERT(ip->i_udquot == NULL);
-			ASSERT(ip->i_gdquot == NULL);
-			ip = ip->i_mnext;
-			continue;
-		}
-		vnode_refd = B_FALSE;
-		if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
-			/*
-			 * Sample vp mapping while holding the mplock, lest
-			 * we come across a non-existent vnode.
-			 */
-			VMAP(vp, vmap);
-			ireclaims = mp->m_ireclaims;
-			topino = mp->m_inodes;
-			XFS_MOUNT_IUNLOCK(mp);
-
-			/* XXX restart limit ? */
-			if ( ! (vp = vn_get(vp, &vmap)))
-				goto again;
-			xfs_ilock(ip, XFS_ILOCK_EXCL);
-			vnode_refd = B_TRUE;
-		} else {
-			ireclaims = mp->m_ireclaims;
-			topino = mp->m_inodes;
-			XFS_MOUNT_IUNLOCK(mp);
-		}
-
-		/*
-		 * We don't keep the mountlock across the dqrele() call,
-		 * since it can take a while..
-		 */
-		if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
-			xfs_qm_dqrele(ip->i_udquot);
-			ip->i_udquot = NULL;
-		}
-		if ((flags & XFS_GQUOTA_ACCT) && ip->i_gdquot) {
-			xfs_qm_dqrele(ip->i_gdquot);
-			ip->i_gdquot = NULL;
-		}
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		/*
-		 * Wait until we've dropped the ilock and mountlock to
-		 * do the vn_rele. Or be condemned to an eternity in the
-		 * inactive code in hell.
-		 */
-		if (vnode_refd)
-			VN_RELE(vp);
-		XFS_MOUNT_ILOCK(mp);
-		/*
-		 * If an inode was inserted or removed, we gotta
-		 * start over again.
-		 */
-		if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) {
-			/* XXX use a sentinel */
-			XFS_MOUNT_IUNLOCK(mp);
-			goto again;
-		}
-		ip = ip->i_mnext;
-	} while (ip != mp->m_inodes);
-
-	XFS_MOUNT_IUNLOCK(mp);
-}
-
-/*------------------------------------------------------------------------*/
-#ifdef DEBUG
-/*
- * This contains all the test functions for XFS disk quotas.
- * Currently it does a quota accounting check. ie. it walks through
- * all inodes in the file system, calculating the dquot accounting fields,
- * and prints out any inconsistencies.
- */
-xfs_dqhash_t *qmtest_udqtab;
-xfs_dqhash_t *qmtest_gdqtab;
-int	      qmtest_hashmask;
-int	      qmtest_nfails;
-mutex_t	      qcheck_lock;
-
-#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
-				 (__psunsigned_t)(id)) & \
-				(qmtest_hashmask - 1))
-
-#define DQTEST_HASH(mp, id, type)   ((type & XFS_DQ_USER) ? \
-				     (qmtest_udqtab + \
-				      DQTEST_HASHVAL(mp, id)) : \
-				     (qmtest_gdqtab + \
-				      DQTEST_HASHVAL(mp, id)))
-
-#define DQTEST_LIST_PRINT(l, NXT, title) \
-{ \
-	  xfs_dqtest_t	*dqp; int i = 0;\
-	  printk("%s (#%d)\n", title, (int) (l)->qh_nelems); \
-	  for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \
-	       dqp = (xfs_dqtest_t *)dqp->NXT) { \
-	    printk("\t%d\.\t\"%d (%s)\"\t bcnt = %d, icnt = %d\n", \
-			 ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp),	     \
-			 dqp->d_bcount, dqp->d_icount); } \
-}
-
-typedef struct dqtest {
-	xfs_dqmarker_t	q_lists;
-	xfs_dqhash_t	*q_hash;	/* the hashchain header */
-	xfs_mount_t	*q_mount;	/* filesystem this relates to */
-	xfs_dqid_t	d_id;		/* user id or group id */
-	xfs_qcnt_t	d_bcount;	/* # disk blocks owned by the user */
-	xfs_qcnt_t	d_icount;	/* # inodes owned by the user */
-} xfs_dqtest_t;
-
-STATIC void
-xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
-{
-	xfs_dquot_t *d;
-	if (((d) = (h)->qh_next))
-		(d)->HL_PREVP = &((dqp)->HL_NEXT);
-	(dqp)->HL_NEXT = d;
-	(dqp)->HL_PREVP = &((h)->qh_next);
-	(h)->qh_next = (xfs_dquot_t *)dqp;
-	(h)->qh_version++;
-	(h)->qh_nelems++;
-}
-STATIC void
-xfs_qm_dqtest_print(
-	xfs_dqtest_t	*d)
-{
-	printk("-----------DQTEST DQUOT----------------\n");
-	printk("---- dquot ID =	 %d\n", d->d_id);
-	printk("---- type     =	 %s\n", XFS_QM_ISUDQ(d) ? "USR" : "GRP");
-	printk("---- fs	      =	 0x%p\n", d->q_mount);
-	printk("---- bcount   =	 %Lu (0x%x)\n", d->d_bcount, (int)d->d_bcount);
-	printk("---- icount   =	 %Lu (0x%x)\n", d->d_icount, (int)d->d_icount);
-	printk("---------------------------\n");
-}
-
-STATIC void
-xfs_qm_dqtest_failed(
-	xfs_dqtest_t	*d,
-	xfs_dquot_t	*dqp,
-	char		*reason,
-	xfs_qcnt_t	a,
-	xfs_qcnt_t	b,
-	int		error)
-{
-	qmtest_nfails++;
-	if (error)
-		printk("quotacheck failed for %d, error = %d\nreason = %s\n",
-		       INT_GET(d->d_id, ARCH_CONVERT), error, reason);
-	else
-		printk("quotacheck failed for %d (%s) [%d != %d]\n",
-		       INT_GET(d->d_id, ARCH_CONVERT), reason, (int)a, (int)b);
-	xfs_qm_dqtest_print(d);
-	if (dqp)
-		xfs_qm_dqprint(dqp);
-}
-
-STATIC int
-xfs_dqtest_cmp2(
-	xfs_dqtest_t	*d,
-	xfs_dquot_t	*dqp)
-{
-	int err = 0;
-	if (INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) != d->d_icount) {
-		xfs_qm_dqtest_failed(d, dqp, "icount mismatch",
-				     INT_GET(dqp->q_core.d_icount, ARCH_CONVERT), d->d_icount, 0);
-		err++;
-	}
-	if (INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT) != d->d_bcount) {
-		xfs_qm_dqtest_failed(d, dqp, "bcount mismatch",
-				     INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT), d->d_bcount, 0);
-		err++;
-	}
-	if (INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT) &&
-	    INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT) >= INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT)) {
-		if (INT_ISZERO(dqp->q_core.d_btimer, ARCH_CONVERT) &&
-		    !INT_ISZERO(dqp->q_core.d_id, ARCH_CONVERT)) {
-			printk("%d [%s] [0x%p] BLK TIMER NOT STARTED\n",
-			       d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount);
-			err++;
-		}
-	}
-	if (INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT) &&
-	    INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >= INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT)) {
-		if (INT_ISZERO(dqp->q_core.d_itimer, ARCH_CONVERT) &&
-		    !INT_ISZERO(dqp->q_core.d_id, ARCH_CONVERT)) {
-			printk("%d [%s] [0x%p] INO TIMER NOT STARTED\n",
-			       d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount);
-			err++;
-		}
-	}
-#if 0
-	if (!err) {
-		printk("%d [%s] [0x%p] qchecked\n",
-		       d->d_id, XFS_QM_ISUDQ(d) ? "USR" : "GRP", d->q_mount);
-	}
-#endif
-	return (err);
-}
-
-STATIC void
-xfs_dqtest_cmp(
-	xfs_dqtest_t	*d)
-{
-	xfs_dquot_t	*dqp;
-	int		error;
-
-	/* xfs_qm_dqtest_print(d); */
-	if ((error = xfs_qm_dqget(d->q_mount, NULL, d->d_id, d->dq_flags, 0,
-				 &dqp))) {
-		xfs_qm_dqtest_failed(d, NULL, "dqget failed", 0, 0, error);
-		return;
-	}
-	xfs_dqtest_cmp2(d, dqp);
-	xfs_qm_dqput(dqp);
-}
-
-STATIC int
-xfs_qm_internalqcheck_dqget(
-	xfs_mount_t	*mp,
-	xfs_dqid_t	id,
-	uint		type,
-	xfs_dqtest_t	**O_dq)
-{
-	xfs_dqtest_t	*d;
-	xfs_dqhash_t	*h;
-
-	h = DQTEST_HASH(mp, id, type);
-	for (d = (xfs_dqtest_t *) h->qh_next; d != NULL;
-	     d = (xfs_dqtest_t *) d->HL_NEXT) {
-		/* DQTEST_LIST_PRINT(h, HL_NEXT, "@@@@@ dqtestlist @@@@@"); */
-		if (d->d_id == id && mp == d->q_mount) {
-			*O_dq = d;
-			return (0);
-		}
-	}
-	d = kmem_zalloc(sizeof(xfs_dqtest_t), KM_SLEEP);
-	d->dq_flags = type;
-	d->d_id = id;
-	d->q_mount = mp;
-	d->q_hash = h;
-	xfs_qm_hashinsert(h, d);
-	*O_dq = d;
-	return (0);
-}
-
-STATIC void
-xfs_qm_internalqcheck_get_dquots(
-	xfs_mount_t	*mp,
-	xfs_dqid_t	uid,
-	xfs_dqid_t	gid,
-	xfs_dqtest_t	**ud,
-	xfs_dqtest_t	**gd)
-{
-	if (XFS_IS_UQUOTA_ON(mp))
-		xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud);
-	if (XFS_IS_GQUOTA_ON(mp))
-		xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd);
-}
-
-
-STATIC void
-xfs_qm_internalqcheck_dqadjust(
-	xfs_inode_t		*ip,
-	xfs_dqtest_t		*d)
-{
-	d->d_icount++;
-	d->d_bcount += (xfs_qcnt_t)ip->i_d.di_nblocks;
-}
-
-STATIC int
-xfs_qm_internalqcheck_adjust(
-	xfs_mount_t	*mp,		/* mount point for filesystem */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_ino_t	ino,		/* inode number to get data for */
-	void		*buffer,	/* not used */
-	xfs_daddr_t	bno,		/* starting block of inode cluster */
-	void		*dip,		/* not used */
-	int		*res)		/* bulkstat result code */
-{
-	xfs_inode_t		*ip;
-	xfs_dqtest_t		*ud, *gd;
-	uint			lock_flags;
-	boolean_t		ipreleased;
-	int			error;
-
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-	if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
-		*res = BULKSTAT_RV_NOTHING;
-		qdprintk("internalqcheck: ino=%llu, uqino=%llu, gqino=%llu\n",
-			(unsigned long long) ino,
-			(unsigned long long) mp->m_sb.sb_uquotino,
-			(unsigned long long) mp->m_sb.sb_gquotino);
-		return XFS_ERROR(EINVAL);
-	}
-	ipreleased = B_FALSE;
- again:
-	lock_flags = XFS_ILOCK_SHARED;
-	if ((error = xfs_iget(mp, tp, ino, lock_flags, &ip, bno))) {
-		*res = BULKSTAT_RV_NOTHING;
-		return (error);
-	}
-
-	if (ip->i_d.di_mode == 0) {
-		xfs_iput_new(ip, lock_flags);
-		*res = BULKSTAT_RV_NOTHING;
-		return XFS_ERROR(ENOENT);
-	}
-
-	/*
-	 * This inode can have blocks after eof which can get released
-	 * when we send it to inactive. Since we don't check the dquot
-	 * until the after all our calculations are done, we must get rid
-	 * of those now.
-	 */
-	if (! ipreleased) {
-		xfs_iput(ip, lock_flags);
-		ipreleased = B_TRUE;
-		goto again;
-	}
-	xfs_qm_internalqcheck_get_dquots(mp,
-					(xfs_dqid_t) ip->i_d.di_uid,
-					(xfs_dqid_t) ip->i_d.di_gid,
-					&ud, &gd);
-	if (XFS_IS_UQUOTA_ON(mp)) {
-		ASSERT(ud);
-		xfs_qm_internalqcheck_dqadjust(ip, ud);
-	}
-	if (XFS_IS_GQUOTA_ON(mp)) {
-		ASSERT(gd);
-		xfs_qm_internalqcheck_dqadjust(ip, gd);
-	}
-	xfs_iput(ip, lock_flags);
-	*res = BULKSTAT_RV_DIDONE;
-	return (0);
-}
-
-
-/* PRIVATE, debugging */
-int
-xfs_qm_internalqcheck(
-	xfs_mount_t	*mp)
-{
-	xfs_ino_t	lastino;
-	int		done, count;
-	int		i;
-	xfs_dqtest_t	*d, *e;
-	xfs_dqhash_t	*h1;
-	int		error;
-
-	lastino = 0;
-	qmtest_hashmask = 32;
-	count = 5;
-	done = 0;
-	qmtest_nfails = 0;
-
-	if (! XFS_IS_QUOTA_ON(mp))
-		return XFS_ERROR(ESRCH);
-
-	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
-	XFS_bflush(mp->m_ddev_targp);
-	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
-	XFS_bflush(mp->m_ddev_targp);
-
-	mutex_lock(&qcheck_lock, PINOD);
-	/* There should be absolutely no quota activity while this
-	   is going on. */
-	qmtest_udqtab = kmem_zalloc(qmtest_hashmask *
-				    sizeof(xfs_dqhash_t), KM_SLEEP);
-	qmtest_gdqtab = kmem_zalloc(qmtest_hashmask *
-				    sizeof(xfs_dqhash_t), KM_SLEEP);
-	do {
-		/*
-		 * Iterate thru all the inodes in the file system,
-		 * adjusting the corresponding dquot counters
-		 */
-		if ((error = xfs_bulkstat(mp, NULL, &lastino, &count,
-				 xfs_qm_internalqcheck_adjust,
-				 0, NULL, BULKSTAT_FG_IGET, &done))) {
-			break;
-		}
-	} while (! done);
-	if (error) {
-		printk("Bulkstat returned error 0x%x\n",
-		       error);
-	}
-	printk("Checking results against system dquots\n");
-	for (i = 0; i < qmtest_hashmask; i++) {
-		h1 = &qmtest_udqtab[i];
-		for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
-			xfs_dqtest_cmp(d);
-			e = (xfs_dqtest_t *) d->HL_NEXT;
-			kmem_free(d, sizeof(xfs_dqtest_t));
-			d = e;
-		}
-		h1 = &qmtest_gdqtab[i];
-		for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
-			xfs_dqtest_cmp(d);
-			e = (xfs_dqtest_t *) d->HL_NEXT;
-			kmem_free(d, sizeof(xfs_dqtest_t));
-			d = e;
-		}
-	}
-
-	if (qmtest_nfails) {
-		printk("**************	quotacheck failed  **************\n");
-		printk("failures = %d\n", qmtest_nfails);
-	} else {
-		printk("**************	quotacheck successful! **************\n");
-	}
-	kmem_free(qmtest_udqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
-	kmem_free(qmtest_gdqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
-	mutex_unlock(&qcheck_lock);
-	return (qmtest_nfails);
-}
-
-#endif /* DEBUG */
diff -Nru a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/xfs_qmops.c	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include <xfs.h>
+
+#ifndef CONFIG_XFS_QUOTA
+STATIC struct xfs_dquot *
+xfs_dqvopchown_default(
+	struct xfs_trans	*tp,
+	struct xfs_inode	*ip,
+	struct xfs_dquot	**dqp,
+	struct xfs_dquot	*dq)
+{
+	return NULL;
+}
+
+xfs_qmops_t	xfs_qmcore_xfs = {
+	.xfs_qminit		= (xfs_qminit_t) fs_noerr,
+	.xfs_qmdone		= (xfs_qmdone_t) fs_noerr,
+	.xfs_qmmount		= (xfs_qmmount_t) fs_noerr,
+	.xfs_qmunmount		= (xfs_qmunmount_t) fs_noerr,
+	.xfs_dqrele		= (xfs_dqrele_t) fs_noerr,
+	.xfs_dqattach		= (xfs_dqattach_t) fs_noerr,
+	.xfs_dqdetach		= (xfs_dqdetach_t) fs_noerr,
+	.xfs_dqpurgeall		= (xfs_dqpurgeall_t) fs_noerr,
+	.xfs_dqvopalloc		= (xfs_dqvopalloc_t) fs_noerr,
+	.xfs_dqvopcreate	= (xfs_dqvopcreate_t) fs_noerr,
+	.xfs_dqvoprename	= (xfs_dqvoprename_t) fs_noerr,
+	.xfs_dqvopchown		= xfs_dqvopchown_default,
+	.xfs_dqvopchownresv	= (xfs_dqvopchownresv_t) fs_noerr,
+};
+#endif /* CONFIG_XFS_QUOTA */
diff -Nru a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
--- a/fs/xfs/xfs_quota.h	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/xfs_quota.h	Mon Mar 31 13:41:08 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -33,6 +33,12 @@
 #define __XFS_QUOTA_H__
 
 /*
+ * The ondisk form of a dquot structure.
+ */
+#define XFS_DQUOT_MAGIC		0x4451		/* 'DQ' */
+#define XFS_DQUOT_VERSION	(u_int8_t)0x01	/* latest version number */
+
+/*
  * uid_t and gid_t are hard-coded to 32 bits in the inode.
  * Hence, an 'id' in a dquot is 32 bits..
  */
@@ -47,6 +53,100 @@
 typedef __uint16_t	xfs_qwarncnt_t;
 
 /*
+ * This is the main portion of the on-disk representation of quota
+ * information for a user. This is the q_core of the xfs_dquot_t that
+ * is kept in kernel memory. We pad this with some more expansion room
+ * to construct the on disk structure.
+ */
+typedef struct	xfs_disk_dquot {
+/*16*/	u_int16_t	d_magic;	/* dquot magic = XFS_DQUOT_MAGIC */
+/*8 */	u_int8_t	d_version;	/* dquot version */
+/*8 */	u_int8_t	d_flags;	/* XFS_DQ_USER/PROJ/GROUP */
+/*32*/	xfs_dqid_t	d_id;		/* user,project,group id */
+/*64*/	xfs_qcnt_t	d_blk_hardlimit;/* absolute limit on disk blks */
+/*64*/	xfs_qcnt_t	d_blk_softlimit;/* preferred limit on disk blks */
+/*64*/	xfs_qcnt_t	d_ino_hardlimit;/* maximum # allocated inodes */
+/*64*/	xfs_qcnt_t	d_ino_softlimit;/* preferred inode limit */
+/*64*/	xfs_qcnt_t	d_bcount;	/* disk blocks owned by the user */
+/*64*/	xfs_qcnt_t	d_icount;	/* inodes owned by the user */
+/*32*/	__int32_t	d_itimer;	/* zero if within inode limits if not,
+					   this is when we refuse service */
+/*32*/	__int32_t	d_btimer;	/* similar to above; for disk blocks */
+/*16*/	xfs_qwarncnt_t	d_iwarns;	/* warnings issued wrt num inodes */
+/*16*/	xfs_qwarncnt_t	d_bwarns;	/* warnings issued wrt disk blocks */
+/*32*/	__int32_t	d_pad0;		/* 64 bit align */
+/*64*/	xfs_qcnt_t	d_rtb_hardlimit;/* absolute limit on realtime blks */
+/*64*/	xfs_qcnt_t	d_rtb_softlimit;/* preferred limit on RT disk blks */
+/*64*/	xfs_qcnt_t	d_rtbcount;	/* realtime blocks owned */
+/*32*/	__int32_t	d_rtbtimer;	/* similar to above; for RT disk blocks */
+/*16*/	xfs_qwarncnt_t	d_rtbwarns;	/* warnings issued wrt RT disk blocks */
+/*16*/	__uint16_t	d_pad;
+} xfs_disk_dquot_t;
+
+/*
+ * This is what goes on disk. This is separated from the xfs_disk_dquot because
+ * carrying the unnecessary padding would be a waste of memory.
+ */
+typedef struct xfs_dqblk {
+	xfs_disk_dquot_t  dd_diskdq;	/* portion that lives incore as well */
+	char		  dd_fill[32];	/* filling for posterity */
+} xfs_dqblk_t;
+
+/*
+ * flags for q_flags field in the dquot.
+ */
+#define XFS_DQ_USER		0x0001		/* a user quota */
+/* #define XFS_DQ_PROJ		0x0002		-- project quota (IRIX) */
+#define XFS_DQ_GROUP		0x0004		/* a group quota */
+#define XFS_DQ_FLOCKED		0x0008		/* flush lock taken */
+#define XFS_DQ_DIRTY		0x0010		/* dquot is dirty */
+#define XFS_DQ_WANT		0x0020		/* for lookup/reclaim race */
+#define XFS_DQ_INACTIVE		0x0040		/* dq off mplist & hashlist */
+#define XFS_DQ_MARKER		0x0080		/* sentinel */
+
+/*
+ * In the worst case, when both user and group quotas are on,
+ * we can have a max of three dquots changing in a single transaction.
+ */
+#define XFS_DQUOT_LOGRES(mp)	(sizeof(xfs_disk_dquot_t) * 3)
+
+
+/*
+ * These are the structures used to lay out dquots and quotaoff
+ * records on the log. Quite similar to those of inodes.
+ */
+
+/*
+ * log format struct for dquots.
+ * The first two fields must be the type and size fitting into
+ * 32 bits : log_recovery code assumes that.
+ */
+typedef struct xfs_dq_logformat {
+	__uint16_t		qlf_type;      /* dquot log item type */
+	__uint16_t		qlf_size;      /* size of this item */
+	xfs_dqid_t		qlf_id;	       /* usr/grp id number : 32 bits */
+	__int64_t		qlf_blkno;     /* blkno of dquot buffer */
+	__int32_t		qlf_len;       /* len of dquot buffer */
+	__uint32_t		qlf_boffset;   /* off of dquot in buffer */
+} xfs_dq_logformat_t;
+
+/*
+ * log format struct for QUOTAOFF records.
+ * The first two fields must be the type and size fitting into
+ * 32 bits : log_recovery code assumes that.
+ * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer
+ * to the first and ensures that the first logitem is taken out of the AIL
+ * only when the last one is securely committed.
+ */
+typedef struct xfs_qoff_logformat {
+	unsigned short		qf_type;	/* quotaoff log item type */
+	unsigned short		qf_size;	/* size of this item */
+	unsigned int		qf_flags;	/* USR and/or GRP */
+	char			qf_pad[12];	/* padding for future */
+} xfs_qoff_logformat_t;
+
+
+/*
  * Disk quotas status in m_qflags, and also sb_qflags. 16 bits.
  */
 #define XFS_UQUOTA_ACCT 0x0001	/* user quota accounting ON */
@@ -134,13 +234,14 @@
 #define XFS_QMOPT_QUOTALL	(XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA)
 #define XFS_QMOPT_RESBLK_MASK	(XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
 
+#ifdef __KERNEL__
 /*
  * This check is done typically without holding the inode lock;
  * that may seem racey, but it is harmless in the context that it is used.
  * The inode cannot go inactive as long a reference is kept, and
  * therefore if dquot(s) were attached, they'll stay consistent.
  * If, for example, the ownership of the inode changes while
- * we didnt have the inode locked, the appropriate dquot(s) will be
+ * we didn't have the inode locked, the appropriate dquot(s) will be
  * attached atomically.
  */
 #define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\
@@ -161,190 +262,93 @@
 #define XFS_MOUNT_QUOTA_MASK	(XFS_MOUNT_QUOTA_ALL | XFS_UQUOTA_ACTIVE | \
 				 XFS_GQUOTA_ACTIVE)
 
-#define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME)
-
 
-#ifdef __KERNEL__
-
-#ifdef CONFIG_XFS_QUOTA
 /*
- * External Interface to the XFS disk quota subsystem.
+ * The structure kept inside the xfs_trans_t keep track of dquot changes
+ * within a transaction and apply them later.
  */
-struct	xfs_disk_dquot;
-struct	xfs_dqhash;
-struct	xfs_dquot;
-struct	xfs_inode;
-struct	xfs_mount;
-struct	xfs_trans;
-
-/*
- * Quota Manager Interface.
- */
-extern struct xfs_qm   *xfs_qm_init(void);
-extern void		xfs_qm_destroy(struct xfs_qm *);
-extern int		xfs_qm_dqflush_all(struct xfs_mount *, int);
-extern int		xfs_qm_dqattach(struct xfs_inode *, uint);
-extern int		xfs_qm_dqpurge_all(struct xfs_mount *, uint);
-extern void		xfs_qm_mount_quotainit(struct xfs_mount *, uint);
-extern void		xfs_qm_unmount_quotadestroy(struct xfs_mount *);
-extern int		xfs_qm_mount_quotas(struct xfs_mount *);
-extern int		xfs_qm_unmount_quotas(struct xfs_mount *);
-extern void		xfs_qm_dqdettach_inode(struct xfs_inode *);
-extern int		xfs_qm_sync(struct xfs_mount *, short);
-
-/*
- * Dquot interface.
- */
-extern void		xfs_dqlock(struct xfs_dquot *);
-extern void		xfs_dqunlock(struct xfs_dquot *);
-extern void		xfs_dqunlock_nonotify(struct xfs_dquot *);
-extern void		xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
-extern void		xfs_qm_dqput(struct xfs_dquot *);
-extern void		xfs_qm_dqrele(struct xfs_dquot *);
-extern xfs_dqid_t	xfs_qm_dqid(struct xfs_dquot *);
-extern int		xfs_qm_dqget(struct xfs_mount *,
-				     struct xfs_inode *, xfs_dqid_t,
-				      uint, uint, struct xfs_dquot **);
-extern int		xfs_qm_dqcheck(struct xfs_disk_dquot *,
-				       xfs_dqid_t, uint, uint, char *);
-
-/*
- * Vnodeops specific code that should actually be _in_ xfs_vnodeops.c, but
- * is here because it's nicer to keep vnodeops (therefore, XFS) lean
- * and clean.
- */
-extern struct xfs_dquot *	xfs_qm_vop_chown(struct xfs_trans *,
-						 struct xfs_inode *,
-						 struct xfs_dquot **,
-						 struct xfs_dquot *);
-extern int		xfs_qm_vop_dqalloc(struct xfs_mount *,
-					   struct xfs_inode *,
-					   uid_t, gid_t, uint,
-					   struct xfs_dquot	**,
-					   struct xfs_dquot	**);
-
-extern int		xfs_qm_vop_chown_reserve(struct xfs_trans *,
-						 struct xfs_inode *,
-						 struct xfs_dquot *,
-						 struct xfs_dquot *,
-						 uint);
-
-extern int		xfs_qm_vop_rename_dqattach(struct xfs_inode **);
-extern void		xfs_qm_vop_dqattach_and_dqmod_newinode(
-						struct xfs_trans *,
-						struct xfs_inode *,
-						struct xfs_dquot *,
-						struct xfs_dquot *);
-
-
-/*
- * Dquot Transaction interface
- */
-extern void		xfs_trans_alloc_dqinfo(struct xfs_trans *);
-extern void		xfs_trans_free_dqinfo(struct xfs_trans *);
-extern void		xfs_trans_dup_dqinfo(struct xfs_trans *,
-					     struct xfs_trans *);
-extern void		xfs_trans_mod_dquot(struct xfs_trans *,
-					    struct xfs_dquot *,
-					    uint, long);
-extern void		xfs_trans_mod_dquot_byino(struct xfs_trans *,
-						  struct xfs_inode *,
-						  uint, long);
-extern void		xfs_trans_apply_dquot_deltas(struct xfs_trans *);
-extern void		xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *);
-
-extern int		xfs_trans_reserve_quota_nblks(struct xfs_trans *,
-						      struct xfs_inode *,
-						      long, long, uint);
-
-
-extern int		xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
-							 struct xfs_dquot *,
-							 struct xfs_dquot *,
-							 long, long, uint);
-extern void		xfs_trans_log_dquot(struct xfs_trans *,
-					    struct xfs_dquot *);
-extern void		xfs_trans_dqjoin(struct xfs_trans *,
-					 struct xfs_dquot *);
-extern void		xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint);
-
-# define _XQM_ZONE_DESTROY(z)	((z)? kmem_cache_destroy(z) : (void)0)
-
-#else
-# define xfs_qm_init()					(NULL)
-# define xfs_qm_destroy(xqm)				do { } while (0)
-# define xfs_qm_dqflush_all(m,t)			(ENOSYS)
-# define xfs_qm_dqattach(i,t)				(ENOSYS)
-# define xfs_qm_dqpurge_all(m,t)			(ENOSYS)
-# define xfs_qm_mount_quotainit(m,t)			do { } while (0)
-# define xfs_qm_unmount_quotadestroy(m)			do { } while (0)
-# define xfs_qm_mount_quotas(m)				(ENOSYS)
-# define xfs_qm_unmount_quotas(m)			(ENOSYS)
-# define xfs_qm_dqdettach_inode(i)			do { } while (0)
-# define xfs_qm_sync(m,t)				(ENOSYS)
-# define xfs_dqlock(d)					do { } while (0)
-# define xfs_dqunlock(d)				do { } while (0)
-# define xfs_dqunlock_nonotify(d)			do { } while (0)
-# define xfs_dqlock2(d1,d2)				do { } while (0)
-# define xfs_qm_dqput(d)				do { } while (0)
-# define xfs_qm_dqrele(d)				do { } while (0)
-# define xfs_qm_dqid(d)					(-1)
-# define xfs_qm_dqget(m,i,di,t,f,d)			(ENOSYS)
-# define xfs_qm_dqcheck(dd,di,t,f,s)			(ENOSYS)
-# define xfs_trans_alloc_dqinfo(t)			do { } while (0)
-# define xfs_trans_free_dqinfo(t)			do { } while (0)
-# define xfs_trans_dup_dqinfo(t1,t2)			do { } while (0)
-# define xfs_trans_mod_dquot(t,d,f,x)			do { } while (0)
-# define xfs_trans_mod_dquot_byino(t,i,f,x)		do { } while (0)
-# define xfs_trans_apply_dquot_deltas(t)		do { } while (0)
-# define xfs_trans_unreserve_and_mod_dquots(t)		do { } while (0)
-# define xfs_trans_reserve_quota_nblks(t,i,nb,ni,f)	(ENOSYS)
-# define xfs_trans_reserve_quota_bydquots(t,x,y,b,i,f)	(ENOSYS)
-# define xfs_trans_log_dquot(t,d)			do { } while (0)
-# define xfs_trans_dqjoin(t,d)				do { } while (0)
-# define xfs_qm_dqrele_all_inodes(m,t)			do { } while (0)
-# define xfs_qm_vop_chown(t,i,d1,d2)			(NULL)
-# define xfs_qm_vop_dqalloc(m,i,u,g,f,d1,d2)		(ENOSYS)
-# define xfs_qm_vop_chown_reserve(t,i,d1,d2,f)		(ENOSYS)
-# define xfs_qm_vop_rename_dqattach(i)			(ENOSYS)
-# define xfs_qm_vop_dqattach_and_dqmod_newinode(t,i,x,y) do { } while (0)
-# define _XQM_ZONE_DESTROY(z)				do { } while (0)
-#endif	/* CONFIG_XFS_QUOTA */
-
-/*
- * Regular disk block quota reservations
- */
-#define		xfs_trans_reserve_blkquota(tp, ip, nblks) \
-xfs_trans_reserve_quota_nblks(tp, ip, nblks, 0, XFS_QMOPT_RES_REGBLKS)
-
-#define		xfs_trans_reserve_blkquota_force(tp, ip, nblks) \
-xfs_trans_reserve_quota_nblks(tp, ip, nblks, 0, \
-		XFS_QMOPT_RES_REGBLKS|XFS_QMOPT_FORCE_RES)
-
-#define		xfs_trans_unreserve_blkquota(tp, ip, nblks) \
-(void)xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), 0, XFS_QMOPT_RES_REGBLKS)
-
-#define		xfs_trans_reserve_quota(tp, udq, gdq, nb, ni, f) \
-xfs_trans_reserve_quota_bydquots(tp, udq, gdq, nb, ni, f|XFS_QMOPT_RES_REGBLKS)
-
-#define		xfs_trans_unreserve_quota(tp, ud, gd, b, i, f) \
-xfs_trans_reserve_quota_bydquots(tp, ud, gd, -(b), -(i), f|XFS_QMOPT_RES_REGBLKS)
-
-/*
- * Realtime disk block quota reservations
- */
-#define		xfs_trans_reserve_rtblkquota(mp, tp, ip, nblks) \
-xfs_trans_reserve_quota_nblks(tp, ip, nblks, 0, XFS_QMOPT_RES_RTBLKS)
-
-#define		xfs_trans_unreserve_rtblkquota(tp, ip, nblks) \
-(void)xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), 0, XFS_QMOPT_RES_RTBLKS)
+typedef struct xfs_dqtrx {
+	struct xfs_dquot *qt_dquot;	  /* the dquot this refers to */
+	ulong		qt_blk_res;	  /* blks reserved on a dquot */
+	ulong		qt_blk_res_used;  /* blks used from the reservation */
+	ulong		qt_ino_res;	  /* inode reserved on a dquot */
+	ulong		qt_ino_res_used;  /* inodes used from the reservation */
+	long		qt_bcount_delta;  /* dquot blk count changes */
+	long		qt_delbcnt_delta; /* delayed dquot blk count changes */
+	long		qt_icount_delta;  /* dquot inode count changes */
+	ulong		qt_rtblk_res;	  /* # blks reserved on a dquot */
+	ulong		qt_rtblk_res_used;/* # blks used from reservation */
+	long		qt_rtbcount_delta;/* dquot realtime blk changes */
+	long		qt_delrtb_delta;  /* delayed RT blk count changes */
+} xfs_dqtrx_t;
+
+/*
+ * Dquot transaction functions, used if quota is enabled.
+ */
+typedef void	(*qo_dup_dqinfo_t)(struct xfs_trans *, struct xfs_trans *);
+typedef void	(*qo_mod_dquot_byino_t)(struct xfs_trans *,
+				struct xfs_inode *, uint, long);
+typedef void	(*qo_free_dqinfo_t)(struct xfs_trans *);
+typedef void	(*qo_apply_dquot_deltas_t)(struct xfs_trans *);
+typedef void	(*qo_unreserve_and_mod_dquots_t)(struct xfs_trans *);
+typedef int	(*qo_reserve_quota_nblks_t)(
+				struct xfs_trans *, struct xfs_mount *,
+				struct xfs_inode *, long, long, uint);
+typedef int	(*qo_reserve_quota_bydquots_t)(
+				struct xfs_trans *, struct xfs_mount *,
+				struct xfs_dquot *, struct xfs_dquot *,
+				long, long, uint);
+typedef struct xfs_dqtrxops {
+	qo_dup_dqinfo_t			qo_dup_dqinfo;
+	qo_free_dqinfo_t		qo_free_dqinfo;
+	qo_mod_dquot_byino_t		qo_mod_dquot_byino;
+	qo_apply_dquot_deltas_t		qo_apply_dquot_deltas;
+	qo_reserve_quota_nblks_t	qo_reserve_quota_nblks;
+	qo_reserve_quota_bydquots_t	qo_reserve_quota_bydquots;
+	qo_unreserve_and_mod_dquots_t	qo_unreserve_and_mod_dquots;
+} xfs_dqtrxops_t;
+
+#define XFS_DQTRXOP(mp, tp, op, args...) \
+			((mp)->m_qm_ops.xfs_dqtrxops ? \
+			((mp)->m_qm_ops.xfs_dqtrxops->op)(tp, ## args) : 0)
+
+#define XFS_TRANS_DUP_DQINFO(mp, otp, ntp) \
+	XFS_DQTRXOP(mp, otp, qo_dup_dqinfo, ntp)
+#define XFS_TRANS_FREE_DQINFO(mp, tp) \
+	XFS_DQTRXOP(mp, tp, qo_free_dqinfo)
+#define XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, field, delta) \
+	XFS_DQTRXOP(mp, tp, qo_mod_dquot_byino, ip, field, delta)
+#define XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp) \
+	XFS_DQTRXOP(mp, tp, qo_apply_dquot_deltas)
+#define XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, fl) \
+	XFS_DQTRXOP(mp, tp, qo_reserve_quota_nblks, mp, ip, nblks, ninos, fl)
+#define XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, fl) \
+	XFS_DQTRXOP(mp, tp, qo_reserve_quota_bydquots, mp, ud, gd, nb, ni, fl)
+#define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \
+	XFS_DQTRXOP(mp, tp, qo_unreserve_and_mod_dquots)
+
+#define XFS_TRANS_RESERVE_BLKQUOTA(mp, tp, ip, nblks) \
+	XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, 0, \
+				XFS_QMOPT_RES_REGBLKS)
+#define XFS_TRANS_RESERVE_BLKQUOTA_FORCE(mp, tp, ip, nblks) \
+	XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, 0, \
+				XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES)
+#define XFS_TRANS_UNRESERVE_BLKQUOTA(mp, tp, ip, nblks) \
+	XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), 0, \
+				XFS_QMOPT_RES_REGBLKS)
+#define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
+	XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \
+				f | XFS_QMOPT_RES_REGBLKS)
+#define XFS_TRANS_UNRESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
+	XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, -(nb), -(ni), \
+				f | XFS_QMOPT_RES_REGBLKS)
 
-#define		xfs_trans_reserve_rtquota(mp, tp, uq, pq, blks, f) \
-xfs_trans_reserve_quota_bydquots(mp, tp, uq, pq, blks, 0, f|XFS_QMOPT_RES_RTBLKS)
+extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
 
-#define		xfs_trans_unreserve_rtquota(tp, uq, pq, blks) \
-xfs_trans_reserve_quota_bydquots(tp, uq, pq, -(blks), XFS_QMOPT_RES_RTBLKS)
+extern struct bhv_vfsops xfs_qmops;
 
+extern void xfs_qm_init(void);
+extern void xfs_qm_exit(void);
 
 #endif	/* __KERNEL__ */
 
diff -Nru a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h
--- a/fs/xfs/xfs_quota_priv.h	Mon Mar 31 13:41:08 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,192 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_QUOTA_PRIV_H__
-#define __XFS_QUOTA_PRIV_H__
-
-/*
- * Number of bmaps that we ask from bmapi when doing a quotacheck.
- * We make this restriction to keep the memory usage to a minimum.
- */
-#define XFS_DQITER_MAP_SIZE	10
-
-/* Number of dquots that fit in to a dquot block */
-#define XFS_QM_DQPERBLK(mp)	((mp)->m_quotainfo->qi_dqperchunk)
-
-#define XFS_ISLOCKED_INODE(ip)		(ismrlocked(&(ip)->i_lock, \
-					    MR_UPDATE | MR_ACCESS) != 0)
-#define XFS_ISLOCKED_INODE_EXCL(ip)	(ismrlocked(&(ip)->i_lock, \
-					    MR_UPDATE) != 0)
-
-#define XFS_DQ_IS_ADDEDTO_TRX(t, d)	((d)->q_transp == (t))
-
-#define XFS_QI_MPLRECLAIMS(mp)	((mp)->m_quotainfo->qi_dqreclaims)
-#define XFS_QI_UQIP(mp)		((mp)->m_quotainfo->qi_uquotaip)
-#define XFS_QI_GQIP(mp)		((mp)->m_quotainfo->qi_gquotaip)
-#define XFS_QI_DQCHUNKLEN(mp)	((mp)->m_quotainfo->qi_dqchunklen)
-#define XFS_QI_BTIMELIMIT(mp)	((mp)->m_quotainfo->qi_btimelimit)
-#define XFS_QI_RTBTIMELIMIT(mp) ((mp)->m_quotainfo->qi_rtbtimelimit)
-#define XFS_QI_ITIMELIMIT(mp)	((mp)->m_quotainfo->qi_itimelimit)
-#define XFS_QI_BWARNLIMIT(mp)	((mp)->m_quotainfo->qi_bwarnlimit)
-#define XFS_QI_IWARNLIMIT(mp)	((mp)->m_quotainfo->qi_iwarnlimit)
-#define XFS_QI_QOFFLOCK(mp)	((mp)->m_quotainfo->qi_quotaofflock)
-
-#define XFS_QI_MPL_LIST(mp)	((mp)->m_quotainfo->qi_dqlist)
-#define XFS_QI_MPLLOCK(mp)	((mp)->m_quotainfo->qi_dqlist.qh_lock)
-#define XFS_QI_MPLNEXT(mp)	((mp)->m_quotainfo->qi_dqlist.qh_next)
-#define XFS_QI_MPLNDQUOTS(mp)	((mp)->m_quotainfo->qi_dqlist.qh_nelems)
-
-#define XQMLCK(h)			(mutex_lock(&((h)->qh_lock), PINOD))
-#define XQMUNLCK(h)			(mutex_unlock(&((h)->qh_lock)))
-#ifdef DEBUG
-static inline int
-XQMISLCKD(xfs_dqhash_t *h)
-{
-	if (mutex_trylock(&h->qh_lock)) {
-		mutex_unlock(&h->qh_lock);
-		return 0;
-	}
-	return 1;
-}
-#endif
-
-#define XFS_DQ_HASH_LOCK(h)		XQMLCK(h)
-#define XFS_DQ_HASH_UNLOCK(h)		XQMUNLCK(h)
-#define XFS_DQ_IS_HASH_LOCKED(h)	XQMISLCKD(h)
-
-#define xfs_qm_mplist_lock(mp)		XQMLCK(&(XFS_QI_MPL_LIST(mp)))
-#define xfs_qm_mplist_unlock(mp)	XQMUNLCK(&(XFS_QI_MPL_LIST(mp)))
-#define XFS_QM_IS_MPLIST_LOCKED(mp)	XQMISLCKD(&(XFS_QI_MPL_LIST(mp)))
-
-#define xfs_qm_freelist_lock(qm)	XQMLCK(&((qm)->qm_dqfreelist))
-#define xfs_qm_freelist_unlock(qm)	XQMUNLCK(&((qm)->qm_dqfreelist))
-#define XFS_QM_IS_FREELIST_LOCKED(qm)	XQMISLCKD(&((qm)->qm_dqfreelist))
-
-/*
- * Hash into a bucket in the dquot hash table, based on <mp, id>.
- */
-#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
-				 (__psunsigned_t)(id)) & \
-				(xfs_Gqm->qm_dqhashmask - 1))
-#define XFS_DQ_HASH(mp, id, type)   (type == XFS_DQ_USER ? \
-				     (xfs_Gqm->qm_usr_dqhtable + \
-				      XFS_DQ_HASHVAL(mp, id)) : \
-				     (xfs_Gqm->qm_grp_dqhtable + \
-				      XFS_DQ_HASHVAL(mp, id)))
-#define XFS_IS_DQTYPE_ON(mp, type)   (type == XFS_DQ_USER ? \
-				      XFS_IS_UQUOTA_ON(mp):XFS_IS_GQUOTA_ON(mp))
-#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
-	INT_ISZERO(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT) && \
-	INT_ISZERO(dqp->q_core.d_blk_softlimit, ARCH_CONVERT) && \
-	INT_ISZERO(dqp->q_core.d_rtb_hardlimit, ARCH_CONVERT) && \
-	INT_ISZERO(dqp->q_core.d_rtb_softlimit, ARCH_CONVERT) && \
-	INT_ISZERO(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT) && \
-	INT_ISZERO(dqp->q_core.d_ino_softlimit, ARCH_CONVERT) && \
-	INT_ISZERO(dqp->q_core.d_bcount, ARCH_CONVERT)	      && \
-	INT_ISZERO(dqp->q_core.d_rtbcount, ARCH_CONVERT)      && \
-	INT_ISZERO(dqp->q_core.d_icount, ARCH_CONVERT))
-
-#define HL_PREVP	dq_hashlist.ql_prevp
-#define HL_NEXT		dq_hashlist.ql_next
-#define MPL_PREVP	dq_mplist.ql_prevp
-#define MPL_NEXT	dq_mplist.ql_next
-
-
-#define _LIST_REMOVE(h, dqp, PVP, NXT)				\
-	{							\
-		 xfs_dquot_t *d;				\
-		 if (((d) = (dqp)->NXT))				\
-			 (d)->PVP = (dqp)->PVP;			\
-		 *((dqp)->PVP) = d;				\
-		 (dqp)->NXT = NULL;				\
-		 (dqp)->PVP = NULL;				\
-		 (h)->qh_version++;				\
-		 (h)->qh_nelems--;				\
-	}
-
-#define _LIST_INSERT(h, dqp, PVP, NXT)				\
-	{							\
-		 xfs_dquot_t *d;				\
-		 if (((d) = (h)->qh_next))			\
-			 (d)->PVP = &((dqp)->NXT);		\
-		 (dqp)->NXT = d;				\
-		 (dqp)->PVP = &((h)->qh_next);			\
-		 (h)->qh_next = dqp;				\
-		 (h)->qh_version++;				\
-		 (h)->qh_nelems++;				\
-	 }
-
-#define FOREACH_DQUOT_IN_MP(dqp, mp) \
-	for ((dqp) = XFS_QI_MPLNEXT(mp); (dqp) != NULL; (dqp) = (dqp)->MPL_NEXT)
-
-#define FOREACH_DQUOT_IN_FREELIST(dqp, qlist)	\
-for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
-     (dqp) = (dqp)->dq_flnext)
-
-#define XQM_HASHLIST_INSERT(h, dqp)	\
-	 _LIST_INSERT(h, dqp, HL_PREVP, HL_NEXT)
-
-#define XQM_FREELIST_INSERT(h, dqp)	\
-	 xfs_qm_freelist_append(h, dqp)
-
-#define XQM_MPLIST_INSERT(h, dqp)	\
-	 _LIST_INSERT(h, dqp, MPL_PREVP, MPL_NEXT)
-
-#define XQM_HASHLIST_REMOVE(h, dqp)	\
-	 _LIST_REMOVE(h, dqp, HL_PREVP, HL_NEXT)
-#define XQM_FREELIST_REMOVE(dqp)	\
-	 xfs_qm_freelist_unlink(dqp)
-#define XQM_MPLIST_REMOVE(h, dqp)	\
-	{ _LIST_REMOVE(h, dqp, MPL_PREVP, MPL_NEXT); \
-	  XFS_QI_MPLRECLAIMS((dqp)->q_mount)++; }
-
-#define XFS_DQ_IS_LOGITEM_INITD(dqp)	((dqp)->q_logitem.qli_dquot == (dqp))
-
-#define XFS_QM_DQP_TO_DQACCT(tp, dqp)	(XFS_QM_ISUDQ(dqp) ? \
-					 (tp)->t_dqinfo->dqa_usrdquots : \
-					 (tp)->t_dqinfo->dqa_grpdquots)
-#define XFS_IS_SUSER_DQUOT(dqp)		\
-	(INT_ISZERO((dqp)->q_core.d_id, ARCH_CONVERT))
-
-#define XFS_PURGE_INODE(ip)		\
-	{				\
-	  vmap_t dqvmap;		\
-	  vnode_t *dqvp;		\
-	  dqvp = XFS_ITOV(ip);		\
-	  VMAP(dqvp, dqvmap);		\
-	  VN_RELE(dqvp);		\
-	}
-
-#define DQFLAGTO_TYPESTR(d)	(((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
-				 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : "???"))
-#define DQFLAGTO_DIRTYSTR(d)	(XFS_DQ_IS_DIRTY(d) ? "DIRTY" : "NOTDIRTY")
-
-#endif	/* __XFS_QUOTA_PRIV_H__ */
diff -Nru a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
--- a/fs/xfs/xfs_rename.c	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/xfs_rename.c	Mon Mar 31 13:41:07 2003
@@ -261,11 +261,12 @@
 
 	src_dp = XFS_BHVTOI(src_dir_bdp);
 	target_dp = XFS_BHVTOI(target_dir_bdp);
+	mp = src_dp->i_mount;
 
 	if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) ||
 	    DM_EVENT_ENABLED(target_dir_vp->v_vfsp,
 				target_dp, DM_EVENT_RENAME)) {
-		error = dm_send_namesp_event(DM_EVENT_RENAME,
+		error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME,
 					src_dir_bdp, DM_RIGHT_NULL,
 					target_dir_bdp, DM_RIGHT_NULL,
 					src_name, target_name,
@@ -323,7 +324,6 @@
 	xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
 
 	XFS_BMAP_INIT(&free_list, &first_block);
-	mp = src_dp->i_mount;
 	tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
 	spaceres = XFS_RENAME_SPACE_RES(mp, target_namelen);
@@ -343,12 +343,10 @@
 	/*
 	 * Attach the dquots to the inodes
 	 */
-	if (XFS_IS_QUOTA_ON(mp)) {
-		if ((error = xfs_qm_vop_rename_dqattach(inodes))) {
-			xfs_trans_cancel(tp, cancel_flags);
-			rename_which_error_return = __LINE__;
-			goto rele_return;
-		}
+	if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
+		xfs_trans_cancel(tp, cancel_flags);
+		rename_which_error_return = __LINE__;
+		goto rele_return;
 	}
 
 	/*
@@ -625,7 +623,7 @@
 	if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_POSTRENAME) ||
 	    DM_EVENT_ENABLED(target_dir_vp->v_vfsp,
 				target_dp, DM_EVENT_POSTRENAME)) {
-		(void) dm_send_namesp_event(DM_EVENT_POSTRENAME,
+		(void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME,
 					src_dir_bdp, DM_RIGHT_NULL,
 					target_dir_bdp, DM_RIGHT_NULL,
 					src_name, target_name,
diff -Nru a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
--- a/fs/xfs/xfs_rtalloc.h	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/xfs_rtalloc.h	Mon Mar 31 13:41:07 2003
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -34,6 +34,8 @@
 
 struct xfs_mount;
 struct xfs_trans;
+
+#define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME)
 
 /* Min and max rt extent sizes, specified in bytes */
 #define XFS_MAX_RTEXTSIZE	(1024 * 1024 * 1024)	/* 1GB */
diff -Nru a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
--- a/fs/xfs/xfs_trans.c	Mon Mar 31 13:41:07 2003
+++ b/fs/xfs/xfs_trans.c	Mon Mar 31 13:41:07 2003
@@ -173,11 +173,7 @@
 	ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
 	tp->t_rtx_res = tp->t_rtx_res_used;
 
-	/*
-	 * dup the dquot stuff too.
-	 */
-	if (tp->t_dqinfo)
-		xfs_trans_dup_dqinfo(tp, ntp);
+	XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp);
 
 	atomic_inc(&tp->t_mountp->m_active_trans);
 	return ntp;
@@ -703,9 +699,7 @@
 		 * means is that we have some (non-persistent) quota
 		 * reservations that need to be unreserved.
 		 */
-		if (tp->t_dqinfo && (tp->t_flags & XFS_TRANS_DQ_DIRTY)) {
-			xfs_trans_unreserve_and_mod_dquots(tp);
-		}
+		XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
 		if (tp->t_ticket) {
 			commit_lsn = xfs_log_done(mp, tp->t_ticket,
 							NULL, log_flags);
@@ -733,9 +727,7 @@
 	if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
 		xfs_trans_apply_sb_deltas(tp);
 	}
-	if (tp->t_flags & XFS_TRANS_DQ_DIRTY) {
-		xfs_trans_apply_dquot_deltas(tp);
-	}
+	XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp);
 
 	/*
 	 * Ask each log item how many log_vector entries it will
@@ -955,9 +947,7 @@
 	}
 
 	xfs_trans_unreserve_and_mod_sb(tp);
-	if (tp->t_dqinfo && (tp->t_flags & XFS_TRANS_DQ_DIRTY)) {
-		xfs_trans_unreserve_and_mod_dquots(tp);
-	}
+	XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp);
 
 	xfs_trans_free_items(tp, flags);
 	xfs_trans_free_busy(tp);
@@ -1079,9 +1069,7 @@
 	}
 #endif
 	xfs_trans_unreserve_and_mod_sb(tp);
-
-	if (tp->t_dqinfo && (tp->t_flags & XFS_TRANS_DQ_DIRTY))
-		xfs_trans_unreserve_and_mod_dquots(tp);
+	XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp);
 
 	if (tp->t_ticket) {
 		if (flags & XFS_TRANS_RELEASE_LOG_RES) {
@@ -1110,8 +1098,7 @@
 	xfs_trans_t	*tp)
 {
 	atomic_dec(&tp->t_mountp->m_active_trans);
-	if (tp->t_dqinfo)
-		xfs_trans_free_dqinfo(tp);
+	XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
 	kmem_zone_free(xfs_trans_zone, tp);
 }
 
diff -Nru a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
--- a/fs/xfs/xfs_trans.h	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/xfs_trans.h	Mon Mar 31 13:41:08 2003
@@ -64,7 +64,6 @@
 #define XFS_LI_BUF		0x123c	/* v2 bufs, variable sized inode bufs */
 #define XFS_LI_DQUOT		0x123d
 #define XFS_LI_QUOTAOFF		0x123e
-#define XFS_LI_RPC		0x123f	/* CXFS RPC return info */
 
 /*
  * Transaction types.  Used to distinguish types of buffers.
@@ -1014,10 +1013,7 @@
 					 struct xfs_efd_log_item *,
 					 xfs_fsblock_t,
 					 xfs_extlen_t);
-void		xfs_trans_log_create_rpc(xfs_trans_t *, int, xfs_ino_t);
-void		xfs_trans_log_setattr_rpc(xfs_trans_t *, int);
 int		xfs_trans_commit(xfs_trans_t *, uint flags, xfs_lsn_t *);
-void		xfs_trans_commit_async(struct xfs_mount *);
 void		xfs_trans_cancel(xfs_trans_t *, int);
 void		xfs_trans_ail_init(struct xfs_mount *);
 xfs_lsn_t	xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
diff -Nru a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
--- a/fs/xfs/xfs_trans_dquot.c	Mon Mar 31 13:41:06 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,852 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-#include <xfs.h>
-#include <xfs_quota_priv.h>
-
-
-/*
- * Add the locked dquot to the transaction.
- * The dquot must be locked, and it cannot be associated with any
- * transaction.
- */
-void
-xfs_trans_dqjoin(
-	xfs_trans_t	*tp,
-	xfs_dquot_t	*dqp)
-{
-	xfs_dq_logitem_t    *lp;
-
-	ASSERT(! XFS_DQ_IS_ADDEDTO_TRX(tp, dqp));
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	ASSERT(XFS_DQ_IS_LOGITEM_INITD(dqp));
-	lp = &dqp->q_logitem;
-
-	/*
-	 * Get a log_item_desc to point at the new item.
-	 */
-	(void) xfs_trans_add_item(tp, (xfs_log_item_t*)(lp));
-
-	/*
-	 * Initialize i_transp so we can later determine if this dquot is
-	 * associated with this transaction.
-	 */
-	dqp->q_transp = tp;
-}
-
-
-/*
- * This is called to mark the dquot as needing
- * to be logged when the transaction is committed.  The dquot must
- * already be associated with the given transaction.
- * Note that it marks the entire transaction as dirty. In the ordinary
- * case, this gets called via xfs_trans_commit, after the transaction
- * is already dirty. However, there's nothing stop this from getting
- * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY
- * flag.
- */
-void
-xfs_trans_log_dquot(
-	xfs_trans_t	*tp,
-	xfs_dquot_t	*dqp)
-{
-	xfs_log_item_desc_t	*lidp;
-
-	ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp));
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-	lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem));
-	ASSERT(lidp != NULL);
-
-	tp->t_flags |= XFS_TRANS_DIRTY;
-	lidp->lid_flags |= XFS_LID_DIRTY;
-}
-
-/*
- * Carry forward whatever is left of the quota blk reservation to
- * the spanky new transaction
- */
-void
-xfs_trans_dup_dqinfo(
-	xfs_trans_t	*otp,
-	xfs_trans_t	*ntp)
-{
-	xfs_dqtrx_t	*oq, *nq;
-	int		i,j;
-	xfs_dqtrx_t	*oqa, *nqa;
-
-	xfs_trans_alloc_dqinfo(ntp);
-	oqa = otp->t_dqinfo->dqa_usrdquots;
-	nqa = ntp->t_dqinfo->dqa_usrdquots;
-
-	/*
-	 * Because the quota blk reservation is carried forward,
-	 * it is also necessary to carry forward the DQ_DIRTY flag.
-	 */
-	if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
-		ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
-
-	for (j = 0; j < 2; j++) {
-		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-			if (oqa[i].qt_dquot == NULL)
-				break;
-			oq = &oqa[i];
-			nq = &nqa[i];
-
-			nq->qt_dquot = oq->qt_dquot;
-			nq->qt_bcount_delta = nq->qt_icount_delta = 0;
-			nq->qt_rtbcount_delta = 0;
-
-			/*
-			 * Transfer whatever is left of the reservations.
-			 */
-			nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
-			oq->qt_blk_res = oq->qt_blk_res_used;
-
-			nq->qt_rtblk_res = oq->qt_rtblk_res -
-				oq->qt_rtblk_res_used;
-			oq->qt_rtblk_res = oq->qt_rtblk_res_used;
-
-			nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used;
-			oq->qt_ino_res = oq->qt_ino_res_used;
-
-		}
-		oqa = otp->t_dqinfo->dqa_grpdquots;
-		nqa = ntp->t_dqinfo->dqa_grpdquots;
-	}
-}
-
-/*
- * Wrap around mod_dquot to account for both user and group quotas.
- */
-void
-xfs_trans_mod_dquot_byino(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	uint		field,
-	long		delta)
-{
-	ASSERT(tp);
-
-	if (tp->t_dqinfo == NULL)
-		xfs_trans_alloc_dqinfo(tp);
-
-	if (XFS_IS_UQUOTA_ON(tp->t_mountp) && ip->i_udquot) {
-		(void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
-	}
-	if (XFS_IS_GQUOTA_ON(tp->t_mountp) && ip->i_gdquot) {
-		(void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
-	}
-}
-
-STATIC xfs_dqtrx_t *
-xfs_trans_get_dqtrx(
-	xfs_trans_t	*tp,
-	xfs_dquot_t	*dqp)
-{
-	int		i;
-	xfs_dqtrx_t	*qa;
-
-	for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-		qa = XFS_QM_DQP_TO_DQACCT(tp, dqp);
-
-		if (qa[i].qt_dquot == NULL ||
-		    qa[i].qt_dquot == dqp) {
-			return (&qa[i]);
-		}
-	}
-
-	return (NULL);
-}
-
-/*
- * Make the changes in the transaction structure.
- * The moral equivalent to xfs_trans_mod_sb().
- * We don't touch any fields in the dquot, so we don't care
- * if it's locked or not (most of the time it won't be).
- */
-void
-xfs_trans_mod_dquot(
-	xfs_trans_t	*tp,
-	xfs_dquot_t	*dqp,
-	uint		field,
-	long		delta)
-{
-	xfs_dqtrx_t	*qtrx;
-
-	ASSERT(tp);
-	qtrx = NULL;
-
-	if (tp->t_dqinfo == NULL)
-		xfs_trans_alloc_dqinfo(tp);
-	/*
-	 * Find either the first free slot or the slot that belongs
-	 * to this dquot.
-	 */
-	qtrx = xfs_trans_get_dqtrx(tp, dqp);
-	ASSERT(qtrx);
-	if (qtrx->qt_dquot == NULL)
-		qtrx->qt_dquot = dqp;
-
-	switch (field) {
-
-		/*
-		 * regular disk blk reservation
-		 */
-	      case XFS_TRANS_DQ_RES_BLKS:
-		qtrx->qt_blk_res += (ulong)delta;
-		break;
-
-		/*
-		 * inode reservation
-		 */
-	      case XFS_TRANS_DQ_RES_INOS:
-		qtrx->qt_ino_res += (ulong)delta;
-		break;
-
-		/*
-		 * disk blocks used.
-		 */
-	      case XFS_TRANS_DQ_BCOUNT:
-		if (qtrx->qt_blk_res && delta > 0) {
-			qtrx->qt_blk_res_used += (ulong)delta;
-			ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
-		}
-		qtrx->qt_bcount_delta += delta;
-		break;
-
-	      case XFS_TRANS_DQ_DELBCOUNT:
-		qtrx->qt_delbcnt_delta += delta;
-		break;
-
-		/*
-		 * Inode Count
-		 */
-	      case XFS_TRANS_DQ_ICOUNT:
-		if (qtrx->qt_ino_res && delta > 0) {
-			qtrx->qt_ino_res_used += (ulong)delta;
-			ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
-		}
-		qtrx->qt_icount_delta += delta;
-		break;
-
-		/*
-		 * rtblk reservation
-		 */
-	      case XFS_TRANS_DQ_RES_RTBLKS:
-		qtrx->qt_rtblk_res += (ulong)delta;
-		break;
-
-		/*
-		 * rtblk count
-		 */
-	      case XFS_TRANS_DQ_RTBCOUNT:
-		if (qtrx->qt_rtblk_res && delta > 0) {
-			qtrx->qt_rtblk_res_used += (ulong)delta;
-			ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
-		}
-		qtrx->qt_rtbcount_delta += delta;
-		break;
-
-	      case XFS_TRANS_DQ_DELRTBCOUNT:
-		qtrx->qt_delrtb_delta += delta;
-		break;
-
-	      default:
-		ASSERT(0);
-	}
-	tp->t_flags |= XFS_TRANS_DQ_DIRTY;
-}
-
-
-/*
- * Given an array of dqtrx structures, lock all the dquots associated
- * and join them to the transaction, provided they have been modified.
- * We know that the highest number of dquots (of one type - usr OR grp),
- * involved in a transaction is 2 and that both usr and grp combined - 3.
- * So, we don't attempt to make this very generic.
- */
-STATIC void
-xfs_trans_dqlockedjoin(
-	xfs_trans_t	*tp,
-	xfs_dqtrx_t	*q)
-{
-	ASSERT(q[0].qt_dquot != NULL);
-	if (q[1].qt_dquot == NULL) {
-		xfs_dqlock(q[0].qt_dquot);
-		xfs_trans_dqjoin(tp, q[0].qt_dquot);
-	} else {
-		ASSERT(XFS_QM_TRANS_MAXDQS == 2);
-		xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot);
-		xfs_trans_dqjoin(tp, q[0].qt_dquot);
-		xfs_trans_dqjoin(tp, q[1].qt_dquot);
-	}
-}
-
-
-/*
- * Called by xfs_trans_commit() and similar in spirit to
- * xfs_trans_apply_sb_deltas().
- * Go thru all the dquots belonging to this transaction and modify the
- * INCORE dquot to reflect the actual usages.
- * Unreserve just the reservations done by this transaction
- * dquot is still left locked at exit.
- */
-void
-xfs_trans_apply_dquot_deltas(
-	xfs_trans_t		*tp)
-{
-	int			i, j;
-	xfs_dquot_t		*dqp;
-	xfs_dqtrx_t		*qtrx, *qa;
-	xfs_disk_dquot_t	*d;
-	long			totalbdelta;
-	long			totalrtbdelta;
-
-	ASSERT(tp->t_dqinfo);
-	qa = tp->t_dqinfo->dqa_usrdquots;
-	for (j = 0; j < 2; j++) {
-		if (qa[0].qt_dquot == NULL) {
-			qa = tp->t_dqinfo->dqa_grpdquots;
-			continue;
-		}
-
-		/*
-		 * Lock all of the dquots and join them to the transaction.
-		 */
-		xfs_trans_dqlockedjoin(tp, qa);
-
-		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-			qtrx = &qa[i];
-			/*
-			 * The array of dquots is filled
-			 * sequentially, not sparsely.
-			 */
-			if ((dqp = qtrx->qt_dquot) == NULL)
-				break;
-
-			ASSERT(XFS_DQ_IS_LOCKED(dqp));
-			ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp));
-
-			/*
-			 * adjust the actual number of blocks used
-			 */
-			d = &dqp->q_core;
-
-			/*
-			 * The issue here is - sometimes we don't make a blkquota
-			 * reservation intentionally to be fair to users
-			 * (when the amount is small). On the other hand,
-			 * delayed allocs do make reservations, but that's
-			 * outside of a transaction, so we have no
-			 * idea how much was really reserved.
-			 * So, here we've accumulated delayed allocation blks and
-			 * non-delay blks. The assumption is that the
-			 * delayed ones are always reserved (outside of a
-			 * transaction), and the others may or may not have
-			 * quota reservations.
-			 */
-			totalbdelta = qtrx->qt_bcount_delta +
-				qtrx->qt_delbcnt_delta;
-			totalrtbdelta = qtrx->qt_rtbcount_delta +
-				qtrx->qt_delrtb_delta;
-#ifdef QUOTADEBUG
-			if (totalbdelta < 0)
-				ASSERT(INT_GET(d->d_bcount, ARCH_CONVERT) >=
-				       (xfs_qcnt_t) -totalbdelta);
-
-			if (totalrtbdelta < 0)
-				ASSERT(INT_GET(d->d_rtbcount, ARCH_CONVERT) >=
-				       (xfs_qcnt_t) -totalrtbdelta);
-
-			if (qtrx->qt_icount_delta < 0)
-				ASSERT(INT_GET(d->d_icount, ARCH_CONVERT) >=
-				       (xfs_qcnt_t) -qtrx->qt_icount_delta);
-#endif
-			if (totalbdelta)
-				INT_MOD(d->d_bcount, ARCH_CONVERT, (xfs_qcnt_t)totalbdelta);
-
-			if (qtrx->qt_icount_delta)
-				INT_MOD(d->d_icount, ARCH_CONVERT, (xfs_qcnt_t)qtrx->qt_icount_delta);
-
-			if (totalrtbdelta)
-				INT_MOD(d->d_rtbcount, ARCH_CONVERT, (xfs_qcnt_t)totalrtbdelta);
-
-			/*
-			 * Start/reset the timer(s) if needed.
-			 */
-			xfs_qm_adjust_dqtimers(tp->t_mountp, d);
-
-			dqp->dq_flags |= XFS_DQ_DIRTY;
-			/*
-			 * add this to the list of items to get logged
-			 */
-			xfs_trans_log_dquot(tp, dqp);
-			/*
-			 * Take off what's left of the original reservation.
-			 * In case of delayed allocations, there's no
-			 * reservation that a transaction structure knows of.
-			 */
-			if (qtrx->qt_blk_res != 0) {
-				if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
-					if (qtrx->qt_blk_res >
-					    qtrx->qt_blk_res_used)
-						dqp->q_res_bcount -= (xfs_qcnt_t)
-							(qtrx->qt_blk_res -
-							 qtrx->qt_blk_res_used);
-					else
-						dqp->q_res_bcount -= (xfs_qcnt_t)
-							(qtrx->qt_blk_res_used -
-							 qtrx->qt_blk_res);
-				}
-			} else {
-				/*
-				 * These blks were never reserved, either inside
-				 * a transaction or outside one (in a delayed
-				 * allocation). Also, this isn't always a
-				 * negative number since we sometimes
-				 * deliberately skip quota reservations.
-				 */
-				if (qtrx->qt_bcount_delta) {
-					dqp->q_res_bcount +=
-					      (xfs_qcnt_t)qtrx->qt_bcount_delta;
-				}
-			}
-			/*
-			 * Adjust the RT reservation.
-			 */
-			if (qtrx->qt_rtblk_res != 0) {
-				if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
-					if (qtrx->qt_rtblk_res >
-					    qtrx->qt_rtblk_res_used)
-					       dqp->q_res_rtbcount -= (xfs_qcnt_t)
-						       (qtrx->qt_rtblk_res -
-							qtrx->qt_rtblk_res_used);
-					else
-					       dqp->q_res_rtbcount -= (xfs_qcnt_t)
-						       (qtrx->qt_rtblk_res_used -
-							qtrx->qt_rtblk_res);
-				}
-			} else {
-				if (qtrx->qt_rtbcount_delta)
-					dqp->q_res_rtbcount +=
-					    (xfs_qcnt_t)qtrx->qt_rtbcount_delta;
-			}
-
-			/*
-			 * Adjust the inode reservation.
-			 */
-			if (qtrx->qt_ino_res != 0) {
-				ASSERT(qtrx->qt_ino_res >=
-				       qtrx->qt_ino_res_used);
-				if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
-					dqp->q_res_icount -= (xfs_qcnt_t)
-						(qtrx->qt_ino_res -
-						 qtrx->qt_ino_res_used);
-			} else {
-				if (qtrx->qt_icount_delta)
-					dqp->q_res_icount +=
-					    (xfs_qcnt_t)qtrx->qt_icount_delta;
-			}
-
-
-#ifdef QUOTADEBUG
-			if (qtrx->qt_rtblk_res != 0)
-				printk("RT res %d for 0x%p\n",
-				      (int) qtrx->qt_rtblk_res,
-				      dqp);
-#endif
-			ASSERT(dqp->q_res_bcount >= INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT));
-			ASSERT(dqp->q_res_icount >= INT_GET(dqp->q_core.d_icount, ARCH_CONVERT));
-			ASSERT(dqp->q_res_rtbcount >= INT_GET(dqp->q_core.d_rtbcount, ARCH_CONVERT));
-		}
-		/*
-		 * Do the group quotas next
-		 */
-		qa = tp->t_dqinfo->dqa_grpdquots;
-	}
-}
-
-/*
- * Release the reservations, and adjust the dquots accordingly.
- * This is called only when the transaction is being aborted. If by
- * any chance we have done dquot modifications incore (ie. deltas) already,
- * we simply throw those away, since that's the expected behavior
- * when a transaction is curtailed without a commit.
- */
-void
-xfs_trans_unreserve_and_mod_dquots(
-	xfs_trans_t	*tp)
-{
-	int			i, j;
-	xfs_dquot_t		*dqp;
-	xfs_dqtrx_t		*qtrx, *qa;
-	boolean_t		locked;
-
-	ASSERT(tp->t_dqinfo);
-	qa = tp->t_dqinfo->dqa_usrdquots;
-
-	for (j = 0; j < 2; j++) {
-		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-			qtrx = &qa[i];
-			/*
-			 * We assume that the array of dquots is filled
-			 * sequentially, not sparsely.
-			 */
-			if ((dqp = qtrx->qt_dquot) == NULL)
-				break;
-			/*
-			 * Unreserve the original reservation. We don't care
-			 * about the number of blocks used field, or deltas.
-			 * Also we don't bother to zero the fields.
-			 */
-			locked = B_FALSE;
-			if (qtrx->qt_blk_res) {
-				xfs_dqlock(dqp);
-				locked = B_TRUE;
-				dqp->q_res_bcount -=
-					(xfs_qcnt_t)qtrx->qt_blk_res;
-			}
-			if (qtrx->qt_ino_res) {
-				if (!locked) {
-					xfs_dqlock(dqp);
-					locked = B_TRUE;
-				}
-				dqp->q_res_icount -=
-					(xfs_qcnt_t)qtrx->qt_ino_res;
-			}
-
-			if (qtrx->qt_rtblk_res) {
-				if (!locked) {
-					xfs_dqlock(dqp);
-					locked = B_TRUE;
-				}
-				dqp->q_res_rtbcount -=
-					(xfs_qcnt_t)qtrx->qt_rtblk_res;
-			}
-			if (locked)
-				xfs_dqunlock(dqp);
-
-		}
-		qa = tp->t_dqinfo->dqa_grpdquots;
-	}
-}
-
-/*
- * This reserves disk blocks and inodes against a dquot.
- * Flags indicate if the dquot is to be locked here and also
- * if the blk reservation is for RT or regular blocks.
- * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
- * Returns EDQUOT if quota is exceeded.
- */
-STATIC int
-xfs_trans_dqresv(
-	xfs_trans_t	*tp,
-	xfs_dquot_t	*dqp,
-	long		nblks,
-	long		ninos,
-	uint		flags)
-{
-	int		error;
-	xfs_qcnt_t	hardlimit;
-	xfs_qcnt_t	softlimit;
-	time_t		btimer;
-	xfs_qcnt_t	*resbcountp;
-
-	if (! (flags & XFS_QMOPT_DQLOCK)) {
-		xfs_dqlock(dqp);
-	}
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	if (flags & XFS_TRANS_DQ_RES_BLKS) {
-		hardlimit = INT_GET(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT);
-		softlimit = INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT);
-		btimer = INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT);
-		resbcountp = &dqp->q_res_bcount;
-	} else {
-		ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
-		hardlimit = INT_GET(dqp->q_core.d_rtb_hardlimit, ARCH_CONVERT);
-		softlimit = INT_GET(dqp->q_core.d_rtb_softlimit, ARCH_CONVERT);
-		btimer = INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT);
-		resbcountp = &dqp->q_res_rtbcount;
-	}
-	error = 0;
-
-	if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
-	    !INT_ISZERO(dqp->q_core.d_id, ARCH_CONVERT) &&
-	    XFS_IS_QUOTA_ENFORCED(dqp->q_mount)) {
-#ifdef QUOTADEBUG
-		printk("BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?\n",
-			nblks, *resbcountp, hardlimit);
-#endif
-		if (nblks > 0) {
-			/*
-			 * dquot is locked already. See if we'd go over the
-			 * hardlimit or exceed the timelimit if we allocate
-			 * nblks.
-			 */
-			if (hardlimit > 0ULL &&
-			     (hardlimit <= nblks + *resbcountp)) {
-				error = EDQUOT;
-				goto error_return;
-			}
-
-			if (softlimit > 0ULL &&
-			     (softlimit <= nblks + *resbcountp)) {
-				/*
-				 * If timer or warnings has expired,
-				 * return EDQUOT
-				 */
-				if ((btimer != 0 && get_seconds() > btimer) ||
-				    (!INT_ISZERO(dqp->q_core.d_bwarns, ARCH_CONVERT) &&
-				     INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT) >=
-				     XFS_QI_BWARNLIMIT(dqp->q_mount))) {
-					error = EDQUOT;
-					goto error_return;
-				}
-			}
-		}
-		if (ninos > 0) {
-			if (INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT) > 0ULL &&
-			    INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >=
-			    INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT)) {
-				error = EDQUOT;
-				goto error_return;
-			} else if (INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT) > 0ULL &&
-				   INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >=
-				   INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT)) {
-				/*
-				 * If timer or warnings has expired,
-				 * return EDQUOT
-				 */
-				if ((!INT_ISZERO(dqp->q_core.d_itimer, ARCH_CONVERT) &&
-				     get_seconds() > INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT)) ||
-				    (!INT_ISZERO(dqp->q_core.d_iwarns, ARCH_CONVERT) &&
-				     INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT) >=
-				     XFS_QI_IWARNLIMIT(dqp->q_mount))) {
-					error = EDQUOT;
-					goto error_return;
-				}
-			}
-		}
-	}
-
-	/*
-	 * Change the reservation, but not the actual usage.
-	 * Note that q_res_bcount = q_core.d_bcount + resv
-	 */
-	(*resbcountp) += (xfs_qcnt_t)nblks;
-	if (ninos != 0)
-		dqp->q_res_icount += (xfs_qcnt_t)ninos;
-
-	/*
-	 * note the reservation amt in the trans struct too,
-	 * so that the transaction knows how much was reserved by
-	 * it against this particular dquot.
-	 * We don't do this when we are reserving for a delayed allocation,
-	 * because we don't have the luxury of a transaction envelope then.
-	 */
-	if (tp) {
-		ASSERT(tp->t_dqinfo);
-		ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
-		if (nblks != 0)
-			xfs_trans_mod_dquot(tp, dqp,
-					    flags & XFS_QMOPT_RESBLK_MASK,
-					    nblks);
-		if (ninos != 0)
-			xfs_trans_mod_dquot(tp, dqp,
-					    XFS_TRANS_DQ_RES_INOS,
-					    ninos);
-	}
-	ASSERT(dqp->q_res_bcount >= INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT));
-	ASSERT(dqp->q_res_rtbcount >= INT_GET(dqp->q_core.d_rtbcount, ARCH_CONVERT));
-	ASSERT(dqp->q_res_icount >= INT_GET(dqp->q_core.d_icount, ARCH_CONVERT));
-
-error_return:
-	if (! (flags & XFS_QMOPT_DQLOCK)) {
-		xfs_dqunlock(dqp);
-	}
-	return (error);
-}
-
-
-/*
- * Given a dquot(s), make disk block and/or inode reservations against them.
- * The fact that this does the reservation against both the usr and
- * grp quotas is important, because this follows a both-or-nothing
- * approach.
- *
- * flags = XFS_QMOPT_DQLOCK indicate if dquot(s) need to be locked.
- *	   XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
- *	   XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
- *	   XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
- * dquots are unlocked on return, if they were not locked by caller.
- */
-int
-xfs_trans_reserve_quota_bydquots(
-	xfs_trans_t	*tp,
-	xfs_dquot_t	*udqp,
-	xfs_dquot_t	*gdqp,
-	long		nblks,
-	long		ninos,
-	uint		flags)
-{
-	int		resvd;
-
-	if (tp && tp->t_dqinfo == NULL)
-		xfs_trans_alloc_dqinfo(tp);
-
-	ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
-	resvd = 0;
-
-	if (udqp) {
-		if (xfs_trans_dqresv(tp, udqp, nblks, ninos, flags))
-			return (EDQUOT);
-		resvd = 1;
-	}
-
-	if (gdqp) {
-		if (xfs_trans_dqresv(tp, gdqp, nblks, ninos, flags)) {
-			/*
-			 * can't do it, so backout previous reservation
-			 */
-			if (resvd) {
-				xfs_trans_dqresv(tp, udqp,  -nblks, -ninos,
-						 flags);
-			}
-			return (EDQUOT);
-		}
-	}
-
-	/*
-	 * Didnt change anything critical, so, no need to log
-	 */
-	return (0);
-}
-
-
-/*
- * Lock the dquot and change the reservation if we can.
- * This doesn't change the actual usage, just the reservation.
- * The inode sent in is locked.
- *
- * Returns 0 on success, EDQUOT or other errors otherwise
- */
-int
-xfs_trans_reserve_quota_nblks(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	long		nblks,
-	long		ninos,
-	uint		type)
-{
-	int error;
-
-#ifdef QUOTADEBUG
-	if (ip->i_udquot)
-		ASSERT(! XFS_DQ_IS_LOCKED(ip->i_udquot));
-	if (ip->i_gdquot)
-		ASSERT(! XFS_DQ_IS_LOCKED(ip->i_gdquot));
-#endif
-
-	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
-	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
-	ASSERT((type & ~XFS_QMOPT_FORCE_RES) == XFS_TRANS_DQ_RES_RTBLKS ||
-	       (type & ~XFS_QMOPT_FORCE_RES) == XFS_TRANS_DQ_RES_BLKS);
-
-	/*
-	 * Reserve nblks against these dquots, with trans as the mediator.
-	 */
-	error = xfs_trans_reserve_quota_bydquots(tp,
-						 ip->i_udquot, ip->i_gdquot,
-						 nblks, ninos,
-						 type);
-	return (error);
-}
-
-/*
- * This routine is called to allocate a quotaoff log item.
- */
-xfs_qoff_logitem_t *
-xfs_trans_get_qoff_item(
-	xfs_trans_t		*tp,
-	xfs_qoff_logitem_t	*startqoff,
-	uint			flags)
-{
-	xfs_qoff_logitem_t	*q;
-
-	ASSERT(tp != NULL);
-
-	q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags);
-	ASSERT(q != NULL);
-
-	/*
-	 * Get a log_item_desc to point at the new item.
-	 */
-	(void) xfs_trans_add_item(tp, (xfs_log_item_t*)q);
-
-	return (q);
-}
-
-
-/*
- * This is called to mark the quotaoff logitem as needing
- * to be logged when the transaction is committed.  The logitem must
- * already be associated with the given transaction.
- */
-void
-xfs_trans_log_quotaoff_item(
-	xfs_trans_t		*tp,
-	xfs_qoff_logitem_t	*qlp)
-{
-	xfs_log_item_desc_t	*lidp;
-
-	lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)qlp);
-	ASSERT(lidp != NULL);
-
-	tp->t_flags |= XFS_TRANS_DIRTY;
-	lidp->lid_flags |= XFS_LID_DIRTY;
-}
-
-void
-xfs_trans_alloc_dqinfo(
-	xfs_trans_t	*tp)
-{
-	(tp)->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
-}
-
-void
-xfs_trans_free_dqinfo(
-	xfs_trans_t	*tp)
-{
-	kmem_zone_free(xfs_Gqm->qm_dqtrxzone, (tp)->t_dqinfo);
-	(tp)->t_dqinfo = NULL;
-}
diff -Nru a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
--- a/fs/xfs/xfs_utils.c	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/xfs_utils.c	Mon Mar 31 13:41:08 2003
@@ -232,7 +232,7 @@
 			xfs_buf_relse(ialloc_context);
 			if (dqinfo) {
 				tp->t_dqinfo = dqinfo;
-				xfs_trans_free_dqinfo(tp);
+				XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
 			}
 			*tpp = ntp;
 			*ipp = NULL;
@@ -254,7 +254,7 @@
 			*ipp = NULL;
 			return code;
 		}
-		xfs_trans_bjoin (tp, ialloc_context);
+		xfs_trans_bjoin(tp, ialloc_context);
 
 		/*
 		 * Call ialloc again. Since we've locked out all
diff -Nru a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
--- a/fs/xfs/xfs_vfsops.c	Mon Mar 31 13:41:06 2003
+++ b/fs/xfs/xfs_vfsops.c	Mon Mar 31 13:41:06 2003
@@ -1,7 +1,7 @@
 /*
  * XFS filesystem operations.
  *
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -68,7 +68,6 @@
 	spinlock_init(&xfs_dabuf_global_lock, "xfsda");
 #endif
 	mutex_init(&xfs_uuidtabmon, MUTEX_DEFAULT, "xfs_uuidtab");
-	mutex_init(&xfs_Gqm_lock, MUTEX_DEFAULT, "xfs_qmlock");
 
 	/*
 	 * Initialize all of the zone allocators we use.
@@ -175,8 +174,6 @@
 	kmem_cache_destroy(xfs_ifork_zone);
 	kmem_cache_destroy(xfs_ili_zone);
 	kmem_cache_destroy(xfs_chashlist_zone);
-	_XQM_ZONE_DESTROY(qm_dqzone);
-	_XQM_ZONE_DESTROY(qm_dqtrxzone);
 	_ACL_ZONE_DESTROY(xfs_acl_zone);
 #if  (defined(DEBUG) || defined(CONFIG_XFS_VNODE_TRACING))
 	ktrace_uninit();
@@ -248,9 +245,6 @@
 	if (ap->flags & XFSMNT_NOATIME)
 		mp->m_flags |= XFS_MOUNT_NOATIME;
 
-	if (ap->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA))
-		xfs_qm_mount_quotainit(mp, ap->flags);
-
 	if (ap->flags & XFSMNT_RETERR)
 		mp->m_flags |= XFS_MOUNT_RETERR;
 
@@ -387,11 +381,13 @@
  */
 STATIC int
 xfs_mount(
-	vfs_t			*vfsp,
+	struct bhv_desc		*bhvp,
 	struct xfs_mount_args	*args,
 	cred_t			*credp)
 {
-	xfs_mount_t		*mp;
+	struct vfs		*vfsp = bhvtovfs(bhvp);
+	struct bhv_desc		*p;
+	struct xfs_mount	*mp = XFS_BHVTOM(bhvp);
 	struct block_device	*ddev, *logdev, *rtdev;
 	int			ronly = (vfsp->vfs_flag & VFS_RDONLY);
 	int			flags = 0, error;
@@ -400,23 +396,18 @@
 	logdev = rtdev = NULL;
 
 	/*
-	 * Allocate VFS private data (xfs mount structure).
-	 */
-	mp = xfs_mount_init();
-
-	/*
 	 * Open real time and log devices - order is important.
 	 */
 	if (args->logname[0]) {
 		error = xfs_blkdev_get(mp, args->logname, &logdev);
 		if (error)
-			goto free_mp;
+			return error;
 	}
 	if (args->rtname[0]) {
 		error = xfs_blkdev_get(mp, args->rtname, &rtdev);
 		if (error) {
 			xfs_blkdev_put(logdev);
-			goto free_mp;
+			return error;
 		}
 
 		if (rtdev == ddev || rtdev == logdev) {
@@ -424,33 +415,47 @@
 	"XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
 			xfs_blkdev_put(logdev);
 			xfs_blkdev_put(rtdev);
-			error = EINVAL;
-			goto free_mp;
+			return EINVAL;
 		}
 	}
 
-	vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp);
+	/*
+	 * Setup xfs_mount function vectors from available behaviors
+	 */
+	p = vfs_bhv_lookup(vfsp, VFS_POSITION_DM);
+	mp->m_dm_ops = p ? *(xfs_dmops_t *) vfs_bhv_custom(p) : xfs_dmcore_xfs;
+	p = vfs_bhv_lookup(vfsp, VFS_POSITION_QM);
+	mp->m_qm_ops = p ? *(xfs_qmops_t *) vfs_bhv_custom(p) : xfs_qmcore_xfs;
+	p = vfs_bhv_lookup(vfsp, VFS_POSITION_IO);
+	mp->m_io_ops = p ? *(xfs_ioops_t *) vfs_bhv_custom(p) : xfs_iocore_xfs;
 
+	/*
+	 * Setup xfs_mount buffer target pointers
+	 */
 	mp->m_ddev_targp = xfs_alloc_buftarg(ddev);
 	if (rtdev)
 		mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev);
 	mp->m_logdev_targp = (logdev && logdev != ddev) ?
 				xfs_alloc_buftarg(logdev) : mp->m_ddev_targp;
 
+	/*
+	 * Setup flags based on mount(2) options and then the superblock
+	 */
 	error = xfs_start_flags(args, mp, ronly);
 	if (error)
 		goto error;
-
 	error = xfs_readsb(mp);
 	if (error)
 		goto error;
-
 	error = xfs_finish_flags(args, mp, ronly);
 	if (error) {
 		xfs_freesb(mp);
 		goto error;
 	}
 
+	/*
+	 * Setup xfs_mount buffer target pointers based on superblock
+	 */
 	xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
 			    mp->m_sb.sb_sectsize);
 	if (logdev && logdev != ddev) {
@@ -465,10 +470,8 @@
 		xfs_setsize_buftarg(mp->m_rtdev_targp, mp->m_sb.sb_blocksize,
 				    mp->m_sb.sb_blocksize);
 
-	error = xfs_mountfs(vfsp, mp, ddev->bd_dev, flags);
-	if (error)
-		goto error;
-	return 0;
+	if (!(error = XFS_IOINIT(vfsp, args, flags)))
+		return 0;
 
  error:
 	xfs_binval(mp->m_ddev_targp);
@@ -479,9 +482,6 @@
 		xfs_binval(mp->m_rtdev_targp);
 	}
 	xfs_unmountfs_close(mp, NULL);
-
- free_mp:
-	xfs_mount_free(mp, 1);
 	return error;
 }
 
@@ -523,8 +523,9 @@
 				continue;
 			}
 #ifdef DEBUG
-			printk("busy vp=0x%p ip=0x%p inum %Ld count=%d\n",
-				vp, ip, ip->i_ino, vn_count(vp));
+			cmn_err(CE_WARN, "%s: busy vp=0x%p ip=0x%p "
+					 "inum %Ld count=%d",
+				__FUNCTION__, vp, ip, ip->i_ino, vn_count(vp));
 #endif
 			busy++;
 		}
@@ -543,16 +544,15 @@
 	int		flags,
 	cred_t		*credp)
 {
-	xfs_mount_t	*mp;
-	xfs_inode_t	*rip;
-	vnode_t		*rvp = 0;
 	struct vfs	*vfsp = bhvtovfs(bdp);
+	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
+	xfs_inode_t	*rip;
+	vnode_t		*rvp;
 	int		unmount_event_wanted = 0;
 	int		unmount_event_flags = 0;
 	int		xfs_unmountfs_needed = 0;
 	int		error;
 
-	mp = XFS_BHVTOM(bdp);
 	rip = mp->m_rootip;
 	rvp = XFS_ITOV(rip);
 
@@ -560,7 +560,7 @@
 		bhv_desc_t	*rbdp;
 
 		rbdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(rvp), &xfs_vnodeops);
-		error = dm_send_namesp_event(DM_EVENT_PREUNMOUNT,
+		error = XFS_SEND_NAMESP(mp, DM_EVENT_PREUNMOUNT,
 				rbdp, DM_RIGHT_NULL, rbdp, DM_RIGHT_NULL,
 				NULL, NULL, 0, 0,
 				(mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))?
@@ -577,7 +577,8 @@
 	 */
 	if (xfs_ibusy(mp)) {
 		error = XFS_ERROR(EBUSY);
-		printk("xfs_unmount: xfs_ibusy says error/%d\n", error);
+		cmn_err(CE_ALERT, "%s: xfs_ibusy failed -- error code %d",
+			__FUNCTION__, error);
 		goto out;
 	}
 
@@ -598,7 +599,7 @@
 	 * we want to make sure we invalidate dirty pages that belong to
 	 * referenced vnodes as well.
 	 */
-	if (XFS_FORCED_SHUTDOWN(mp))  {
+	if (XFS_FORCED_SHUTDOWN(mp)) {
 		error = xfs_sync(&mp->m_bhv,
 			 (SYNC_WAIT | SYNC_CLOSE), credp);
 		ASSERT(error != EFSCORRUPTED);
@@ -612,9 +613,9 @@
 	 */
 	if (unmount_event_wanted) {
 		/* Note: mp structure must still exist for
-		 * dm_send_unmount_event() call.
+		 * XFS_SEND_UNMOUNT() call.
 		 */
-		dm_send_unmount_event(vfsp, error == 0 ? rvp : NULL,
+		XFS_SEND_UNMOUNT(mp, vfsp, error == 0 ? rvp : NULL,
 			DM_RIGHT_NULL, 0, error, unmount_event_flags);
 	}
 	if (xfs_unmountfs_needed) {
@@ -641,7 +642,7 @@
 {
 	xfs_inode_t	*rip = mp->m_rootip;
 	xfs_inode_t	*rbmip;
-	xfs_inode_t	*rsumip=NULL;
+	xfs_inode_t	*rsumip = NULL;
 	vnode_t		*rvp = XFS_ITOV(rip);
 	int		error;
 
@@ -675,23 +676,22 @@
 	}
 
 	/*
-	 * synchronously flush root inode to disk
+	 * Synchronously flush root inode to disk
 	 */
 	error = xfs_iflush(rip, XFS_IFLUSH_SYNC);
-
 	if (error == EFSCORRUPTED)
 		goto fscorrupt_out2;
 
 	if (vn_count(rvp) != 1 && !relocation) {
 		xfs_iunlock(rip, XFS_ILOCK_EXCL);
-		error = XFS_ERROR(EBUSY);
-		return (error);
+		return XFS_ERROR(EBUSY);
 	}
+
 	/*
 	 * Release dquot that rootinode, rbmino and rsumino might be holding,
 	 * flush and purge the quota inodes.
 	 */
-	error = xfs_qm_unmount_quotas(mp);
+	error = XFS_QM_UNMOUNT(mp);
 	if (error == EFSCORRUPTED)
 		goto fscorrupt_out2;
 
@@ -701,7 +701,7 @@
 	}
 
 	xfs_iunlock(rip, XFS_ILOCK_EXCL);
-	return (0);
+	return 0;
 
 fscorrupt_out:
 	xfs_ifunlock(rip);
@@ -709,8 +709,7 @@
 fscorrupt_out2:
 	xfs_iunlock(rip, XFS_ILOCK_EXCL);
 
-	error = XFS_ERROR(EFSCORRUPTED);
-	return (error);
+	return XFS_ERROR(EFSCORRUPTED);
 }
 
 /*
@@ -725,12 +724,11 @@
 	bhv_desc_t	*bdp,
 	vnode_t		**vpp)
 {
-	vnode_t *vp;
+	vnode_t		*vp;
 
 	vp = XFS_ITOV((XFS_BHVTOM(bdp))->m_rootip);
 	VN_HOLD(vp);
 	*vpp = vp;
-
 	return 0;
 }
 
@@ -1411,23 +1409,6 @@
 	ASSERT(ipointer_in == B_FALSE);
 
 	/*
-	 * Get the Quota Manager to flush the dquots in a similar manner.
-	 */
-	if (XFS_IS_QUOTA_ON(mp)) {
-		if ((error = xfs_qm_sync(mp, flags))) {
-			/*
-			 * If we got an IO error, we will be shutting down.
-			 * So, there's nothing more for us to do here.
-			 */
-			ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp));
-			if (XFS_FORCED_SHUTDOWN(mp)) {
-				kmem_free(ipointer, sizeof(xfs_iptr_t));
-				return XFS_ERROR(error);
-			}
-		}
-	}
-
-	/*
 	 * Flushing out dirty data above probably generated more
 	 * log activity, so if this isn't vfs_sync() then flush
 	 * the log again.  If SYNC_WAIT is set then do it synchronously.
@@ -1581,16 +1562,17 @@
 
 
 vfsops_t xfs_vfsops = {
+	BHV_IDENTITY_INIT(VFS_BHV_XFS,VFS_POSITION_XFS),
+	.vfs_parseargs		= xfs_parseargs,
+	.vfs_showargs		= xfs_showargs,
 	.vfs_mount		= xfs_mount,
 	.vfs_unmount		= xfs_unmount,
 	.vfs_root		= xfs_root,
 	.vfs_statvfs		= xfs_statvfs,
 	.vfs_sync		= xfs_sync,
 	.vfs_vget		= xfs_vget,
+	.vfs_dmapiops		= (vfs_dmapiops_t)fs_nosys,
+	.vfs_quotactl		= (vfs_quotactl_t)fs_nosys,
 	.vfs_init_vnode		= xfs_initialize_vnode,
 	.vfs_force_shutdown	= xfs_do_force_shutdown,
-#ifdef CONFIG_XFS_DMAPI
-	.vfs_dmapi_mount	= xfs_dm_mount,
-	.vfs_dmapi_fsys_vector	= xfs_dm_get_fsys_vector,
-#endif
 };
diff -Nru a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
--- a/fs/xfs/xfs_vnodeops.c	Mon Mar 31 13:41:09 2003
+++ b/fs/xfs/xfs_vnodeops.c	Mon Mar 31 13:41:09 2003
@@ -254,28 +254,26 @@
  */
 STATIC int
 xfs_setattr(
-	bhv_desc_t	*bdp,
-	vattr_t		*vap,
-	int		flags,
-	cred_t		*credp)
+	bhv_desc_t		*bdp,
+	vattr_t			*vap,
+	int			flags,
+	cred_t			*credp)
 {
-	xfs_inode_t	*ip;
-	xfs_trans_t	*tp;
-	xfs_mount_t	*mp;
-	int		mask;
-	int		code;
-	uint		lock_flags;
-	uint		commit_flags=0;
-	uid_t		uid=0, iuid=0;
-	gid_t		gid=0, igid=0;
-	int		timeflags = 0;
-	vnode_t		*vp;
-	xfs_prid_t	projid=0, iprojid=0;
-	int		privileged;
-	int		mandlock_before, mandlock_after;
-	uint		qflags;
-	xfs_dquot_t	*udqp, *gdqp, *olddquot1, *olddquot2;
-	int		file_owner;
+	xfs_inode_t		*ip;
+	xfs_trans_t		*tp;
+	xfs_mount_t		*mp;
+	int			mask;
+	int			code;
+	uint			lock_flags;
+	uint			commit_flags=0;
+	uid_t			uid=0, iuid=0;
+	gid_t			gid=0, igid=0;
+	int			timeflags = 0;
+	vnode_t			*vp;
+	xfs_prid_t		projid=0, iprojid=0;
+	int			mandlock_before, mandlock_after;
+	struct xfs_dquot	*udqp, *gdqp, *olddquot1, *olddquot2;
+	int			file_owner;
 
 	vp = BHV_TO_VNODE(bdp);
 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
@@ -319,7 +317,8 @@
 	 * because the i_*dquot fields will get updated anyway.
 	 */
 	if (XFS_IS_QUOTA_ON(mp) && (mask & (XFS_AT_UID|XFS_AT_GID))) {
-		qflags = 0;
+		uint	qflags = 0;
+
 		if (mask & XFS_AT_UID) {
 			uid = vap->va_uid;
 			qflags |= XFS_QMOPT_UQUOTA;
@@ -339,8 +338,8 @@
 		 */
 		ASSERT(udqp == NULL);
 		ASSERT(gdqp == NULL);
-		if ((code = xfs_qm_vop_dqalloc(mp, ip, uid, gid, qflags,
-						    &udqp, &gdqp)))
+		code = XFS_QM_DQVOPALLOC(mp, ip, uid,gid, qflags, &udqp, &gdqp);
+		if (code)
 			return (code);
 	}
 
@@ -365,7 +364,7 @@
 	} else {
 		if (DM_EVENT_ENABLED (vp->v_vfsp, ip, DM_EVENT_TRUNCATE) &&
 		    !(flags & ATTR_DMI)) {
-			code = xfs_dm_send_data_event (DM_EVENT_TRUNCATE, bdp,
+			code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, bdp,
 				vap->va_size, 0, AT_DELAY_FLAG(flags), NULL);
 			if (code) {
 				lock_flags = 0;
@@ -482,15 +481,10 @@
 		if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
 		    (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
 			ASSERT(tp);
-			/*
-			 * XXX:casey - This may result in unnecessary auditing.
-			 */
-			privileged = capable(CAP_FOWNER);
-			if ((code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
-							  privileged ?
-							  XFS_QMOPT_FORCE_RES :
-							  0)))
-				/* out of quota */
+			code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+						capable(CAP_FOWNER) ?
+						XFS_QMOPT_FORCE_RES : 0);
+			if (code)	/* out of quota */
 				goto error_return;
 		}
 	}
@@ -520,10 +514,8 @@
 		/*
 		 * Make sure that the dquots are attached to the inode.
 		 */
-		if (XFS_IS_QUOTA_ON(mp) && XFS_NOT_DQATTACHED(mp, ip)) {
-			if ((code = xfs_qm_dqattach(ip, XFS_QMOPT_ILOCKED)))
-				goto error_return;
-		}
+		if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED)))
+			goto error_return;
 	}
 
 	/*
@@ -730,13 +722,8 @@
 			if (XFS_IS_UQUOTA_ON(mp)) {
 				ASSERT(mask & XFS_AT_UID);
 				ASSERT(udqp);
-				ASSERT(xfs_qm_dqid(udqp) == (xfs_dqid_t)uid);
-				olddquot1 = xfs_qm_vop_chown(tp, ip,
-							     &ip->i_udquot,
-							     udqp);
-				/*
-				 * We'll dqrele olddquot at the end.
-				 */
+				olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+							&ip->i_udquot, udqp);
 			}
 			ip->i_d.di_uid = uid;
 		}
@@ -744,10 +731,8 @@
 			if (XFS_IS_GQUOTA_ON(mp)) {
 				ASSERT(mask & XFS_AT_GID);
 				ASSERT(gdqp);
-				ASSERT(xfs_qm_dqid(gdqp) == gid);
-				olddquot2 = xfs_qm_vop_chown(tp, ip,
-							     &ip->i_gdquot,
-							     gdqp);
+				olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+							&ip->i_gdquot, gdqp);
 			}
 			ip->i_d.di_gid = gid;
 		}
@@ -802,9 +787,6 @@
 			ip->i_d.di_flags = 0;
 			if (vap->va_xflags & XFS_XFLAG_REALTIME) {
 				ip->i_d.di_flags |= XFS_DIFLAG_REALTIME;
-				/* This is replicated in the io core for
-				 * CXFS use
-				 */
 				ip->i_iocore.io_flags |= XFS_IOCORE_RT;
 			}
 			/* can't set PREALLOC this way, just ignore it */
@@ -866,16 +848,12 @@
 	xfs_iunlock(ip, lock_flags);
 
 	/*
-	 * release any dquot(s) inode had kept before chown
+	 * Release any dquot(s) the inode had kept before chown.
 	 */
-	if (olddquot1)
-		xfs_qm_dqrele(olddquot1);
-	if (olddquot2)
-		xfs_qm_dqrele(olddquot2);
-	if (udqp)
-		xfs_qm_dqrele(udqp);
-	if (gdqp)
-		xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, olddquot1);
+	XFS_QM_DQRELE(mp, olddquot2);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 
 	if (code) {
 		return code;
@@ -883,9 +861,9 @@
 
 	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_ATTRIBUTE) &&
 	    !(flags & ATTR_DMI)) {
-		(void) dm_send_namesp_event (DM_EVENT_ATTRIBUTE, bdp, DM_RIGHT_NULL,
-				NULL, DM_RIGHT_NULL, NULL, NULL,
-			0, 0, AT_DELAY_FLAG(flags));
+		(void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, bdp, DM_RIGHT_NULL,
+					NULL, DM_RIGHT_NULL, NULL, NULL,
+					0, 0, AT_DELAY_FLAG(flags));
 	}
 	return 0;
 
@@ -893,10 +871,8 @@
 	commit_flags |= XFS_TRANS_ABORT;
 	/* FALLTHROUGH */
  error_return:
-	if (udqp)
-		xfs_qm_dqrele(udqp);
-	if (gdqp)
-		xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 	if (tp) {
 		xfs_trans_cancel(tp, commit_flags);
 	}
@@ -1286,14 +1262,8 @@
 		/*
 		 * Attach the dquots to the inode up front.
 		 */
-		if (XFS_IS_QUOTA_ON(mp) &&
-		    ip->i_ino != mp->m_sb.sb_uquotino &&
-		    ip->i_ino != mp->m_sb.sb_gquotino) {
-			if (XFS_NOT_DQATTACHED(mp, ip)) {
-				if ((error = xfs_qm_dqattach(ip, 0)))
-					return (error);
-			}
-		}
+		if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+			return (error);
 
 		/*
 		 * There are blocks after the end of file.
@@ -1683,7 +1653,7 @@
 
 	if (ip->i_d.di_nlink == 0 &&
 	    DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_DESTROY)) {
-		(void) dm_send_destroy_event(bdp, DM_RIGHT_NULL);
+		(void) XFS_SEND_DESTROY(mp, bdp, DM_RIGHT_NULL);
 	}
 
 	error = 0;
@@ -1709,14 +1679,9 @@
 
 	ASSERT(ip->i_d.di_nlink == 0);
 
-	if (XFS_IS_QUOTA_ON(mp) &&
-	    ip->i_ino != mp->m_sb.sb_uquotino &&
-	    ip->i_ino != mp->m_sb.sb_gquotino) {
-		if (XFS_NOT_DQATTACHED(mp, ip)) {
-			if ((error = xfs_qm_dqattach(ip, 0)))
-				return (VN_INACTIVE_CACHE);
-		}
-	}
+	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+		return (VN_INACTIVE_CACHE);
+
 	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
 	if (truncate) {
 		/*
@@ -1826,20 +1791,18 @@
 		 * might do that, we need to make sure.	 Otherwise the
 		 * inode might be lost for a long time or forever.
 		 */
-		if (!XFS_FORCED_SHUTDOWN(tp->t_mountp)) {
+		if (!XFS_FORCED_SHUTDOWN(mp)) {
 			cmn_err(CE_NOTE,
-				"xfs_inactive:	xfs_ifree() returned an error = %d on %s",
-				error,tp->t_mountp->m_fsname);
-			xfs_force_shutdown(tp->t_mountp, XFS_METADATA_IO_ERROR);
+		"xfs_inactive:	xfs_ifree() returned an error = %d on %s",
+				error, mp->m_fsname);
+			xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR);
 		}
 		xfs_trans_cancel(tp, commit_flags | XFS_TRANS_ABORT);
 	} else {
 		/*
 		 * Credit the quota account(s). The inode is gone.
 		 */
-		if (XFS_IS_QUOTA_ON(tp->t_mountp))
-			xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT,
-							 -1);
+		XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
 
 		/*
 		 * Just ignore errors at this point.  There is
@@ -1850,8 +1813,7 @@
 	/*
 	 * Release the dquots held by inode, if any.
 	 */
-	if (ip->i_udquot || ip->i_gdquot)
-		xfs_qm_dqdettach_inode(ip);
+	XFS_QM_DQDETACH(mp, ip);
 
 	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 
@@ -1925,7 +1887,7 @@
 	uint			cancel_flags;
 	int			committed;
 	xfs_prid_t		prid;
-	xfs_dquot_t		*udqp, *gdqp;
+	struct xfs_dquot	*udqp, *gdqp;
 	uint			resblks;
 	int			dm_di_mode;
 	int			namelen;
@@ -1935,22 +1897,22 @@
 	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
 
 	dp = XFS_BHVTOI(dir_bdp);
+	mp = dp->i_mount;
 
 	dm_di_mode = vap->va_mode|VTTOIF(vap->va_type);
 	namelen = VNAMELEN(dentry);
 
 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
-		error = dm_send_namesp_event(DM_EVENT_CREATE,
+		error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
 				dir_bdp, DM_RIGHT_NULL, NULL,
 				DM_RIGHT_NULL, name, NULL,
 				dm_di_mode, 0, 0);
+
 		if (error)
 			return error;
 		dm_event_sent = 1;
 	}
 
-	mp = dp->i_mount;
-
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
@@ -1965,14 +1927,10 @@
 	/*
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
-	if (XFS_IS_QUOTA_ON(mp)) {
-		error = xfs_qm_vop_dqalloc(mp, dp,
-					   current->fsuid, current->fsgid,
-					   XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT,
-					   &udqp, &gdqp);
-		if (error)
-			goto std_return;
-	}
+	error = XFS_QM_DQVOPALLOC(mp, dp, current->fsuid, current->fsgid,
+			XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp);
+	if (error)
+		goto std_return;
 
 	ip = NULL;
 	dp_joined_to_trans = B_FALSE;
@@ -2008,13 +1966,10 @@
 	/*
 	 * Reserve disk quota and the inode.
 	 */
-	if (XFS_IS_QUOTA_ON(mp)) {
-		if (xfs_trans_reserve_quota(tp, udqp, gdqp, resblks,
-					    1, 0)) {
-			error = EDQUOT;
-			goto error_return;
-		}
-	}
+	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+	if (error)
+		goto error_return;
+
 	if (resblks == 0 &&
 	    (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen)))
 		goto error_return;
@@ -2074,9 +2029,7 @@
 	 * These ids of the inode couldn't have changed since the new
 	 * inode has been locked ever since it was created.
 	 */
-	if (XFS_IS_QUOTA_ON(mp))
-		xfs_qm_vop_dqattach_and_dqmod_newinode(tp, ip, udqp,
-						       gdqp);
+	XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
 
 	/*
 	 * xfs_trans_commit normally decrements the vnode ref count
@@ -2099,10 +2052,8 @@
 		goto error_return;
 	}
 
-	if (udqp)
-		xfs_qm_dqrele(udqp);
-	if (gdqp)
-		xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 
 	/*
 	 * Propogate the fact that the vnode changed after the
@@ -2118,7 +2069,7 @@
 	if ( (*vpp || (error != 0 && dm_event_sent != 0)) &&
 			DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp),
 							DM_EVENT_POSTCREATE)) {
-		(void) dm_send_namesp_event(DM_EVENT_POSTCREATE,
+		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
 			dir_bdp, DM_RIGHT_NULL,
 			*vpp ? vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops):NULL,
 			DM_RIGHT_NULL, name, NULL,
@@ -2136,10 +2087,8 @@
 
 	if (!dp_joined_to_trans && (dp != NULL))
 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
-	if (udqp)
-		xfs_qm_dqrele(udqp);
-	if (gdqp)
-		xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 
 	goto std_return;
 
@@ -2153,10 +2102,8 @@
 	xfs_trans_cancel(tp, cancel_flags);
 	IRELE(ip);
 
-	if (udqp)
-		xfs_qm_dqrele(udqp);
-	if (gdqp)
-		xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 
 	goto std_return;
 }
@@ -2437,8 +2384,8 @@
 	namelen = VNAMELEN(dentry);
 
 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) {
-		error = dm_send_namesp_event(DM_EVENT_REMOVE, dir_bdp, DM_RIGHT_NULL,
-					NULL, DM_RIGHT_NULL,
+		error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_bdp,
+					DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
 					name, NULL, 0, 0, 0);
 		if (error)
 			return error;
@@ -2471,17 +2418,13 @@
 
 	ITRACE(ip);
 
-	if (XFS_IS_QUOTA_ON(mp)) {
-		ASSERT(! error);
-		if (XFS_NOT_DQATTACHED(mp, dp))
-			error = xfs_qm_dqattach(dp, 0);
-		if (!error && dp != ip && XFS_NOT_DQATTACHED(mp, ip))
-			error = xfs_qm_dqattach(ip, 0);
-		if (error) {
-			REMOVE_DEBUG_TRACE(__LINE__);
-			IRELE(ip);
-			goto std_return;
-		}
+	error = XFS_QM_DQATTACH(mp, dp, 0);
+	if (!error && dp != ip)
+		error = XFS_QM_DQATTACH(mp, ip, 0);
+	if (error) {
+		REMOVE_DEBUG_TRACE(__LINE__);
+		IRELE(ip);
+		goto std_return;
 	}
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
@@ -2606,7 +2549,7 @@
  std_return:
 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp,
 						DM_EVENT_POSTREMOVE)) {
-		(void) dm_send_namesp_event(DM_EVENT_POSTREMOVE,
+		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
 				dir_bdp, DM_RIGHT_NULL,
 				NULL, DM_RIGHT_NULL,
 				name, NULL, dm_di_mode, error, 0);
@@ -2690,7 +2633,7 @@
 		return XFS_ERROR(EIO);
 
 	if (DM_EVENT_ENABLED(src_vp->v_vfsp, tdp, DM_EVENT_LINK)) {
-		error = dm_send_namesp_event(DM_EVENT_LINK,
+		error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK,
 					target_dir_bdp, DM_RIGHT_NULL,
 					src_bdp, DM_RIGHT_NULL,
 					target_name, NULL, 0, 0, 0);
@@ -2700,15 +2643,11 @@
 
 	/* Return through std_return after this point. */
 
-	if (XFS_IS_QUOTA_ON(mp)) {
-		error = 0;
-		if (XFS_NOT_DQATTACHED(mp, sip))
-			error = xfs_qm_dqattach(sip, 0);
-		if (!error && sip != tdp && XFS_NOT_DQATTACHED(mp, tdp))
-			error = xfs_qm_dqattach(tdp, 0);
-		if (error)
-			goto std_return;
-	}
+	error = XFS_QM_DQATTACH(mp, sip, 0);
+	if (!error && sip != tdp)
+		error = XFS_QM_DQATTACH(mp, tdp, 0);
+	if (error)
+		goto std_return;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
@@ -2798,7 +2737,7 @@
 std_return:
 	if (DM_EVENT_ENABLED(src_vp->v_vfsp, sip,
 						DM_EVENT_POSTLINK)) {
-		(void) dm_send_namesp_event(DM_EVENT_POSTLINK,
+		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK,
 				target_dir_bdp, DM_RIGHT_NULL,
 				src_bdp, DM_RIGHT_NULL,
 				target_name, NULL, 0, error, 0);
@@ -2813,8 +2752,6 @@
 
 	goto std_return;
 }
-
-
 /*
  * xfs_mkdir
  *
@@ -2844,7 +2781,7 @@
 	boolean_t		created = B_FALSE;
 	int			dm_event_sent = 0;
 	xfs_prid_t		prid;
-	xfs_dquot_t		*udqp, *gdqp;
+	struct xfs_dquot	*udqp, *gdqp;
 	uint			resblks;
 	int			dm_di_mode;
 	int			dir_namelen;
@@ -2863,7 +2800,7 @@
 	dm_di_mode = vap->va_mode|VTTOIF(vap->va_type);
 
 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
-		error = dm_send_namesp_event(DM_EVENT_CREATE,
+		error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
 					dir_bdp, DM_RIGHT_NULL, NULL,
 					DM_RIGHT_NULL, dir_name, NULL,
 					dm_di_mode, 0, 0);
@@ -2886,14 +2823,10 @@
 	/*
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
-	if (XFS_IS_QUOTA_ON(mp)) {
-		error = xfs_qm_vop_dqalloc(mp, dp,
-					   current->fsuid, current->fsgid,
-					   XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT,
-					   &udqp, &gdqp);
-		if (error)
-			goto std_return;
-	}
+	error = XFS_QM_DQVOPALLOC(mp, dp, current->fsuid, current->fsgid,
+			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
+	if (error)
+		goto std_return;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
@@ -2925,12 +2858,9 @@
 	/*
 	 * Reserve disk quota and the inode.
 	 */
-	if (XFS_IS_QUOTA_ON(mp)) {
-		if (xfs_trans_reserve_quota(tp, udqp, gdqp, resblks, 1, 0)) {
-			error = XFS_ERROR(EDQUOT);
-			goto error_return;
-		}
-	}
+	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+	if (error)
+		goto error_return;
 
 	if (resblks == 0 &&
 	    (error = XFS_DIR_CANENTER(mp, tp, dp, dir_name, dir_namelen)))
@@ -2999,9 +2929,7 @@
 	/*
 	 * Attach the dquots to the new inode and modify the icount incore.
 	 */
-	if (XFS_IS_QUOTA_ON(mp)) {
-		xfs_qm_vop_dqattach_and_dqmod_newinode(tp, cdp, udqp, gdqp);
-	}
+	XFS_QM_DQVOPCREATE(mp, tp, cdp, udqp, gdqp);
 
 	/*
 	 * If this is a synchronous mount, make sure that the
@@ -3019,11 +2947,8 @@
 	}
 
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
-	if (udqp)
-		 xfs_qm_dqrele(udqp);
-	if (gdqp)
-		 xfs_qm_dqrele(gdqp);
-
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 	if (error) {
 		IRELE(cdp);
 	}
@@ -3035,7 +2960,7 @@
 	if ( (created || (error != 0 && dm_event_sent != 0)) &&
 			DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp),
 						DM_EVENT_POSTCREATE)) {
-		(void) dm_send_namesp_event(DM_EVENT_POSTCREATE,
+		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
 					dir_bdp, DM_RIGHT_NULL,
 					created ? XFS_ITOBHV(cdp):NULL,
 					DM_RIGHT_NULL,
@@ -3051,11 +2976,8 @@
 	cancel_flags |= XFS_TRANS_ABORT;
  error_return:
 	xfs_trans_cancel(tp, cancel_flags);
-
-	if (udqp)
-		xfs_qm_dqrele(udqp);
-	if (gdqp)
-		xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 
 	if (!dp_joined_to_trans && (dp != NULL)) {
 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -3093,6 +3015,7 @@
 
 	dir_vp = BHV_TO_VNODE(dir_bdp);
 	dp = XFS_BHVTOI(dir_bdp);
+	mp = dp->i_mount;
 
 	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
 
@@ -3101,7 +3024,7 @@
 	namelen = VNAMELEN(dentry);
 
 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) {
-		error = dm_send_namesp_event(DM_EVENT_REMOVE,
+		error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
 					dir_bdp, DM_RIGHT_NULL,
 					NULL, DM_RIGHT_NULL,
 					name, NULL, 0, 0, 0);
@@ -3136,17 +3059,13 @@
 	/*
 	 * Get the dquots for the inodes.
 	 */
-	if (XFS_IS_QUOTA_ON(mp)) {
-		ASSERT(! error);
-		if (XFS_NOT_DQATTACHED(mp, dp))
-			error = xfs_qm_dqattach(dp, 0);
-		if (!error && dp != cdp && XFS_NOT_DQATTACHED(mp, cdp))
-			error = xfs_qm_dqattach(cdp, 0);
-		if (error) {
-			IRELE(cdp);
-			REMOVE_DEBUG_TRACE(__LINE__);
-			goto std_return;
-		}
+	error = XFS_QM_DQATTACH(mp, dp, 0);
+	if (!error && dp != cdp)
+		error = XFS_QM_DQATTACH(mp, cdp, 0);
+	if (error) {
+		IRELE(cdp);
+		REMOVE_DEBUG_TRACE(__LINE__);
+		goto std_return;
 	}
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
@@ -3298,9 +3217,8 @@
 	/* Fall through to std_return with error = 0 or the errno
 	 * from xfs_trans_commit. */
 std_return:
-	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp,
-						DM_EVENT_POSTREMOVE)) {
-		(void) dm_send_namesp_event(DM_EVENT_POSTREMOVE,
+	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_POSTREMOVE)) {
+		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
 					dir_bdp, DM_RIGHT_NULL,
 					NULL, DM_RIGHT_NULL,
 					name, NULL, dm_di_mode,
@@ -3391,7 +3309,7 @@
 	int			n;
 	xfs_buf_t		*bp;
 	xfs_prid_t		prid;
-	xfs_dquot_t		*udqp, *gdqp;
+	struct xfs_dquot	*udqp, *gdqp;
 	uint			resblks;
 	char			*link_name = VNAME(dentry);
 	int			link_namelen;
@@ -3446,10 +3364,9 @@
 	}
 
 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_SYMLINK)) {
-		error = dm_send_namesp_event(DM_EVENT_SYMLINK, dir_bdp, DM_RIGHT_NULL,
-						NULL, DM_RIGHT_NULL,
-						link_name, target_path,
-						0, 0, 0);
+		error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_bdp,
+					DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
+					link_name, target_path, 0, 0, 0);
 		if (error)
 			return error;
 	}
@@ -3465,14 +3382,10 @@
 	/*
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
-	if (XFS_IS_QUOTA_ON(mp)) {
-		error = xfs_qm_vop_dqalloc(mp, dp,
-					   current->fsuid, current->fsgid,
-					   XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT,
-					   &udqp, &gdqp);
-		if (error)
-			goto std_return;
-	}
+	error = XFS_QM_DQVOPALLOC(mp, dp, current->fsuid, current->fsgid,
+			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
+	if (error)
+		goto std_return;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
@@ -3503,12 +3416,9 @@
 	/*
 	 * Reserve disk quota : blocks and inode.
 	 */
-	if (XFS_IS_QUOTA_ON(mp)) {
-		if (xfs_trans_reserve_quota(tp, udqp, gdqp, resblks, 1, 0)) {
-			error = XFS_ERROR(EDQUOT);
-			goto error_return;
-		}
-	}
+	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+	if (error)
+		goto error_return;
 
 	/*
 	 * Check for ability to enter directory entry, if no space reserved.
@@ -3543,9 +3453,7 @@
 	/*
 	 * Also attach the dquot(s) to it, if applicable.
 	 */
-	if (XFS_IS_QUOTA_ON(mp)) {
-		xfs_qm_vop_dqattach_and_dqmod_newinode(tp, ip, udqp, gdqp);
-	}
+	XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
 
 	if (resblks)
 		resblks -= XFS_IALLOC_SPACE_RES(mp);
@@ -3641,22 +3549,19 @@
 		goto error2;
 	}
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
-	if (udqp)
-		xfs_qm_dqrele(udqp);
-	if (gdqp)
-		xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 
 	/* Fall through to std_return with error = 0 or errno from
 	 * xfs_trans_commit	*/
 std_return:
 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp),
 			     DM_EVENT_POSTSYMLINK)) {
-		(void) dm_send_namesp_event(DM_EVENT_POSTSYMLINK,
-						dir_bdp, DM_RIGHT_NULL,
-						error? NULL:XFS_ITOBHV(ip),
-						DM_RIGHT_NULL,
-						link_name, target_path,
-						0, error, 0);
+		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK,
+					dir_bdp, DM_RIGHT_NULL,
+					error ? NULL : XFS_ITOBHV(ip),
+					DM_RIGHT_NULL, link_name, target_path,
+					0, error, 0);
 	}
 
 	if (!error) {
@@ -3675,10 +3580,8 @@
 	cancel_flags |= XFS_TRANS_ABORT;
  error_return:
 	xfs_trans_cancel(tp, cancel_flags);
-	if (udqp)
-		xfs_qm_dqrele(udqp);
-	if (gdqp)
-		xfs_qm_dqrele(gdqp);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
 
 	if (!dp_joined_to_trans && (dp != NULL)) {
 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -4165,7 +4068,7 @@
 	/*
 	 * determine if this is a realtime file
 	 */
-	if ((rt = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) != 0) {
+	if ((rt = XFS_IS_REALTIME_INODE(ip)) != 0) {
 		if (ip->i_d.di_extsize)
 			rtextsize = ip->i_d.di_extsize;
 		else
@@ -4173,12 +4076,8 @@
 	} else
 		rtextsize = 0;
 
-	if (XFS_IS_QUOTA_ON(mp)) {
-		if (XFS_NOT_DQATTACHED(mp, ip)) {
-			if ((error = xfs_qm_dqattach(ip, 0)))
-				return error;
-		}
-	}
+	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+		return error;
 
 	if (len <= 0)
 		return XFS_ERROR(EINVAL);
@@ -4200,7 +4099,7 @@
 		end_dmi_offset = offset+len;
 		if (end_dmi_offset > ip->i_d.di_size)
 			end_dmi_offset = ip->i_d.di_size;
-		error = xfs_dm_send_data_event(DM_EVENT_WRITE, XFS_ITOBHV(ip),
+		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOBHV(ip),
 			offset, end_dmi_offset - offset,
 			0, NULL);
 		if (error)
@@ -4255,15 +4154,11 @@
 			break;
 		}
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		if (XFS_IS_QUOTA_ON(mp)) {
-			if (xfs_trans_reserve_quota(tp,
-						    ip->i_udquot,
-						    ip->i_gdquot,
-						    resblks, 0, 0)) {
-				error = XFS_ERROR(EDQUOT);
-				goto error1;
-			}
-		}
+		error = XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp,
+				ip->i_udquot, ip->i_gdquot, resblks, 0, rt ?
+				XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+		if (error)
+			goto error1;
 
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 		xfs_trans_ihold(tp, ip);
@@ -4308,13 +4203,13 @@
 	if (error == ENOSPC && (attr_flags&ATTR_DMI) == 0 &&
 	    DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_NOSPACE)) {
 
-		error = dm_send_namesp_event(DM_EVENT_NOSPACE,
+		error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
 				XFS_ITOBHV(ip), DM_RIGHT_NULL,
 				XFS_ITOBHV(ip), DM_RIGHT_NULL,
 				NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */
 		if (error == 0)
 			goto retry;	/* Maybe DMAPI app. has made space */
-		/* else fall through with error from xfs_dm_send_data_event */
+		/* else fall through with error from XFS_SEND_DATA */
 	}
 
 	return error;
@@ -4434,38 +4329,32 @@
 	int			nimap;
 	uint			resblks;
 	int			rounding;
-	int			specrt;
+	int			rt;
 	xfs_fileoff_t		startoffset_fsb;
 	xfs_trans_t		*tp;
 
 	vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
 	mp = ip->i_mount;
 
-	if (XFS_IS_QUOTA_ON(mp)) {
-		if (XFS_NOT_DQATTACHED(mp, ip)) {
-			if ((error = xfs_qm_dqattach(ip, 0)))
-				return error;
-		}
-	}
+	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+		return error;
 
 	error = 0;
 	if (len <= 0)	/* if nothing being freed */
 		return error;
-	specrt =
-		(ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
-		!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb);
+	rt = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
 	startoffset_fsb = XFS_B_TO_FSB(mp, offset);
 	end_dmi_offset = offset + len;
 	endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
 
 	if (offset < ip->i_d.di_size &&
-	    (attr_flags&ATTR_DMI) == 0	&&
+	    (attr_flags & ATTR_DMI) == 0 &&
 	    DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) {
 		if (end_dmi_offset > ip->i_d.di_size)
 			end_dmi_offset = ip->i_d.di_size;
-		error = xfs_dm_send_data_event(DM_EVENT_WRITE, XFS_ITOBHV(ip),
-			offset, end_dmi_offset - offset,
-			AT_DELAY_FLAG(attr_flags), NULL);
+		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOBHV(ip),
+				offset, end_dmi_offset - offset,
+				AT_DELAY_FLAG(attr_flags), NULL);
 		if (error)
 			return(error);
 	}
@@ -4480,11 +4369,11 @@
 	xfs_inval_cached_pages(XFS_ITOV(ip), &(ip->i_iocore), ioffset, 0, 0);
 	/*
 	 * Need to zero the stuff we're not freeing, on disk.
-	 * If its specrt (realtime & can't use unwritten extents) then
-	 * we actually need to zero the extent edges.  Otherwise xfs_bunmapi
+	 * If its a realtime file & can't use unwritten extents then we
+	 * actually need to zero the extent edges.  Otherwise xfs_bunmapi
 	 * will take care of it for us.
 	 */
-	if (specrt) {
+	if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
 		nimap = 1;
 		error = xfs_bmapi(NULL, ip, startoffset_fsb, 1, 0, NULL, 0,
 			&imap, &nimap, NULL);
@@ -4561,15 +4450,11 @@
 			break;
 		}
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		if (XFS_IS_QUOTA_ON(mp)) {
-			if (xfs_trans_reserve_quota(tp,
-						    ip->i_udquot,
-						    ip->i_gdquot,
-						    resblks, 0, 0)) {
-				error = XFS_ERROR(EDQUOT);
-				goto error1;
-			}
-		}
+		error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
+				ip->i_udquot, ip->i_gdquot, resblks, 0, rt ?
+				XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+		if (error)
+			goto error1;
 
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 		xfs_trans_ihold(tp, ip);
diff -Nru a/fs/xfs/xfsidbg.c b/fs/xfs/xfsidbg.c
--- a/fs/xfs/xfsidbg.c	Mon Mar 31 13:41:08 2003
+++ b/fs/xfs/xfsidbg.c	Mon Mar 31 13:41:08 2003
@@ -31,8 +31,8 @@
  */
 
 #include <xfs.h>
-#include <xfs_quota_priv.h>
 #include <xfs_log_recover.h>
+#include "quota/xfs_qm.h"
 #include "pagebuf/page_buf_internal.h"
 
 #include <linux/ctype.h>
@@ -1320,13 +1320,13 @@
 	"INVALID0x40000",	/*    0x40000 */
 	"INVALID0x80000",	/*    0x80000 */
 	"VROOT",		/*   0x100000 */
-	"VNOSWAP",		/*   0x200000 */
-	"VISSWAP",		/*   0x400000 */
-	"VREPLICABLE",		/*   0x800000 */
-	"VNOTREPLICABLE",	/*  0x1000000 */
-	"VDOCMP",		/*  0x2000000 */
+	"INVALID0x200000",	/*   0x200000 */
+	"INVALID00x400000",	/*   0x400000 */
+	"INVALID0x800000",	/*   0x800000 */
+	"INVALID0x1000000",	/*  0x1000000 */
+	"INVALID0x2000000",	/*  0x2000000 */
 	"VSHARE",		/*  0x4000000 */
-	"VFRLOCKS",		/*  0x8000000 */
+	"INVALID0x8000000",     /*  0x8000000 */
 	"VENF_LOCKING",		/* 0x10000000 */
 	"VOPLOCK",		/* 0x20000000 */
 	"VPURGE",		/* 0x40000000 */
@@ -1373,9 +1373,10 @@
 
 	kdb_printf("vnode: 0x%p type ", vp);
 	if ((size_t)vp->v_type >= sizeof(vnode_type)/sizeof(vnode_type[0]))
-		kdb_printf("out of range 0x%x\n", vp->v_type);
+		kdb_printf("out of range 0x%x", vp->v_type);
 	else
-		kdb_printf("%s\n", vnode_type[vp->v_type]);
+		kdb_printf("%s", vnode_type[vp->v_type]);
+	kdb_printf(" v_bh %p\n", &vp->v_bh);
 
 	if ((bh = vp->v_bh.bh_first)) {
 		kdb_printf("   v_inode 0x%p v_bh->bh_first 0x%p pobj 0x%p\n",
@@ -1397,6 +1398,9 @@
 #ifdef	CONFIG_XFS_VNODE_TRACING
 	kdb_printf("   v_trace 0x%p\n", vp->v_trace);
 #endif	/* CONFIG_XFS_VNODE_TRACING */
+        
+	kdb_printf("   v_vfsp 0x%p v_number %Lx\n",
+	        vp->v_vfsp, vp->v_number);
 }
 
 
@@ -4808,13 +4812,8 @@
 static void
 xfsidbg_xcore(xfs_iocore_t *io)
 {
-	if (IO_IS_XFS(io)) {
-		kdb_printf("io_obj 0x%p (xinode) io_mount 0x%p\n",
-			io->io_obj, io->io_mount);
-	} else {
-		kdb_printf("io_obj 0x%p (dcxvn) io_mount 0x%p\n",
-			io->io_obj, io->io_mount);
-	}
+        kdb_printf("io_obj 0x%p io_flags 0x%x io_mount 0x%p\n",
+			io->io_obj, io->io_flags, io->io_mount);
 	kdb_printf("new_size %Lx\n", io->io_new_size);
 }
 
diff -Nru a/include/asm-alpha/fcntl.h b/include/asm-alpha/fcntl.h
--- a/include/asm-alpha/fcntl.h	Mon Mar 31 13:41:07 2003
+++ b/include/asm-alpha/fcntl.h	Mon Mar 31 13:41:07 2003
@@ -69,9 +69,6 @@
 	__kernel_pid_t l_pid;
 };
 
-#ifdef __KERNEL__
-#define flock64	flock
-#endif
 #define F_LINUX_SPECIFIC_BASE  1024
 
 #endif
diff -Nru a/include/asm-alpha/pgtable.h b/include/asm-alpha/pgtable.h
--- a/include/asm-alpha/pgtable.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-alpha/pgtable.h	Mon Mar 31 13:41:06 2003
@@ -68,6 +68,7 @@
 /* .. and these are ours ... */
 #define _PAGE_DIRTY	0x20000
 #define _PAGE_ACCESSED	0x40000
+#define _PAGE_FILE	0x80000	/* pagecache or swap? */
 
 /*
  * NOTE! The "accessed" bit isn't necessarily exact:  it can be kept exactly
@@ -254,6 +255,7 @@
 extern inline int pte_exec(pte_t pte)		{ return !(pte_val(pte) & _PAGE_FOE); }
 extern inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
 extern inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
+extern inline int pte_file(pte_t pte)		{ return pte_val(pte) & _PAGE_FILE; }
 
 extern inline pte_t pte_wrprotect(pte_t pte)	{ pte_val(pte) |= _PAGE_FOW; return pte; }
 extern inline pte_t pte_rdprotect(pte_t pte)	{ pte_val(pte) |= _PAGE_FOR; return pte; }
@@ -311,11 +313,16 @@
 extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 { pte_t pte; pte_val(pte) = (type << 32) | (offset << 40); return pte; }
 
-#define __swp_type(x)			(((x).val >> 32) & 0xff)
-#define __swp_offset(x)			((x).val >> 40)
-#define __swp_entry(type, offset)	((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) })
-#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+#define __swp_type(x)		(((x).val >> 32) & 0xff)
+#define __swp_offset(x)		((x).val >> 40)
+#define __swp_entry(type, off)	((swp_entry_t) { pte_val(mk_swap_pte((type), (off))) })
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
+
+#define pte_to_pgoff(pte)	(pte_val(pte) >> 32)
+#define pgoff_to_pte(off)	((pte_t) { ((off) << 32) | _PAGE_FILE })
+
+#define PTE_FILE_MAX_BITS	32
 
 #ifndef CONFIG_DISCONTIGMEM
 #define kern_addr_valid(addr)	(1)
diff -Nru a/include/asm-arm/div64.h b/include/asm-arm/div64.h
--- a/include/asm-arm/div64.h	Mon Mar 31 13:41:07 2003
+++ b/include/asm-arm/div64.h	Mon Mar 31 13:41:07 2003
@@ -4,9 +4,13 @@
 /* We're not 64-bit, but... */
 #define do_div(n,base)						\
 ({								\
-	int __res;						\
-	__res = ((unsigned long)n) % (unsigned int)base;	\
-	n = ((unsigned long)n) / (unsigned int)base;		\
+	register int __res asm("r2") = base;			\
+	register unsigned long long __n asm("r0") = n;		\
+	asm("bl do_div64"					\
+		: "=r" (__n), "=r" (__res)			\
+		: "0" (__n), "1" (__res)			\
+		: "r3", "ip", "lr", "cc");			\
+	n = __n;						\
 	__res;							\
 })
 
diff -Nru a/include/asm-arm/proc-armv/pgtable.h b/include/asm-arm/proc-armv/pgtable.h
--- a/include/asm-arm/proc-armv/pgtable.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-arm/proc-armv/pgtable.h	Mon Mar 31 13:41:06 2003
@@ -103,6 +103,7 @@
  * entries are stored 1024 bytes below.
  */
 #define L_PTE_PRESENT		(1 << 0)
+#define L_PTE_FILE		(1 << 1)	/* only when !PRESENT */
 #define L_PTE_YOUNG		(1 << 1)
 #define L_PTE_BUFFERABLE	(1 << 2)	/* matches PTE */
 #define L_PTE_CACHEABLE		(1 << 3)	/* matches PTE */
@@ -173,6 +174,7 @@
 #define pte_exec(pte)			(pte_val(pte) & L_PTE_EXEC)
 #define pte_dirty(pte)			(pte_val(pte) & L_PTE_DIRTY)
 #define pte_young(pte)			(pte_val(pte) & L_PTE_YOUNG)
+#define pte_file(pte)			(pte_val(pte) & L_PTE_FILE)
 
 #define PTE_BIT_FUNC(fn,op)			\
 static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
@@ -195,6 +197,11 @@
 #define pgprot_writecombine(prot) __pgprot(pgprot_val(prot) & ~L_PTE_CACHEABLE)
 
 #define pgtable_cache_init() do { } while (0)
+
+#define pte_to_pgoff(x)	(pte_val(x) >> 2)
+#define pgoff_to_pte(x)	__pte(((x) << 2) | L_PTE_FILE)
+
+#define PTE_FILE_MAX_BITS	30
 
 #endif /* __ASSEMBLY__ */
 
diff -Nru a/include/asm-arm/system.h b/include/asm-arm/system.h
--- a/include/asm-arm/system.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-arm/system.h	Mon Mar 31 13:41:06 2003
@@ -65,12 +65,13 @@
  * The `mb' is to tell GCC not to cache `current' across this call.
  */
 struct thread_info;
-extern struct thread_info *__switch_to(struct thread_info *, struct thread_info *);
+struct task_struct;
+extern struct task_struct *__switch_to(struct task_struct *, struct thread_info *, struct thread_info *);
 
-#define switch_to(prev,next,last)					\
-	do {			 					\
-		__switch_to(prev->thread_info,next->thread_info);	\
-		mb();							\
+#define switch_to(prev,next,last)						\
+	do {									\
+		last = __switch_to(prev,prev->thread_info,next->thread_info);	\
+		mb();								\
 	} while (0)
 
 #ifdef CONFIG_SMP
diff -Nru a/include/asm-i386/ide.h b/include/asm-i386/ide.h
--- a/include/asm-i386/ide.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-i386/ide.h	Mon Mar 31 13:41:06 2003
@@ -26,6 +26,9 @@
 static __inline__ int ide_default_irq(unsigned long base)
 {
 	switch (base) {
+#ifdef CONFIG_X86_PC9800
+		case 0x640: return 9;
+#endif
 		case 0x1f0: return 14;
 		case 0x170: return 15;
 		case 0x1e8: return 11;
@@ -40,12 +43,17 @@
 static __inline__ unsigned long ide_default_io_base(int index)
 {
 	switch (index) {
+#ifdef CONFIG_X86_PC9800
+		case 0:
+		case 1:	return 0x640;
+#else
 		case 0:	return 0x1f0;
 		case 1:	return 0x170;
 		case 2: return 0x1e8;
 		case 3: return 0x168;
 		case 4: return 0x1e0;
 		case 5: return 0x160;
+#endif
 		default:
 			return 0;
 	}
@@ -56,13 +64,24 @@
 {
 	unsigned long reg = data_port;
 	int i;
+#ifdef CONFIG_X86_PC9800
+	unsigned long increment = data_port == 0x640 ? 2 : 1;
+#endif
 
 	for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
 		hw->io_ports[i] = reg;
+#ifdef CONFIG_X86_PC9800
+		reg += increment;
+#else
 		reg += 1;
+#endif
 	}
 	if (ctrl_port) {
 		hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
+#ifdef CONFIG_X86_PC9800
+	} else if (data_port == 0x640) {
+		hw->io_ports[IDE_CONTROL_OFFSET] = 0x74c;
+#endif
 	} else {
 		hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206;
 	}
diff -Nru a/include/asm-i386/timer.h b/include/asm-i386/timer.h
--- a/include/asm-i386/timer.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-i386/timer.h	Mon Mar 31 13:41:06 2003
@@ -4,14 +4,14 @@
 /**
  * struct timer_ops - used to define a timer source
  *
- * @init: Probes and initializes the timer.  Returns 0 on success, anything
- *	else on failure.
+ * @init: Probes and initializes the timer. Takes clock= override 
+ *  string as an argument. Returns 0 on success, anything else on failure.
  * @mark_offset: called by the timer interrupt
  * @get_offset: called by gettimeofday().  Returns the number of ms since the
  *	last timer intruupt.
  */
 struct timer_opts{
-	int (*init)(void);
+	int (*init)(char *override);
 	void (*mark_offset)(void);
 	unsigned long (*get_offset)(void);
 	void (*delay)(unsigned long);
diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
--- a/include/asm-i386/uaccess.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-i386/uaccess.h	Mon Mar 31 13:41:06 2003
@@ -510,9 +510,9 @@
  *
  * Context: User context only.  This function may sleep.
  *
- * Get the size of a NULL-terminated string in user space.
+ * Get the size of a NUL-terminated string in user space.
  *
- * Returns the size of the string INCLUDING the terminating NULL.
+ * Returns the size of the string INCLUDING the terminating NUL.
  * On exception, returns 0.
  *
  * If there is a limit on the length of a valid string, you may wish to
diff -Nru a/include/asm-ppc/keylargo.h b/include/asm-ppc/keylargo.h
--- a/include/asm-ppc/keylargo.h	Mon Mar 31 13:41:07 2003
+++ b/include/asm-ppc/keylargo.h	Mon Mar 31 13:41:07 2003
@@ -10,12 +10,13 @@
 #define KL_PANGEA_REV		0x100
 
 /* offset from base for feature control registers */
-#define KEYLARGO_MBCR		0x34	/* Media bay control/status */
+#define KEYLARGO_MBCR		0x34	/* KL Only, Media bay control/status */
 #define KEYLARGO_FCR0		0x38
 #define KEYLARGO_FCR1		0x3c
 #define KEYLARGO_FCR2		0x40
 #define KEYLARGO_FCR3		0x44
 #define KEYLARGO_FCR4		0x48
+#define KEYLARGO_FCR5		0x4c	/* Pangea only */
 
 /* GPIO registers */
 #define KEYLARGO_GPIO_LEVELS0		0x50
@@ -85,76 +86,111 @@
 #define KL_MBCR_MB1_DEV_RESET		0x02000000
 #define KL_MBCR_MB1_ENABLE		0x01000000
 
-#define KL0_SCC_B_INTF_ENABLE		0x00000001
+#define KL0_SCC_B_INTF_ENABLE		0x00000001	/* (KL Only) */
 #define KL0_SCC_A_INTF_ENABLE		0x00000002
 #define KL0_SCC_SLOWPCLK		0x00000004
 #define KL0_SCC_RESET			0x00000008
 #define KL0_SCCA_ENABLE			0x00000010
 #define KL0_SCCB_ENABLE			0x00000020
 #define KL0_SCC_CELL_ENABLE		0x00000040
-#define KL0_IRDA_HIGH_BAND		0x00000100
-#define KL0_IRDA_SOURCE2_SEL		0x00000200
-#define KL0_IRDA_SOURCE1_SEL		0x00000400
-#define KL0_IRDA_RESET			0x00000800
-#define KL0_IRDA_DEFAULT1		0x00001000
-#define KL0_IRDA_DEFAULT0		0x00002000
-#define KL0_IRDA_FAST_CONNECT		0x00004000
-#define KL0_IRDA_ENABLE			0x00008000
-#define KL0_IRDA_CLK32_ENABLE		0x00010000
-#define KL0_IRDA_CLK19_ENABLE		0x00020000
+#define KL0_IRDA_HIGH_BAND		0x00000100	/* (KL Only) */
+#define KL0_IRDA_SOURCE2_SEL		0x00000200	/* (KL Only) */
+#define KL0_IRDA_SOURCE1_SEL		0x00000400	/* (KL Only) */
+#define KL0_PG_USB0_PMI_ENABLE		0x00000400	/* (Pangea/Intrepid Only) */
+#define KL0_IRDA_RESET			0x00000800	/* (KL Only) */
+#define KL0_PG_USB0_REF_SUSPEND_SEL	0x00000800	/* (Pangea/Intrepid Only) */
+#define KL0_IRDA_DEFAULT1		0x00001000	/* (KL Only) */
+#define KL0_PG_USB0_REF_SUSPEND		0x00001000	/* (Pangea/Intrepid Only) */
+#define KL0_IRDA_DEFAULT0		0x00002000	/* (KL Only) */
+#define KL0_PG_USB0_PAD_SUSPEND		0x00002000	/* (Pangea/Intrepid Only) */
+#define KL0_IRDA_FAST_CONNECT		0x00004000	/* (KL Only) */
+#define KL0_PG_USB1_PMI_ENABLE		0x00004000	/* (Pangea/Intrepid Only) */
+#define KL0_IRDA_ENABLE			0x00008000	/* (KL Only) */
+#define KL0_PG_USB1_REF_SUSPEND_SEL	0x00008000	/* (Pangea/Intrepid Only) */
+#define KL0_IRDA_CLK32_ENABLE		0x00010000	/* (KL Only) */
+#define KL0_PG_USB1_REF_SUSPEND		0x00010000	/* (Pangea/Intrepid Only) */
+#define KL0_IRDA_CLK19_ENABLE		0x00020000	/* (KL Only) */
+#define KL0_PG_USB1_PAD_SUSPEND		0x00020000	/* (Pangea/Intrepid Only) */
 #define KL0_USB0_PAD_SUSPEND0		0x00040000
 #define KL0_USB0_PAD_SUSPEND1		0x00080000
 #define KL0_USB0_CELL_ENABLE		0x00100000
 #define KL0_USB1_PAD_SUSPEND0		0x00400000
 #define KL0_USB1_PAD_SUSPEND1		0x00800000
 #define KL0_USB1_CELL_ENABLE		0x01000000
-#define KL0_USB_REF_SUSPEND		0x10000000
+#define KL0_USB_REF_SUSPEND		0x10000000	/* (KL Only) */
 
 #define KL0_SERIAL_ENABLE		(KL0_SCC_B_INTF_ENABLE | \
 					KL0_SCC_SLOWPCLK | \
 					KL0_SCC_CELL_ENABLE | KL0_SCCA_ENABLE)
 
-#define KL1_AUDIO_SEL_22MCLK		0x00000002
-#define KL1_AUDIO_CLK_ENABLE_BIT	0x00000008
-#define KL1_AUDIO_CLK_OUT_ENABLE	0x00000020	/* Burgundy only ? */
-#define KL1_AUDIO_CELL_ENABLE		0x00000040
-#define KL1_AUDIO_CHOOSE		0x00000080	/* Burgundy only ? */
+#define KL1_USB2_PMI_ENABLE		0x00000001	/* Intrepid only */
+#define KL1_AUDIO_SEL_22MCLK		0x00000002	/* KL/Pangea only */
+#define KL1_USB2_REF_SUSPEND_SEL	0x00000002	/* Intrepid only */
+#define KL1_USB2_REF_SUSPEND		0x00000004	/* Intrepid only */
+#define KL1_AUDIO_CLK_ENABLE_BIT	0x00000008	/* KL/Pangea only */
+#define KL1_USB2_PAD_SUSPEND_SEL	0x00000008	/* Intrepid only */
+#define KL1_USB2_PAD_SUSPEND0		0x00000010	/* Intrepid only */
+#define KL1_AUDIO_CLK_OUT_ENABLE	0x00000020	/* KL/Pangea only */
+#define KL1_USB2_PAD_SUSPEND1		0x00000020	/* Intrepid only */
+#define KL1_AUDIO_CELL_ENABLE		0x00000040	/* KL/Pangea only */
+#define KL1_USB2_CELL_ENABLE		0x00000040	/* Intrepid only */
+#define KL1_AUDIO_CHOOSE		0x00000080	/* KL/Pangea only */
+#define KL1_I2S0_CHOOSE			0x00000200	/* KL Only */
 #define KL1_I2S0_CELL_ENABLE		0x00000400
 #define KL1_I2S0_CLK_ENABLE_BIT		0x00001000
 #define KL1_I2S0_ENABLE			0x00002000
 #define KL1_I2S1_CELL_ENABLE		0x00020000
 #define KL1_I2S1_CLK_ENABLE_BIT		0x00080000
 #define KL1_I2S1_ENABLE			0x00100000
-#define KL1_EIDE0_ENABLE		0x00800000
-#define KL1_EIDE0_RESET_N		0x01000000
-#define KL1_EIDE1_ENABLE		0x04000000
-#define KL1_EIDE1_RESET_N		0x08000000
-#define KL1_UIDE_ENABLE			0x20000000
-#define KL1_UIDE_RESET_N		0x40000000
+#define KL1_EIDE0_ENABLE		0x00800000	/* KL/Intrepid Only */
+#define KL1_EIDE0_RESET_N		0x01000000	/* KL/Intrepid Only */
+#define KL1_EIDE1_ENABLE		0x04000000	/* KL Only */
+#define KL1_EIDE1_RESET_N		0x08000000	/* KL Only */
+#define KL1_UIDE_ENABLE			0x20000000	/* KL/Pangea Only */
+#define KL1_UIDE_RESET_N		0x40000000	/* KL/Pangea Only */
 
 #define KL2_IOBUS_ENABLE		0x00000002
-#define KL2_SLEEP_STATE_BIT		0x00000100
+#define KL2_SLEEP_STATE_BIT		0x00000100	/* KL Only */
+#define KL2_PG_STOP_ALL_CLOCKS		0x00000100	/* Pangea Only */
 #define KL2_MPIC_ENABLE			0x00020000
-#define KL2_ALT_DATA_OUT		0x02000000
+#define KL2_CARDSLOT_RESET		0x00040000	/* Pangea/Intrepid Only */
+#define KL2_ALT_DATA_OUT		0x02000000	/* KL Only ??? */
 #define KL2_MEM_IS_BIG			0x04000000
 #define KL2_CARDSEL_16			0x08000000
 
-#define KL3_SHUTDOWN_PLL_TOTAL		0x00000001
-#define KL3_SHUTDOWN_PLLKW6		0x00000002
-#define KL3_SHUTDOWN_PLLKW4		0x00000004
-#define KL3_SHUTDOWN_PLLKW35		0x00000008
-#define KL3_SHUTDOWN_PLLKW12		0x00000010
-#define KL3_PLL_RESET			0x00000020
-#define KL3_SHUTDOWN_PLL2X		0x00000080
-#define KL3_CLK66_ENABLE		0x00000100
+#define KL3_SHUTDOWN_PLL_TOTAL		0x00000001	/* KL/Pangea only */
+#define KL3_SHUTDOWN_PLLKW6		0x00000002	/* KL/Pangea only */
+#define KL3_IT_SHUTDOWN_PLL3		0x00000002	/* Intrepid only */
+#define KL3_SHUTDOWN_PLLKW4		0x00000004	/* KL/Pangea only */
+#define KL3_IT_SHUTDOWN_PLL2		0x00000004	/* Intrepid only */
+#define KL3_SHUTDOWN_PLLKW35		0x00000008	/* KL/Pangea only */
+#define KL3_IT_SHUTDOWN_PLL1		0x00000008	/* Intrepid only */
+#define KL3_SHUTDOWN_PLLKW12		0x00000010	/* KL Only */
+#define KL3_IT_ENABLE_PLL3_SHUTDOWN	0x00000010	/* Intrepid only */
+#define KL3_PLL_RESET			0x00000020	/* KL/Pangea only */
+#define KL3_IT_ENABLE_PLL2_SHUTDOWN	0x00000020	/* Intrepid only */
+#define KL3_IT_ENABLE_PLL1_SHUTDOWN	0x00000010	/* Intrepid only */
+#define KL3_SHUTDOWN_PLL2X		0x00000080	/* KL Only */
+#define KL3_CLK66_ENABLE		0x00000100	/* KL Only */
 #define KL3_CLK49_ENABLE		0x00000200
 #define KL3_CLK45_ENABLE		0x00000400
-#define KL3_CLK31_ENABLE		0x00000800
+#define KL3_CLK31_ENABLE		0x00000800	/* KL/Pangea only */
 #define KL3_TIMER_CLK18_ENABLE		0x00001000
 #define KL3_I2S1_CLK18_ENABLE		0x00002000
 #define KL3_I2S0_CLK18_ENABLE		0x00004000
-#define KL3_VIA_CLK16_ENABLE		0x00008000
-#define KL3_STOPPING33_ENABLED		0x00080000
+#define KL3_VIA_CLK16_ENABLE		0x00008000	/* KL/Pangea only */
+#define KL3_IT_VIA_CLK32_ENABLE		0x00008000	/* Intrepid only */
+#define KL3_STOPPING33_ENABLED		0x00080000	/* KL Only */
+#define KL3_PG_PLL_ENABLE_TEST		0x00080000	/* Pangea Only */
+
+/* Intrepid USB bus 2, port 0,1 */
+#define KL3_IT_PORT_WAKEUP_ENABLE(p)		(0x00080000 << ((p)<<3))
+#define KL3_IT_PORT_RESUME_WAKE_EN(p)		(0x00040000 << ((p)<<3))
+#define KL3_IT_PORT_CONNECT_WAKE_EN(p)		(0x00020000 << ((p)<<3))
+#define KL3_IT_PORT_DISCONNECT_WAKE_EN(p)	(0x00010000 << ((p)<<3))
+#define KL3_IT_PORT_RESUME_STAT(p)		(0x00300000 << ((p)<<3))
+#define KL3_IT_PORT_CONNECT_STAT(p)		(0x00200000 << ((p)<<3))
+#define KL3_IT_PORT_DISCONNECT_STAT(p)		(0x00100000 << ((p)<<3))
 
 /* Port 0,1 : bus 0, port 2,3 : bus 1 */
 #define KL4_PORT_WAKEUP_ENABLE(p)	(0x00000008 << ((p)<<3))
@@ -164,4 +200,11 @@
 #define KL4_PORT_RESUME_STAT(p)		(0x00000040 << ((p)<<3))
 #define KL4_PORT_CONNECT_STAT(p)	(0x00000020 << ((p)<<3))
 #define KL4_PORT_DISCONNECT_STAT(p)	(0x00000010 << ((p)<<3))
+
+/* Pangea and Intrepid only */
+#define KL5_VIA_USE_CLK31		0x000000001	/* Pangea Only */
+#define KL5_SCC_USE_CLK31		0x000000002	/* Pangea Only */
+#define KL5_PWM_CLK32_EN		0x000000004
+#define KL5_CLK3_68_EN			0x000000010
+#define KL5_CLK32_EN			0x000000020
 
diff -Nru a/include/asm-ppc/macio_asic.h b/include/asm-ppc/macio_asic.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/include/asm-ppc/macio_asic.h	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,80 @@
+#ifndef __MACIO_ASIC_H__
+#define __MACIO_ASIC_H__
+
+#include <linux/device.h>
+
+extern struct bus_type macio_bus_type;
+
+/* MacIO device driver is defined later */
+struct macio_driver;
+struct macio_chip;
+
+#define MACIO_DEV_COUNT_RESOURCE	8
+#define MACIO_DEV_COUNT_IRQS		8
+
+/*
+ * the macio_bus structure is used to describe a "virtual" bus
+ * within a MacIO ASIC. It's typically provided by a macio_pci_asic
+ * PCI device, but could be provided differently as well (nubus
+ * machines using a fake OF tree).
+ */
+struct macio_bus
+{
+	struct macio_chip	*chip;		/* macio_chip (private use) */
+	struct pci_dev		*pdev;		/* PCI device hosting this bus */
+	struct list_head	devices;	/* list of devices on this bus */
+};
+
+/*
+ * the macio_dev structure is used to describe a device
+ * within an Apple MacIO ASIC.
+ */
+struct macio_dev
+{
+	struct macio_bus	*bus;		/* virtual bus this device is on */
+
+	struct device_node	*node;		/* OF node */	
+	struct macio_driver	*driver;	/* which driver allocated this device */
+	void			*driver_data;	/* placeholder for driver specific stuffs */
+	struct resource		resources[MACIO_DEV_COUNT_RESOURCE]; /* I/O */
+	int			irqs[MACIO_DEV_COUNT_IRQS];
+	
+	struct device		dev;		/* Generic device interface */
+};
+#define	to_macio_device(d) container_of(d, struct macio_dev, dev)
+
+/*
+ * Struct used for matching a device
+ */
+struct macio_match
+{
+	char	*name;
+	char	*type;
+	char	*compatible;
+};
+#define MACIO_ANY_MATCH		((char *)-1L)
+
+/*
+ * A driver for a mac-io chip based device
+ */
+struct macio_driver
+{
+	struct list_head	node;
+	char			*name;
+	struct macio_match	*match_table;
+
+	int	(*probe)(struct macio_dev* dev, const struct macio_match *match);
+	int	(*remove)(struct macio_dev* dev);
+
+	int	(*suspend)(struct macio_dev* dev, u32 state, u32 level);
+	int	(*resume)(struct macio_dev* dev, u32 level);
+	int	(*shutdown)(struct macio_dev* dev);
+
+	struct device_driver	driver;
+};
+#define	to_macio_driver(drv) container_of(drv,struct macio_driver, driver)
+
+extern int macio_register_driver(struct macio_driver *);
+extern void macio_unregister_driver(struct macio_driver *);
+
+#endif /* __MACIO_ASIC_H__ */
diff -Nru a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h
--- a/include/asm-ppc/pgtable.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-ppc/pgtable.h	Mon Mar 31 13:41:06 2003
@@ -152,6 +152,7 @@
 
 /* Definitions for 40x embedded chips. */
 #define	_PAGE_GUARDED	0x001	/* G: page is guarded from prefetch */
+#define _PAGE_FILE	0x001	/* when !present: nonlinear file mapping */
 #define _PAGE_PRESENT	0x002	/* software: PTE contains a translation */
 #define	_PAGE_NO_CACHE	0x004	/* I: caching is inhibited */
 #define	_PAGE_WRITETHRU	0x008	/* W: caching is write-through */
@@ -172,6 +173,7 @@
 #elif defined(CONFIG_8xx)
 /* Definitions for 8xx embedded chips. */
 #define _PAGE_PRESENT	0x0001	/* Page is valid */
+#define _PAGE_FILE	0x0002	/* when !present: nonlinear file mapping */
 #define _PAGE_NO_CACHE	0x0002	/* I: cache inhibit */
 #define _PAGE_SHARED	0x0004	/* No ASID (context) compare */
 
@@ -210,6 +212,7 @@
 /* Definitions for 60x, 740/750, etc. */
 #define _PAGE_PRESENT	0x001	/* software: pte contains a translation */
 #define _PAGE_HASHPTE	0x002	/* hash_page has made an HPTE for this pte */
+#define _PAGE_FILE	0x004	/* when !present: nonlinear file mapping */
 #define _PAGE_USER	0x004	/* usermode access allowed */
 #define _PAGE_GUARDED	0x008	/* G: prohibit speculative access */
 #define _PAGE_COHERENT	0x010	/* M: enforce memory coherence (SMP systems) */
@@ -378,6 +381,7 @@
 static inline int pte_exec(pte_t pte)		{ return pte_val(pte) & _PAGE_EXEC; }
 static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
 static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
+static inline int pte_file(pte_t pte)		{ return pte_val(pte) & _PAGE_FILE; }
 
 static inline void pte_uncache(pte_t pte)       { pte_val(pte) |= _PAGE_NO_CACHE; }
 static inline void pte_cache(pte_t pte)         { pte_val(pte) &= ~_PAGE_NO_CACHE; }
@@ -523,14 +527,19 @@
 /*
  * Encode and decode a swap entry.
  * Note that the bits we use in a PTE for representing a swap entry
- * must not include the _PAGE_PRESENT bit, or the _PAGE_HASHPTE bit
- * (if used).  -- paulus
+ * must not include the _PAGE_PRESENT bit, the _PAGE_FILE bit, or the
+ *_PAGE_HASHPTE bit (if used).  -- paulus
  */
-#define __swp_type(entry)		((entry).val & 0x3f)
-#define __swp_offset(entry)		((entry).val >> 6)
-#define __swp_entry(type, offset)	((swp_entry_t) { (type) | ((offset) << 6) })
-#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) >> 2 })
-#define __swp_entry_to_pte(x)		((pte_t) { (x).val << 2 })
+#define __swp_type(entry)		((entry).val & 0x1f)
+#define __swp_offset(entry)		((entry).val >> 5)
+#define __swp_entry(type, offset)	((swp_entry_t) { (type) | ((offset) << 5) })
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) >> 3 })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val << 3 })
+
+/* Encode and decode a nonlinear file mapping entry */
+#define PTE_FILE_MAX_BITS	29
+#define pte_to_pgoff(pte)	(pte_val(pte) >> 3)
+#define pgoff_to_pte(off)	((pte_t) { ((off) << 3) | _PAGE_FILE })
 
 /* CONFIG_APUS */
 /* For virtual address to physical address conversion */
diff -Nru a/include/asm-ppc/pmac_feature.h b/include/asm-ppc/pmac_feature.h
--- a/include/asm-ppc/pmac_feature.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-ppc/pmac_feature.h	Mon Mar 31 13:41:06 2003
@@ -31,10 +31,16 @@
 #ifndef __PPC_ASM_PMAC_FEATURE_H
 #define __PPC_ASM_PMAC_FEATURE_H
 
+#include <asm/macio_asic.h>
+
 /*
  * Known Mac motherboard models
  * 
  * Please, report any error here to benh@kernel.crashing.org, thanks !
+ * 
+ * Note that I don't fully maintain this list for Core99 & MacRISC2
+ * and I'm considering removing all NewWorld entries from it and
+ * entirely rely on the model string.
  */
 
 /* PowerSurge are the first generation of PCI Pmacs. This include
@@ -85,10 +91,16 @@
 #define PMAC_TYPE_QUICKSILVER		0x45	/* QuickSilver G4s */
 #define PMAC_TYPE_PISMO			0x46	/* Pismo PowerBook */
 #define PMAC_TYPE_TITANIUM		0x47	/* Titanium PowerBook */
-#define PMAC_TYPE_TITANIUM2		0x48	/* Titanium II PowerBook */
-#define PMAC_TYPE_TITANIUM3		0x49	/* Titanium III PowerBook (with L3) */
+#define PMAC_TYPE_TITANIUM2		0x48	/* Titanium II PowerBook (no L3, M6) */
+#define PMAC_TYPE_TITANIUM3		0x49	/* Titanium III PowerBook (with L3 & M7) */
+#define PMAC_TYPE_TITANIUM4		0x50	/* Titanium IV PowerBook (with L3 & M9) */
+#define PMAC_TYPE_EMAC			0x50	/* eMac */
 #define PMAC_TYPE_UNKNOWN_CORE99	0x5f
 
+/* MacRisc2 with UniNorth 2.0 */
+#define PMAC_TYPE_RACKMAC		0x80	/* XServe */
+#define PMAC_TYPE_WINDTUNNEL		0x81	
+
 /* MacRISC2 machines based on the Pangea chipset
  */
 #define PMAC_TYPE_PANGEA_IMAC		0x100	/* Flower Power iMac */
@@ -96,12 +108,18 @@
 #define PMAC_TYPE_FLAT_PANEL_IMAC	0x102	/* Flat panel iMac */
 #define PMAC_TYPE_UNKNOWN_PANGEA	0x10f
 
+/* MacRISC2 machines based on the Intrepid chipset
+ */
+#define PMAC_TYPE_UNKNOWN_INTREPID	0x11f	/* Generic */
+
 /*
  * Motherboard flags
  */
 
 #define PMAC_MB_CAN_SLEEP		0x00000001
 #define PMAC_MB_HAS_FW_POWER		0x00000002
+#define PMAC_MB_OLD_CORE99		0x00000004
+#define PMAC_MB_MOBILE			0x00000008
 
 /*
  * Feature calls supported on pmac
@@ -250,6 +268,61 @@
 extern void pmac_feature_init(void);
 
 #define PMAC_FTR_DEF(x) ((_MACH_Pmac << 16) | (x))
+
+
+/*
+ * The part below is for use by macio_asic.c only, do not rely
+ * on the data structures or constants below in a normal driver
+ *
+ */
+ 
+#define MAX_MACIO_CHIPS		2
+
+enum {
+	macio_unknown = 0,
+	macio_grand_central,
+	macio_ohare,
+	macio_ohareII,
+	macio_heathrow,
+	macio_gatwick,
+	macio_paddington,
+	macio_keylargo,
+	macio_pangea,
+	macio_intrepid,
+};
+
+struct macio_chip
+{
+	struct device_node	*of_node;
+	int			type;
+	const char		*name;
+	int			rev;
+	volatile u32		*base;
+	unsigned long		flags;
+
+	/* For use by macio_asic PCI driver */
+	struct macio_bus	lbus;
+};
+
+extern struct macio_chip macio_chips[MAX_MACIO_CHIPS];
+
+#define MACIO_FLAG_SCCA_ON	0x00000001
+#define MACIO_FLAG_SCCB_ON	0x00000002
+#define MACIO_FLAG_SCC_LOCKED	0x00000004
+#define MACIO_FLAG_AIRPORT_ON	0x00000010
+#define MACIO_FLAG_FW_SUPPORTED	0x00000020
+
+extern struct macio_chip* macio_find(struct device_node* child, int type);
+
+#define MACIO_FCR32(macio, r)	((macio)->base + ((r) >> 2))
+#define MACIO_FCR8(macio, r)	(((volatile u8*)((macio)->base)) + (r))
+
+#define MACIO_IN32(r)		(in_le32(MACIO_FCR32(macio,r)))
+#define MACIO_OUT32(r,v)	(out_le32(MACIO_FCR32(macio,r), (v)))
+#define MACIO_BIS(r,v)		(MACIO_OUT32((r), MACIO_IN32(r) | (v)))
+#define MACIO_BIC(r,v)		(MACIO_OUT32((r), MACIO_IN32(r) & ~(v)))
+#define MACIO_IN8(r)		(in_8(MACIO_FCR8(macio,r)))
+#define MACIO_OUT8(r,v)		(out_8(MACIO_FCR8(macio,r), (v)))
 
 #endif /* __PPC_ASM_PMAC_FEATURE_H */
 #endif /* __KERNEL__ */
diff -Nru a/include/asm-ppc/uninorth.h b/include/asm-ppc/uninorth.h
--- a/include/asm-ppc/uninorth.h	Mon Mar 31 13:41:08 2003
+++ b/include/asm-ppc/uninorth.h	Mon Mar 31 13:41:08 2003
@@ -28,6 +28,7 @@
 #define UNI_N_CFG_GART_INVAL		0x00000001
 #define UNI_N_CFG_GART_ENABLE		0x00000100
 #define UNI_N_CFG_GART_2xRESET		0x00010000
+#define UNI_N_CFG_GART_DISSBADET	0x00020000
 
 /* My understanding of UniNorth AGP as of UniNorth rev 1.0x,
  * revision 1.5 (x4 AGP) may need further changes.
@@ -94,6 +95,7 @@
 #define UNI_N_CLOCK_CNTL_PCI		0x00000001	/* PCI2 clock control */
 #define UNI_N_CLOCK_CNTL_GMAC		0x00000002	/* GMAC clock control */
 #define UNI_N_CLOCK_CNTL_FW		0x00000004	/* FireWire clock control */
+#define UNI_N_CLOCK_CNTL_ATA100		0x00000010	/* ATA-100 clock control (U2) */
 
 /* Power Management control */
 #define UNI_N_POWER_MGT			0x0030
diff -Nru a/include/asm-ppc64/machdep.h b/include/asm-ppc64/machdep.h
--- a/include/asm-ppc64/machdep.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-ppc64/machdep.h	Mon Mar 31 13:41:06 2003
@@ -90,22 +90,6 @@
 	unsigned char	(*udbg_getc)(void);
 	int		(*udbg_getc_poll)(void);
 
-	/* PCI interfaces */
-	int (*pcibios_read_config)(struct device_node *dn, int where, int size,
-				   u32 *val);
-	int (*pcibios_write_config)(struct device_node *dn, int where,
-				    int size, u32 val);
-
-	/* Called after scanning the bus, before allocating
-	 * resources
-	 */
-	void (*pcibios_fixup)(void);
-
-	/* Called for each PCI bus in the system
-	 * when it's probed
-	 */
-	void (*pcibios_fixup_bus)(struct pci_bus *);
-
 #ifdef CONFIG_SMP
 	/* functions for dealing with other cpus */
 	struct smp_ops_t smp_ops;
diff -Nru a/include/asm-ppc64/pci-bridge.h b/include/asm-ppc64/pci-bridge.h
--- a/include/asm-ppc64/pci-bridge.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-ppc64/pci-bridge.h	Mon Mar 31 13:41:06 2003
@@ -40,7 +40,7 @@
 	void *io_base_virt;
 	unsigned long io_base_phys;
 
-	/* Some machines (PReP) have a non 1:1 mapping of
+	/* Some machines have a non 1:1 mapping of
 	 * the PCI memory space in the CPU bus space
 	 */
 	unsigned long pci_mem_offset;
diff -Nru a/include/asm-ppc64/pci.h b/include/asm-ppc64/pci.h
--- a/include/asm-ppc64/pci.h	Mon Mar 31 13:41:07 2003
+++ b/include/asm-ppc64/pci.h	Mon Mar 31 13:41:07 2003
@@ -16,11 +16,6 @@
 #include <asm/io.h>
 #include <asm/prom.h>
 
-static inline int pcibios_assign_all_busses(void)
-{
-	return 0;
-}
-
 #define PCIBIOS_MIN_IO		0x1000
 #define PCIBIOS_MIN_MEM		0x10000000
 
@@ -36,7 +31,18 @@
 
 struct pci_dev;
 
-extern char* pci_card_location(struct pci_dev*);
+#define HAVE_ARCH_PCI_MWI 1
+static inline int pcibios_prep_mwi(struct pci_dev *dev)
+{
+	/* 
+	 * pSeries firmware sets cacheline size and hardware treats
+	 * MWI the same as memory write, so we dont change cacheline size
+	 * or the MWI bit.
+	 */
+	return 1;
+}
+
+extern unsigned int pcibios_assign_all_busses(void);
 
 extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
 				  dma_addr_t *dma_handle);
@@ -52,8 +58,6 @@
 extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
                          int nents, int direction);
 
-extern void pSeries_pcibios_init_early(void);
-
 static inline void pci_dma_sync_single(struct pci_dev *hwdev,
 				       dma_addr_t dma_handle,
 				       size_t size, int direction)
@@ -122,9 +126,10 @@
  */
 #define PCI_DMA_BUS_IS_PHYS	(0)
 	
-#endif	/* __KERNEL__ */
+extern void
+pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+			struct resource *res);
 
-/* generic pci stuff */
-#include <asm-generic/pci.h>
+#endif	/* __KERNEL__ */
 
 #endif /* __PPC64_PCI_H */
diff -Nru a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h
--- a/include/asm-s390/bitops.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-s390/bitops.h	Mon Mar 31 13:41:06 2003
@@ -51,7 +51,7 @@
 
 #ifdef CONFIG_SMP
 /*
- * SMP save set_bit routine based on compare and swap (CS)
+ * SMP safe set_bit routine based on compare and swap (CS)
  */
 static inline void set_bit_cs(int nr, volatile unsigned long *ptr)
 {
@@ -76,7 +76,7 @@
 }
 
 /*
- * SMP save clear_bit routine based on compare and swap (CS)
+ * SMP safe clear_bit routine based on compare and swap (CS)
  */
 static inline void clear_bit_cs(int nr, volatile unsigned long *ptr)
 {
@@ -101,7 +101,7 @@
 }
 
 /*
- * SMP save change_bit routine based on compare and swap (CS)
+ * SMP safe change_bit routine based on compare and swap (CS)
  */
 static inline void change_bit_cs(int nr, volatile unsigned long *ptr)
 {
@@ -126,7 +126,7 @@
 }
 
 /*
- * SMP save test_and_set_bit routine based on compare and swap (CS)
+ * SMP safe test_and_set_bit routine based on compare and swap (CS)
  */
 static inline int
 test_and_set_bit_cs(int nr, volatile unsigned long *ptr)
@@ -153,7 +153,7 @@
 }
 
 /*
- * SMP save test_and_clear_bit routine based on compare and swap (CS)
+ * SMP safe test_and_clear_bit routine based on compare and swap (CS)
  */
 static inline int
 test_and_clear_bit_cs(int nr, volatile unsigned long *ptr)
@@ -180,7 +180,7 @@
 }
 
 /*
- * SMP save test_and_change_bit routine based on compare and swap (CS) 
+ * SMP safe test_and_change_bit routine based on compare and swap (CS) 
  */
 static inline int
 test_and_change_bit_cs(int nr, volatile unsigned long *ptr)
diff -Nru a/include/asm-sparc64/pci.h b/include/asm-sparc64/pci.h
--- a/include/asm-sparc64/pci.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-sparc64/pci.h	Mon Mar 31 13:41:06 2003
@@ -205,9 +205,14 @@
 #define HAVE_ARCH_PCI_MWI
 extern int pcibios_prep_mwi(struct pci_dev *dev);
 
-#endif /* __KERNEL__ */
+extern void
+pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+			struct resource *res);
+
+extern void
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+			struct pci_bus_region *region);
 
-/* generic pci stuff */
-#include <asm-generic/pci.h>
+#endif /* __KERNEL__ */
 
 #endif /* __SPARC64_PCI_H */
diff -Nru a/include/asm-sparc64/rwsem.h b/include/asm-sparc64/rwsem.h
--- a/include/asm-sparc64/rwsem.h	Mon Mar 31 13:41:07 2003
+++ b/include/asm-sparc64/rwsem.h	Mon Mar 31 13:41:07 2003
@@ -18,11 +18,6 @@
 
 struct rwsem_waiter;
 
-extern struct rw_semaphore *FASTCALL(rwsem_down_read_failed(struct rw_semaphore *sem));
-extern struct rw_semaphore *FASTCALL(rwsem_down_write_failed(struct rw_semaphore *sem));
-extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *));
-extern struct rw_semaphore *FASTCALL(rwsem_downgrade_wake(struct rw_semaphore *));
-
 struct rw_semaphore {
 	signed int count;
 #define RWSEM_UNLOCKED_VALUE		0x00000000
@@ -48,222 +43,13 @@
 	INIT_LIST_HEAD(&sem->wait_list);
 }
 
-static __inline__ void __down_read(struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-		"! beginning __down_read\n"
-		"1:\tlduw	[%0], %%g5\n\t"
-		"add		%%g5, 1, %%g7\n\t"
-		"cas		[%0], %%g5, %%g7\n\t"
-		"cmp		%%g5, %%g7\n\t"
-		"bne,pn		%%icc, 1b\n\t"
-		" add		%%g7, 1, %%g7\n\t"
-		"cmp		%%g7, 0\n\t"
-		"bl,pn		%%icc, 3f\n\t"
-		" membar	#StoreLoad | #StoreStore\n"
-		"2:\n\t"
-		".subsection	2\n"
-		"3:\tmov	%0, %%g5\n\t"
-		"save		%%sp, -160, %%sp\n\t"
-		"mov		%%g1, %%l1\n\t"
-		"mov		%%g2, %%l2\n\t"
-		"mov		%%g3, %%l3\n\t"
-		"call		%1\n\t"
-		" mov		%%g5, %%o0\n\t"
-		"mov		%%l1, %%g1\n\t"
-		"mov		%%l2, %%g2\n\t"
-		"ba,pt		%%xcc, 2b\n\t"
-		" restore	%%l3, %%g0, %%g3\n\t"
-		".previous\n\t"
-		"! ending __down_read"
-		: : "r" (sem), "i" (rwsem_down_read_failed)
-		: "g5", "g7", "memory", "cc");
-}
-
-static __inline__ int __down_read_trylock(struct rw_semaphore *sem)
-{
-	int result;
-
-	__asm__ __volatile__(
-		"! beginning __down_read_trylock\n"
-		"1:\tlduw	[%1], %%g5\n\t"
-		"add		%%g5, 1, %%g7\n\t"
-		"cmp		%%g7, 0\n\t"
-		"bl,pn		%%icc, 2f\n\t"
-		" mov		0, %0\n\t"
-		"cas		[%1], %%g5, %%g7\n\t"
-		"cmp		%%g5, %%g7\n\t"
-		"bne,pn		%%icc, 1b\n\t"
-		" mov		1, %0\n\t"
-		"membar		#StoreLoad | #StoreStore\n"
-		"2:\n\t"
-		"! ending __down_read_trylock"
-		: "=&r" (result)
-                : "r" (sem)
-		: "g5", "g7", "memory", "cc");
-
-	return result;
-}
-
-static __inline__ void __down_write(struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-		"! beginning __down_write\n\t"
-		"sethi		%%hi(%2), %%g1\n\t"
-		"or		%%g1, %%lo(%2), %%g1\n"
-		"1:\tlduw	[%0], %%g5\n\t"
-		"add		%%g5, %%g1, %%g7\n\t"
-		"cas		[%0], %%g5, %%g7\n\t"
-		"cmp		%%g5, %%g7\n\t"
-		"bne,pn		%%icc, 1b\n\t"
-		" cmp		%%g7, 0\n\t"
-		"bne,pn		%%icc, 3f\n\t"
-		" membar	#StoreLoad | #StoreStore\n"
-		"2:\n\t"
-		".subsection	2\n"
-		"3:\tmov	%0, %%g5\n\t"
-		"save		%%sp, -160, %%sp\n\t"
-		"mov		%%g2, %%l2\n\t"
-		"mov		%%g3, %%l3\n\t"
-		"call		%1\n\t"
-		" mov		%%g5, %%o0\n\t"
-		"mov		%%l2, %%g2\n\t"
-		"ba,pt		%%xcc, 2b\n\t"
-		" restore	%%l3, %%g0, %%g3\n\t"
-		".previous\n\t"
-		"! ending __down_write"
-		: : "r" (sem), "i" (rwsem_down_write_failed),
-		    "i" (RWSEM_ACTIVE_WRITE_BIAS)
-		: "g1", "g5", "g7", "memory", "cc");
-}
-
-static __inline__ int __down_write_trylock(struct rw_semaphore *sem)
-{
-	int result;
-
-	__asm__ __volatile__(
-		"! beginning __down_write_trylock\n\t"
-		"sethi		%%hi(%2), %%g1\n\t"
-		"or		%%g1, %%lo(%2), %%g1\n"
-		"1:\tlduw	[%1], %%g5\n\t"
-		"cmp		%%g5, 0\n\t"
-		"bne,pn		%%icc, 2f\n\t"
-		" mov		0, %0\n\t"
-		"add		%%g5, %%g1, %%g7\n\t"
-		"cas		[%1], %%g5, %%g7\n\t"
-		"cmp		%%g5, %%g7\n\t"
-		"bne,pn		%%icc, 1b\n\t"
-		" mov		1, %0\n\t"
-		"membar		#StoreLoad | #StoreStore\n"
-		"2:\n\t"
-		"! ending __down_write_trylock"
-		: "=&r" (result)
-		: "r" (sem), "i" (RWSEM_ACTIVE_WRITE_BIAS)
-		: "g1", "g5", "g7", "memory", "cc");
-
-	return result;
-}
-
-static __inline__ void __up_read(struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-		"! beginning __up_read\n\t"
-		"1:\tlduw	[%0], %%g5\n\t"
-		"sub		%%g5, 1, %%g7\n\t"
-		"cas		[%0], %%g5, %%g7\n\t"
-		"cmp		%%g5, %%g7\n\t"
-		"bne,pn		%%icc, 1b\n\t"
-		" cmp		%%g7, 0\n\t"
-		"bl,pn		%%icc, 3f\n\t"
-		" membar	#StoreLoad | #StoreStore\n"
-		"2:\n\t"
-		".subsection	2\n"
-		"3:\tsethi	%%hi(%2), %%g1\n\t"
-		"sub		%%g7, 1, %%g7\n\t"
-		"or		%%g1, %%lo(%2), %%g1\n\t"
-		"andcc		%%g7, %%g1, %%g0\n\t"
-		"bne,pn		%%icc, 2b\n\t"
-		" mov		%0, %%g5\n\t"
-		"save		%%sp, -160, %%sp\n\t"
-		"mov		%%g2, %%l2\n\t"
-		"mov		%%g3, %%l3\n\t"
-		"call		%1\n\t"
-		" mov		%%g5, %%o0\n\t"
-		"mov		%%l2, %%g2\n\t"
-		"ba,pt		%%xcc, 2b\n\t"
-		" restore	%%l3, %%g0, %%g3\n\t"
-		".previous\n\t"
-		"! ending __up_read"
-		: : "r" (sem), "i" (rwsem_wake),
-		    "i" (RWSEM_ACTIVE_MASK)
-		: "g1", "g5", "g7", "memory", "cc");
-}
-
-static __inline__ void __up_write(struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-		"! beginning __up_write\n\t"
-		"sethi		%%hi(%2), %%g1\n\t"
-		"or		%%g1, %%lo(%2), %%g1\n"
-		"1:\tlduw	[%0], %%g5\n\t"
-		"sub		%%g5, %%g1, %%g7\n\t"
-		"cas		[%0], %%g5, %%g7\n\t"
-		"cmp		%%g5, %%g7\n\t"
-		"bne,pn		%%icc, 1b\n\t"
-		" sub		%%g7, %%g1, %%g7\n\t"
-		"cmp		%%g7, 0\n\t"
-		"bl,pn		%%icc, 3f\n\t"
-		" membar	#StoreLoad | #StoreStore\n"
-		"2:\n\t"
-		".subsection 2\n"
-		"3:\tmov	%0, %%g5\n\t"
-		"save		%%sp, -160, %%sp\n\t"
-		"mov		%%g2, %%l2\n\t"
-		"mov		%%g3, %%l3\n\t"
-		"call		%1\n\t"
-		" mov		%%g5, %%o0\n\t"
-		"mov		%%l2, %%g2\n\t"
-		"ba,pt		%%xcc, 2b\n\t"
-		" restore	%%l3, %%g0, %%g3\n\t"
-		".previous\n\t"
-		"! ending __up_write"
-		: : "r" (sem), "i" (rwsem_wake),
-		    "i" (RWSEM_ACTIVE_WRITE_BIAS)
-		: "g1", "g5", "g7", "memory", "cc");
-}
-
-static __inline__ void __downgrade_write(struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-		"! beginning __up_write\n\t"
-		"sethi		%%hi(%2), %%g1\n\t"
-		"or		%%g1, %%lo(%2), %%g1\n"
-		"1:\tlduw	[%0], %%g5\n\t"
-		"sub		%%g5, %%g1, %%g7\n\t"
-		"cas		[%0], %%g5, %%g7\n\t"
-		"cmp		%%g5, %%g7\n\t"
-		"bne,pn		%%icc, 1b\n\t"
-		" sub		%%g7, %%g1, %%g7\n\t"
-		"cmp		%%g7, 0\n\t"
-		"bl,pn		%%icc, 3f\n\t"
-		" membar	#StoreLoad | #StoreStore\n"
-		"2:\n\t"
-		".subsection 2\n"
-		"3:\tmov	%0, %%g5\n\t"
-		"save		%%sp, -160, %%sp\n\t"
-		"mov		%%g2, %%l2\n\t"
-		"mov		%%g3, %%l3\n\t"
-		"call		%1\n\t"
-		" mov		%%g5, %%o0\n\t"
-		"mov		%%l2, %%g2\n\t"
-		"ba,pt		%%xcc, 2b\n\t"
-		" restore	%%l3, %%g0, %%g3\n\t"
-		".previous\n\t"
-		"! ending __up_write"
-		: : "r" (sem), "i" (rwsem_downgrade_wake),
-		    "i" (RWSEM_WAITING_BIAS)
-		: "g1", "g5", "g7", "memory", "cc");
-}
+extern void __down_read(struct rw_semaphore *sem);
+extern int __down_read_trylock(struct rw_semaphore *sem);
+extern void __down_write(struct rw_semaphore *sem);
+extern int __down_write_trylock(struct rw_semaphore *sem);
+extern void __up_read(struct rw_semaphore *sem);
+extern void __up_write(struct rw_semaphore *sem);
+extern void __downgrade_write(struct rw_semaphore *sem);
 
 static __inline__ int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
 {
diff -Nru a/include/asm-sparc64/timer.h b/include/asm-sparc64/timer.h
--- a/include/asm-sparc64/timer.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-sparc64/timer.h	Mon Mar 31 13:41:06 2003
@@ -66,4 +66,10 @@
 extern void timer_tick_interrupt(struct pt_regs *);
 #endif
 
+#ifndef CONFIG_SMP
+extern unsigned long up_clock_tick;
+#endif
+
+extern unsigned long sparc64_get_clock_tick(unsigned int cpu);
+
 #endif /* _SPARC64_TIMER_H */
diff -Nru a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h
--- a/include/asm-x86_64/apic.h	Mon Mar 31 13:41:08 2003
+++ b/include/asm-x86_64/apic.h	Mon Mar 31 13:41:08 2003
@@ -75,7 +75,7 @@
 extern void setup_boot_APIC_clock (void);
 extern void setup_secondary_APIC_clock (void);
 extern void setup_apic_nmi_watchdog (void);
-extern inline void nmi_watchdog_tick (struct pt_regs * regs);
+extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
 extern int APIC_init_uniprocessor (void);
 extern void disable_APIC_timer(void);
 extern void enable_APIC_timer(void);
diff -Nru a/include/asm-x86_64/debugreg.h b/include/asm-x86_64/debugreg.h
--- a/include/asm-x86_64/debugreg.h	Mon Mar 31 13:41:07 2003
+++ b/include/asm-x86_64/debugreg.h	Mon Mar 31 13:41:07 2003
@@ -58,7 +58,7 @@
    We can slow the instruction pipeline for instructions coming via the
    gdt or the ldt if we want to.  I am not sure why this is an advantage */
 
-#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
+#define DR_CONTROL_RESERVED (0xFFFFFFFFFC00) /* Reserved by Intel */
 #define DR_LOCAL_SLOWDOWN (0x100)   /* Local slow the pipeline */
 #define DR_GLOBAL_SLOWDOWN (0x200)  /* Global slow the pipeline */
 
diff -Nru a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h
--- a/include/asm-x86_64/desc.h	Mon Mar 31 13:41:09 2003
+++ b/include/asm-x86_64/desc.h	Mon Mar 31 13:41:09 2003
@@ -8,6 +8,7 @@
 #ifndef __ASSEMBLY__
 
 #include <asm/segment.h>
+#include <asm/mmu.h>
 
 // 8 byte segment descriptor
 struct desc_struct { 
diff -Nru a/include/asm-x86_64/hdreg.h b/include/asm-x86_64/hdreg.h
--- a/include/asm-x86_64/hdreg.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-x86_64/hdreg.h	Mon Mar 31 13:41:06 2003
@@ -7,6 +7,4 @@
 #ifndef __ASMx86_64_HDREG_H
 #define __ASMx86_64_HDREG_H
 
-typedef unsigned long ide_ioreg_t;
-
 #endif /* __ASMx86_64_HDREG_H */
diff -Nru a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h
--- a/include/asm-x86_64/i387.h	Mon Mar 31 13:41:07 2003
+++ b/include/asm-x86_64/i387.h	Mon Mar 31 13:41:07 2003
@@ -19,15 +19,15 @@
 #include <asm/thread_info.h>
 
 extern void fpu_init(void);
-extern void init_fpu(void);
-int save_i387(struct _fpstate *buf);
+extern void init_fpu(struct task_struct *child);
+extern int save_i387(struct _fpstate *buf);
 
 static inline int need_signal_i387(struct task_struct *me) 
 { 
 	if (!me->used_math)
 		return 0;
 	me->used_math = 0; 
-	if (me->thread_info->flags & _TIF_USEDFPU)
+	if (me->thread_info->status & TS_USEDFPU)
 		return 0;
 	return 1;
 } 
@@ -39,14 +39,14 @@
 #define kernel_fpu_end() stts()
 
 #define unlazy_fpu(tsk) do { \
-	if ((tsk)->thread_info->flags & _TIF_USEDFPU) \
+	if ((tsk)->thread_info->status & TS_USEDFPU) \
 		save_init_fpu(tsk); \
 } while (0)
 
 #define clear_fpu(tsk) do { \
-	if ((tsk)->thread_info->flags & _TIF_USEDFPU) {		\
+	if ((tsk)->thread_info->status & TS_USEDFPU) {		\
 		asm volatile("fwait");				\
-		(tsk)->thread_info->flags &= ~_TIF_USEDFPU;	\
+		(tsk)->thread_info->status &= ~TS_USEDFPU;	\
 		stts();						\
 	}							\
 } while (0)
@@ -114,11 +114,11 @@
 
 static inline void kernel_fpu_begin(void)
 {
-	struct task_struct *me = current;
-	if (test_tsk_thread_flag(me,TIF_USEDFPU)) {
-		asm volatile("fxsave %0 ; fnclex"
-			      : "=m" (me->thread.i387.fxsave));
-		clear_tsk_thread_flag(me, TIF_USEDFPU);
+	struct thread_info *me = current_thread_info();
+	if (me->status & TS_USEDFPU) { 
+		asm volatile("rex64 ; fxsave %0 ; fnclex"
+			      : "=m" (me->task->thread.i387.fxsave));
+		me->status &= ~TS_USEDFPU;
 		return;
 	}
 	clts();
@@ -128,7 +128,7 @@
 {
 	asm volatile( "fxsave %0 ; fnclex"
 		      : "=m" (tsk->thread.i387.fxsave));
-	tsk->thread_info->flags &= ~TIF_USEDFPU;
+	tsk->thread_info->status &= ~TS_USEDFPU;
 	stts();
 }
 
@@ -139,19 +139,5 @@
 {
 	return restore_fpu_checking((struct i387_fxsave_struct *)buf);
 }
-
-
-static inline void empty_fpu(struct task_struct *child)
-{
-	if (!child->used_math) {
-		/* Simulate an empty FPU. */
-		memset(&child->thread.i387.fxsave,0,sizeof(struct i387_fxsave_struct));
-		child->thread.i387.fxsave.cwd = 0x037f; 
-		child->thread.i387.fxsave.swd = 0;
-		child->thread.i387.fxsave.twd = 0; 
-		child->thread.i387.fxsave.mxcsr = 0x1f80;
-	}
-	child->used_math = 1; 
-}		
 
 #endif /* __ASM_X86_64_I387_H */
diff -Nru a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h
--- a/include/asm-x86_64/kdebug.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-x86_64/kdebug.h	Mon Mar 31 13:41:06 2003
@@ -9,8 +9,13 @@
 	struct pt_regs *regs;
 	const char *str;
 	long err; 
+	int trapnr;
+	int signr;
 }; 
 
+/* Note - you should never unregister because that can race with NMIs.
+   If you really want to do it first unregister - then synchronize_kernel - then free. 
+  */
 extern struct notifier_block *die_chain;
 
 /* Grossly misnamed. */
@@ -21,15 +26,16 @@
 	DIE_PANIC,
 	DIE_NMI,
 	DIE_DIE,
+	DIE_NMIWATCHDOG,
+	DIE_KERNELDEBUG,
+	DIE_TRAP,
+	DIE_GPF,
 	DIE_CALL,
-	DIE_CPUINIT,	/* not really a die, but .. */
-	DIE_TRAPINIT,	/* not really a die, but .. */
-	DIE_STOP, 
 }; 
 	
-static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err)
+static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err,int trap, int sig)
 { 
-	struct die_args args = { regs: regs, str: str, err: err }; 
+	struct die_args args = { .regs=regs, .str=str, .err=err, .trapnr=trap,.signr=sig }; 
 	return notifier_call_chain(&die_chain, val, &args); 
 } 
 
diff -Nru a/include/asm-x86_64/numa.h b/include/asm-x86_64/numa.h
--- a/include/asm-x86_64/numa.h	Mon Mar 31 13:41:06 2003
+++ b/include/asm-x86_64/numa.h	Mon Mar 31 13:41:06 2003
@@ -8,13 +8,11 @@
 	u64 start,end; 
 };
 
-#define for_all_nodes(x) for ((x) = 0; (x) <= maxnode; (x)++) \
+#define for_all_nodes(x) for ((x) = 0; (x) < numnodes; (x)++) \
 				if ((1UL << (x)) & nodes_present)
 
-#define early_for_all_nodes(n) \
-	for (n=0; n<MAXNODE;n++) if (nodes[n].start!=nodes[n].end)
 
-extern int compute_hash_shift(struct node *nodes, int numnodes, u64 maxmem);
+extern int compute_hash_shift(struct node *nodes);
 extern unsigned long nodes_present;
 
 #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
diff -Nru a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h
--- a/include/asm-x86_64/pci.h	Mon Mar 31 13:41:07 2003
+++ b/include/asm-x86_64/pci.h	Mon Mar 31 13:41:07 2003
@@ -44,8 +44,7 @@
 
 struct pci_dev;
 
-extern int iommu_setup(char *opt, char **end);
-
+extern int iommu_setup(char *opt);
 extern void pci_iommu_init(void);
 
 /* Allocate and map kernel buffer using consistent mode DMA for a device.
@@ -77,10 +76,11 @@
  * Once the device is given the dma address, the device owns this memory
  * until either pci_unmap_single or pci_dma_sync_single is performed.
  */
-extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
-			  size_t size, int direction);
+extern dma_addr_t __pci_map_single(struct pci_dev *hwdev, void *ptr,
+			  size_t size, int direction, int flush);
+
 
-extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t addr,
+void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t addr,
 				   size_t size, int direction);
 
 /*
@@ -118,12 +118,16 @@
 	BUG_ON(direction == PCI_DMA_NONE); 
 } 
 
-#define PCI_DMA_BUS_IS_PHYS	0
+/* The PCI address space does equal the physical memory
+ * address space.  The networking and block device layers use
+ * this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS	(0)
 
 
 #else
-static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
-					size_t size, int direction)
+static inline dma_addr_t __pci_map_single(struct pci_dev *hwdev, void *ptr,
+					size_t size, int direction, int flush)
 {
 	dma_addr_t addr; 
 
@@ -210,6 +214,11 @@
 extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
 			 int nents, int direction);
 
+static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
+			  size_t size, int direction)
+{
+	return __pci_map_single(hwdev,ptr,size,direction,1); 
+}
 
 #define pci_unmap_page pci_unmap_single
 
diff -Nru a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
--- a/include/asm-x86_64/pgtable.h	Mon Mar 31 13:41:08 2003
+++ b/include/asm-x86_64/pgtable.h	Mon Mar 31 13:41:08 2003
@@ -283,6 +283,7 @@
 #define pml4_page(pml4) ((unsigned long) __va(pml4_val(pml4) & PTE_MASK))
 #define pml4_index(address) ((address >> PML4_SHIFT) & (PTRS_PER_PML4-1))
 #define pml4_offset_k(address) (init_level4_pgt + pml4_index(address))
+#define pml4_present(pml4) (pml4_val(pml4) & _PAGE_PRESENT)
 #define mk_kernel_pml4(address) ((pml4_t){ (address) | _KERNPG_TABLE })
 #define level3_offset_k(dir, address) ((pgd_t *) pml4_page(*(dir)) + pgd_index(address))
 
diff -Nru a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
--- a/include/asm-x86_64/processor.h	Mon Mar 31 13:41:08 2003
+++ b/include/asm-x86_64/processor.h	Mon Mar 31 13:41:08 2003
@@ -269,7 +269,7 @@
 extern void release_thread(struct task_struct *);
 
 /* Prepare to copy thread state - unlazy all lazy status */
-#define prepare_to_copy(tsk)	do { } while (0)
+extern void prepare_to_copy(struct task_struct *tsk);
 
 /*
  * create a kernel thread without removing it from tasklists
@@ -308,8 +308,8 @@
 #define ARCH_HAS_PREFETCHW
 #define ARCH_HAS_SPINLOCK_PREFETCH
 
-#define prefetch(x) __builtin_prefetch((x),0)
-#define prefetchw(x) __builtin_prefetch((x),1)
+#define prefetch(x) __builtin_prefetch((x),0,1)
+#define prefetchw(x) __builtin_prefetch((x),1,1)
 #define spin_lock_prefetch(x)  prefetchw(x)
 #define cpu_relax()   rep_nop()
 
diff -Nru a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
--- a/include/asm-x86_64/proto.h	Mon Mar 31 13:41:08 2003
+++ b/include/asm-x86_64/proto.h	Mon Mar 31 13:41:08 2003
@@ -6,6 +6,7 @@
 /* misc architecture specific prototypes */
 
 struct cpuinfo_x86; 
+struct pt_regs;
 
 extern void get_cpu_vendor(struct cpuinfo_x86*);
 extern void start_kernel(void);
@@ -41,6 +42,8 @@
 extern unsigned long end_pfn_map; 
 
 extern void show_stack(unsigned long * rsp);
+extern void show_trace(unsigned long * rsp);
+extern void show_registers(struct pt_regs *regs);
 
 extern void exception_table_check(void);
 
diff -Nru a/include/asm-x86_64/suspend.h b/include/asm-x86_64/suspend.h
--- a/include/asm-x86_64/suspend.h	Mon Mar 31 13:41:07 2003
+++ b/include/asm-x86_64/suspend.h	Mon Mar 31 13:41:07 2003
@@ -11,7 +11,7 @@
 {
 }
 
-/* image of the saved processor state */
+/* Image of the saved processor state. If you touch this, fix acpi_wakeup.S. */
 struct saved_context {
   	u16 ds, es, fs, gs, ss;
 	unsigned long gs_base, gs_kernel_base, fs_base;
diff -Nru a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h
--- a/include/asm-x86_64/system.h	Mon Mar 31 13:41:08 2003
+++ b/include/asm-x86_64/system.h	Mon Mar 31 13:41:08 2003
@@ -22,18 +22,18 @@
 struct save_context_frame { 
 	unsigned long rbp; 
 	unsigned long rbx;
+	unsigned long r11;
+	unsigned long r10;
+	unsigned long r9;
+	unsigned long r8;
 	unsigned long rcx;
 	unsigned long rdx;	
-	unsigned long rsi;
-	unsigned long rdi; 
 	unsigned long r15;
 	unsigned long r14;
 	unsigned long r13;
 	unsigned long r12;
-	unsigned long r11;
-	unsigned long r10;
-	unsigned long r9;
-	unsigned long r8; 
+	unsigned long rdi;
+	unsigned long rsi;
 }; 
 
 /* frame pointer must be last for get_wchan */
@@ -43,19 +43,20 @@
    rbp needs to be always explicitely saved because gcc cannot clobber the
    frame pointer and the scheduler is compiled with frame pointers. -AK */
 #define SAVE_CONTEXT \
-	__PUSH(r8) __PUSH(r9) __PUSH(r10) __PUSH(r11) __PUSH(r12) __PUSH(r13) \
-	__PUSH(r14) __PUSH(r15) \
-	__PUSH(rdi) __PUSH(rsi) \
-	__PUSH(rdx) __PUSH(rcx) \
+	__PUSH(rsi) __PUSH(rdi) \
+    __PUSH(r12) __PUSH(r13) __PUSH(r14) __PUSH(r15)  \
+	__PUSH(rdx) __PUSH(rcx) __PUSH(r8) __PUSH(r9) __PUSH(r10) __PUSH(r11)  \
 	__PUSH(rbx) __PUSH(rbp) 
 #define RESTORE_CONTEXT \
 	__POP(rbp) __POP(rbx) \
-	__POP(rcx) __POP(rdx) \
-	__POP(rsi) __POP(rdi) \
-	__POP(r15) __POP(r14) __POP(r13) __POP(r12) __POP(r11) __POP(r10) \
-	__POP(r9) __POP(r8)
+	__POP(r11) __POP(r10) __POP(r9) __POP(r8) __POP(rcx) __POP(rdx) \
+	__POP(r15) __POP(r14) __POP(r13) __POP(r12) \
+	__POP(rdi) __POP(rsi)
 
 /* RED-PEN: pipeline stall on ret because it is not predicted */
+/* RED-PEN: the register saving could be optimized */
+/* frame pointer must be last for get_wchan */
+
 #define switch_to(prev,next,last) \
 	asm volatile(SAVE_CONTEXT						    \
 		     "movq %%rsp,%[prevrsp]\n\t"				    \
diff -Nru a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h
--- a/include/asm-x86_64/thread_info.h	Mon Mar 31 13:41:07 2003
+++ b/include/asm-x86_64/thread_info.h	Mon Mar 31 13:41:07 2003
@@ -27,6 +27,7 @@
 	struct task_struct	*task;		/* main task structure */
 	struct exec_domain	*exec_domain;	/* execution domain */
 	__u32			flags;		/* low level flags */
+	__u32			status;		/* thread synchronous flags */
 	__u32			cpu;		/* current CPU */
 	int 			preempt_count;
 
@@ -100,16 +101,14 @@
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
-#define TIF_USEDFPU		16	/* FPU was used by this task this quantum */
-#define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling TIF_NEED_RESCHED */
-#define TIF_IA32		18	/* 32bit process */ 
+#define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_IA32		17	/* 32bit process */ 
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
-#define _TIF_USEDFPU		(1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_IA32		(1<<TIF_IA32)
 
@@ -117,6 +116,15 @@
 #define _TIF_ALLWORK_MASK	0x0000FFFF	/* work to do on any return to u-space */
 
 #define PREEMPT_ACTIVE     0x4000000
+
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#define TS_USEDFPU		0x0001	/* FPU was used by this task this quantum (SMP) */
 
 #endif /* __KERNEL__ */
 
diff -Nru a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
--- a/include/asm-x86_64/unistd.h	Mon Mar 31 13:41:07 2003
+++ b/include/asm-x86_64/unistd.h	Mon Mar 31 13:41:07 2003
@@ -520,8 +520,10 @@
 __SYSCALL(__NR_clock_getres, sys_clock_getres)
 #define __NR_clock_nanosleep	230
 __SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep)
+#define __NR_exit_group		231
+__SYSCALL(__NR_exit_group, sys_exit_group)
 
-#define __NR_syscall_max __NR_clock_nanosleep
+#define __NR_syscall_max __NR_exit_group
 #ifndef __NO_STUBS
 
 /* user-visible error numbers are in the range -1 - -4095 */
diff -Nru a/include/linux/aio.h b/include/linux/aio.h
--- a/include/linux/aio.h	Mon Mar 31 13:41:06 2003
+++ b/include/linux/aio.h	Mon Mar 31 13:41:06 2003
@@ -148,6 +148,11 @@
 struct mm_struct;
 extern void FASTCALL(exit_aio(struct mm_struct *mm));
 
+/* semi private, but used by the 32bit emulations: */
+struct kioctx *lookup_ioctx(unsigned long ctx_id);
+int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb *user_iocb,
+				  struct iocb *iocb));
+
 #define get_ioctx(kioctx)	do { if (unlikely(atomic_read(&(kioctx)->users) <= 0)) BUG(); atomic_inc(&(kioctx)->users); } while (0)
 #define put_ioctx(kioctx)	do { if (unlikely(atomic_dec_and_test(&(kioctx)->users))) __put_ioctx(kioctx); else if (unlikely(atomic_read(&(kioctx)->users) < 0)) BUG(); } while (0)
 
diff -Nru a/include/linux/buffer_head.h b/include/linux/buffer_head.h
--- a/include/linux/buffer_head.h	Mon Mar 31 13:41:06 2003
+++ b/include/linux/buffer_head.h	Mon Mar 31 13:41:06 2003
@@ -60,6 +60,12 @@
 	struct list_head b_assoc_buffers; /* associated with another mapping */
 };
 
+/*
+ * Debug
+ */
+
+void __buffer_error(char *file, int line);
+#define buffer_error() __buffer_error(__FILE__, __LINE__)
 
 /*
  * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
@@ -183,7 +189,8 @@
  */
 int try_to_release_page(struct page * page, int gfp_mask);
 int block_invalidatepage(struct page *page, unsigned long offset);
-int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc);
+int block_write_full_page(struct page *page, get_block_t *get_block,
+				struct writeback_control *wbc);
 int block_read_full_page(struct page*, get_block_t*);
 int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
 int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
@@ -232,12 +239,14 @@
 		__bforget(bh);
 }
 
-static inline struct buffer_head *sb_bread(struct super_block *sb, sector_t block)
+static inline struct buffer_head *
+sb_bread(struct super_block *sb, sector_t block)
 {
 	return __bread(sb->s_bdev, block, sb->s_blocksize);
 }
 
-static inline struct buffer_head *sb_getblk(struct super_block *sb, sector_t block)
+static inline struct buffer_head *
+sb_getblk(struct super_block *sb, sector_t block)
 {
 	return __getblk(sb->s_bdev, block, sb->s_blocksize);
 }
@@ -256,9 +265,14 @@
 	bh->b_blocknr = block;
 }
 
+/*
+ * Calling wait_on_buffer() for a zero-ref buffer is illegal, so we call into
+ * __wait_on_buffer() just to trip a debug check.  Because debug code in inline
+ * functions is bloaty.
+ */
 static inline void wait_on_buffer(struct buffer_head *bh)
 {
-	if (buffer_locked(bh))
+	if (buffer_locked(bh) || atomic_read(&bh->b_count) == 0)
 		__wait_on_buffer(bh);
 }
 
@@ -267,12 +281,5 @@
 	while (test_set_buffer_locked(bh))
 		__wait_on_buffer(bh);
 }
-
-/*
- * Debug
- */
-
-void __buffer_error(char *file, int line);
-#define buffer_error() __buffer_error(__FILE__, __LINE__)
 
 #endif /* _LINUX_BUFFER_HEAD_H */
diff -Nru a/include/linux/crypto.h b/include/linux/crypto.h
--- a/include/linux/crypto.h	Mon Mar 31 13:41:06 2003
+++ b/include/linux/crypto.h	Mon Mar 31 13:41:06 2003
@@ -31,7 +31,6 @@
 #define CRYPTO_ALG_TYPE_DIGEST		0x00000002
 #define CRYPTO_ALG_TYPE_COMPRESS	0x00000004
 
-
 /*
  * Transform masks and values (for crt_flags).
  */
@@ -45,14 +44,12 @@
 #define CRYPTO_TFM_MODE_CTR		0x00000008
 
 #define CRYPTO_TFM_REQ_WEAK_KEY		0x00000100
-
 #define CRYPTO_TFM_RES_WEAK_KEY		0x00100000
 #define CRYPTO_TFM_RES_BAD_KEY_LEN   	0x00200000
 #define CRYPTO_TFM_RES_BAD_KEY_SCHED 	0x00400000
 #define CRYPTO_TFM_RES_BAD_BLOCK_LEN 	0x00800000
 #define CRYPTO_TFM_RES_BAD_FLAGS 	0x01000000
 
-
 /*
  * Miscellaneous stuff.
  */
@@ -83,8 +80,12 @@
 };
 
 struct compress_alg {
-	void (*coa_compress)(void);
-	void (*coa_decompress)(void);
+	int (*coa_init)(void *ctx);
+	void (*coa_exit)(void *ctx);
+	int (*coa_compress)(void *ctx, const u8 *src, unsigned int slen,
+	                    u8 *dst, unsigned int *dlen);
+	int (*coa_decompress)(void *ctx, const u8 *src, unsigned int slen,
+	                      u8 *dst, unsigned int *dlen);
 };
 
 #define cra_cipher	cra_u.cipher
@@ -162,8 +163,12 @@
 };
 
 struct compress_tfm {
-	void (*cot_compress)(struct crypto_tfm *tfm);
-	void (*cot_decompress)(struct crypto_tfm *tfm);
+	int (*cot_compress)(struct crypto_tfm *tfm,
+	                    const u8 *src, unsigned int slen,
+	                    u8 *dst, unsigned int *dlen);
+	int (*cot_decompress)(struct crypto_tfm *tfm,
+	                      const u8 *src, unsigned int slen,
+	                      u8 *dst, unsigned int *dlen);
 };
 
 #define crt_cipher	crt_u.cipher
@@ -336,16 +341,20 @@
 	memcpy(dst, tfm->crt_cipher.cit_iv, len);
 }
 
-static inline void crypto_comp_compress(struct crypto_tfm *tfm)
+static inline int crypto_comp_compress(struct crypto_tfm *tfm,
+                                       const u8 *src, unsigned int slen,
+                                       u8 *dst, unsigned int *dlen)
 {
 	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_COMPRESS);
-	tfm->crt_compress.cot_compress(tfm);
+	return tfm->crt_compress.cot_compress(tfm, src, slen, dst, dlen);
 }
 
-static inline void crypto_comp_decompress(struct crypto_tfm *tfm) 
+static inline int crypto_comp_decompress(struct crypto_tfm *tfm,
+                                         const u8 *src, unsigned int slen,
+                                         u8 *dst, unsigned int *dlen)
 {
 	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_COMPRESS);
-	tfm->crt_compress.cot_decompress(tfm);
+	return tfm->crt_compress.cot_decompress(tfm, src, slen, dst, dlen);
 }
 
 /*
diff -Nru a/include/linux/device.h b/include/linux/device.h
--- a/include/linux/device.h	Mon Mar 31 13:41:08 2003
+++ b/include/linux/device.h	Mon Mar 31 13:41:08 2003
@@ -35,7 +35,7 @@
 #define DEVICE_NAME_SIZE	50
 #define DEVICE_NAME_HALF	__stringify(20)	/* Less than half to accommodate slop */
 #define DEVICE_ID_SIZE		32
-#define BUS_ID_SIZE		16
+#define BUS_ID_SIZE		20
 
 
 enum {
diff -Nru a/include/linux/fs.h b/include/linux/fs.h
--- a/include/linux/fs.h	Mon Mar 31 13:41:06 2003
+++ b/include/linux/fs.h	Mon Mar 31 13:41:06 2003
@@ -1060,6 +1060,8 @@
 extern int register_chrdev(unsigned int, const char *,
 			   struct file_operations *);
 extern int unregister_chrdev(unsigned int, const char *);
+extern int unregister_chrdev_region(unsigned int, unsigned int, int,
+				    const char *);
 extern int chrdev_open(struct inode *, struct file *);
 
 /* fs/block_dev.c */
diff -Nru a/include/linux/highmem.h b/include/linux/highmem.h
--- a/include/linux/highmem.h	Mon Mar 31 13:41:08 2003
+++ b/include/linux/highmem.h	Mon Mar 31 13:41:08 2003
@@ -18,7 +18,11 @@
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
 
-static inline void *kmap(struct page *page) { return page_address(page); }
+static inline void *kmap(struct page *page)
+{
+	might_sleep();
+	return page_address(page);
+}
 
 #define kunmap(page) do { (void) (page); } while (0)
 
diff -Nru a/include/linux/i2c.h b/include/linux/i2c.h
--- a/include/linux/i2c.h	Mon Mar 31 13:41:06 2003
+++ b/include/linux/i2c.h	Mon Mar 31 13:41:06 2003
@@ -156,7 +156,6 @@
  * function is mainly used for lookup & other admin. functions.
  */
 struct i2c_client {
-	char name[32];
 	int id;
 	unsigned int flags;		/* div., see below		*/
 	unsigned int addr;		/* chip address - NOTE: 7bit 	*/
@@ -167,11 +166,21 @@
 	  alignment considerations */
 	struct i2c_adapter *adapter;	/* the adapter we sit on	*/
 	struct i2c_driver *driver;	/* and our access routines	*/
-	void *data;			/* for the clients		*/
 	int usage_count;		/* How many accesses currently  */
 					/* to the client		*/
+	struct device dev;		/* the device structure		*/
 };
+#define to_i2c_client(d) container_of(d, struct i2c_client, dev)
 
+static inline void *i2c_get_clientdata (struct i2c_client *dev)
+{
+	return dev_get_drvdata (&dev->dev);
+}
+
+static inline void i2c_set_clientdata (struct i2c_client *dev, void *data)
+{
+	return dev_set_drvdata (&dev->dev, data);
+}
 
 /*
  * The following structs are for those who like to implement new bus drivers:
@@ -210,7 +219,6 @@
  */
 struct i2c_adapter {
 	struct module *owner;
-	char name[32];	/* some useful name to identify the adapter	*/
 	unsigned int id;/* == is algo->id | hwdep.struct->id, 		*/
 			/* for registered values see below		*/
 	struct i2c_algorithm *algo;/* the algorithm to access the bus	*/
@@ -220,12 +228,7 @@
 	int (*client_register)(struct i2c_client *);
 	int (*client_unregister)(struct i2c_client *);
 
-	void *data;	/* private data for the adapter			*/
-			/* some data fields that are used by all types	*/
-			/* these data fields are readonly to the public	*/
-			/* and can be set via the i2c_ioctl call	*/
-
-			/* data fields that are valid for all devices	*/
+	/* data fields that are valid for all devices	*/
 	struct semaphore bus;
 	struct semaphore list;  
 	unsigned int flags;/* flags specifying div. data		*/
@@ -242,6 +245,16 @@
 #endif /* def CONFIG_PROC_FS */
 };
 #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev)
+
+static inline void *i2c_get_adapdata (struct i2c_adapter *dev)
+{
+	return dev_get_drvdata (&dev->dev);
+}
+
+static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data)
+{
+	return dev_set_drvdata (&dev->dev, data);
+}
 
 /*flags for the driver struct: */
 #define I2C_DF_NOTIFY	0x01		/* notify on bus (de/a)ttaches 	*/
diff -Nru a/include/linux/idr.h b/include/linux/idr.h
--- a/include/linux/idr.h	Mon Mar 31 13:41:06 2003
+++ b/include/linux/idr.h	Mon Mar 31 13:41:06 2003
@@ -13,40 +13,44 @@
 
 #define RESERVED_ID_BITS 8
 
-#if     BITS_PER_LONG == 32
-#define IDR_BITS 5
-#define IDR_FULL 0xffffffff
+#if BITS_PER_LONG == 32
+# define IDR_BITS 5
+# define IDR_FULL 0xffffffff
 #elif BITS_PER_LONG == 64
-#define IDR_BITS 6
-#define IDR_FULL 0xffffffffffffffff
+# define IDR_BITS 6
+# define IDR_FULL 0xffffffffffffffff
 #else
-#error "BITS_PER_LONG is not 32 or 64"
+# error "BITS_PER_LONG is not 32 or 64"
 #endif
 
 #define IDR_MASK ((1 << IDR_BITS)-1)
 
-/* Leave the possibility of an incomplete final layer */
-#define MAX_LEVEL (BITS_PER_LONG - RESERVED_ID_BITS + IDR_BITS - 1) / IDR_BITS
-#define MAX_ID_SHIFT (BITS_PER_LONG - RESERVED_ID_BITS)
-#define MAX_ID_BIT (1L << MAX_ID_SHIFT)
+/* Define the size of the id's */
+#define BITS_PER_INT (sizeof(int)*8)
+
+#define MAX_ID_SHIFT (BITS_PER_INT - RESERVED_ID_BITS)
+#define MAX_ID_BIT (1 << MAX_ID_SHIFT)
 #define MAX_ID_MASK (MAX_ID_BIT - 1)
 
+/* Leave the possibility of an incomplete final layer */
+#define MAX_LEVEL (MAX_ID_SHIFT + IDR_BITS - 1) / IDR_BITS
+
 /* Number of id_layer structs to leave in free list */
 #define IDR_FREE_MAX MAX_LEVEL + MAX_LEVEL
 
 struct idr_layer {
-	unsigned long	        bitmap;     // A zero bit means "space here"
-	int                     count;      // When zero, we can release it
-	struct idr_layer       *ary[1<<IDR_BITS];
+	unsigned long		 bitmap;	/* A zero bit means "space here" */
+	struct idr_layer	*ary[1<<IDR_BITS];
+	int			 count;		/* When zero, we can release it */
 };
 
 struct idr {
 	struct idr_layer *top;
-	int		  layers;
-	long		  count;
 	struct idr_layer *id_free;
-	int               id_free_cnt;
-	spinlock_t        lock;
+	long		  count;
+	int		  layers;
+	int		  id_free_cnt;
+	spinlock_t	  lock;
 };
 
 /*
diff -Nru a/include/linux/if_bonding.h b/include/linux/if_bonding.h
--- a/include/linux/if_bonding.h	Mon Mar 31 13:41:08 2003
+++ b/include/linux/if_bonding.h	Mon Mar 31 13:41:08 2003
@@ -54,6 +54,15 @@
 
 #define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
 
+#define BOND_MULTICAST_DISABLED 0
+#define BOND_MULTICAST_ACTIVE   1
+#define BOND_MULTICAST_ALL      2
+
+struct bond_parm_tbl {
+	char *modename;
+	int mode;
+};
+
 typedef struct ifbond {
 	__s32 bond_mode;
 	__s32 num_slaves;
diff -Nru a/include/linux/kmalloc_sizes.h b/include/linux/kmalloc_sizes.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/include/linux/kmalloc_sizes.h	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,33 @@
+#if (PAGE_SIZE == 4096)
+	CACHE(32)
+#endif
+	CACHE(64)
+#if L1_CACHE_BYTES < 64
+	CACHE(96)
+#endif
+	CACHE(128)
+#if L1_CACHE_BYTES < 128
+	CACHE(192)
+#endif
+	CACHE(256)
+	CACHE(512)
+	CACHE(1024)
+	CACHE(2048)
+	CACHE(4096)
+	CACHE(8192)
+	CACHE(16384)
+	CACHE(32768)
+	CACHE(65536)
+	CACHE(131072)
+#ifndef CONFIG_MMU
+	CACHE(262144)
+	CACHE(524288)
+	CACHE(1048576)
+#ifdef CONFIG_LARGE_ALLOCS
+	CACHE(2097152)
+	CACHE(4194304)
+	CACHE(8388608)
+	CACHE(16777216)
+	CACHE(33554432)
+#endif /* CONFIG_LARGE_ALLOCS */
+#endif /* CONFIG_MMU */
diff -Nru a/include/linux/netdevice.h b/include/linux/netdevice.h
--- a/include/linux/netdevice.h	Mon Mar 31 13:41:08 2003
+++ b/include/linux/netdevice.h	Mon Mar 31 13:41:08 2003
@@ -624,11 +624,6 @@
        return err;
 }
 
-static inline void dev_init_buffers(struct net_device *dev)
-{
-	/* WILL BE REMOVED IN 2.5.0 */
-}
-
 extern int netdev_finish_unregister(struct net_device *dev);
 
 static inline void dev_put(struct net_device *dev)
diff -Nru a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
--- a/include/linux/raid/md_k.h	Mon Mar 31 13:41:06 2003
+++ b/include/linux/raid/md_k.h	Mon Mar 31 13:41:06 2003
@@ -254,6 +254,7 @@
 struct mdk_personality_s
 {
 	char *name;
+	struct module *owner;
 	int (*make_request)(request_queue_t *q, struct bio *bio);
 	int (*run)(mddev_t *mddev);
 	int (*stop)(mddev_t *mddev);
diff -Nru a/include/linux/skbuff.h b/include/linux/skbuff.h
--- a/include/linux/skbuff.h	Mon Mar 31 13:41:07 2003
+++ b/include/linux/skbuff.h	Mon Mar 31 13:41:07 2003
@@ -231,15 +231,8 @@
 				pkt_type,
 				ip_summed;
 	__u32			priority;
-	atomic_t		users;
 	unsigned short		protocol,
 				security;
-	unsigned int		truesize;
-
-	unsigned char		*head,
-				*data,
-				*tail,
-				*end;
 
 	void			(*destructor)(struct sk_buff *skb);
 #ifdef CONFIG_NETFILTER
@@ -261,6 +254,14 @@
 #ifdef CONFIG_NET_SCHED
        __u32			tc_index;               /* traffic control index */
 #endif
+
+	/* These elements must be at the end, see alloc_skb() for details.  */
+	unsigned int		truesize;
+	atomic_t		users;
+	unsigned char		*head,
+				*data,
+				*tail,
+				*end;
 };
 
 #define SK_WMEM_MAX	65535
diff -Nru a/include/linux/swap.h b/include/linux/swap.h
--- a/include/linux/swap.h	Mon Mar 31 13:41:06 2003
+++ b/include/linux/swap.h	Mon Mar 31 13:41:06 2003
@@ -177,22 +177,19 @@
 					struct pte_chain *));
 void FASTCALL(page_remove_rmap(struct page *, pte_t *));
 int FASTCALL(try_to_unmap(struct page *));
-int FASTCALL(page_over_rsslimit(struct page *));
-
-/* return values of try_to_unmap */
-#define	SWAP_SUCCESS	0
-#define	SWAP_AGAIN	1
-#define	SWAP_FAIL	2
-#define	SWAP_ERROR	3
 
 /* linux/mm/shmem.c */
 extern int shmem_unuse(swp_entry_t entry, struct page *page);
-
 #else
-#define page_referenced(page) \
-	TestClearPageReferenced(page)
+#define page_referenced(page)	TestClearPageReferenced(page)
+#define try_to_unmap(page)	SWAP_FAIL
 #endif /* CONFIG_MMU */
 
+/* return values of try_to_unmap */
+#define	SWAP_SUCCESS	0
+#define	SWAP_AGAIN	1
+#define	SWAP_FAIL	2
+
 #ifdef CONFIG_SWAP
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct file *, struct page *);
@@ -203,7 +200,6 @@
 extern struct address_space swapper_space;
 #define total_swapcache_pages  swapper_space.nrpages
 extern void show_swap_cache_info(void);
-extern int add_to_swap_cache(struct page *, swp_entry_t);
 extern int add_to_swap(struct page *);
 extern void __delete_from_swap_cache(struct page *);
 extern void delete_from_swap_cache(struct page *);
diff -Nru a/include/linux/xfrm.h b/include/linux/xfrm.h
--- a/include/linux/xfrm.h	Mon Mar 31 13:41:06 2003
+++ b/include/linux/xfrm.h	Mon Mar 31 13:41:06 2003
@@ -42,7 +42,6 @@
 	__u8	proto;
 	int	ifindex;
 	uid_t	user;
-	void	*owner;
 };
 
 #define XFRM_INF (~(u64)0)
@@ -164,8 +163,8 @@
 
 struct xfrm_userspi_info {
 	struct xfrm_usersa_info		info;
-	u32				min;
-	u32				max;
+	__u32				min;
+	__u32				max;
 };
 
 struct xfrm_userpolicy_info {
diff -Nru a/include/net/ah.h b/include/net/ah.h
--- a/include/net/ah.h	Mon Mar 31 13:41:06 2003
+++ b/include/net/ah.h	Mon Mar 31 13:41:06 2003
@@ -17,9 +17,6 @@
 	struct crypto_tfm	*tfm;
 };
 
-extern void skb_ah_walk(const struct sk_buff *skb,
-                        struct crypto_tfm *tfm, icv_update_fn_t icv_update);
-
 static inline void
 ah_hmac_digest(struct ah_data *ahp, struct sk_buff *skb, u8 *auth_data)
 {
@@ -27,7 +24,7 @@
 
 	memset(auth_data, 0, ahp->icv_trunc_len);
 	crypto_hmac_init(tfm, ahp->key, &ahp->key_len);
-	skb_ah_walk(skb, tfm, crypto_hmac_update);
+	skb_icv_walk(skb, tfm, 0, skb->len, crypto_hmac_update);
 	crypto_hmac_final(tfm, ahp->key, &ahp->key_len, ahp->work_icv);
 	memcpy(auth_data, ahp->work_icv, ahp->icv_trunc_len);
 }
diff -Nru a/include/net/compat.h b/include/net/compat.h
--- a/include/net/compat.h	Mon Mar 31 13:41:06 2003
+++ b/include/net/compat.h	Mon Mar 31 13:41:06 2003
@@ -33,8 +33,6 @@
 extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr *,unsigned);
 extern asmlinkage long compat_sys_getsockopt(int, int, int, char *, int *);
 extern int put_cmsg_compat(struct msghdr*, int, int, int, void *);
-extern int put_compat_msg_controllen(struct msghdr *, struct compat_msghdr *,
-		unsigned long);
 extern int cmsghdr_from_user_compat_to_kern(struct msghdr *, unsigned char *,
 		int);
 
diff -Nru a/include/net/dst.h b/include/net/dst.h
--- a/include/net/dst.h	Mon Mar 31 13:41:08 2003
+++ b/include/net/dst.h	Mon Mar 31 13:41:08 2003
@@ -248,11 +248,6 @@
 struct flowi;
 extern int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 		       struct sock *sk, int flags);
-extern int xfrm6_lookup(struct dst_entry **dst_p, struct flowi *fl,
-		       struct sock *sk, int flags);
-extern void xfrm_init(void);
-extern void xfrm6_init(void);
-
 #endif
 
 #endif /* _NET_DST_H */
diff -Nru a/include/net/esp.h b/include/net/esp.h
--- a/include/net/esp.h	Mon Mar 31 13:41:07 2003
+++ b/include/net/esp.h	Mon Mar 31 13:41:07 2003
@@ -33,8 +33,6 @@
 	} auth;
 };
 
-extern void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
-			 int offset, int len, icv_update_fn_t icv_update);
 extern int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len);
 extern int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
 extern void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
diff -Nru a/include/net/irda/af_irda.h b/include/net/irda/af_irda.h
--- a/include/net/irda/af_irda.h	Mon Mar 31 13:41:08 2003
+++ b/include/net/irda/af_irda.h	Mon Mar 31 13:41:08 2003
@@ -52,8 +52,8 @@
 	__u8  max_header_size;
 	struct qos_info qos_tx;
 
-	__u16 mask;           /* Hint bits mask */
-	__u16 hints;          /* Hint bits */
+	__u16_host_order mask;           /* Hint bits mask */
+	__u16_host_order hints;          /* Hint bits */
 
 	void *ckey;           /* IrLMP client handle */
 	void *skey;           /* IrLMP service handle */
@@ -63,7 +63,7 @@
 	struct ias_value *ias_result; /* Result of remote IAS query */
 
 	hashbin_t *cachelog;		/* Result of discovery query */
-	struct discovery_t *cachediscovery;	/* Result of selective discovery query */
+	__u32 cachedaddr;	/* Result of selective discovery query */
 
 	int nslots;           /* Number of slots to use for discovery */
 
diff -Nru a/include/net/irda/discovery.h b/include/net/irda/discovery.h
--- a/include/net/irda/discovery.h	Mon Mar 31 13:41:07 2003
+++ b/include/net/irda/discovery.h	Mon Mar 31 13:41:07 2003
@@ -46,12 +46,20 @@
  *  little endian format. A little endian machine stores MSB of word in
  *  byte[1] and LSB in byte[0]. A big endian machine stores MSB in byte[0] 
  *  and LSB in byte[1].
+ *
+ * This structure is used in the code for things that are endian neutral
+ * but that fit in a word so that we can manipulate them efficiently.
+ * By endian neutral, I mean things that are really an array of bytes,
+ * and always used as such, for example the hint bits. Jean II
  */
 typedef union {
 	__u16 word;
 	__u8  byte[2];
 } __u16_host_order;
 
+/* Same purpose, different application */
+#define u16ho(array) (* ((__u16 *) array))
+
 /* Types of discovery */
 typedef enum {
 	DISCOVERY_LOG,		/* What's in our discovery log */
@@ -62,30 +70,31 @@
 
 #define NICKNAME_MAX_LEN 21
 
+/* Basic discovery information about a peer */
+typedef struct irda_device_info		discinfo_t;	/* linux/irda.h */
+
 /*
  * The DISCOVERY structure is used for both discovery requests and responses
  */
 typedef struct discovery_t {
-	irda_queue_t q;          /* Must be first! */
+	irda_queue_t	q;		/* Must be first! */
+
+	discinfo_t	data;		/* Basic discovery information */
+	int		name_len;	/* Lenght of nickname */
 
-	__u32      saddr;        /* Which link the device was discovered */
-	__u32      daddr;        /* Remote device address */
-	LAP_REASON condition;    /* More info about the discovery */
-
-	__u16_host_order hints;  /* Discovery hint bits */
-	__u8       charset;      /* Encoding of nickname */
-	char       nickname[22]; /* The name of the device (21 bytes + \0) */
-	int        name_len;     /* Lenght of nickname */
-
-	int        gen_addr_bit; /* Need to generate a new device address? */
-	int        nslots;       /* Number of slots to use when discovering */
-	unsigned long timestamp; /* Time discovered */
-	unsigned long first_timestamp; /* First time discovered */
+	LAP_REASON	condition;	/* More info about the discovery */
+	int		gen_addr_bit;	/* Need to generate a new device
+					 * address? */
+	int		nslots;		/* Number of slots to use when
+					 * discovering */
+	unsigned long	timestamp;	/* Last time discovered */
+	unsigned long	firststamp;	/* First time discovered */
 } discovery_t;
 
 void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *discovery);
 void irlmp_add_discovery_log(hashbin_t *cachelog, hashbin_t *log);
 void irlmp_expire_discoveries(hashbin_t *log, __u32 saddr, int force);
-struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn, __u16 mask);
+struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
+						__u16 mask, int old_entries);
 
 #endif
diff -Nru a/include/net/irda/irlan_client.h b/include/net/irda/irlan_client.h
--- a/include/net/irda/irlan_client.h	Mon Mar 31 13:41:08 2003
+++ b/include/net/irda/irlan_client.h	Mon Mar 31 13:41:08 2003
@@ -34,7 +34,7 @@
 #include <net/irda/irlan_event.h>
 
 void irlan_client_start_kick_timer(struct irlan_cb *self, int timeout);
-void irlan_client_discovery_indication(discovery_t *, DISCOVERY_MODE, void *);
+void irlan_client_discovery_indication(discinfo_t *, DISCOVERY_MODE, void *);
 void irlan_client_wakeup(struct irlan_cb *self, __u32 saddr, __u32 daddr);
 
 void irlan_client_open_ctrl_tsap( struct irlan_cb *self);
diff -Nru a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h
--- a/include/net/irda/irlmp.h	Mon Mar 31 13:41:06 2003
+++ b/include/net/irda/irlmp.h	Mon Mar 31 13:41:06 2003
@@ -58,7 +58,7 @@
 #define LM_IDLE_TIMEOUT     2*HZ /* 2 seconds for now */
 
 typedef enum {
-	S_PNP,
+	S_PNP = 0,
 	S_PDA,
 	S_COMPUTER,
 	S_PRINTER,
@@ -72,22 +72,24 @@
 	S_END,
 } SERVICE;
 
-typedef void (*DISCOVERY_CALLBACK1) (discovery_t *, DISCOVERY_MODE, void *);
-typedef void (*DISCOVERY_CALLBACK2) (hashbin_t *, void *);
+/* For selective discovery */
+typedef void (*DISCOVERY_CALLBACK1) (discinfo_t *, DISCOVERY_MODE, void *);
+/* For expiry (the same) */
+typedef void (*DISCOVERY_CALLBACK2) (discinfo_t *, DISCOVERY_MODE, void *);
 
 typedef struct {
 	irda_queue_t queue; /* Must be first */
 
-	__u16 hints; /* Hint bits */
+	__u16_host_order hints; /* Hint bits */
 } irlmp_service_t;
 
 typedef struct {
 	irda_queue_t queue; /* Must be first */
 
-	__u16 hint_mask;
+	__u16_host_order hint_mask;
 
 	DISCOVERY_CALLBACK1 disco_callback;	/* Selective discovery */
-	DISCOVERY_CALLBACK1 expir_callback;	/* Selective expiration */
+	DISCOVERY_CALLBACK2 expir_callback;	/* Selective expiration */
 	void *priv;                /* Used to identify client */
 } irlmp_client_t;
 
@@ -199,11 +201,11 @@
 void *irlmp_register_service(__u16 hints);
 int irlmp_unregister_service(void *handle);
 void *irlmp_register_client(__u16 hint_mask, DISCOVERY_CALLBACK1 disco_clb,
-			    DISCOVERY_CALLBACK1 expir_clb, void *priv);
+			    DISCOVERY_CALLBACK2 expir_clb, void *priv);
 int irlmp_unregister_client(void *handle);
 int irlmp_update_client(void *handle, __u16 hint_mask, 
 			DISCOVERY_CALLBACK1 disco_clb,
-			DISCOVERY_CALLBACK1 expir_clb, void *priv);
+			DISCOVERY_CALLBACK2 expir_clb, void *priv);
 
 void irlmp_register_link(struct irlap_cb *, __u32 saddr, notify_t *);
 void irlmp_unregister_link(__u32 saddr);
@@ -222,11 +224,11 @@
 
 void irlmp_discovery_confirm(hashbin_t *discovery_log, DISCOVERY_MODE mode);
 void irlmp_discovery_request(int nslots);
-struct irda_device_info *irlmp_get_discoveries(int *pn, __u16 mask, int nslots);
+discinfo_t *irlmp_get_discoveries(int *pn, __u16 mask, int nslots);
 void irlmp_do_expiry(void);
 void irlmp_do_discovery(int nslots);
 discovery_t *irlmp_get_discovery_response(void);
-void irlmp_discovery_expiry(discovery_t *expiry);
+void irlmp_discovery_expiry(discinfo_t *expiry, int number);
 
 int  irlmp_data_request(struct lsap_cb *, struct sk_buff *);
 void irlmp_data_indication(struct lsap_cb *, struct sk_buff *);
diff -Nru a/include/net/protocol.h b/include/net/protocol.h
--- a/include/net/protocol.h	Mon Mar 31 13:41:07 2003
+++ b/include/net/protocol.h	Mon Mar 31 13:41:07 2003
@@ -50,6 +50,7 @@
 			       struct inet6_skb_parm *opt,
 			       int type, int code, int offset,
 			       __u32 info);
+	int	no_policy;
 };
 
 #endif
diff -Nru a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
--- a/include/net/sctp/sctp.h	Mon Mar 31 13:41:06 2003
+++ b/include/net/sctp/sctp.h	Mon Mar 31 13:41:06 2003
@@ -356,7 +356,7 @@
 static inline void sctp_skb_list_tail(struct sk_buff_head *list,
 				      struct sk_buff_head *head)
 {
-	int flags __attribute__ ((unused));
+	unsigned long flags;
 
 	sctp_spin_lock_irqsave(&head->lock, flags);
 	sctp_spin_lock(&list->lock);
diff -Nru a/include/net/sock.h b/include/net/sock.h
--- a/include/net/sock.h	Mon Mar 31 13:41:08 2003
+++ b/include/net/sock.h	Mon Mar 31 13:41:08 2003
@@ -48,9 +48,7 @@
 #include <linux/skbuff.h>	/* struct sk_buff */
 #include <linux/security.h>
 
-#ifdef CONFIG_FILTER
 #include <linux/filter.h>
-#endif
 
 #include <asm/atomic.h>
 #include <net/dst.h>
@@ -174,10 +172,8 @@
 	long			rcvtimeo;
 	long			sndtimeo;
 
-#ifdef CONFIG_FILTER
 	/* Socket Filtering Instructions */
 	struct sk_filter      	*filter;
-#endif /* CONFIG_FILTER */
 
 	/* This is where all the private (optional) areas that don't
 	 * overlap will eventually live. 
@@ -456,8 +452,6 @@
 /* Initialise core socket variables */
 extern void sock_init_data(struct socket *sock, struct sock *sk);
 
-#ifdef CONFIG_FILTER
-
 /**
  *	__sk_filter - run a packet through a socket filter
  *	@sk: sock associated with &sk_buff
@@ -523,15 +517,6 @@
 	atomic_inc(&fp->refcnt);
 	atomic_add(sk_filter_len(fp), &sk->omem_alloc);
 }
-
-#else
-
-static inline int __sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
-{
-	return 0;
-}
-
-#endif /* CONFIG_FILTER */
 
 static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
 {
diff -Nru a/include/net/xfrm.h b/include/net/xfrm.h
--- a/include/net/xfrm.h	Mon Mar 31 13:41:08 2003
+++ b/include/net/xfrm.h	Mon Mar 31 13:41:08 2003
@@ -744,6 +744,7 @@
 struct xfrm_policy *xfrm_policy_delete(int dir, struct xfrm_selector *sel);
 struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete);
 void xfrm_policy_flush(void);
+u32 xfrm_get_acqseq(void);
 void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
 struct xfrm_state * xfrm_find_acq(u8 mode, u16 reqid, u8 proto, 
 				  xfrm_address_t *daddr, xfrm_address_t *saddr, 
@@ -779,5 +780,8 @@
 
 struct crypto_tfm;
 typedef void (icv_update_fn_t)(struct crypto_tfm *, struct scatterlist *, unsigned int);
+
+extern void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
+			 int offset, int len, icv_update_fn_t icv_update);
 
 #endif	/* _NET_XFRM_H */
diff -Nru a/include/pcmcia/bus_ops.h b/include/pcmcia/bus_ops.h
--- a/include/pcmcia/bus_ops.h	Mon Mar 31 13:41:07 2003
+++ b/include/pcmcia/bus_ops.h	Mon Mar 31 13:41:07 2003
@@ -1,152 +1,2 @@
-/*
- * bus_ops.h 1.10 2000/06/12 21:55:41
- *
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License
- * at http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and
- * limitations under the License. 
- *
- * The initial developer of the original code is David A. Hinds
- * <dahinds@users.sourceforge.net>.  Portions created by David A. Hinds
- * are Copyright (C) 1999 David A. Hinds.  All Rights Reserved.
- *
- * Alternatively, the contents of this file may be used under the
- * terms of the GNU General Public License version 2 (the "GPL"), in which
- * case the provisions of the GPL are applicable instead of the
- * above.  If you wish to allow the use of your version of this file
- * only under the terms of the GPL and not to allow others to use
- * your version of this file under the MPL, indicate your decision by
- * deleting the provisions above and replace them with the notice and
- * other provisions required by the GPL.  If you do not delete the
- * provisions above, a recipient may use your version of this file
- * under either the MPL or the GPL.
- */
-
-#ifndef _LINUX_BUS_OPS_H
-#define _LINUX_BUS_OPS_H
-
-#include <linux/config.h>
-
-#ifdef CONFIG_VIRTUAL_BUS
-
-typedef struct bus_operations {
-    void	*priv;
-    u32		(*b_in)(void *bus, u32 port, s32 sz);
-    void	(*b_ins)(void *bus, u32 port, void *buf,
-			 u32 count, s32 sz);
-    void	(*b_out)(void *bus, u32 val, u32 port, s32 sz);
-    void	(*b_outs)(void *bus, u32 port, void *buf,
-			  u32 count, s32 sz);
-    void	*(*b_ioremap)(void *bus, u_long ofs, u_long sz);
-    void	(*b_iounmap)(void *bus, void *addr);
-    u32		(*b_read)(void *bus, void *addr, s32 sz);
-    void	(*b_write)(void *bus, u32 val, void *addr, s32 sz);
-    void	(*b_copy_from)(void *bus, void *d, void *s, u32 count);
-    void	(*b_copy_to)(void *bus, void *d, void *s, u32 count);
-    int		(*b_request_irq)(void *bus, u_int irq,
-				 void (*handler)(int, void *,
-						 struct pt_regs *),
-				 u_long flags, const char *device,
-				 void *dev_id);
-    void	(*b_free_irq)(void *bus, u_int irq, void *dev_id);
-} bus_operations;
-
-#define bus_inb(b,p)		(b)->b_in((b),(p),0)
-#define bus_inw(b,p)		(b)->b_in((b),(p),1)
-#define bus_inl(b,p)		(b)->b_in((b),(p),2)
-#define bus_inw_ns(b,p)		(b)->b_in((b),(p),-1)
-#define bus_inl_ns(b,p)		(b)->b_in((b),(p),-2)
-
-#define bus_insb(b,p,a,c)	(b)->b_ins((b),(p),(a),(c),0)
-#define bus_insw(b,p,a,c)	(b)->b_ins((b),(p),(a),(c),1)
-#define bus_insl(b,p,a,c)	(b)->b_ins((b),(p),(a),(c),2)
-#define bus_insw_ns(b,p,a,c)	(b)->b_ins((b),(p),(a),(c),-1)
-#define bus_insl_ns(b,p,a,c)	(b)->b_ins((b),(p),(a),(c),-2)
-
-#define bus_outb(b,v,p)		(b)->b_out((b),(v),(p),0)
-#define bus_outw(b,v,p)		(b)->b_out((b),(v),(p),1)
-#define bus_outl(b,v,p)		(b)->b_out((b),(v),(p),2)
-#define bus_outw_ns(b,v,p)	(b)->b_out((b),(v),(p),-1)
-#define bus_outl_ns(b,v,p)	(b)->b_out((b),(v),(p),-2)
-
-#define bus_outsb(b,p,a,c)	(b)->b_outs((b),(p),(a),(c),0)
-#define bus_outsw(b,p,a,c)	(b)->b_outs((b),(p),(a),(c),1)
-#define bus_outsl(b,p,a,c)	(b)->b_outs((b),(p),(a),(c),2)
-#define bus_outsw_ns(b,p,a,c)	(b)->b_outs((b),(p),(a),(c),-1)
-#define bus_outsl_ns(b,p,a,c)	(b)->b_outs((b),(p),(a),(c),-2)
-
-#define bus_readb(b,a)		(b)->b_read((b),(a),0)
-#define bus_readw(b,a)		(b)->b_read((b),(a),1)
-#define bus_readl(b,a)		(b)->b_read((b),(a),2)
-#define bus_readw_ns(b,a)	(b)->b_read((b),(a),-1)
-#define bus_readl_ns(b,a)	(b)->b_read((b),(a),-2)
-
-#define bus_writeb(b,v,a)	(b)->b_write((b),(v),(a),0)
-#define bus_writew(b,v,a)	(b)->b_write((b),(v),(a),1)
-#define bus_writel(b,v,a)	(b)->b_write((b),(v),(a),2)
-#define bus_writew_ns(b,v,a)	(b)->b_write((b),(v),(a),-1)
-#define bus_writel_ns(b,v,a)	(b)->b_write((b),(v),(a),-2)
-
-#define bus_ioremap(b,s,n)	(b)->b_ioremap((b),(s),(n))
-#define bus_iounmap(b,a)	(b)->b_iounmap((b),(a))
-#define bus_memcpy_fromio(b,d,s,n) (b)->b_copy_from((b),(d),(s),(n))
-#define bus_memcpy_toio(b,d,s,n) (b)->b_copy_to((b),(d),(s),(n))
-
-#define bus_request_irq(b,i,h,f,n,d) \
-				(b)->b_request_irq((b),(i),(h),(f),(n),(d))
-#define bus_free_irq(b,i,d)	(b)->b_free_irq((b),(i),(d))
-
-#else
-
-#define bus_inb(b,p)		inb(p)
-#define bus_inw(b,p)		inw(p)
-#define bus_inl(b,p)		inl(p)
-#define bus_inw_ns(b,p)		inw_ns(p)
-#define bus_inl_ns(b,p)		inl_ns(p)
-
-#define bus_insb(b,p,a,c)	insb(p,a,c)
-#define bus_insw(b,p,a,c)	insw(p,a,c)
-#define bus_insl(b,p,a,c)	insl(p,a,c)
-#define bus_insw_ns(b,p,a,c)	insw_ns(p,a,c)
-#define bus_insl_ns(b,p,a,c)	insl_ns(p,a,c)
-
-#define bus_outb(b,v,p)		outb(b,v,p)
-#define bus_outw(b,v,p)		outw(b,v,p)
-#define bus_outl(b,v,p)		outl(b,v,p)
-#define bus_outw_ns(b,v,p)	outw_ns(b,v,p)
-#define bus_outl_ns(b,v,p)	outl_ns(b,v,p)
-
-#define bus_outsb(b,p,a,c)	outsb(p,a,c)
-#define bus_outsw(b,p,a,c)	outsw(p,a,c)
-#define bus_outsl(b,p,a,c)	outsl(p,a,c)
-#define bus_outsw_ns(b,p,a,c)	outsw_ns(p,a,c)
-#define bus_outsl_ns(b,p,a,c)	outsl_ns(p,a,c)
-
-#define bus_readb(b,a)		readb(a)
-#define bus_readw(b,a)		readw(a)
-#define bus_readl(b,a)		readl(a)
-#define bus_readw_ns(b,a)	readw_ns(a)
-#define bus_readl_ns(b,a)	readl_ns(a)
-
-#define bus_writeb(b,v,a)	writeb(v,a)
-#define bus_writew(b,v,a)	writew(v,a)
-#define bus_writel(b,v,a)	writel(v,a)
-#define bus_writew_ns(b,v,a)	writew_ns(v,a)
-#define bus_writel_ns(b,v,a)	writel_ns(v,a)
-
-#define bus_ioremap(b,s,n)	ioremap(s,n)
-#define bus_iounmap(b,a)	iounmap(a)
-#define bus_memcpy_fromio(b,d,s,n) memcpy_fromio(d,s,n)
-#define bus_memcpy_toio(b,d,s,n) memcpy_toio(d,s,n)
-
-#define bus_request_irq(b,i,h,f,n,d) request_irq((i),(h),(f),(n),(d))
-#define bus_free_irq(b,i,d)	free_irq((i),(d))
-
-#endif /* CONFIG_VIRTUAL_BUS */
-
-#endif /* _LINUX_BUS_OPS_H */
+/* now empty */
+#warning please remove the reference to this file
diff -Nru a/include/pcmcia/cs.h b/include/pcmcia/cs.h
--- a/include/pcmcia/cs.h	Mon Mar 31 13:41:06 2003
+++ b/include/pcmcia/cs.h	Mon Mar 31 13:41:06 2003
@@ -98,7 +98,6 @@
     void	*buffer;
     void	*misc;
     void	*client_data;
-    struct bus_operations *bus;
 } event_callback_args_t;
 
 /* for GetConfigurationInfo */
diff -Nru a/include/pcmcia/driver_ops.h b/include/pcmcia/driver_ops.h
--- a/include/pcmcia/driver_ops.h	Mon Mar 31 13:41:07 2003
+++ b/include/pcmcia/driver_ops.h	Mon Mar 31 13:41:07 2003
@@ -1,73 +1,2 @@
-/*
- * driver_ops.h 1.15 2000/06/12 21:55:40
- *
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License
- * at http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and
- * limitations under the License. 
- *
- * The initial developer of the original code is David A. Hinds
- * <dahinds@users.sourceforge.net>.  Portions created by David A. Hinds
- * are Copyright (C) 1999 David A. Hinds.  All Rights Reserved.
- *
- * Alternatively, the contents of this file may be used under the
- * terms of the GNU General Public License version 2 (the "GPL"), in which
- * case the provisions of the GPL are applicable instead of the
- * above.  If you wish to allow the use of your version of this file
- * only under the terms of the GPL and not to allow others to use
- * your version of this file under the MPL, indicate your decision by
- * deleting the provisions above and replace them with the notice and
- * other provisions required by the GPL.  If you do not delete the
- * provisions above, a recipient may use your version of this file
- * under either the MPL or the GPL.
- */
-
-#ifndef _LINUX_DRIVER_OPS_H
-#define _LINUX_DRIVER_OPS_H
-
-#ifndef DEV_NAME_LEN
-#define DEV_NAME_LEN	32
-#endif
-
-#ifdef __KERNEL__
-
-typedef struct dev_node_t {
-    char		dev_name[DEV_NAME_LEN];
-    u_short		major, minor;
-    struct dev_node_t	*next;
-} dev_node_t;
-
-typedef struct dev_locator_t {
-    enum { LOC_ISA, LOC_PCI } bus;
-    union {
-	struct {
-	    u_short	io_base_1, io_base_2;
-	    u_long	mem_base;
-	    u_char	irq, dma;
-	} isa;
-	struct {
-	    u_char	bus;
-	    u_char	devfn;
-	} pci;
-    } b;
-} dev_locator_t;
-
-typedef struct driver_operations {
-    char		*name;
-    dev_node_t		*(*attach) (dev_locator_t *loc);
-    void		(*suspend) (dev_node_t *dev);
-    void		(*resume) (dev_node_t *dev);
-    void		(*detach) (dev_node_t *dev);
-} driver_operations;
-
-int register_driver(struct driver_operations *ops);
-void unregister_driver(struct driver_operations *ops);
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_DRIVER_OPS_H */
+/* now empty */
+#warning please remove the reference to this file
diff -Nru a/include/pcmcia/ds.h b/include/pcmcia/ds.h
--- a/include/pcmcia/ds.h	Mon Mar 31 13:41:06 2003
+++ b/include/pcmcia/ds.h	Mon Mar 31 13:41:06 2003
@@ -30,9 +30,9 @@
 #ifndef _LINUX_DS_H
 #define _LINUX_DS_H
 
-#include <pcmcia/driver_ops.h>
 #include <pcmcia/bulkmem.h>
 #include <linux/device.h>
+#include <pcmcia/cs_types.h>
 
 typedef struct tuple_parse_t {
     tuple_t		tuple;
@@ -108,6 +108,12 @@
 
 #ifdef __KERNEL__
 
+typedef struct dev_node_t {
+    char		dev_name[DEV_NAME_LEN];
+    u_short		major, minor;
+    struct dev_node_t	*next;
+} dev_node_t;
+
 typedef struct dev_link_t {
     dev_node_t		*dev;
     u_int		state, open;
@@ -144,7 +150,7 @@
 extern struct bus_type pcmcia_bus_type;
 
 struct pcmcia_driver {
-	int			use_count, status;
+	int			use_count;
 	dev_link_t		*(*attach)(void);
 	void			(*detach)(dev_link_t *);
 	struct module		*owner;
diff -Nru a/include/pcmcia/ss.h b/include/pcmcia/ss.h
--- a/include/pcmcia/ss.h	Mon Mar 31 13:41:08 2003
+++ b/include/pcmcia/ss.h	Mon Mar 31 13:41:08 2003
@@ -58,7 +58,6 @@
     ioaddr_t	io_offset;
     u_char	pci_irq;
     struct pci_dev *cb_dev;
-    struct bus_operations *bus;
 } socket_cap_t;
 
 /* InquireSocket capabilities */
@@ -134,9 +133,7 @@
 	int (*get_status)(unsigned int sock, u_int *value);
 	int (*get_socket)(unsigned int sock, socket_state_t *state);
 	int (*set_socket)(unsigned int sock, socket_state_t *state);
-	int (*get_io_map)(unsigned int sock, struct pccard_io_map *io);
 	int (*set_io_map)(unsigned int sock, struct pccard_io_map *io);
-	int (*get_mem_map)(unsigned int sock, struct pccard_mem_map *mem);
 	int (*set_mem_map)(unsigned int sock, struct pccard_mem_map *mem);
 	void (*proc_setup)(unsigned int sock, struct proc_dir_entry *base);
 };
@@ -151,9 +148,12 @@
 	 * returned to driver) = sock_offset + (0, 1, .. , (nsock-1) */
 	struct pccard_operations *ops;		/* see above */
 	void *s_info;				/* socket_info_t */
-	unsigned int use_bus_pm;
 };
 
 extern struct device_class pcmcia_socket_class;
+
+/* socket drivers are expected to use these callbacks in their .drv struct */
+int pcmcia_socket_dev_suspend(struct device * dev, u32 state, u32 level);
+int pcmcia_socket_dev_resume(struct device * dev, u32 level);
 
 #endif /* _LINUX_SS_H */
diff -Nru a/init/main.c b/init/main.c
--- a/init/main.c	Mon Mar 31 13:41:06 2003
+++ b/init/main.c	Mon Mar 31 13:41:06 2003
@@ -463,6 +463,15 @@
 	rest_init();
 }
 
+int __initdata initcall_debug;
+
+static int __init initcall_debug_setup(char *str)
+{
+	initcall_debug = 1;
+	return 1;
+}
+__setup("initcall_debug", initcall_debug_setup);
+
 struct task_struct *child_reaper = &init_task;
 
 extern initcall_t __initcall_start, __initcall_end;
@@ -470,12 +479,30 @@
 static void __init do_initcalls(void)
 {
 	initcall_t *call;
+	int count = preempt_count();
+
+	for (call = &__initcall_start; call < &__initcall_end; call++) {
+		char *msg;
+
+		if (initcall_debug)
+			printk("calling initcall 0x%p\n", *call);
 
-	call = &__initcall_start;
-	do {
 		(*call)();
-		call++;
-	} while (call < &__initcall_end);
+
+		msg = NULL;
+		if (preempt_count() != count) {
+			msg = "preemption imbalance";
+			preempt_count() = count;
+		}
+		if (irqs_disabled()) {
+			msg = "disabled interrupts";
+			local_irq_enable();
+		}
+		if (msg) {
+			printk("error in initcall at 0x%p: "
+				"returned with %s\n", *call, msg);
+		}
+	}
 
 	/* Make sure there is no pending stuff from the initcall sequence */
 	flush_scheduled_work();
diff -Nru a/kernel/posix-timers.c b/kernel/posix-timers.c
--- a/kernel/posix-timers.c	Mon Mar 31 13:41:07 2003
+++ b/kernel/posix-timers.c	Mon Mar 31 13:41:07 2003
@@ -230,8 +230,10 @@
 		return;
 	}
 	posix_get_now(&now);
-	while (posix_time_before(&timr->it_timer, &now))
+	do {
 		posix_bump_timer(timr);
+	}while (posix_time_before(&timr->it_timer, &now));
+
 	timr->it_overrun_last = timr->it_overrun;
 	timr->it_overrun = -1;
 	timr->it_requeue_pending = 0;
@@ -587,7 +589,6 @@
 void inline
 do_timer_gettime(struct k_itimer *timr, struct itimerspec *cur_setting)
 {
-	long sub_expires;
 	unsigned long expires;
 	struct now_struct now;
 
@@ -607,7 +608,7 @@
 				posix_bump_timer(timr);
 		else
 			if (!timer_pending(&timr->it_timer))
-				sub_expires = expires = 0;
+				expires = 0;
 		if (expires)
 			expires -= now.jiffies;
 	}
diff -Nru a/kernel/sched.c b/kernel/sched.c
--- a/kernel/sched.c	Mon Mar 31 13:41:08 2003
+++ b/kernel/sched.c	Mon Mar 31 13:41:08 2003
@@ -2554,7 +2554,7 @@
 #if defined(in_atomic)
 	static unsigned long prev_jiffy;	/* ratelimiting */
 
-	if (in_atomic()) {
+	if (in_atomic() || irqs_disabled()) {
 		if (time_before(jiffies, prev_jiffy + HZ))
 			return;
 		prev_jiffy = jiffies;
diff -Nru a/lib/Kconfig b/lib/Kconfig
--- a/lib/Kconfig	Mon Mar 31 13:41:07 2003
+++ b/lib/Kconfig	Mon Mar 31 13:41:07 2003
@@ -17,13 +17,14 @@
 #
 config ZLIB_INFLATE
 	tristate
-	default y if CRAMFS=y || PPP_DEFLATE=y || JFFS2_FS=y || ZISOFS_FS=y || BINFMT_ZFLAT=y
-	default m if CRAMFS=m || PPP_DEFLATE=m || JFFS2_FS=m || ZISOFS_FS=m || BINFMT_ZFLAT=m
+	default y if CRAMFS=y || PPP_DEFLATE=y || JFFS2_FS=y || ZISOFS_FS=y || BINFMT_ZFLAT=y || CRYPTO_DEFLATE=y
+	default m if CRAMFS=m || PPP_DEFLATE=m || JFFS2_FS=m || ZISOFS_FS=m || BINFMT_ZFLAT=m || CRYPTO_DEFLATE=m
 
 config ZLIB_DEFLATE
 	tristate
-	default m if PPP_DEFLATE!=y && JFFS2_FS!=y && (PPP_DEFLATE=m || JFFS2_FS=m)
-	default y if PPP_DEFLATE=y || JFFS2_FS=y
+	default m if PPP_DEFLATE!=y && JFFS2_FS!=y && CRYPTO_DEFLATE!=y && \
+		(PPP_DEFLATE=m || JFFS2_FS=m || CRYPTO_DEFLATE=m)
+	default y if PPP_DEFLATE=y || JFFS2_FS=y || CRYPTO_DEFLATE=y
 
 endmenu
 
diff -Nru a/lib/idr.c b/lib/idr.c
--- a/lib/idr.c	Mon Mar 31 13:41:07 2003
+++ b/lib/idr.c	Mon Mar 31 13:41:07 2003
@@ -150,7 +150,7 @@
 
 static inline int sub_alloc(struct idr *idp, int shift, void *ptr)
 {
-	long n, v = 0;
+	int n, v = 0;
 	struct idr_layer *p;
 	struct idr_layer **pa[MAX_LEVEL];
 	struct idr_layer ***paa = &pa[0];
@@ -211,7 +211,7 @@
 
 int idr_get_new(struct idr *idp, void *ptr)
 {
-	long v;
+	int v;
 	
 	if (idp->id_free_cnt < idp->layers + 1) 
 		return (-1);
diff -Nru a/mm/fremap.c b/mm/fremap.c
--- a/mm/fremap.c	Mon Mar 31 13:41:06 2003
+++ b/mm/fremap.c	Mon Mar 31 13:41:06 2003
@@ -17,7 +17,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
-static inline int zap_pte(struct mm_struct *mm, pte_t *ptep)
+static inline int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
+			unsigned long addr, pte_t *ptep)
 {
 	pte_t pte = *ptep;
 
@@ -26,6 +27,7 @@
 	if (pte_present(pte)) {
 		unsigned long pfn = pte_pfn(pte);
 
+		flush_cache_page(vma, addr);
 		pte = ptep_get_and_clear(ptep);
 		if (pfn_valid(pfn)) {
 			struct page *page = pfn_to_page(pfn);
@@ -54,7 +56,7 @@
 		unsigned long addr, struct page *page, pgprot_t prot)
 {
 	int err = -ENOMEM, flush;
-	pte_t *pte, entry;
+	pte_t *pte;
 	pgd_t *pgd;
 	pmd_t *pmd;
 	struct pte_chain *pte_chain;
@@ -73,13 +75,12 @@
 	if (!pte)
 		goto err_unlock;
 
-	flush = zap_pte(mm, pte);
+	flush = zap_pte(mm, vma, addr, pte);
 
 	mm->rss++;
 	flush_page_to_ram(page);
 	flush_icache_page(vma, page);
-	entry = mk_pte(page, prot);
-	set_pte(pte, entry);
+	set_pte(pte, mk_pte(page, prot));
 	pte_chain = page_add_rmap(page, pte, pte_chain);
 	pte_unmap(pte);
 	if (flush)
diff -Nru a/mm/highmem.c b/mm/highmem.c
--- a/mm/highmem.c	Mon Mar 31 13:41:07 2003
+++ b/mm/highmem.c	Mon Mar 31 13:41:07 2003
@@ -381,7 +381,7 @@
 		/*
 		 * is destination page below bounce pfn?
 		 */
-		if ((page - page_zone(page)->zone_mem_map) + (page_zone(page)->zone_start_pfn) < q->bounce_pfn)
+		if (page_to_pfn(page) < q->bounce_pfn)
 			continue;
 
 		/*
diff -Nru a/mm/mmap.c b/mm/mmap.c
--- a/mm/mmap.c	Mon Mar 31 13:41:07 2003
+++ b/mm/mmap.c	Mon Mar 31 13:41:07 2003
@@ -82,14 +82,6 @@
 		free += nr_swap_pages;
 
 		/*
-		 * This double-counts: the nrpages are both in the
-		 * page-cache and in the swapper space. At the same time,
-		 * this compensates for the swap-space over-allocation
-		 * (ie "nr_swap_pages" being too small).
-		 */
-		free += total_swapcache_pages;
-
-		/*
 		 * The code below doesn't account for free space in the
 		 * inode and dentry slab cache, slab cache fragmentation,
 		 * inodes and dentries which will become freeable under
@@ -374,6 +366,8 @@
 can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
 	struct file *file, unsigned long vm_pgoff, unsigned long size)
 {
+	if ((vma->vm_flags & VM_DONTEXPAND) || (vm_flags & VM_DONTEXPAND))
+		return 0;
 	if (vma->vm_file == file && vma->vm_flags == vm_flags) {
 		if (!file)
 			return 1;	/* anon mapping */
@@ -391,6 +385,8 @@
 can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
 	struct file *file, unsigned long vm_pgoff)
 {
+	if ((vma->vm_flags & VM_DONTEXPAND) || (vm_flags & VM_DONTEXPAND))
+		return 0;
 	if (vma->vm_file == file && vma->vm_flags == vm_flags) {
 		unsigned long vma_size;
 
diff -Nru a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c	Mon Mar 31 13:41:06 2003
+++ b/mm/page_alloc.c	Mon Mar 31 13:41:06 2003
@@ -106,7 +106,7 @@
  * This is only for debug at present.  This usage means that zero-order pages
  * may not be compound.
  */
-static void prep_compound_page(struct page *page, int order)
+static void prep_compound_page(struct page *page, unsigned long order)
 {
 	int i;
 	int nr_pages = 1 << order;
@@ -121,7 +121,7 @@
 	}
 }
 
-static void destroy_compound_page(struct page *page, int order)
+static void destroy_compound_page(struct page *page, unsigned long order)
 {
 	int i;
 	int nr_pages = 1 << order;
diff -Nru a/mm/rmap.c b/mm/rmap.c
--- a/mm/rmap.c	Mon Mar 31 13:41:08 2003
+++ b/mm/rmap.c	Mon Mar 31 13:41:08 2003
@@ -415,7 +415,6 @@
  * SWAP_SUCCESS	- we succeeded in removing all mappings
  * SWAP_AGAIN	- we missed a trylock, try again later
  * SWAP_FAIL	- the page is unswappable
- * SWAP_ERROR	- an error occurred
  */
 int try_to_unmap(struct page * page)
 {
@@ -482,9 +481,6 @@
 				continue;
 			case SWAP_FAIL:
 				ret = SWAP_FAIL;
-				goto out;
-			case SWAP_ERROR:
-				ret = SWAP_ERROR;
 				goto out;
 			}
 		}
diff -Nru a/mm/shmem.c b/mm/shmem.c
--- a/mm/shmem.c	Mon Mar 31 13:41:07 2003
+++ b/mm/shmem.c	Mon Mar 31 13:41:07 2003
@@ -33,6 +33,7 @@
 #include <linux/mount.h>
 #include <linux/writeback.h>
 #include <linux/vfs.h>
+#include <linux/blkdev.h>
 #include <asm/uaccess.h>
 
 /* This magic number is used in glibc for posix shared memory */
@@ -486,6 +487,16 @@
 	}
 done2:
 	BUG_ON(info->swapped > info->next_index);
+	if (inode->i_mapping->nrpages) {
+		/*
+		 * Call truncate_inode_pages again: racing shmem_unuse_inode
+		 * may have swizzled a page in from swap since vmtruncate or
+		 * generic_delete_inode did it, before we lowered next_index.
+		 */
+		spin_unlock(&info->lock);
+		truncate_inode_pages(inode->i_mapping, inode->i_size);
+		spin_lock(&info->lock);
+	}
 	shmem_recalc_inode(inode);
 	spin_unlock(&info->lock);
 }
@@ -828,8 +839,7 @@
 			SetPageUptodate(filepage);
 			set_page_dirty(filepage);
 			swap_free(swap);
-		} else if (!(error = move_from_swap_cache(
-				swappage, idx, mapping))) {
+		} else if (move_from_swap_cache(swappage, idx, mapping) == 0) {
 			shmem_swp_set(info, entry, 0);
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
@@ -840,8 +850,8 @@
 			spin_unlock(&info->lock);
 			unlock_page(swappage);
 			page_cache_release(swappage);
-			if (error != -EEXIST)
-				goto failed;
+			/* let kswapd refresh zone for GFP_ATOMICs */
+			blk_congestion_wait(WRITE, HZ/50);
 			goto repeat;
 		}
 	} else if (sgp == SGP_READ && !filepage) {
@@ -887,15 +897,16 @@
 				swap = *entry;
 				shmem_swp_unmap(entry);
 			}
-			if (error || swap.val ||
-			    (error = add_to_page_cache_lru(
-					filepage, mapping, idx, GFP_ATOMIC))) {
+			if (error || swap.val || 0 != add_to_page_cache_lru(
+					filepage, mapping, idx, GFP_ATOMIC)) {
 				spin_unlock(&info->lock);
 				page_cache_release(filepage);
 				shmem_free_block(inode);
 				filepage = NULL;
-				if (error != -EEXIST)
+				if (error)
 					goto failed;
+				/* let kswapd refresh zone for GFP_ATOMICs */
+				blk_congestion_wait(WRITE, HZ / 50);
 				goto repeat;
 			}
 		}
diff -Nru a/mm/slab.c b/mm/slab.c
--- a/mm/slab.c	Mon Mar 31 13:41:08 2003
+++ b/mm/slab.c	Mon Mar 31 13:41:08 2003
@@ -375,91 +375,26 @@
 #define	SET_PAGE_SLAB(pg,x)   ((pg)->list.prev = (struct list_head *)(x))
 #define	GET_PAGE_SLAB(pg)     ((struct slab *)(pg)->list.prev)
 
-/* Size description struct for general caches. */
-struct cache_sizes {
+/* These are the default caches for kmalloc. Custom caches can have other sizes. */
+static struct cache_sizes {
 	size_t		 cs_size;
 	kmem_cache_t	*cs_cachep;
 	kmem_cache_t	*cs_dmacachep;
+} malloc_sizes[] = {
+#define CACHE(x) { .cs_size = (x) },
+#include <linux/kmalloc_sizes.h>
+#undef CACHE
 };
 
-/* These are the default caches for kmalloc. Custom caches can have other sizes. */
-static struct cache_sizes malloc_sizes[] = {
-#if PAGE_SIZE == 4096
-	{    32,	NULL, NULL},
-#endif
-	{    64,	NULL, NULL},
-#if L1_CACHE_BYTES < 64
-	{    96,	NULL, NULL},
-#endif
-	{   128,	NULL, NULL},
-#if L1_CACHE_BYTES < 128
-	{   192,	NULL, NULL},
-#endif
-	{   256,	NULL, NULL},
-	{   512,	NULL, NULL},
-	{  1024,	NULL, NULL},
-	{  2048,	NULL, NULL},
-	{  4096,	NULL, NULL},
-	{  8192,	NULL, NULL},
-	{ 16384,	NULL, NULL},
-	{ 32768,	NULL, NULL},
-	{ 65536,	NULL, NULL},
-	{131072,	NULL, NULL},
-#ifndef CONFIG_MMU
-	{262144,	NULL, NULL},
-	{524288,	NULL, NULL},
-	{1048576,	NULL, NULL},
-#ifdef CONFIG_LARGE_ALLOCS
-	{2097152,	NULL, NULL},
-	{4194304,	NULL, NULL},
-	{8388608,	NULL, NULL},
-	{16777216,	NULL, NULL},
-	{33554432,	NULL, NULL},
-#endif /* CONFIG_LARGE_ALLOCS */
-#endif /* CONFIG_MMU */
-	{     0,	NULL, NULL}
-};
 /* Must match cache_sizes above. Out of line to keep cache footprint low. */
-#define CN(x) { x, x "(DMA)" }
 static struct { 
 	char *name; 
 	char *name_dma;
 } cache_names[] = { 
-#if PAGE_SIZE == 4096
-	CN("size-32"),
-#endif
-	CN("size-64"),
-#if L1_CACHE_BYTES < 64
-	CN("size-96"),
-#endif
-	CN("size-128"),
-#if L1_CACHE_BYTES < 128
-	CN("size-192"),
-#endif
-	CN("size-256"),
-	CN("size-512"),
-	CN("size-1024"),
-	CN("size-2048"),
-	CN("size-4096"),
-	CN("size-8192"),
-	CN("size-16384"),
-	CN("size-32768"),
-	CN("size-65536"),
-	CN("size-131072"),
-#ifndef CONFIG_MMU
-	CN("size-262144"),
-	CN("size-524288"),
-	CN("size-1048576"),
-#ifdef CONFIG_LARGE_ALLOCS
-	CN("size-2097152"),
-	CN("size-4194304"),
-	CN("size-8388608"),
-	CN("size-16777216"),
-	CN("size-33554432"),
-#endif /* CONFIG_LARGE_ALLOCS */
-#endif /* CONFIG_MMU */
+#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
+#include <linux/kmalloc_sizes.h>
+#undef CACHE
 }; 
-#undef CN
 
 struct arraycache_init initarray_cache __initdata = { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 struct arraycache_init initarray_generic __initdata = { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
@@ -660,39 +595,39 @@
  */
 void __init kmem_cache_sizes_init(void)
 {
-	struct cache_sizes *sizes = malloc_sizes;
+	int i;
 	/*
 	 * Fragmentation resistance on low memory - only use bigger
 	 * page orders on machines with more than 32MB of memory.
 	 */
 	if (num_physpages > (32 << 20) >> PAGE_SHIFT)
 		slab_break_gfp_order = BREAK_GFP_ORDER_HI;
-	do {
+
+	for (i = 0; i < ARRAY_SIZE(malloc_sizes); i++) {
+		struct cache_sizes *sizes = malloc_sizes + i;
 		/* For performance, all the general caches are L1 aligned.
 		 * This should be particularly beneficial on SMP boxes, as it
 		 * eliminates "false sharing".
 		 * Note for systems short on memory removing the alignment will
 		 * allow tighter packing of the smaller caches. */
-		if (!(sizes->cs_cachep =
-			kmem_cache_create(cache_names[sizes-malloc_sizes].name, 
-					  sizes->cs_size,
-					0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
+		sizes->cs_cachep = kmem_cache_create(
+			cache_names[i].name, sizes->cs_size,
+			0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+		if (!sizes->cs_cachep)
 			BUG();
-		}
 
 		/* Inc off-slab bufctl limit until the ceiling is hit. */
 		if (!(OFF_SLAB(sizes->cs_cachep))) {
 			offslab_limit = sizes->cs_size-sizeof(struct slab);
 			offslab_limit /= sizeof(kmem_bufctl_t);
 		}
+
 		sizes->cs_dmacachep = kmem_cache_create(
-		    cache_names[sizes-malloc_sizes].name_dma, 
-			sizes->cs_size, 0,
-			SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
+			cache_names[i].name_dma, sizes->cs_size,
+			0, SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
 		if (!sizes->cs_dmacachep)
 			BUG();
-		sizes++;
-	} while (sizes->cs_size);
+	}
 	/*
 	 * The generic caches are running - time to kick out the
 	 * bootstrap cpucaches.
@@ -1035,7 +970,7 @@
 	if (flags & SLAB_HWCACHE_ALIGN) {
 		/* Need to adjust size so that objs are cache aligned. */
 		/* Small obj size, can get at least two per cache line. */
-		while (size < align/2)
+		while (size <= align/2)
 			align /= 2;
 		size = (size+align-1)&(~(align-1));
 	}
diff -Nru a/mm/swap_state.c b/mm/swap_state.c
--- a/mm/swap_state.c	Mon Mar 31 13:41:07 2003
+++ b/mm/swap_state.c	Mon Mar 31 13:41:07 2003
@@ -68,7 +68,7 @@
 		swap_cache_info.noent_race, swap_cache_info.exist_race);
 }
 
-int add_to_swap_cache(struct page *page, swp_entry_t entry)
+static int add_to_swap_cache(struct page *page, swp_entry_t entry)
 {
 	int error;
 
@@ -78,7 +78,7 @@
 		INC_CACHE_INFO(noent_race);
 		return -ENOENT;
 	}
-	error = add_to_page_cache(page, &swapper_space, entry.val, GFP_ATOMIC);
+	error = add_to_page_cache(page, &swapper_space, entry.val, GFP_KERNEL);
 	/*
 	 * Anon pages are already on the LRU, we don't run lru_cache_add here.
 	 */
diff -Nru a/mm/vmscan.c b/mm/vmscan.c
--- a/mm/vmscan.c	Mon Mar 31 13:41:06 2003
+++ b/mm/vmscan.c	Mon Mar 31 13:41:06 2003
@@ -277,6 +277,7 @@
 			pte_chain_lock(page);
 			mapping = page->mapping;
 		}
+#endif /* CONFIG_SWAP */
 
 		/*
 		 * The page is mapped into the page tables of one or more
@@ -284,7 +285,6 @@
 		 */
 		if (page_mapped(page) && mapping) {
 			switch (try_to_unmap(page)) {
-			case SWAP_ERROR:
 			case SWAP_FAIL:
 				pte_chain_unlock(page);
 				goto activate_locked;
@@ -295,7 +295,6 @@
 				; /* try to free the page below */
 			}
 		}
-#endif /* CONFIG_SWAP */
 		pte_chain_unlock(page);
 
 		/*
diff -Nru a/net/Kconfig b/net/Kconfig
--- a/net/Kconfig	Mon Mar 31 13:41:06 2003
+++ b/net/Kconfig	Mon Mar 31 13:41:06 2003
@@ -118,22 +118,6 @@
 	  You can say Y here if you want to get additional messages useful in
 	  debugging the netfilter code.
 
-config FILTER
-	bool "Socket Filtering"
-	---help---
-	  The Linux Socket Filter is derived from the Berkeley Packet Filter.
-	  If you say Y here, user-space programs can attach a filter to any
-	  socket and thereby tell the kernel that it should allow or disallow
-	  certain types of data to get through the socket.  Linux Socket
-	  Filtering works on all socket types except TCP for now.  See the
-	  text file <file:Documentation/networking/filter.txt> for more
-	  information.
-
-	  You need to say Y here if you want to use PPP packet filtering
-	  (see the CONFIG_PPP_FILTER option below).
-
-	  If unsure, say N.
-
 config UNIX
 	tristate "Unix domain sockets"
 	---help---
@@ -223,6 +207,8 @@
 	  It is safe to say N here for now.
 
 source "net/ipv6/Kconfig"
+
+source "net/xfrm/Kconfig"
 
 source "net/sctp/Kconfig"
 
diff -Nru a/net/Makefile b/net/Makefile
--- a/net/Makefile	Mon Mar 31 13:41:07 2003
+++ b/net/Makefile	Mon Mar 31 13:41:07 2003
@@ -14,7 +14,7 @@
 # LLC has to be linked before the files in net/802/
 obj-$(CONFIG_LLC)		+= llc/
 obj-$(CONFIG_NET)		+= ethernet/ 802/ sched/ netlink/
-obj-$(CONFIG_INET)		+= ipv4/
+obj-$(CONFIG_INET)		+= ipv4/ xfrm/
 obj-$(CONFIG_UNIX)		+= unix/
 obj-$(CONFIG_IPV6)		+= ipv6/
 obj-$(CONFIG_PACKET)		+= packet/
diff -Nru a/net/compat.c b/net/compat.c
--- a/net/compat.c	Mon Mar 31 13:41:07 2003
+++ b/net/compat.c	Mon Mar 31 13:41:07 2003
@@ -296,108 +296,6 @@
 	__scm_destroy(scm);
 }
 
-/* In these cases we (currently) can just copy to data over verbatim
- * because all CMSGs created by the kernel have well defined types which
- * have the same layout in both the 32-bit and 64-bit API.  One must add
- * some special cased conversions here if we start sending control messages
- * with incompatible types.
- *
- * SCM_RIGHTS and SCM_CREDENTIALS are done by hand in recvmsg_compat right after
- * we do our work.  The remaining cases are:
- *
- * SOL_IP	IP_PKTINFO	struct in_pktinfo	32-bit clean
- *		IP_TTL		int			32-bit clean
- *		IP_TOS		__u8			32-bit clean
- *		IP_RECVOPTS	variable length		32-bit clean
- *		IP_RETOPTS	variable length		32-bit clean
- *		(these last two are clean because the types are defined
- *		 by the IPv4 protocol)
- *		IP_RECVERR	struct sock_extended_err +
- *				struct sockaddr_in	32-bit clean
- * SOL_IPV6	IPV6_RECVERR	struct sock_extended_err +
- *				struct sockaddr_in6	32-bit clean
- *		IPV6_PKTINFO	struct in6_pktinfo	32-bit clean
- *		IPV6_HOPLIMIT	int			32-bit clean
- *		IPV6_FLOWINFO	u32			32-bit clean
- *		IPV6_HOPOPTS	ipv6 hop exthdr		32-bit clean
- *		IPV6_DSTOPTS	ipv6 dst exthdr(s)	32-bit clean
- *		IPV6_RTHDR	ipv6 routing exthdr	32-bit clean
- *		IPV6_AUTHHDR	ipv6 auth exthdr	32-bit clean
- */
-static void cmsg_compat_recvmsg_fixup(struct msghdr *kmsg, unsigned long orig_cmsg_uptr)
-{
-	unsigned char *workbuf, *wp;
-	unsigned long bufsz, space_avail;
-	struct cmsghdr *ucmsg;
-
-	bufsz = ((unsigned long)kmsg->msg_control) - orig_cmsg_uptr;
-	space_avail = kmsg->msg_controllen + bufsz;
-	wp = workbuf = kmalloc(bufsz, GFP_KERNEL);
-	if(workbuf == NULL)
-		goto fail;
-
-	/* To make this more sane we assume the kernel sends back properly
-	 * formatted control messages.  Because of how the kernel will truncate
-	 * the cmsg_len for MSG_TRUNC cases, we need not check that case either.
-	 */
-	ucmsg = (struct cmsghdr *) orig_cmsg_uptr;
-	while(((unsigned long)ucmsg) <=
-	      (((unsigned long)kmsg->msg_control) - sizeof(struct cmsghdr))) {
-		struct compat_cmsghdr *kcmsg_compat = (struct compat_cmsghdr *) wp;
-		int clen64, clen32;
-
-		/* UCMSG is the 64-bit format CMSG entry in user-space.
-		 * KCMSG_COMPAT is within the kernel space temporary buffer
-		 * we use to convert into a 32-bit style CMSG.
-		 */
-		__get_user(kcmsg_compat->cmsg_len, &ucmsg->cmsg_len);
-		__get_user(kcmsg_compat->cmsg_level, &ucmsg->cmsg_level);
-		__get_user(kcmsg_compat->cmsg_type, &ucmsg->cmsg_type);
-
-		clen64 = kcmsg_compat->cmsg_len;
-		copy_from_user(CMSG_COMPAT_DATA(kcmsg_compat), CMSG_DATA(ucmsg),
-			       clen64 - CMSG_ALIGN(sizeof(*ucmsg)));
-		clen32 = ((clen64 - CMSG_ALIGN(sizeof(*ucmsg))) +
-			  CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr)));
-		kcmsg_compat->cmsg_len = clen32;
-
-		ucmsg = (struct cmsghdr *) (((char *)ucmsg) + CMSG_ALIGN(clen64));
-		wp = (((char *)kcmsg_compat) + CMSG_COMPAT_ALIGN(clen32));
-	}
-
-	/* Copy back fixed up data, and adjust pointers. */
-	bufsz = (wp - workbuf);
-	copy_to_user((void *)orig_cmsg_uptr, workbuf, bufsz);
-
-	kmsg->msg_control = (struct cmsghdr *)
-		(((char *)orig_cmsg_uptr) + bufsz);
-	kmsg->msg_controllen = space_avail - bufsz;
-
-	kfree(workbuf);
-	return;
-
-fail:
-	/* If we leave the 64-bit format CMSG chunks in there,
-	 * the application could get confused and crash.  So to
-	 * ensure greater recovery, we report no CMSGs.
-	 */
-	kmsg->msg_controllen += bufsz;
-	kmsg->msg_control = (void *) orig_cmsg_uptr;
-}
-
-int put_compat_msg_controllen(struct msghdr *msg_sys,
-		struct compat_msghdr *msg_compat, unsigned long cmsg_ptr)
-{
-	unsigned long ucmsg_ptr;
-	compat_size_t uclen;
-
-	if ((unsigned long)msg_sys->msg_control != cmsg_ptr)
-		cmsg_compat_recvmsg_fixup(msg_sys, cmsg_ptr);
-	ucmsg_ptr = ((unsigned long)msg_sys->msg_control);
-	uclen = (compat_size_t) (ucmsg_ptr - cmsg_ptr);
-	return __put_user(uclen, &msg_compat->msg_controllen);
-}
-
 extern asmlinkage long sys_setsockopt(int fd, int level, int optname,
 				     char *optval, int optlen);
 
@@ -496,6 +394,7 @@
 	struct sock_fprog kfprog;
 	mm_segment_t old_fs;
 	compat_uptr_t uptr;
+	unsigned int fsize;
 	int ret;
 
 	if (!access_ok(VERIFY_READ, fprog32, sizeof(*fprog32)) ||
@@ -503,15 +402,14 @@
 	    __get_user(uptr, &fprog32->filter))
 		return -EFAULT;
 
-	kfprog.filter = compat_ptr(uptr);
-	/*
-	 * Since struct sock_filter is architecure independent,
-	 * we can just do the access_ok check and pass the
-	 * same pointer to the real syscall.
-	 */
-	if (!access_ok(VERIFY_READ, kfprog.filter,
-			kfprog.len * sizeof(struct sock_filter)))
+	fsize = kfprog.len * sizeof(struct sock_filter);
+	kfprog.filter = (struct sock_filter *)kmalloc(fsize, GFP_KERNEL);
+	if (kfprog.filter == NULL)
+		return -ENOMEM;
+	if (copy_from_user(kfprog.filter, compat_ptr(uptr), fsize)) {
+		kfree(kfprog.filter);
 		return -EFAULT;
+	}
 
 	old_fs = get_fs();
 	set_fs(KERNEL_DS);
@@ -519,6 +417,7 @@
 			     (char *)&kfprog, sizeof(kfprog));
 	set_fs(old_fs);
 
+	kfree(kfprog.filter);
 	return ret;
 }
 
diff -Nru a/net/core/Makefile b/net/core/Makefile
--- a/net/core/Makefile	Mon Mar 31 13:41:08 2003
+++ b/net/core/Makefile	Mon Mar 31 13:41:08 2003
@@ -10,9 +10,7 @@
 endif
 endif
 
-obj-$(CONFIG_FILTER) += filter.o
-
-obj-$(CONFIG_NET) += dev.o dev_mcast.o dst.o neighbour.o rtnetlink.o utils.o link_watch.o
+obj-$(CONFIG_NET) += dev.o dev_mcast.o dst.o neighbour.o rtnetlink.o utils.o link_watch.o filter.o
 
 obj-$(CONFIG_NETFILTER) += netfilter.o
 obj-$(CONFIG_NET_DIVERT) += dv.o
diff -Nru a/net/core/filter.c b/net/core/filter.c
--- a/net/core/filter.c	Mon Mar 31 13:41:06 2003
+++ b/net/core/filter.c	Mon Mar 31 13:41:06 2003
@@ -15,9 +15,6 @@
  * Andi Kleen - Fix a few bad bugs and races.
  */
 
-#include <linux/config.h>
-#if defined(CONFIG_FILTER)
-
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/sched.h>
@@ -495,4 +492,3 @@
 
 	return (err);
 }
-#endif /* CONFIG_FILTER */
diff -Nru a/net/core/skbuff.c b/net/core/skbuff.c
--- a/net/core/skbuff.c	Mon Mar 31 13:41:08 2003
+++ b/net/core/skbuff.c	Mon Mar 31 13:41:08 2003
@@ -170,15 +170,8 @@
 	struct sk_buff *skb;
 	u8 *data;
 
-	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
-		static int count;
-		if (++count < 5) {
-			printk(KERN_ERR "alloc_skb called nonatomically "
-			       "from interrupt %p\n", NET_CALLER(size));
- 			BUG();
-		}
-		gfp_mask &= ~__GFP_WAIT;
-	}
+	if (gfp_mask & __GFP_WAIT)
+		might_sleep();
 
 	/* Get the HEAD */
 	skb = skb_head_from_pool();
@@ -195,46 +188,14 @@
 	if (!data)
 		goto nodata;
 
-	/* XXX: does not include slab overhead */
-	skb->next	  = skb->prev = NULL;
-	skb->list	  = NULL;
-	skb->sk		  = NULL;
-	skb->stamp.tv_sec = 0;	/* No idea about time */
-	skb->dev	  = NULL;
-	skb->dst	  = NULL;
-	skb->sp		  = NULL;
-	memset(skb->cb, 0, sizeof(skb->cb));
-	/* Set up other state */
-	skb->len	  = 0;
-	skb->data_len	  = 0;
-	skb->csum	  = 0;
-	skb->local_df	  = 0;
-	skb->cloned	  = 0;
-	skb->pkt_type	  = PACKET_HOST;	/* Default type */
-	skb->ip_summed	  = 0;
-	skb->priority	  = 0;
-	atomic_set(&skb->users, 1);
-	skb->security	  = 0;	/* By default packets are insecure */
+	memset(skb, 0, offsetof(struct sk_buff, truesize));
 	skb->truesize = size + sizeof(struct sk_buff);
-	
-	/* Load the data pointers. */
-	skb->head = skb->data = skb->tail = data;
+	atomic_set(&skb->users, 1);
+	skb->head = data;
+	skb->data = data;
+	skb->tail = data;
 	skb->end  = data + size;
 
-	skb->destructor	  = NULL;
-#ifdef CONFIG_NETFILTER
-	skb->nfmark	= skb->nfcache = 0;
-	skb->nfct	= NULL;
-#ifdef CONFIG_NETFILTER_DEBUG
-	skb->nf_debug	= 0;
-#endif
-#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
-	skb->nf_bridge	= NULL;
-#endif
-#endif
-#ifdef CONFIG_NET_SCHED
-	skb->tc_index	= 0;
-#endif	
 	atomic_set(&(skb_shinfo(skb)->dataref), 1);
 	skb_shinfo(skb)->nr_frags  = 0;
 	skb_shinfo(skb)->tso_size = 0;
@@ -367,10 +328,10 @@
 	C(nh);
 	C(mac);
 	C(dst);
-	dst_clone(n->dst);
+	dst_clone(skb->dst);
 	C(sp);
 #ifdef CONFIG_INET
-	secpath_get(n->sp);
+	secpath_get(skb->sp);
 #endif
 	memcpy(n->cb, skb->cb, sizeof(skb->cb));
 	C(len);
@@ -381,24 +342,20 @@
 	C(pkt_type);
 	C(ip_summed);
 	C(priority);
-	atomic_set(&n->users, 1);
 	C(protocol);
 	C(security);
-	C(truesize);
-	C(head);
-	C(data);
-	C(tail);
-	C(end);
 	n->destructor = NULL;
 #ifdef CONFIG_NETFILTER
 	C(nfmark);
 	C(nfcache);
 	C(nfct);
+	nf_conntrack_get(skb->nfct);
 #ifdef CONFIG_NETFILTER_DEBUG
 	C(nf_debug);
 #endif
 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
 	C(nf_bridge);
+	nf_bridge_get(skb->nf_bridge);
 #endif
 #endif /*CONFIG_NETFILTER*/
 #if defined(CONFIG_HIPPI)
@@ -407,15 +364,16 @@
 #ifdef CONFIG_NET_SCHED
 	C(tc_index);
 #endif
+	C(truesize);
+	atomic_set(&n->users, 1);
+	C(head);
+	C(data);
+	C(tail);
+	C(end);
 
 	atomic_inc(&(skb_shinfo(skb)->dataref));
 	skb->cloned = 1;
-#ifdef CONFIG_NETFILTER
-	nf_conntrack_get(skb->nfct);
-#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
-	nf_bridge_get(skb->nf_bridge);
-#endif
-#endif
+
 	return n;
 }
 
@@ -439,7 +397,6 @@
 	new->nh.raw	= old->nh.raw + offset;
 	new->mac.raw	= old->mac.raw + offset;
 	memcpy(new->cb, old->cb, sizeof(old->cb));
-	atomic_set(&new->users, 1);
 	new->local_df	= old->local_df;
 	new->pkt_type	= old->pkt_type;
 	new->stamp	= old->stamp;
@@ -449,18 +406,19 @@
 	new->nfmark	= old->nfmark;
 	new->nfcache	= old->nfcache;
 	new->nfct	= old->nfct;
-	nf_conntrack_get(new->nfct);
+	nf_conntrack_get(old->nfct);
 #ifdef CONFIG_NETFILTER_DEBUG
 	new->nf_debug	= old->nf_debug;
 #endif
 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
 	new->nf_bridge	= old->nf_bridge;
-	nf_bridge_get(new->nf_bridge);
+	nf_bridge_get(old->nf_bridge);
 #endif
 #endif
 #ifdef CONFIG_NET_SCHED
 	new->tc_index	= old->tc_index;
 #endif
+	atomic_set(&new->users, 1);
 }
 
 /**
diff -Nru a/net/core/sock.c b/net/core/sock.c
--- a/net/core/sock.c	Mon Mar 31 13:41:08 2003
+++ b/net/core/sock.c	Mon Mar 31 13:41:08 2003
@@ -119,9 +119,7 @@
 #include <net/sock.h>
 #include <linux/ipsec.h>
 
-#ifdef CONFIG_FILTER
 #include <linux/filter.h>
-#endif
 
 #ifdef CONFIG_INET
 #include <net/tcp.h>
@@ -168,9 +166,7 @@
 		    char *optval, int optlen)
 {
 	struct sock *sk=sock->sk;
-#ifdef CONFIG_FILTER
 	struct sk_filter *filter;
-#endif
 	int val;
 	int valbool;
 	struct linger ling;
@@ -381,7 +377,6 @@
 #endif
 
 
-#ifdef CONFIG_FILTER
 		case SO_ATTACH_FILTER:
 			ret = -EINVAL;
 			if (optlen == sizeof(struct sock_fprog)) {
@@ -407,7 +402,7 @@
 			spin_unlock_bh(&sk->lock.slock);
 			ret = -ENONET;
 			break;
-#endif
+
 		/* We implement the SO_SNDLOWAT etc to
 		   not be settable (1003.1g 5.3) */
 		default:
@@ -614,20 +609,16 @@
 
 void sk_free(struct sock *sk)
 {
-#ifdef CONFIG_FILTER
 	struct sk_filter *filter;
-#endif
 
 	if (sk->destruct)
 		sk->destruct(sk);
 
-#ifdef CONFIG_FILTER
 	filter = sk->filter;
 	if (filter) {
 		sk_filter_release(sk, filter);
 		sk->filter = NULL;
 	}
-#endif
 
 	if (atomic_read(&sk->omem_alloc))
 		printk(KERN_DEBUG "sk_free: optmem leakage (%d bytes) detected.\n", atomic_read(&sk->omem_alloc));
diff -Nru a/net/ipv4/Kconfig b/net/ipv4/Kconfig
--- a/net/ipv4/Kconfig	Mon Mar 31 13:41:08 2003
+++ b/net/ipv4/Kconfig	Mon Mar 31 13:41:08 2003
@@ -362,13 +362,5 @@
 
 	  If unsure, say Y.
 
-config XFRM_USER
-	tristate "IP: IPsec user configuration interface"
-	---help---
-	  Support for IPsec user configuration interface used
-	  by native Linux tools.
-
-	  If unsure, say Y.
-
 source "net/ipv4/netfilter/Kconfig"
 
diff -Nru a/net/ipv4/Makefile b/net/ipv4/Makefile
--- a/net/ipv4/Makefile	Mon Mar 31 13:41:06 2003
+++ b/net/ipv4/Makefile	Mon Mar 31 13:41:06 2003
@@ -20,6 +20,5 @@
 obj-$(CONFIG_INET_ESP) += esp.o
 obj-$(CONFIG_IP_PNP) += ipconfig.o
 obj-$(CONFIG_NETFILTER)	+= netfilter/
-obj-$(CONFIG_XFRM_USER) += xfrm_user.o
 
-obj-y += xfrm_policy.o xfrm4_policy.o xfrm_state.o xfrm4_state.o xfrm_input.o xfrm4_input.o xfrm_algo.o
+obj-y += xfrm4_policy.o xfrm4_state.o xfrm4_input.o
diff -Nru a/net/ipv4/devinet.c b/net/ipv4/devinet.c
--- a/net/ipv4/devinet.c	Mon Mar 31 13:41:06 2003
+++ b/net/ipv4/devinet.c	Mon Mar 31 13:41:06 2003
@@ -1065,6 +1065,62 @@
 	return ret;
 }
 
+static int ipv4_doint_and_flush(ctl_table *ctl, int write,
+				struct file* filp, void *buffer,
+				size_t *lenp)
+{
+	int *valp = ctl->data;
+	int val = *valp;
+	int ret = proc_dointvec(ctl, write, filp, buffer, lenp);
+
+	if (write && *valp != val)
+		rt_cache_flush(0);
+
+	return ret;
+}
+
+static int ipv4_doint_and_flush_strategy(ctl_table *table, int *name, int nlen,
+					 void *oldval, size_t *oldlenp,
+					 void *newval, size_t newlen, 
+					 void **context)
+{
+	int *valp = table->data;
+	int new;
+
+	if (!newval || !newlen)
+		return 0;
+
+	if (newlen != sizeof(int))
+		return -EINVAL;
+
+	if (get_user(new, (int *)newval))
+		return -EFAULT;
+
+	if (new == *valp)
+		return 0;
+
+	if (oldval && oldlenp) {
+		size_t len;
+
+		if (get_user(len, oldlenp))
+			return -EFAULT;
+
+		if (len) {
+			if (len > table->maxlen)
+				len = table->maxlen;
+			if (copy_to_user(oldval, valp, len))
+				return -EFAULT;
+			if (put_user(len, oldlenp))
+				return -EFAULT;
+		}
+	}
+
+	*valp = new;
+	rt_cache_flush(0);
+	return 1;
+}
+
+
 static struct devinet_sysctl_table {
 	struct ctl_table_header *sysctl_header;
 	ctl_table		devinet_vars[17];
@@ -1192,7 +1248,8 @@
 			.data =	&ipv4_devconf.no_xfrm,
 			.maxlen =		sizeof(int),
 			.mode =	0644,
-			.proc_handler =&proc_dointvec,
+			.proc_handler = &ipv4_doint_and_flush,
+			.strategy = &ipv4_doint_and_flush_strategy,
 		},
 		{
 			.ctl_name =	NET_IPV4_CONF_NOPOLICY,
@@ -1200,7 +1257,8 @@
 			.data =	&ipv4_devconf.no_policy,
 			.maxlen =		sizeof(int),
 			.mode =	0644,
-			.proc_handler =&proc_dointvec,
+			.proc_handler = &ipv4_doint_and_flush,
+			.strategy = &ipv4_doint_and_flush_strategy,
 		},
 	},
 	.devinet_dev = {
diff -Nru a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
--- a/net/ipv4/netfilter/ip_nat_standalone.c	Mon Mar 31 13:41:08 2003
+++ b/net/ipv4/netfilter/ip_nat_standalone.c	Mon Mar 31 13:41:08 2003
@@ -110,12 +110,6 @@
 		}
 		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
 	case IP_CT_NEW:
-#ifdef CONFIG_IP_NF_NAT_LOCAL
-		/* LOCAL_IN hook doesn't have a chain and thus doesn't care
-		 * about new packets -HW */
-		if (hooknum == NF_IP_LOCAL_IN)
-			return NF_ACCEPT;
-#endif
 		info = &ct->nat.info;
 
 		WRITE_LOCK(&ip_nat_lock);
@@ -131,6 +125,12 @@
 				ret = call_expect(master_ct(ct), pskb, 
 						  hooknum, ct, info);
 			} else {
+#ifdef CONFIG_IP_NF_NAT_LOCAL
+				/* LOCAL_IN hook doesn't have a chain!  */
+				if (hooknum == NF_IP_LOCAL_IN) {
+					ret = NF_ACCEPT;
+				} else
+#endif
 				ret = ip_nat_rule_find(pskb, hooknum, in, out,
 						       ct, info);
 			}
diff -Nru a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
--- a/net/ipv4/netfilter/ipt_REJECT.c	Mon Mar 31 13:41:08 2003
+++ b/net/ipv4/netfilter/ipt_REJECT.c	Mon Mar 31 13:41:08 2003
@@ -44,6 +44,7 @@
 	u_int16_t tmp_port;
 	u_int32_t tmp_addr;
 	int needs_ack;
+	int hh_len;
 
 	/* IP header checks: fragment, too short. */
 	if (oldskb->nh.iph->frag_off & htons(IP_OFFSET)
@@ -63,13 +64,36 @@
 			 csum_partial((char *)otcph, otcplen, 0)) != 0)
 		return;
 
+
+	{
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = oldskb->nh.iph->saddr,
+						.saddr = (local ?
+							  oldskb->nh.iph->daddr :
+							  0),
+						.tos = (RT_TOS(oldskb->nh.iph->tos) |
+							RTO_CONN) } } };
+
+		/* Routing: if not headed for us, route won't like source */
+		if (ip_route_output_key(&rt, &fl))
+			return;
+	
+		hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
+	}
+
+
 	/* Copy skb (even if skb is about to be dropped, we can't just
            clone it because there may be other things, such as tcpdump,
-           interested in it) */
-	nskb = skb_copy(oldskb, GFP_ATOMIC);
+           interested in it). We also need to expand headroom in case
+	   hh_len of incoming interface < hh_len of outgoing interface */
+	nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb),
+			       GFP_ATOMIC);
 	if (!nskb)
 		return;
 
+	dst_release(nskb->dst);
+	nskb->dst = &rt->u.dst;
+
 	/* This packet will not be the same as the other: clear nf fields */
 	nf_conntrack_put(nskb->nfct);
 	nskb->nfct = NULL;
@@ -131,23 +155,6 @@
 	nskb->nh.iph->check = 0;
 	nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph, 
 					   nskb->nh.iph->ihl);
-
-	{
-		struct flowi fl = { .nl_u = { .ip4_u =
-					      { .daddr = nskb->nh.iph->daddr,
-						.saddr = (local ?
-							  nskb->nh.iph->saddr :
-							  0),
-						.tos = (RT_TOS(nskb->nh.iph->tos) |
-							RTO_CONN) } } };
-
-		/* Routing: if not headed for us, route won't like source */
-		if (ip_route_output_key(&rt, &fl))
-			goto free_nskb;
-	}
-
-	dst_release(nskb->dst);
-	nskb->dst = &rt->u.dst;
 
 	/* "Never happens" */
 	if (nskb->len > dst_pmtu(nskb->dst))
diff -Nru a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
--- a/net/ipv4/netfilter/iptable_mangle.c	Mon Mar 31 13:41:08 2003
+++ b/net/ipv4/netfilter/iptable_mangle.c	Mon Mar 31 13:41:08 2003
@@ -179,7 +179,7 @@
 		.priority	= NF_IP_PRI_MANGLE,
 	},
 	{
-		.hook		= ipt_local_hook,
+		.hook		= ipt_route_hook,
 		.pf		= PF_INET,
 		.hooknum	= NF_IP_LOCAL_IN,
 		.priority	= NF_IP_PRI_MANGLE,
diff -Nru a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
--- a/net/ipv4/sysctl_net_ipv4.c	Mon Mar 31 13:41:08 2003
+++ b/net/ipv4/sysctl_net_ipv4.c	Mon Mar 31 13:41:08 2003
@@ -77,14 +77,39 @@
 			 void *newval, size_t newlen, 
 			 void **context)
 {
+	int *valp = table->data;
 	int new;
+
+	if (!newval || !newlen)
+		return 0;
+
 	if (newlen != sizeof(int))
 		return -EINVAL;
-	if (get_user(new,(int *)newval))
-		return -EFAULT; 
-	if (new != ipv4_devconf.forwarding) 
-		inet_forward_change(); 
-	return 0; /* caller does change again and handles handles oldval */ 
+
+	if (get_user(new, (int *)newval))
+		return -EFAULT;
+
+	if (new == *valp)
+		return 0;
+
+	if (oldval && oldlenp) {
+		size_t len;
+
+		if (get_user(len, oldlenp))
+			return -EFAULT;
+
+		if (len) {
+			if (len > table->maxlen)
+				len = table->maxlen;
+			if (copy_to_user(oldval, valp, len))
+				return -EFAULT;
+			if (put_user(len, oldlenp))
+				return -EFAULT;
+		}
+	}
+
+	inet_forward_change();
+	return 1;
 }
 
 ctl_table ipv4_table[] = {
diff -Nru a/net/ipv4/tcp.c b/net/ipv4/tcp.c
--- a/net/ipv4/tcp.c	Mon Mar 31 13:41:06 2003
+++ b/net/ipv4/tcp.c	Mon Mar 31 13:41:06 2003
@@ -1189,7 +1189,8 @@
 
 			from += copy;
 			copied += copy;
-			seglen -= copy;
+			if ((seglen -= copy) == 0 && iovlen == 0)
+				goto out;
 
 			if (skb->len != mss_now || (flags & MSG_OOB))
 				continue;
diff -Nru a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
--- a/net/ipv4/tcp_input.c	Mon Mar 31 13:41:08 2003
+++ b/net/ipv4/tcp_input.c	Mon Mar 31 13:41:08 2003
@@ -3363,7 +3363,7 @@
 
 	/*
 	 *	Header prediction.
-	 *	The code losely follows the one in the famous 
+	 *	The code loosely follows the one in the famous 
 	 *	"30 instruction TCP receive" Van Jacobson mail.
 	 *	
 	 *	Van's trick is to deposit buffers into socket queue 
diff -Nru a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
--- a/net/ipv4/tcp_minisocks.c	Mon Mar 31 13:41:08 2003
+++ b/net/ipv4/tcp_minisocks.c	Mon Mar 31 13:41:08 2003
@@ -645,9 +645,7 @@
 
 	if(newsk != NULL) {
 		struct tcp_opt *newtp;
-#ifdef CONFIG_FILTER
 		struct sk_filter *filter;
-#endif
 
 		memcpy(newsk, sk, sizeof(struct tcp_sock));
 		newsk->state = TCP_SYN_RECV;
@@ -677,10 +675,10 @@
 		newsk->callback_lock = RW_LOCK_UNLOCKED;
 		skb_queue_head_init(&newsk->error_queue);
 		newsk->write_space = tcp_write_space;
-#ifdef CONFIG_FILTER
+
 		if ((filter = newsk->filter) != NULL)
 			sk_filter_charge(newsk, filter);
-#endif
+
 		if (unlikely(xfrm_sk_clone_policy(newsk))) {
 			/* It is still raw copy of parent, so invalidate
 			 * destructor and make plain sk_free() */
diff -Nru a/net/ipv4/udp.c b/net/ipv4/udp.c
--- a/net/ipv4/udp.c	Mon Mar 31 13:41:06 2003
+++ b/net/ipv4/udp.c	Mon Mar 31 13:41:06 2003
@@ -951,7 +951,6 @@
 		return -1;
 	}
 
-#if defined(CONFIG_FILTER)
 	if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
 		if (__udp_checksum_complete(skb)) {
 			UDP_INC_STATS_BH(UdpInErrors);
@@ -960,7 +959,6 @@
 		}
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
-#endif
 
 	if (sock_queue_rcv_skb(sk,skb)<0) {
 		UDP_INC_STATS_BH(UdpInErrors);
diff -Nru a/net/ipv4/xfrm_algo.c b/net/ipv4/xfrm_algo.c
--- a/net/ipv4/xfrm_algo.c	Mon Mar 31 13:41:07 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,771 +0,0 @@
-/* 
- * xfrm algorithm interface
- *
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option) 
- * any later version.
- */
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/pfkeyv2.h>
-#include <net/xfrm.h>
-#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE)
-#include <net/ah.h>
-#endif
-#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
-#include <net/esp.h>
-#endif
-#include <asm/scatterlist.h>
-
-/*
- * Algorithms supported by IPsec.  These entries contain properties which
- * are used in key negotiation and xfrm processing, and are used to verify
- * that instantiated crypto transforms have correct parameters for IPsec
- * purposes.
- */
-static struct xfrm_algo_desc aalg_list[] = {
-{
-	.name = "digest_null",
-	
-	.uinfo = {
-		.auth = {
-			.icv_truncbits = 0,
-			.icv_fullbits = 0,
-		}
-	},
-	
-	.desc = {
-		.sadb_alg_id = SADB_X_AALG_NULL,
-		.sadb_alg_ivlen = 0,
-		.sadb_alg_minbits = 0,
-		.sadb_alg_maxbits = 0
-	}
-},
-{
-	.name = "md5",
-
-	.uinfo = {
-		.auth = {
-			.icv_truncbits = 96,
-			.icv_fullbits = 128,
-		}
-	},
-	
-	.desc = {
-		.sadb_alg_id = SADB_AALG_MD5HMAC,
-		.sadb_alg_ivlen = 0,
-		.sadb_alg_minbits = 128,
-		.sadb_alg_maxbits = 128
-	}
-},
-{
-	.name = "sha1",
-
-	.uinfo = {
-		.auth = {
-			.icv_truncbits = 96,
-			.icv_fullbits = 160,
-		}
-	},
-
-	.desc = {
-		.sadb_alg_id = SADB_AALG_SHA1HMAC,
-		.sadb_alg_ivlen = 0,
-		.sadb_alg_minbits = 160,
-		.sadb_alg_maxbits = 160
-	}
-},
-{
-	.name = "sha256",
-
-	.uinfo = {
-		.auth = {
-			.icv_truncbits = 128,
-			.icv_fullbits = 256,
-		}
-	},
-
-	.desc = {
-		.sadb_alg_id = SADB_X_AALG_SHA2_256HMAC,
-		.sadb_alg_ivlen = 0,
-		.sadb_alg_minbits = 256,
-		.sadb_alg_maxbits = 256
-	}
-},
-{
-	.name = "ripemd160",
-
-	.uinfo = {
-		.auth = {
-			.icv_truncbits = 96,
-			.icv_fullbits = 160,
-		}
-	},
-
-	.desc = {
-		.sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC,
-		.sadb_alg_ivlen = 0,
-		.sadb_alg_minbits = 160,
-		.sadb_alg_maxbits = 160
-	}
-},
-};
-
-static struct xfrm_algo_desc ealg_list[] = {
-{
-	.name = "cipher_null",
-	
-	.uinfo = {
-		.encr = {
-			.blockbits = 8,
-			.defkeybits = 0,
-		}
-	},
-	
-	.desc = {
-		.sadb_alg_id =	SADB_EALG_NULL,
-		.sadb_alg_ivlen = 0,
-		.sadb_alg_minbits = 0,
-		.sadb_alg_maxbits = 0
-	}
-},
-{
-	.name = "des",
-
-	.uinfo = {
-		.encr = {
-			.blockbits = 64,
-			.defkeybits = 64,
-		}
-	},
-
-	.desc = {
-		.sadb_alg_id = SADB_EALG_DESCBC,
-		.sadb_alg_ivlen = 8,
-		.sadb_alg_minbits = 64,
-		.sadb_alg_maxbits = 64
-	}
-},
-{
-	.name = "des3_ede",
-
-	.uinfo = {
-		.encr = {
-			.blockbits = 64,
-			.defkeybits = 192,
-		}
-	},
-
-	.desc = {
-		.sadb_alg_id = SADB_EALG_3DESCBC,
-		.sadb_alg_ivlen = 8,
-		.sadb_alg_minbits = 192,
-		.sadb_alg_maxbits = 192
-	}
-},
-{
-	.name = "cast128",
-
-	.uinfo = {
-		.encr = {
-			.blockbits = 64,
-			.defkeybits = 128,
-		}
-	},
-
-	.desc = {
-		.sadb_alg_id = SADB_X_EALG_CASTCBC,
-		.sadb_alg_ivlen = 8,
-		.sadb_alg_minbits = 40,
-		.sadb_alg_maxbits = 128
-	}
-},
-{
-	.name = "blowfish",
-
-	.uinfo = {
-		.encr = {
-			.blockbits = 64,
-			.defkeybits = 128,
-		}
-	},
-
-	.desc = {
-		.sadb_alg_id = SADB_X_EALG_BLOWFISHCBC,
-		.sadb_alg_ivlen = 8,
-		.sadb_alg_minbits = 40,
-		.sadb_alg_maxbits = 448
-	}
-},
-{
-	.name = "aes",
-
-	.uinfo = {
-		.encr = {
-			.blockbits = 128,
-			.defkeybits = 128,
-		}
-	},
-
-	.desc = {
-		.sadb_alg_id = SADB_X_EALG_AESCBC,
-		.sadb_alg_ivlen = 8,
-		.sadb_alg_minbits = 128,
-		.sadb_alg_maxbits = 256
-	}
-},
-};
-
-static struct xfrm_algo_desc calg_list[] = {
-{
-	.name = "deflate",
-	.uinfo = {
-		.comp = {
-			.threshold = 90,
-		}
-	},
-	.desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE }
-},
-{
-	.name = "lzs",
-	.uinfo = {
-		.comp = {
-			.threshold = 90,
-		}
-	},
-	.desc = { .sadb_alg_id = SADB_X_CALG_LZS }
-},
-{
-	.name = "lzjh",
-	.uinfo = {
-		.comp = {
-			.threshold = 50,
-		}
-	},
-	.desc = { .sadb_alg_id = SADB_X_CALG_LZJH }
-},
-};
-
-static inline int aalg_entries(void)
-{
-	return sizeof(aalg_list) / sizeof(aalg_list[0]);
-}
-
-static inline int ealg_entries(void)
-{
-	return sizeof(ealg_list) / sizeof(ealg_list[0]);
-}
-
-static inline int calg_entries(void)
-{
-	return sizeof(calg_list) / sizeof(calg_list[0]);
-}
-
-/* Todo: generic iterators */
-struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id)
-{
-	int i;
-
-	for (i = 0; i < aalg_entries(); i++) {
-		if (aalg_list[i].desc.sadb_alg_id == alg_id) {
-			if (aalg_list[i].available)
-				return &aalg_list[i];
-			else
-				break;
-		}
-	}
-	return NULL;
-}
-
-struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id)
-{
-	int i;
-
-	for (i = 0; i < ealg_entries(); i++) {
-		if (ealg_list[i].desc.sadb_alg_id == alg_id) {
-			if (ealg_list[i].available)
-				return &ealg_list[i];
-			else
-				break;
-		}
-	}
-	return NULL;
-}
-
-struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id)
-{
-	int i;
-
-	for (i = 0; i < calg_entries(); i++) {
-		if (calg_list[i].desc.sadb_alg_id == alg_id) {
-			if (calg_list[i].available)
-				return &calg_list[i];
-			else
-				break;
-		}
-	}
-	return NULL;
-}
-
-struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name)
-{
-	int i;
-
-	if (!name)
-		return NULL;
-
-	for (i=0; i < aalg_entries(); i++) {
-		if (strcmp(name, aalg_list[i].name) == 0) {
-			if (aalg_list[i].available)
-				return &aalg_list[i];
-			else
-				break;
-		}
-	}
-	return NULL;
-}
-
-struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name)
-{
-	int i;
-
-	if (!name)
-		return NULL;
-
-	for (i=0; i < ealg_entries(); i++) {
-		if (strcmp(name, ealg_list[i].name) == 0) {
-			if (ealg_list[i].available)
-				return &ealg_list[i];
-			else
-				break;
-		}
-	}
-	return NULL;
-}
-
-struct xfrm_algo_desc *xfrm_calg_get_byname(char *name)
-{
-	int i;
-
-	if (!name)
-		return NULL;
-
-	for (i=0; i < calg_entries(); i++) {
-		if (strcmp(name, calg_list[i].name) == 0) {
-			if (calg_list[i].available)
-				return &calg_list[i];
-			else
-				break;
-		}
-	}
-	return NULL;
-}
-
-struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx)
-{
-	if (idx >= aalg_entries())
-		return NULL;
-
-	return &aalg_list[idx];
-}
-
-struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx)
-{
-	if (idx >= ealg_entries())
-		return NULL;
-
-	return &ealg_list[idx];
-}
-
-struct xfrm_algo_desc *xfrm_calg_get_byidx(unsigned int idx)
-{
-	if (idx >= calg_entries())
-		return NULL;
-
-	return &calg_list[idx];
-}
-
-/*
- * Probe for the availability of crypto algorithms, and set the available
- * flag for any algorithms found on the system.  This is typically called by
- * pfkey during userspace SA add, update or register.
- */
-void xfrm_probe_algs(void)
-{
-#ifdef CONFIG_CRYPTO
-	int i, status;
-	
-	BUG_ON(in_softirq());
-
-	for (i = 0; i < aalg_entries(); i++) {
-		status = crypto_alg_available(aalg_list[i].name, 0);
-		if (aalg_list[i].available != status)
-			aalg_list[i].available = status;
-	}
-	
-	for (i = 0; i < ealg_entries(); i++) {
-		status = crypto_alg_available(ealg_list[i].name, 0);
-		if (ealg_list[i].available != status)
-			ealg_list[i].available = status;
-	}
-	
-	for (i = 0; i < calg_entries(); i++) {
-		status = crypto_alg_available(calg_list[i].name, 0);
-		if (calg_list[i].available != status)
-			calg_list[i].available = status;
-	}
-#endif
-}
-
-int xfrm_count_auth_supported(void)
-{
-	int i, n;
-
-	for (i = 0, n = 0; i < aalg_entries(); i++)
-		if (aalg_list[i].available)
-			n++;
-	return n;
-}
-
-int xfrm_count_enc_supported(void)
-{
-	int i, n;
-
-	for (i = 0, n = 0; i < ealg_entries(); i++)
-		if (ealg_list[i].available)
-			n++;
-	return n;
-}
-
-#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE)
-void skb_ah_walk(const struct sk_buff *skb,
-                        struct crypto_tfm *tfm, icv_update_fn_t icv_update)
-{
-	int offset = 0;
-	int len = skb->len;
-	int start = skb->len - skb->data_len;
-	int i, copy = start - offset;
-	struct scatterlist sg;
-
-	/* Checksum header. */
-	if (copy > 0) {
-		if (copy > len)
-			copy = len;
-		
-		sg.page = virt_to_page(skb->data + offset);
-		sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
-		sg.length = copy;
-		
-		icv_update(tfm, &sg, 1);
-		
-		if ((len -= copy) == 0)
-			return;
-		offset += copy;
-	}
-
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
-
-		BUG_TRAP(start <= offset + len);
-
-		end = start + skb_shinfo(skb)->frags[i].size;
-		if ((copy = end - offset) > 0) {
-			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
-			if (copy > len)
-				copy = len;
-			
-			sg.page = frag->page;
-			sg.offset = frag->page_offset + offset-start;
-			sg.length = copy;
-			
-			icv_update(tfm, &sg, 1);
-			
-			if (!(len -= copy))
-				return;
-			offset += copy;
-		}
-		start = end;
-	}
-
-	if (skb_shinfo(skb)->frag_list) {
-		struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
-		for (; list; list = list->next) {
-			int end;
-
-			BUG_TRAP(start <= offset + len);
-
-			end = start + list->len;
-			if ((copy = end - offset) > 0) {
-				if (copy > len)
-					copy = len;
-				skb_ah_walk(list, tfm, icv_update);
-				if ((len -= copy) == 0)
-					return;
-				offset += copy;
-			}
-			start = end;
-		}
-	}
-	if (len)
-		BUG();
-}
-#endif
-
-#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
-/* Move to common area: it is shared with AH. */
-
-void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
-		  int offset, int len, icv_update_fn_t icv_update)
-{
-	int start = skb->len - skb->data_len;
-	int i, copy = start - offset;
-	struct scatterlist sg;
-
-	/* Checksum header. */
-	if (copy > 0) {
-		if (copy > len)
-			copy = len;
-		
-		sg.page = virt_to_page(skb->data + offset);
-		sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
-		sg.length = copy;
-		
-		icv_update(tfm, &sg, 1);
-		
-		if ((len -= copy) == 0)
-			return;
-		offset += copy;
-	}
-
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
-
-		BUG_TRAP(start <= offset + len);
-
-		end = start + skb_shinfo(skb)->frags[i].size;
-		if ((copy = end - offset) > 0) {
-			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
-			if (copy > len)
-				copy = len;
-			
-			sg.page = frag->page;
-			sg.offset = frag->page_offset + offset-start;
-			sg.length = copy;
-			
-			icv_update(tfm, &sg, 1);
-
-			if (!(len -= copy))
-				return;
-			offset += copy;
-		}
-		start = end;
-	}
-
-	if (skb_shinfo(skb)->frag_list) {
-		struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
-		for (; list; list = list->next) {
-			int end;
-
-			BUG_TRAP(start <= offset + len);
-
-			end = start + list->len;
-			if ((copy = end - offset) > 0) {
-				if (copy > len)
-					copy = len;
-				skb_icv_walk(list, tfm, offset-start, copy, icv_update);
-				if ((len -= copy) == 0)
-					return;
-				offset += copy;
-			}
-			start = end;
-		}
-	}
-	if (len)
-		BUG();
-}
-
-
-/* Looking generic it is not used in another places. */
-
-int
-skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
-{
-	int start = skb->len - skb->data_len;
-	int i, copy = start - offset;
-	int elt = 0;
-
-	if (copy > 0) {
-		if (copy > len)
-			copy = len;
-		sg[elt].page = virt_to_page(skb->data + offset);
-		sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
-		sg[elt].length = copy;
-		elt++;
-		if ((len -= copy) == 0)
-			return elt;
-		offset += copy;
-	}
-
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
-
-		BUG_TRAP(start <= offset + len);
-
-		end = start + skb_shinfo(skb)->frags[i].size;
-		if ((copy = end - offset) > 0) {
-			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
-			if (copy > len)
-				copy = len;
-			sg[elt].page = frag->page;
-			sg[elt].offset = frag->page_offset+offset-start;
-			sg[elt].length = copy;
-			elt++;
-			if (!(len -= copy))
-				return elt;
-			offset += copy;
-		}
-		start = end;
-	}
-
-	if (skb_shinfo(skb)->frag_list) {
-		struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
-		for (; list; list = list->next) {
-			int end;
-
-			BUG_TRAP(start <= offset + len);
-
-			end = start + list->len;
-			if ((copy = end - offset) > 0) {
-				if (copy > len)
-					copy = len;
-				elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
-				if ((len -= copy) == 0)
-					return elt;
-				offset += copy;
-			}
-			start = end;
-		}
-	}
-	if (len)
-		BUG();
-	return elt;
-}
-
-/* Check that skb data bits are writable. If they are not, copy data
- * to newly created private area. If "tailbits" is given, make sure that
- * tailbits bytes beyond current end of skb are writable.
- *
- * Returns amount of elements of scatterlist to load for subsequent
- * transformations and pointer to writable trailer skb.
- */
-
-int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
-{
-	int copyflag;
-	int elt;
-	struct sk_buff *skb1, **skb_p;
-
-	/* If skb is cloned or its head is paged, reallocate
-	 * head pulling out all the pages (pages are considered not writable
-	 * at the moment even if they are anonymous).
-	 */
-	if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
-	    __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
-		return -ENOMEM;
-
-	/* Easy case. Most of packets will go this way. */
-	if (!skb_shinfo(skb)->frag_list) {
-		/* A little of trouble, not enough of space for trailer.
-		 * This should not happen, when stack is tuned to generate
-		 * good frames. OK, on miss we reallocate and reserve even more
-		 * space, 128 bytes is fair. */
-
-		if (skb_tailroom(skb) < tailbits &&
-		    pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
-			return -ENOMEM;
-
-		/* Voila! */
-		*trailer = skb;
-		return 1;
-	}
-
-	/* Misery. We are in troubles, going to mincer fragments... */
-
-	elt = 1;
-	skb_p = &skb_shinfo(skb)->frag_list;
-	copyflag = 0;
-
-	while ((skb1 = *skb_p) != NULL) {
-		int ntail = 0;
-
-		/* The fragment is partially pulled by someone,
-		 * this can happen on input. Copy it and everything
-		 * after it. */
-
-		if (skb_shared(skb1))
-			copyflag = 1;
-
-		/* If the skb is the last, worry about trailer. */
-
-		if (skb1->next == NULL && tailbits) {
-			if (skb_shinfo(skb1)->nr_frags ||
-			    skb_shinfo(skb1)->frag_list ||
-			    skb_tailroom(skb1) < tailbits)
-				ntail = tailbits + 128;
-		}
-
-		if (copyflag ||
-		    skb_cloned(skb1) ||
-		    ntail ||
-		    skb_shinfo(skb1)->nr_frags ||
-		    skb_shinfo(skb1)->frag_list) {
-			struct sk_buff *skb2;
-
-			/* Fuck, we are miserable poor guys... */
-			if (ntail == 0)
-				skb2 = skb_copy(skb1, GFP_ATOMIC);
-			else
-				skb2 = skb_copy_expand(skb1,
-						       skb_headroom(skb1),
-						       ntail,
-						       GFP_ATOMIC);
-			if (unlikely(skb2 == NULL))
-				return -ENOMEM;
-
-			if (skb1->sk)
-				skb_set_owner_w(skb, skb1->sk);
-
-			/* Looking around. Are we still alive?
-			 * OK, link new skb, drop old one */
-
-			skb2->next = skb1->next;
-			*skb_p = skb2;
-			kfree_skb(skb1);
-			skb1 = skb2;
-		}
-		elt++;
-		*trailer = skb1;
-		skb_p = &skb1->next;
-	}
-
-	return elt;
-}
-
-void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
-{
-	if (tail != skb) {
-		skb->data_len += len;
-		skb->len += len;
-	}
-	return skb_put(tail, len);
-}
-#endif
diff -Nru a/net/ipv4/xfrm_input.c b/net/ipv4/xfrm_input.c
--- a/net/ipv4/xfrm_input.c	Mon Mar 31 13:41:06 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,52 +0,0 @@
-/*
- * xfrm_input.c
- *
- * Changes:
- * 	YOSHIFUJI Hideaki @USAGI
- * 		Split up af-specific portion
- * 	
- */
-
-#include <net/ip.h>
-#include <net/xfrm.h>
-
-void __secpath_destroy(struct sec_path *sp)
-{
-	int i;
-	for (i = 0; i < sp->len; i++)
-		xfrm_state_put(sp->xvec[i]);
-	kmem_cache_free(sp->pool, sp);
-}
-
-/* Fetch spi and seq frpm ipsec header */
-
-int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
-{
-	int offset, offset_seq;
-
-	switch (nexthdr) {
-	case IPPROTO_AH:
-		offset = offsetof(struct ip_auth_hdr, spi);
-		offset_seq = offsetof(struct ip_auth_hdr, seq_no);
-		break;
-	case IPPROTO_ESP:
-		offset = offsetof(struct ip_esp_hdr, spi);
-		offset_seq = offsetof(struct ip_esp_hdr, seq_no);
-		break;
-	case IPPROTO_COMP:
-		if (!pskb_may_pull(skb, 4))
-			return -EINVAL;
-		*spi = ntohl(ntohs(*(u16*)(skb->h.raw + 2)));
-		*seq = 0;
-		return 0;
-	default:
-		return 1;
-	}
-
-	if (!pskb_may_pull(skb, 16))
-		return -EINVAL;
-
-	*spi = *(u32*)(skb->h.raw + offset);
-	*seq = *(u32*)(skb->h.raw + offset_seq);
-	return 0;
-}
diff -Nru a/net/ipv4/xfrm_policy.c b/net/ipv4/xfrm_policy.c
--- a/net/ipv4/xfrm_policy.c	Mon Mar 31 13:41:07 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,1232 +0,0 @@
-/* 
- * xfrm_policy.c
- *
- * Changes:
- *	Mitsuru KANDA @USAGI
- * 	Kazunori MIYAZAWA @USAGI
- * 	Kunihiro Ishiguro
- * 		IPv6 support
- * 	Kazunori MIYAZAWA @USAGI
- * 	YOSHIFUJI Hideaki
- * 		Split up af-specific portion
- * 	
- */
-
-#include <linux/config.h>
-#include <net/xfrm.h>
-#include <net/ip.h>
-
-DECLARE_MUTEX(xfrm_cfg_sem);
-
-static u32      xfrm_policy_genid;
-static rwlock_t xfrm_policy_lock = RW_LOCK_UNLOCKED;
-
-struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
-
-static rwlock_t xfrm_policy_afinfo_lock = RW_LOCK_UNLOCKED;
-static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
-
-kmem_cache_t *xfrm_dst_cache;
-
-/* Limited flow cache. Its function now is to accelerate search for
- * policy rules.
- *
- * Flow cache is private to cpus, at the moment this is important
- * mostly for flows which do not match any rule, so that flow lookups
- * are absolultely cpu-local. When a rule exists we do some updates
- * to rule (refcnt, stats), so that locality is broken. Later this
- * can be repaired.
- */
-
-struct flow_entry
-{
-	struct flow_entry	*next;
-	struct flowi		fl;
-	u8			dir;
-	u32			genid;
-	struct xfrm_policy	*pol;
-};
-
-static kmem_cache_t *flow_cachep;
-
-struct flow_entry **flow_table;
-
-static int flow_lwm = 2*XFRM_FLOWCACHE_HASH_SIZE;
-static int flow_hwm = 4*XFRM_FLOWCACHE_HASH_SIZE;
-
-static int flow_number[NR_CPUS] __cacheline_aligned;
-
-#define flow_count(cpu)		(flow_number[cpu])
-
-static void flow_cache_shrink(int cpu)
-{
-	int i;
-	struct flow_entry *fle, **flp;
-	int shrink_to = flow_lwm/XFRM_FLOWCACHE_HASH_SIZE;
-
-	for (i=0; i<XFRM_FLOWCACHE_HASH_SIZE; i++) {
-		int k = 0;
-		flp = &flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+i];
-		while ((fle=*flp) != NULL && k<shrink_to) {
-			k++;
-			flp = &fle->next;
-		}
-		while ((fle=*flp) != NULL) {
-			*flp = fle->next;
-			if (fle->pol)
-				xfrm_pol_put(fle->pol);
-			kmem_cache_free(flow_cachep, fle);
-		}
-	}
-}
-
-struct xfrm_policy *flow_lookup(int dir, struct flowi *fl, 
-				unsigned short family)
-{
-	struct xfrm_policy *pol = NULL;
-	struct flow_entry *fle;
-	u32 hash;
-	int cpu;
-
-	hash = flow_hash(fl, family);
-
-	local_bh_disable();
-	cpu = smp_processor_id();
-
-	for (fle = flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash];
-	     fle; fle = fle->next) {
-		if (memcmp(fl, &fle->fl, sizeof(fle->fl)) == 0 &&
-		    fle->dir == dir) {
-			if (fle->genid == xfrm_policy_genid) {
-				if ((pol = fle->pol) != NULL)
-					atomic_inc(&pol->refcnt);
-				local_bh_enable();
-				return pol;
-			}
-			break;
-		}
-	}
-
-	pol = xfrm_policy_lookup(dir, fl, family);
-
-	if (fle) {
-		/* Stale flow entry found. Update it. */
-		fle->genid = xfrm_policy_genid;
-
-		if (fle->pol)
-			xfrm_pol_put(fle->pol);
-		fle->pol = pol;
-		if (pol)
-			atomic_inc(&pol->refcnt);
-	} else {
-		if (flow_count(cpu) > flow_hwm)
-			flow_cache_shrink(cpu);
-
-		fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC);
-		if (fle) {
-			flow_count(cpu)++;
-			fle->fl = *fl;
-			fle->genid = xfrm_policy_genid;
-			fle->dir = dir;
-			fle->pol = pol;
-			if (pol)
-				atomic_inc(&pol->refcnt);
-			fle->next = flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash];
-			flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash] = fle;
-		}
-	}
-	local_bh_enable();
-	return pol;
-}
-
-void __init flow_cache_init(void)
-{
-	int order;
-
-	flow_cachep = kmem_cache_create("flow_cache",
-					sizeof(struct flow_entry),
-					0, SLAB_HWCACHE_ALIGN,
-					NULL, NULL);
-
-	if (!flow_cachep)
-		panic("NET: failed to allocate flow cache slab\n");
-
-	for (order = 0;
-	     (PAGE_SIZE<<order) < (NR_CPUS*sizeof(struct flow_entry *)*XFRM_FLOWCACHE_HASH_SIZE);
-	     order++)
-		/* NOTHING */;
-
-	flow_table = (struct flow_entry **)__get_free_pages(GFP_ATOMIC, order);
-
-	if (!flow_table)
-		panic("Failed to allocate flow cache hash table\n");
-
-	memset(flow_table, 0, PAGE_SIZE<<order);
-}
-
-int xfrm_register_type(struct xfrm_type *type, unsigned short family)
-{
-	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
-	struct xfrm_type_map *typemap;
-	int err = 0;
-
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
-	typemap = afinfo->type_map;
-
-	write_lock(&typemap->lock);
-	if (likely(typemap->map[type->proto] == NULL))
-		typemap->map[type->proto] = type;
-	else
-		err = -EEXIST;
-	write_unlock(&typemap->lock);
-	xfrm_policy_put_afinfo(afinfo);
-	return err;
-}
-
-int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
-{
-	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
-	struct xfrm_type_map *typemap;
-	int err = 0;
-
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
-	typemap = afinfo->type_map;
-
-	write_lock(&typemap->lock);
-	if (unlikely(typemap->map[type->proto] != type))
-		err = -ENOENT;
-	else
-		typemap->map[type->proto] = NULL;
-	write_unlock(&typemap->lock);
-	xfrm_policy_put_afinfo(afinfo);
-	return err;
-}
-
-struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
-{
-	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
-	struct xfrm_type_map *typemap;
-	struct xfrm_type *type;
-
-	if (unlikely(afinfo == NULL))
-		return NULL;
-	typemap = afinfo->type_map;
-
-	read_lock(&typemap->lock);
-	type = typemap->map[proto];
-	if (unlikely(type && !try_module_get(type->owner)))
-		type = NULL;
-	read_unlock(&typemap->lock);
-	xfrm_policy_put_afinfo(afinfo);
-	return type;
-}
-
-int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 
-		    unsigned short family)
-{
-	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
-	int err = 0;
-
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
-
-	if (likely(afinfo->dst_lookup != NULL))
-		err = afinfo->dst_lookup(dst, fl);
-	else
-		err = -EINVAL;
-	xfrm_policy_put_afinfo(afinfo);
-	return err;
-}
-
-void xfrm_put_type(struct xfrm_type *type)
-{
-	module_put(type->owner);
-}
-
-static inline unsigned long make_jiffies(long secs)
-{
-	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
-		return MAX_SCHEDULE_TIMEOUT-1;
-	else
-	        return secs*HZ;
-}
-
-static void xfrm_policy_timer(unsigned long data)
-{
-	struct xfrm_policy *xp = (struct xfrm_policy*)data;
-	unsigned long now = (unsigned long)xtime.tv_sec;
-	long next = LONG_MAX;
-	u32 index;
-
-	if (xp->dead)
-		goto out;
-
-	if (xp->lft.hard_add_expires_seconds) {
-		long tmo = xp->lft.hard_add_expires_seconds +
-			xp->curlft.add_time - now;
-		if (tmo <= 0)
-			goto expired;
-		if (tmo < next)
-			next = tmo;
-	}
-	if (next != LONG_MAX &&
-	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
-		atomic_inc(&xp->refcnt);
-
-out:
-	xfrm_pol_put(xp);
-	return;
-
-expired:
-	index = xp->index;
-	xfrm_pol_put(xp);
-
-	/* Not 100% correct. id can be recycled in theory */
-	xp = xfrm_policy_byid(0, index, 1);
-	if (xp) {
-		xfrm_policy_kill(xp);
-		xfrm_pol_put(xp);
-	}
-}
-
-
-/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
- * SPD calls.
- */
-
-struct xfrm_policy *xfrm_policy_alloc(int gfp)
-{
-	struct xfrm_policy *policy;
-
-	policy = kmalloc(sizeof(struct xfrm_policy), gfp);
-
-	if (policy) {
-		memset(policy, 0, sizeof(struct xfrm_policy));
-		atomic_set(&policy->refcnt, 1);
-		policy->lock = RW_LOCK_UNLOCKED;
-		init_timer(&policy->timer);
-		policy->timer.data = (unsigned long)policy;
-		policy->timer.function = xfrm_policy_timer;
-	}
-	return policy;
-}
-
-/* Destroy xfrm_policy: descendant resources must be released to this moment. */
-
-void __xfrm_policy_destroy(struct xfrm_policy *policy)
-{
-	if (!policy->dead)
-		BUG();
-
-	if (policy->bundles)
-		BUG();
-
-	if (del_timer(&policy->timer))
-		BUG();
-
-	kfree(policy);
-}
-
-/* Rule must be locked. Release descentant resources, announce
- * entry dead. The rule must be unlinked from lists to the moment.
- */
-
-void xfrm_policy_kill(struct xfrm_policy *policy)
-{
-	struct dst_entry *dst;
-
-	write_lock_bh(&policy->lock);
-	if (policy->dead)
-		goto out;
-
-	policy->dead = 1;
-
-	while ((dst = policy->bundles) != NULL) {
-		policy->bundles = dst->next;
-		dst_free(dst);
-	}
-
-	if (del_timer(&policy->timer))
-		atomic_dec(&policy->refcnt);
-
-out:
-	write_unlock_bh(&policy->lock);
-}
-
-/* Generate new index... KAME seems to generate them ordered by cost
- * of an absolute inpredictability of ordering of rules. This will not pass. */
-static u32 xfrm_gen_index(int dir)
-{
-	u32 idx;
-	struct xfrm_policy *p;
-	static u32 idx_generator;
-
-	for (;;) {
-		idx = (idx_generator | dir);
-		idx_generator += 8;
-		if (idx == 0)
-			idx = 8;
-		for (p = xfrm_policy_list[dir]; p; p = p->next) {
-			if (p->index == idx)
-				break;
-		}
-		if (!p)
-			return idx;
-	}
-}
-
-int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
-{
-	struct xfrm_policy *pol, **p;
-
-	write_lock_bh(&xfrm_policy_lock);
-	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
-		if (memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
-			if (excl) {
-				write_unlock_bh(&xfrm_policy_lock);
-				return -EEXIST;
-			}
-			break;
-		}
-	}
-	atomic_inc(&policy->refcnt);
-	policy->next = pol ? pol->next : NULL;
-	*p = policy;
-	xfrm_policy_genid++;
-	policy->index = pol ? pol->index : xfrm_gen_index(dir);
-	policy->curlft.add_time = (unsigned long)xtime.tv_sec;
-	policy->curlft.use_time = 0;
-	if (policy->lft.hard_add_expires_seconds &&
-	    !mod_timer(&policy->timer, jiffies + HZ))
-		atomic_inc(&policy->refcnt);
-	write_unlock_bh(&xfrm_policy_lock);
-
-	if (pol) {
-		atomic_dec(&pol->refcnt);
-		xfrm_policy_kill(pol);
-		xfrm_pol_put(pol);
-	}
-	return 0;
-}
-
-struct xfrm_policy *xfrm_policy_delete(int dir, struct xfrm_selector *sel)
-{
-	struct xfrm_policy *pol, **p;
-
-	write_lock_bh(&xfrm_policy_lock);
-	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
-		if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) {
-			*p = pol->next;
-			break;
-		}
-	}
-	if (pol)
-		xfrm_policy_genid++;
-	write_unlock_bh(&xfrm_policy_lock);
-	return pol;
-}
-
-struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
-{
-	struct xfrm_policy *pol, **p;
-
-	write_lock_bh(&xfrm_policy_lock);
-	for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) {
-		if (pol->index == id) {
-			if (delete)
-				*p = pol->next;
-			break;
-		}
-	}
-	if (pol) {
-		if (delete)
-			xfrm_policy_genid++;
-		else
-			atomic_inc(&pol->refcnt);
-	}
-	write_unlock_bh(&xfrm_policy_lock);
-	return pol;
-}
-
-void xfrm_policy_flush()
-{
-	struct xfrm_policy *xp;
-	int dir;
-
-	write_lock_bh(&xfrm_policy_lock);
-	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
-		while ((xp = xfrm_policy_list[dir]) != NULL) {
-			xfrm_policy_list[dir] = xp->next;
-			write_unlock_bh(&xfrm_policy_lock);
-
-			xfrm_policy_kill(xp);
-			xfrm_pol_put(xp);
-
-			write_lock_bh(&xfrm_policy_lock);
-		}
-	}
-	xfrm_policy_genid++;
-	write_unlock_bh(&xfrm_policy_lock);
-}
-
-int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
-		     void *data)
-{
-	struct xfrm_policy *xp;
-	int dir;
-	int count = 0;
-	int error = 0;
-
-	read_lock_bh(&xfrm_policy_lock);
-	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
-		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
-			count++;
-	}
-
-	if (count == 0) {
-		error = -ENOENT;
-		goto out;
-	}
-
-	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
-		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
-			error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
-			if (error)
-				goto out;
-		}
-	}
-
-out:
-	read_unlock_bh(&xfrm_policy_lock);
-	return error;
-}
-
-
-/* Find policy to apply to this flow. */
-
-struct xfrm_policy *xfrm_policy_lookup(int dir, struct flowi *fl, 
-				       unsigned short family)
-{
-	struct xfrm_policy *pol;
-
-	read_lock_bh(&xfrm_policy_lock);
-	for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
-		struct xfrm_selector *sel = &pol->selector;
-		int match;
-
-		if (pol->family != family)
-			continue;
-
-		match = xfrm_selector_match(sel, fl, family);
-		if (match) {
-			atomic_inc(&pol->refcnt);
-			break;
-		}
-	}
-	read_unlock_bh(&xfrm_policy_lock);
-	return pol;
-}
-
-struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
-{
-	struct xfrm_policy *pol;
-
-	read_lock_bh(&xfrm_policy_lock);
-	if ((pol = sk->policy[dir]) != NULL) {
-		int match;
-
-		match = xfrm_selector_match(&pol->selector, fl, sk->family);
-		if (match)
-			atomic_inc(&pol->refcnt);
-		else
-			pol = NULL;
-	}
-	read_unlock_bh(&xfrm_policy_lock);
-	return pol;
-}
-
-void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir)
-{
-	pol->next = xfrm_policy_list[XFRM_POLICY_MAX+dir];
-	xfrm_policy_list[XFRM_POLICY_MAX+dir] = pol;
-	atomic_inc(&pol->refcnt);
-}
-
-void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir)
-{
-	struct xfrm_policy **polp;
-
-	for (polp = &xfrm_policy_list[XFRM_POLICY_MAX+dir];
-	     *polp != NULL; polp = &(*polp)->next) {
-		if (*polp == pol) {
-			*polp = pol->next;
-			atomic_dec(&pol->refcnt);
-			return;
-		}
-	}
-}
-
-int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
-{
-	struct xfrm_policy *old_pol;
-
-	write_lock_bh(&xfrm_policy_lock);
-	old_pol = sk->policy[dir];
-	sk->policy[dir] = pol;
-	if (pol) {
-		pol->curlft.add_time = (unsigned long)xtime.tv_sec;
-		pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
-		xfrm_sk_policy_link(pol, dir);
-	}
-	if (old_pol)
-		xfrm_sk_policy_unlink(old_pol, dir);
-	write_unlock_bh(&xfrm_policy_lock);
-
-	if (old_pol) {
-		xfrm_policy_kill(old_pol);
-		xfrm_pol_put(old_pol);
-	}
-	return 0;
-}
-
-static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
-{
-	struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
-
-	if (newp) {
-		newp->selector = old->selector;
-		newp->lft = old->lft;
-		newp->curlft = old->curlft;
-		newp->action = old->action;
-		newp->flags = old->flags;
-		newp->xfrm_nr = old->xfrm_nr;
-		newp->index = old->index;
-		memcpy(newp->xfrm_vec, old->xfrm_vec,
-		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
-		write_lock_bh(&xfrm_policy_lock);
-		xfrm_sk_policy_link(newp, dir);
-		write_unlock_bh(&xfrm_policy_lock);
-	}
-	return newp;
-}
-
-int __xfrm_sk_clone_policy(struct sock *sk)
-{
-	struct xfrm_policy *p0, *p1;
-	p0 = sk->policy[0];
-	p1 = sk->policy[1];
-	sk->policy[0] = NULL;
-	sk->policy[1] = NULL;
-	if (p0 && (sk->policy[0] = clone_policy(p0, 0)) == NULL)
-		return -ENOMEM;
-	if (p1 && (sk->policy[1] = clone_policy(p1, 1)) == NULL)
-		return -ENOMEM;
-	return 0;
-}
-
-void __xfrm_sk_free_policy(struct xfrm_policy *pol, int dir)
-{
-	write_lock_bh(&xfrm_policy_lock);
-	xfrm_sk_policy_unlink(pol, dir);
-	write_unlock_bh(&xfrm_policy_lock);
-
-	xfrm_policy_kill(pol);
-	xfrm_pol_put(pol);
-}
-
-/* Resolve list of templates for the flow, given policy. */
-
-static int
-xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
-		  struct xfrm_state **xfrm,
-		  unsigned short family)
-{
-	int nx;
-	int i, error;
-	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
-	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
-
-	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
-		struct xfrm_state *x;
-		xfrm_address_t *remote = daddr;
-		xfrm_address_t *local  = saddr;
-		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
-
-		if (tmpl->mode) {
-			remote = &tmpl->id.daddr;
-			local = &tmpl->saddr;
-		}
-
-		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
-
-		if (x && x->km.state == XFRM_STATE_VALID) {
-			xfrm[nx++] = x;
-			daddr = remote;
-			saddr = local;
-			continue;
-		}
-		if (x) {
-			error = (x->km.state == XFRM_STATE_ERROR ?
-				 -EINVAL : -EAGAIN);
-			xfrm_state_put(x);
-		}
-
-		if (!tmpl->optional)
-			goto fail;
-	}
-	return nx;
-
-fail:
-	for (nx--; nx>=0; nx--)
-		xfrm_state_put(xfrm[nx]);
-	return error;
-}
-
-/* Check that the bundle accepts the flow and its components are
- * still valid.
- */
-
-static struct dst_entry *
-xfrm_find_bundle(struct flowi *fl, struct rtable *rt, struct xfrm_policy *policy, unsigned short family)
-{
-	struct dst_entry *x;
-	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
-	if (unlikely(afinfo == NULL))
-		return ERR_PTR(-EINVAL);
-	x = afinfo->find_bundle(fl, rt, policy);
-	xfrm_policy_put_afinfo(afinfo);
-	return x;
-}
-
-/* Allocate chain of dst_entry's, attach known xfrm's, calculate
- * all the metrics... Shortly, bundle a bundle.
- */
-
-static int
-xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
-		   struct flowi *fl, struct dst_entry **dst_p,
-		   unsigned short family)
-{
-	int err;
-	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
-	if (unlikely(afinfo == NULL))
-		return -EINVAL;
-	err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
-	xfrm_policy_put_afinfo(afinfo);
-	return err;
-}
-
-/* Main function: finds/creates a bundle for given flow.
- *
- * At the moment we eat a raw IP route. Mostly to speed up lookups
- * on interfaces with disabled IPsec.
- */
-int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
-		struct sock *sk, int flags)
-{
-	struct xfrm_policy *policy;
-	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
-	struct rtable *rt = (struct rtable*)*dst_p;
-	struct dst_entry *dst;
-	int nx = 0;
-	int err;
-	u32 genid;
-	u16 family = (*dst_p)->ops->family;
-
-	switch (family) {
-	case AF_INET:
-		if (!fl->fl4_src)
-			fl->fl4_src = rt->rt_src;
-		if (!fl->fl4_dst)
-			fl->fl4_dst = rt->rt_dst;
-	case AF_INET6:
-		/* Still not clear... */
-	default:
-		/* nothing */;
-	}
-
-restart:
-	genid = xfrm_policy_genid;
-	policy = NULL;
-	if (sk && sk->policy[1])
-		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
-
-	if (!policy) {
-		/* To accelerate a bit...  */
-		if ((rt->u.dst.flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
-			return 0;
-
-		policy = flow_lookup(XFRM_POLICY_OUT, fl, family);
-	}
-
-	if (!policy)
-		return 0;
-
-	policy->curlft.use_time = (unsigned long)xtime.tv_sec;
-
-	switch (policy->action) {
-	case XFRM_POLICY_BLOCK:
-		/* Prohibit the flow */
-		xfrm_pol_put(policy);
-		return -EPERM;
-
-	case XFRM_POLICY_ALLOW:
-		if (policy->xfrm_nr == 0) {
-			/* Flow passes not transformed. */
-			xfrm_pol_put(policy);
-			return 0;
-		}
-
-		/* Try to find matching bundle.
-		 *
-		 * LATER: help from flow cache. It is optional, this
-		 * is required only for output policy.
-		 */
-		dst = xfrm_find_bundle(fl, rt, policy, family);
-		if (IS_ERR(dst)) {
-			xfrm_pol_put(policy);
-			return PTR_ERR(dst);
-		}
-
-		if (dst)
-			break;
-
-		nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
-
-		if (unlikely(nx<0)) {
-			err = nx;
-			if (err == -EAGAIN) {
-				struct task_struct *tsk = current;
-				DECLARE_WAITQUEUE(wait, tsk);
-				if (!flags)
-					goto error;
-
-				__set_task_state(tsk, TASK_INTERRUPTIBLE);
-				add_wait_queue(&km_waitq, &wait);
-				err = xfrm_tmpl_resolve(policy, fl, xfrm, family);
-				if (err == -EAGAIN)
-					schedule();
-				__set_task_state(tsk, TASK_RUNNING);
-				remove_wait_queue(&km_waitq, &wait);
-
-				if (err == -EAGAIN && signal_pending(current)) {
-					err = -ERESTART;
-					goto error;
-				}
-				if (err == -EAGAIN ||
-				    genid != xfrm_policy_genid)
-					goto restart;
-			}
-			if (err)
-				goto error;
-		} else if (nx == 0) {
-			/* Flow passes not transformed. */
-			xfrm_pol_put(policy);
-			return 0;
-		}
-
-		dst = &rt->u.dst;
-		err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
-
-		if (unlikely(err)) {
-			int i;
-			for (i=0; i<nx; i++)
-				xfrm_state_put(xfrm[i]);
-			goto error;
-		}
-
-		write_lock_bh(&policy->lock);
-		if (unlikely(policy->dead)) {
-			/* Wow! While we worked on resolving, this
-			 * policy has gone. Retry. It is not paranoia,
-			 * we just cannot enlist new bundle to dead object.
-			 */
-			write_unlock_bh(&policy->lock);
-
-			xfrm_pol_put(policy);
-			if (dst)
-				dst_free(dst);
-			goto restart;
-		}
-		dst->next = policy->bundles;
-		policy->bundles = dst;
-		dst_hold(dst);
-		write_unlock_bh(&policy->lock);
-	}
-	*dst_p = dst;
-	ip_rt_put(rt);
-	xfrm_pol_put(policy);
-	return 0;
-
-error:
-	ip_rt_put(rt);
-	xfrm_pol_put(policy);
-	*dst_p = NULL;
-	return err;
-}
-
-/* When skb is transformed back to its "native" form, we have to
- * check policy restrictions. At the moment we make this in maximally
- * stupid way. Shame on me. :-) Of course, connected sockets must
- * have policy cached at them.
- */
-
-static inline int
-xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 
-	      unsigned short family)
-{
-	return	x->id.proto == tmpl->id.proto &&
-		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
-		x->props.mode == tmpl->mode &&
-		(tmpl->aalgos & (1<<x->props.aalgo)) &&
-		!(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
-}
-
-static inline int
-xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int idx,
-	       unsigned short family)
-{
-	for (; idx < sp->len; idx++) {
-		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
-			return ++idx;
-	}
-	return -1;
-}
-
-static int
-_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
-{
-	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
-
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
-
-	afinfo->decode_session(skb, fl);
-	xfrm_policy_put_afinfo(afinfo);
-	return 0;
-}
-
-int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 
-			unsigned short family)
-{
-	struct xfrm_policy *pol;
-	struct flowi fl;
-
-	if (_decode_session(skb, &fl, family) < 0)
-		return 0;
-
-	/* First, check used SA against their selectors. */
-	if (skb->sp) {
-		int i;
-
-		for (i=skb->sp->len-1; i>=0; i--) {
-			if (!xfrm_selector_match(&skb->sp->xvec[i]->sel, &fl, family))
-				return 0;
-		}
-	}
-
-	pol = NULL;
-	if (sk && sk->policy[dir])
-		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
-
-	if (!pol)
-		pol = flow_lookup(dir, &fl, family);
-
-	if (!pol)
-		return 1;
-
-	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
-
-	if (pol->action == XFRM_POLICY_ALLOW) {
-		if (pol->xfrm_nr != 0) {
-			struct sec_path *sp;
-			static struct sec_path dummy;
-			int i, k;
-
-			if ((sp = skb->sp) == NULL)
-				sp = &dummy;
-
-			/* For each tmpl search corresponding xfrm.
-			 * Order is _important_. Later we will implement
-			 * some barriers, but at the moment barriers
-			 * are implied between each two transformations.
-			 */
-			for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
-				k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
-				if (k < 0)
-					goto reject;
-			}
-		}
-		xfrm_pol_put(pol);
-		return 1;
-	}
-
-reject:
-	xfrm_pol_put(pol);
-	return 0;
-}
-
-int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
-{
-	struct flowi fl;
-
-	if (_decode_session(skb, &fl, family) < 0)
-		return 0;
-
-	return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
-}
-
-/* Optimize later using cookies and generation ids. */
-
-static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
-{
-	struct dst_entry *child = dst;
-
-	while (child) {
-		if (child->obsolete > 0 ||
-		    (child->xfrm && child->xfrm->km.state != XFRM_STATE_VALID)) {
-			dst_release(dst);
-			return NULL;
-		}
-		child = child->child;
-	}
-
-	return dst;
-}
-
-static void xfrm_dst_destroy(struct dst_entry *dst)
-{
-	xfrm_state_put(dst->xfrm);
-	dst->xfrm = NULL;
-}
-
-static void xfrm_link_failure(struct sk_buff *skb)
-{
-	/* Impossible. Such dst must be popped before reaches point of failure. */
-	return;
-}
-
-static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
-{
-	if (dst) {
-		if (dst->obsolete) {
-			dst_release(dst);
-			dst = NULL;
-		}
-	}
-	return dst;
-}
-
-static void __xfrm_garbage_collect(void)
-{
-	int i;
-	struct xfrm_policy *pol;
-	struct dst_entry *dst, **dstp, *gc_list = NULL;
-
-	read_lock_bh(&xfrm_policy_lock);
-	for (i=0; i<2*XFRM_POLICY_MAX; i++) {
-		for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
-			write_lock(&pol->lock);
-			dstp = &pol->bundles;
-			while ((dst=*dstp) != NULL) {
-				if (atomic_read(&dst->__refcnt) == 0) {
-					*dstp = dst->next;
-					dst->next = gc_list;
-					gc_list = dst;
-				} else {
-					dstp = &dst->next;
-				}
-			}
-			write_unlock(&pol->lock);
-		}
-	}
-	read_unlock_bh(&xfrm_policy_lock);
-
-	while (gc_list) {
-		dst = gc_list;
-		gc_list = dst->next;
-		dst_free(dst);
-	}
-}
-
-static int bundle_depends_on(struct dst_entry *dst, struct xfrm_state *x)
-{
-	do {
-		if (dst->xfrm == x)
-			return 1;
-	} while ((dst = dst->child) != NULL);
-	return 0;
-}
-
-int xfrm_flush_bundles(struct xfrm_state *x)
-{
-	int i;
-	struct xfrm_policy *pol;
-	struct dst_entry *dst, **dstp, *gc_list = NULL;
-
-	read_lock_bh(&xfrm_policy_lock);
-	for (i=0; i<2*XFRM_POLICY_MAX; i++) {
-		for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
-			write_lock(&pol->lock);
-			dstp = &pol->bundles;
-			while ((dst=*dstp) != NULL) {
-				if (bundle_depends_on(dst, x)) {
-					*dstp = dst->next;
-					dst->next = gc_list;
-					gc_list = dst;
-				} else {
-					dstp = &dst->next;
-				}
-			}
-			write_unlock(&pol->lock);
-		}
-	}
-	read_unlock_bh(&xfrm_policy_lock);
-
-	while (gc_list) {
-		dst = gc_list;
-		gc_list = dst->next;
-		dst_free(dst);
-	}
-
-	return 0;
-}
-
-/* Well... that's _TASK_. We need to scan through transformation
- * list and figure out what mss tcp should generate in order to
- * final datagram fit to mtu. Mama mia... :-)
- *
- * Apparently, some easy way exists, but we used to choose the most
- * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta.
- *
- * Consider this function as something like dark humour. :-)
- */
-static int xfrm_get_mss(struct dst_entry *dst, u32 mtu)
-{
-	int res = mtu - dst->header_len;
-
-	for (;;) {
-		struct dst_entry *d = dst;
-		int m = res;
-
-		do {
-			struct xfrm_state *x = d->xfrm;
-			if (x) {
-				spin_lock_bh(&x->lock);
-				if (x->km.state == XFRM_STATE_VALID &&
-				    x->type && x->type->get_max_size)
-					m = x->type->get_max_size(d->xfrm, m);
-				else
-					m += x->props.header_len;
-				spin_unlock_bh(&x->lock);
-			}
-		} while ((d = d->child) != NULL);
-
-		if (m <= mtu)
-			break;
-		res -= (m - mtu);
-		if (res < 88)
-			return mtu;
-	}
-
-	return res + dst->header_len;
-}
-
-int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
-{
-	int err = 0;
-	if (unlikely(afinfo == NULL))
-		return -EINVAL;
-	if (unlikely(afinfo->family >= NPROTO))
-		return -EAFNOSUPPORT;
-	write_lock(&xfrm_policy_afinfo_lock);
-	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
-		err = -ENOBUFS;
-	else {
-		struct dst_ops *dst_ops = afinfo->dst_ops;
-		if (likely(dst_ops->kmem_cachep == NULL))
-			dst_ops->kmem_cachep = xfrm_dst_cache;
-		if (likely(dst_ops->check == NULL))
-			dst_ops->check = xfrm_dst_check;
-		if (likely(dst_ops->destroy == NULL))
-			dst_ops->destroy = xfrm_dst_destroy;
-		if (likely(dst_ops->negative_advice == NULL))
-			dst_ops->negative_advice = xfrm_negative_advice;
-		if (likely(dst_ops->link_failure == NULL))
-			dst_ops->link_failure = xfrm_link_failure;
-		if (likely(dst_ops->get_mss == NULL))
-			dst_ops->get_mss = xfrm_get_mss;
-		if (likely(afinfo->garbage_collect == NULL))
-			afinfo->garbage_collect = __xfrm_garbage_collect;
-		xfrm_policy_afinfo[afinfo->family] = afinfo;
-	}
-	write_unlock(&xfrm_policy_afinfo_lock);
-	return err;
-}
-
-int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
-{
-	int err = 0;
-	if (unlikely(afinfo == NULL))
-		return -EINVAL;
-	if (unlikely(afinfo->family >= NPROTO))
-		return -EAFNOSUPPORT;
-	write_lock(&xfrm_policy_afinfo_lock);
-	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
-		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
-			err = -EINVAL;
-		else {
-			struct dst_ops *dst_ops = afinfo->dst_ops;
-			xfrm_policy_afinfo[afinfo->family] = NULL;
-			dst_ops->kmem_cachep = NULL;
-			dst_ops->check = NULL;
-			dst_ops->destroy = NULL;
-			dst_ops->negative_advice = NULL;
-			dst_ops->link_failure = NULL;
-			dst_ops->get_mss = NULL;
-			afinfo->garbage_collect = NULL;
-		}
-	}
-	write_unlock(&xfrm_policy_afinfo_lock);
-	return err;
-}
-
-struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
-{
-	struct xfrm_policy_afinfo *afinfo;
-	if (unlikely(family >= NPROTO))
-		return NULL;
-	read_lock(&xfrm_policy_afinfo_lock);
-	afinfo = xfrm_policy_afinfo[family];
-	if (likely(afinfo != NULL))
-		read_lock(&afinfo->lock);
-	read_unlock(&xfrm_policy_afinfo_lock);
-	return afinfo;
-}
-
-void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
-{
-	if (unlikely(afinfo == NULL))
-		return;
-	read_unlock(&afinfo->lock);
-}
-
-void __init xfrm_policy_init(void)
-{
-	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
-					   sizeof(struct xfrm_dst),
-					   0, SLAB_HWCACHE_ALIGN,
-					   NULL, NULL);
-	if (!xfrm_dst_cache)
-		panic("XFRM: failed to allocate xfrm_dst_cache\n");
-}
-
-void __init xfrm_init(void)
-{
-	xfrm_state_init();
-	flow_cache_init();
-	xfrm_policy_init();
-}
-
diff -Nru a/net/ipv4/xfrm_state.c b/net/ipv4/xfrm_state.c
--- a/net/ipv4/xfrm_state.c	Mon Mar 31 13:41:08 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,728 +0,0 @@
-/*
- * xfrm_state.c
- *
- * Changes:
- *	Mitsuru KANDA @USAGI
- * 	Kazunori MIYAZAWA @USAGI
- * 	Kunihiro Ishiguro
- * 		IPv6 support
- * 	YOSHIFUJI Hideaki @USAGI
- * 		Split up af-specific functions
- * 	
- */
-
-#include <net/xfrm.h>
-#include <linux/pfkeyv2.h>
-#include <linux/ipsec.h>
-
-/* Each xfrm_state may be linked to two tables:
-
-   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
-   2. Hash table by daddr to find what SAs exist for given
-      destination/tunnel endpoint. (output)
- */
-
-static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED;
-
-/* Hash table to find appropriate SA towards given target (endpoint
- * of tunnel or destination of transport mode) allowed by selector.
- *
- * Main use is finding SA after policy selected tunnel or transport mode.
- * Also, it can be used by ah/esp icmp error handler to find offending SA.
- */
-static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
-static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
-
-DECLARE_WAIT_QUEUE_HEAD(km_waitq);
-
-static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED;
-static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
-
-static void __xfrm_state_delete(struct xfrm_state *x);
-
-static inline unsigned long make_jiffies(long secs)
-{
-	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
-		return MAX_SCHEDULE_TIMEOUT-1;
-	else
-	        return secs*HZ;
-}
-
-static void xfrm_timer_handler(unsigned long data)
-{
-	struct xfrm_state *x = (struct xfrm_state*)data;
-	unsigned long now = (unsigned long)xtime.tv_sec;
-	long next = LONG_MAX;
-	int warn = 0;
-
-	spin_lock(&x->lock);
-	if (x->km.state == XFRM_STATE_DEAD)
-		goto out;
-	if (x->km.state == XFRM_STATE_EXPIRED)
-		goto expired;
-	if (x->lft.hard_add_expires_seconds) {
-		long tmo = x->lft.hard_add_expires_seconds +
-			x->curlft.add_time - now;
-		if (tmo <= 0)
-			goto expired;
-		if (tmo < next)
-			next = tmo;
-	}
-	if (x->lft.hard_use_expires_seconds && x->curlft.use_time) {
-		long tmo = x->lft.hard_use_expires_seconds +
-			x->curlft.use_time - now;
-		if (tmo <= 0)
-			goto expired;
-		if (tmo < next)
-			next = tmo;
-	}
-	if (x->km.dying)
-		goto resched;
-	if (x->lft.soft_add_expires_seconds) {
-		long tmo = x->lft.soft_add_expires_seconds +
-			x->curlft.add_time - now;
-		if (tmo <= 0)
-			warn = 1;
-		else if (tmo < next)
-			next = tmo;
-	}
-	if (x->lft.soft_use_expires_seconds && x->curlft.use_time) {
-		long tmo = x->lft.soft_use_expires_seconds +
-			x->curlft.use_time - now;
-		if (tmo <= 0)
-			warn = 1;
-		else if (tmo < next)
-			next = tmo;
-	}
-
-	if (warn)
-		km_warn_expired(x);
-resched:
-	if (next != LONG_MAX &&
-	    !mod_timer(&x->timer, jiffies + make_jiffies(next)))
-		atomic_inc(&x->refcnt);
-	goto out;
-
-expired:
-	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
-		x->km.state = XFRM_STATE_EXPIRED;
-		wake_up(&km_waitq);
-		next = 2;
-		goto resched;
-	}
-	if (x->id.spi != 0)
-		km_expired(x);
-	__xfrm_state_delete(x);
-
-out:
-	spin_unlock(&x->lock);
-	xfrm_state_put(x);
-}
-
-struct xfrm_state *xfrm_state_alloc(void)
-{
-	struct xfrm_state *x;
-
-	x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
-
-	if (x) {
-		memset(x, 0, sizeof(struct xfrm_state));
-		atomic_set(&x->refcnt, 1);
-		INIT_LIST_HEAD(&x->bydst);
-		INIT_LIST_HEAD(&x->byspi);
-		init_timer(&x->timer);
-		x->timer.function = xfrm_timer_handler;
-		x->timer.data	  = (unsigned long)x;
-		x->curlft.add_time = (unsigned long)xtime.tv_sec;
-		x->lft.soft_byte_limit = XFRM_INF;
-		x->lft.soft_packet_limit = XFRM_INF;
-		x->lft.hard_byte_limit = XFRM_INF;
-		x->lft.hard_packet_limit = XFRM_INF;
-		x->lock = SPIN_LOCK_UNLOCKED;
-	}
-	return x;
-}
-
-void __xfrm_state_destroy(struct xfrm_state *x)
-{
-	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
-	if (del_timer(&x->timer))
-		BUG();
-	if (x->aalg)
-		kfree(x->aalg);
-	if (x->ealg)
-		kfree(x->ealg);
-	if (x->calg)
-		kfree(x->calg);
-	if (x->type)
-		xfrm_put_type(x->type);
-	kfree(x);
-}
-
-static void __xfrm_state_delete(struct xfrm_state *x)
-{
-	int kill = 0;
-
-	if (x->km.state != XFRM_STATE_DEAD) {
-		x->km.state = XFRM_STATE_DEAD;
-		kill = 1;
-		spin_lock(&xfrm_state_lock);
-		list_del(&x->bydst);
-		atomic_dec(&x->refcnt);
-		if (x->id.spi) {
-			list_del(&x->byspi);
-			atomic_dec(&x->refcnt);
-		}
-		spin_unlock(&xfrm_state_lock);
-		if (del_timer(&x->timer))
-			atomic_dec(&x->refcnt);
-		if (atomic_read(&x->refcnt) != 1)
-			xfrm_flush_bundles(x);
-	}
-
-	if (kill && x->type)
-		x->type->destructor(x);
-	wake_up(&km_waitq);
-}
-
-void xfrm_state_delete(struct xfrm_state *x)
-{
-	spin_lock_bh(&x->lock);
-	__xfrm_state_delete(x);
-	spin_unlock_bh(&x->lock);
-}
-
-void xfrm_state_flush(u8 proto)
-{
-	int i;
-	struct xfrm_state *x;
-
-	spin_lock_bh(&xfrm_state_lock);
-	for (i = 0; i < XFRM_DST_HSIZE; i++) {
-restart:
-		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
-			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) {
-				atomic_inc(&x->refcnt);
-				spin_unlock_bh(&xfrm_state_lock);
-
-				xfrm_state_delete(x);
-				xfrm_state_put(x);
-
-				spin_lock_bh(&xfrm_state_lock);
-				goto restart;
-			}
-		}
-	}
-	spin_unlock_bh(&xfrm_state_lock);
-	wake_up(&km_waitq);
-}
-
-static int
-xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
-		  struct xfrm_tmpl *tmpl,
-		  xfrm_address_t *daddr, xfrm_address_t *saddr,
-		  unsigned short family)
-{
-	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
-	if (!afinfo)
-		return -1;
-	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
-	xfrm_state_put_afinfo(afinfo);
-	return 0;
-}
-
-struct xfrm_state *
-xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
-		struct flowi *fl, struct xfrm_tmpl *tmpl,
-		struct xfrm_policy *pol, int *err,
-		unsigned short family)
-{
-	unsigned h = xfrm_dst_hash(daddr, family);
-	struct xfrm_state *x;
-	int acquire_in_progress = 0;
-	int error = 0;
-	struct xfrm_state *best = NULL;
-
-	spin_lock_bh(&xfrm_state_lock);
-	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
-		if (x->props.family == family &&
-		    x->props.reqid == tmpl->reqid &&
-		    xfrm_state_addr_check(x, daddr, saddr, family) &&
-		    tmpl->mode == x->props.mode &&
-		    tmpl->id.proto == x->id.proto) {
-			/* Resolution logic:
-			   1. There is a valid state with matching selector.
-			      Done.
-			   2. Valid state with inappropriate selector. Skip.
-
-			   Entering area of "sysdeps".
-
-			   3. If state is not valid, selector is temporary,
-			      it selects only session which triggered
-			      previous resolution. Key manager will do
-			      something to install a state with proper
-			      selector.
-			 */
-			if (x->km.state == XFRM_STATE_VALID) {
-				if (!xfrm_selector_match(&x->sel, fl, family))
-					continue;
-				if (!best ||
-				    best->km.dying > x->km.dying ||
-				    (best->km.dying == x->km.dying &&
-				     best->curlft.add_time < x->curlft.add_time))
-					best = x;
-			} else if (x->km.state == XFRM_STATE_ACQ) {
-				acquire_in_progress = 1;
-			} else if (x->km.state == XFRM_STATE_ERROR ||
-				   x->km.state == XFRM_STATE_EXPIRED) {
-				if (xfrm_selector_match(&x->sel, fl, family))
-					error = 1;
-			}
-		}
-	}
-
-	if (best) {
-		atomic_inc(&best->refcnt);
-		spin_unlock_bh(&xfrm_state_lock);
-		return best;
-	}
-
-	x = NULL;
-	if (!error && !acquire_in_progress &&
-	    ((x = xfrm_state_alloc()) != NULL)) {
-		/* Initialize temporary selector matching only
-		 * to current session. */
-		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
-
-		if (km_query(x, tmpl, pol) == 0) {
-			x->km.state = XFRM_STATE_ACQ;
-			list_add_tail(&x->bydst, xfrm_state_bydst+h);
-			atomic_inc(&x->refcnt);
-			if (x->id.spi) {
-				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
-				list_add(&x->byspi, xfrm_state_byspi+h);
-				atomic_inc(&x->refcnt);
-			}
-			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
-			atomic_inc(&x->refcnt);
-			mod_timer(&x->timer, XFRM_ACQ_EXPIRES*HZ);
-		} else {
-			x->km.state = XFRM_STATE_DEAD;
-			xfrm_state_put(x);
-			x = NULL;
-			error = 1;
-		}
-	}
-	spin_unlock_bh(&xfrm_state_lock);
-	if (!x)
-		*err = acquire_in_progress ? -EAGAIN :
-			(error ? -ESRCH : -ENOMEM);
-	return x;
-}
-
-void xfrm_state_insert(struct xfrm_state *x)
-{
-	unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
-
-	spin_lock_bh(&xfrm_state_lock);
-	list_add(&x->bydst, xfrm_state_bydst+h);
-	atomic_inc(&x->refcnt);
-
-	h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
-
-	list_add(&x->byspi, xfrm_state_byspi+h);
-	atomic_inc(&x->refcnt);
-
-	if (!mod_timer(&x->timer, jiffies + HZ))
-		atomic_inc(&x->refcnt);
-
-	spin_unlock_bh(&xfrm_state_lock);
-	wake_up(&km_waitq);
-}
-
-int xfrm_state_check_expire(struct xfrm_state *x)
-{
-	if (!x->curlft.use_time)
-		x->curlft.use_time = (unsigned long)xtime.tv_sec;
-
-	if (x->km.state != XFRM_STATE_VALID)
-		return -EINVAL;
-
-	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
-	    x->curlft.packets >= x->lft.hard_packet_limit) {
-		km_expired(x);
-		if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
-			atomic_inc(&x->refcnt);
-		return -EINVAL;
-	}
-
-	if (!x->km.dying &&
-	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
-	     x->curlft.packets >= x->lft.soft_packet_limit))
-		km_warn_expired(x);
-	return 0;
-}
-
-int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
-		- skb_headroom(skb);
-
-	if (nhead > 0)
-		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
-
-	/* Check tail too... */
-	return 0;
-}
-
-struct xfrm_state *
-xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
-		  unsigned short family)
-{
-	struct xfrm_state *x;
-	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
-	if (!afinfo)
-		return NULL;
-
-	spin_lock_bh(&xfrm_state_lock);
-	x = afinfo->state_lookup(daddr, spi, proto);
-	spin_unlock_bh(&xfrm_state_lock);
-	xfrm_state_put_afinfo(afinfo);
-	return x;
-}
-
-struct xfrm_state *
-xfrm_find_acq(u8 mode, u16 reqid, u8 proto, 
-	      xfrm_address_t *daddr, xfrm_address_t *saddr, 
-	      int create, unsigned short family)
-{
-	struct xfrm_state *x;
-	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
-	if (!afinfo)
-		return NULL;
-
-	spin_lock_bh(&xfrm_state_lock);
-	x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
-	spin_unlock_bh(&xfrm_state_lock);
-	xfrm_state_put_afinfo(afinfo);
-	return x;
-}
-
-/* Silly enough, but I'm lazy to build resolution list */
-
-struct xfrm_state * xfrm_find_acq_byseq(u32 seq)
-{
-	int i;
-	struct xfrm_state *x;
-
-	spin_lock_bh(&xfrm_state_lock);
-	for (i = 0; i < XFRM_DST_HSIZE; i++) {
-		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
-			if (x->km.seq == seq) {
-				atomic_inc(&x->refcnt);
-				spin_unlock_bh(&xfrm_state_lock);
-				return x;
-			}
-		}
-	}
-	spin_unlock_bh(&xfrm_state_lock);
-	return NULL;
-}
-
-
-void
-xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
-{
-	u32 h;
-	struct xfrm_state *x0;
-
-	if (x->id.spi)
-		return;
-
-	if (minspi == maxspi) {
-		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
-		if (x0) {
-			xfrm_state_put(x0);
-			return;
-		}
-		x->id.spi = minspi;
-	} else {
-		u32 spi = 0;
-		minspi = ntohl(minspi);
-		maxspi = ntohl(maxspi);
-		for (h=0; h<maxspi-minspi+1; h++) {
-			spi = minspi + net_random()%(maxspi-minspi+1);
-			x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
-			if (x0 == NULL)
-				break;
-			xfrm_state_put(x0);
-		}
-		x->id.spi = htonl(spi);
-	}
-	if (x->id.spi) {
-		spin_lock_bh(&xfrm_state_lock);
-		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
-		list_add(&x->byspi, xfrm_state_byspi+h);
-		atomic_inc(&x->refcnt);
-		spin_unlock_bh(&xfrm_state_lock);
-		wake_up(&km_waitq);
-	}
-}
-
-int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
-		    void *data)
-{
-	int i;
-	struct xfrm_state *x;
-	int count = 0;
-	int err = 0;
-
-	spin_lock_bh(&xfrm_state_lock);
-	for (i = 0; i < XFRM_DST_HSIZE; i++) {
-		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
-			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
-				count++;
-		}
-	}
-	if (count == 0) {
-		err = -ENOENT;
-		goto out;
-	}
-
-	for (i = 0; i < XFRM_DST_HSIZE; i++) {
-		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
-			if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
-				continue;
-			err = func(x, --count, data);
-			if (err)
-				goto out;
-		}
-	}
-out:
-	spin_unlock_bh(&xfrm_state_lock);
-	return err;
-}
-
-
-int xfrm_replay_check(struct xfrm_state *x, u32 seq)
-{
-	u32 diff;
-
-	seq = ntohl(seq);
-
-	if (unlikely(seq == 0))
-		return -EINVAL;
-
-	if (likely(seq > x->replay.seq))
-		return 0;
-
-	diff = x->replay.seq - seq;
-	if (diff >= x->props.replay_window) {
-		x->stats.replay_window++;
-		return -EINVAL;
-	}
-
-	if (x->replay.bitmap & (1U << diff)) {
-		x->stats.replay++;
-		return -EINVAL;
-	}
-	return 0;
-}
-
-void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
-{
-	u32 diff;
-
-	seq = ntohl(seq);
-
-	if (seq > x->replay.seq) {
-		diff = seq - x->replay.seq;
-		if (diff < x->props.replay_window)
-			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
-		else
-			x->replay.bitmap = 1;
-		x->replay.seq = seq;
-	} else {
-		diff = x->replay.seq - seq;
-		x->replay.bitmap |= (1U << diff);
-	}
-}
-
-int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl)
-{
-	int i;
-
-	for (i=0; i<n; i++) {
-		int match;
-		match = xfrm_selector_match(&x[i]->sel, fl, x[i]->props.family);
-		if (!match)
-			return -EINVAL;
-	}
-	return 0;
-}
-
-static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
-static rwlock_t		xfrm_km_lock = RW_LOCK_UNLOCKED;
-
-void km_warn_expired(struct xfrm_state *x)
-{
-	struct xfrm_mgr *km;
-
-	x->km.dying = 1;
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list)
-		km->notify(x, 0);
-	read_unlock(&xfrm_km_lock);
-}
-
-void km_expired(struct xfrm_state *x)
-{
-	struct xfrm_mgr *km;
-
-	x->km.state = XFRM_STATE_EXPIRED;
-
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list)
-		km->notify(x, 1);
-	read_unlock(&xfrm_km_lock);
-	wake_up(&km_waitq);
-}
-
-int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
-{
-	int err = -EINVAL;
-	struct xfrm_mgr *km;
-
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list) {
-		err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
-		if (!err)
-			break;
-	}
-	read_unlock(&xfrm_km_lock);
-	return err;
-}
-
-int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen)
-{
-	int err;
-	u8 *data;
-	struct xfrm_mgr *km;
-	struct xfrm_policy *pol = NULL;
-
-	if (optlen <= 0 || optlen > PAGE_SIZE)
-		return -EMSGSIZE;
-
-	data = kmalloc(optlen, GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	err = -EFAULT;
-	if (copy_from_user(data, optval, optlen))
-		goto out;
-
-	err = -EINVAL;
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list) {
-		pol = km->compile_policy(sk->family, optname, data, optlen, &err);
-		if (err >= 0)
-			break;
-	}
-	read_unlock(&xfrm_km_lock);
-
-	if (err >= 0) {
-		xfrm_sk_policy_insert(sk, err, pol);
-		err = 0;
-	}
-
-out:
-	kfree(data);
-	return err;
-}
-
-int xfrm_register_km(struct xfrm_mgr *km)
-{
-	write_lock_bh(&xfrm_km_lock);
-	list_add_tail(&km->list, &xfrm_km_list);
-	write_unlock_bh(&xfrm_km_lock);
-	return 0;
-}
-
-int xfrm_unregister_km(struct xfrm_mgr *km)
-{
-	write_lock_bh(&xfrm_km_lock);
-	list_del(&km->list);
-	write_unlock_bh(&xfrm_km_lock);
-	return 0;
-}
-
-int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
-{
-	int err = 0;
-	if (unlikely(afinfo == NULL))
-		return -EINVAL;
-	if (unlikely(afinfo->family >= NPROTO))
-		return -EAFNOSUPPORT;
-	write_lock(&xfrm_state_afinfo_lock);
-	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
-		err = -ENOBUFS;
-	else {
-		afinfo->state_bydst = xfrm_state_bydst;
-		afinfo->state_byspi = xfrm_state_byspi;
-		xfrm_state_afinfo[afinfo->family] = afinfo;
-	}
-	write_unlock(&xfrm_state_afinfo_lock);
-	return err;
-}
-
-int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
-{
-	int err = 0;
-	if (unlikely(afinfo == NULL))
-		return -EINVAL;
-	if (unlikely(afinfo->family >= NPROTO))
-		return -EAFNOSUPPORT;
-	write_lock(&xfrm_state_afinfo_lock);
-	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
-		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
-			err = -EINVAL;
-		else {
-			xfrm_state_afinfo[afinfo->family] = NULL;
-			afinfo->state_byspi = NULL;
-			afinfo->state_bydst = NULL;
-		}
-	}
-	write_unlock(&xfrm_state_afinfo_lock);
-	return err;
-}
-
-struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
-{
-	struct xfrm_state_afinfo *afinfo;
-	if (unlikely(family >= NPROTO))
-		return NULL;
-	read_lock(&xfrm_state_afinfo_lock);
-	afinfo = xfrm_state_afinfo[family];
-	if (likely(afinfo != NULL))
-		read_lock(&afinfo->lock);
-	read_unlock(&xfrm_state_afinfo_lock);
-	return afinfo;
-}
-
-void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
-{
-	if (unlikely(afinfo == NULL))
-		return;
-	read_unlock(&afinfo->lock);
-}
-
-void __init xfrm_state_init(void)
-{
-	int i;
-
-	for (i=0; i<XFRM_DST_HSIZE; i++) {
-		INIT_LIST_HEAD(&xfrm_state_bydst[i]);
-		INIT_LIST_HEAD(&xfrm_state_byspi[i]);
-	}
-}
-
diff -Nru a/net/ipv4/xfrm_user.c b/net/ipv4/xfrm_user.c
--- a/net/ipv4/xfrm_user.c	Mon Mar 31 13:41:07 2003
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,1095 +0,0 @@
-/* xfrm_user.c: User interface to configure xfrm engine.
- *
- * Copyright (C) 2002 David S. Miller (davem@redhat.com)
- *
- * Changes:
- *	Mitsuru KANDA @USAGI
- * 	Kazunori MIYAZAWA @USAGI
- * 	Kunihiro Ishiguro
- * 		IPv6 support
- * 	
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-#include <linux/string.h>
-#include <linux/net.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
-#include <linux/pfkeyv2.h>
-#include <linux/ipsec.h>
-#include <linux/init.h>
-#include <linux/security.h>
-#include <net/sock.h>
-#include <net/xfrm.h>
-
-static struct sock *xfrm_nl;
-
-static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
-{
-	struct rtattr *rt = xfrma[type - 1];
-	struct xfrm_algo *algp;
-
-	if (!rt)
-		return 0;
-
-	if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp))
-		return -EINVAL;
-
-	algp = RTA_DATA(rt);
-	switch (type) {
-	case XFRMA_ALG_AUTH:
-		if (!algp->alg_key_len &&
-		    strcmp(algp->alg_name, "digest_null") != 0)
-			return -EINVAL;
-		break;
-
-	case XFRMA_ALG_CRYPT:
-		if (!algp->alg_key_len &&
-		    strcmp(algp->alg_name, "cipher_null") != 0)
-			return -EINVAL;
-		break;
-
-	case XFRMA_ALG_COMP:
-		/* Zero length keys are legal.  */
-		break;
-
-	default:
-		return -EINVAL;
-	};
-
-	algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
-	return 0;
-}
-
-static int verify_newsa_info(struct xfrm_usersa_info *p,
-			     struct rtattr **xfrma)
-{
-	int err;
-
-	err = -EINVAL;
-	switch (p->family) {
-	case AF_INET:
-		break;
-
-	case AF_INET6:
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-		break;
-#else
-		err = -EAFNOSUPPORT;
-		goto out;
-#endif
-
-	default:
-		goto out;
-	};
-
-	err = -EINVAL;
-	switch (p->id.proto) {
-	case IPPROTO_AH:
-		if (!xfrma[XFRMA_ALG_AUTH-1]	||
-		    xfrma[XFRMA_ALG_CRYPT-1]	||
-		    xfrma[XFRMA_ALG_COMP-1])
-			goto out;
-		break;
-
-	case IPPROTO_ESP:
-		if ((!xfrma[XFRMA_ALG_AUTH-1] &&
-		     !xfrma[XFRMA_ALG_CRYPT-1])	||
-		    xfrma[XFRMA_ALG_COMP-1])
-			goto out;
-		break;
-
-	case IPPROTO_COMP:
-		if (!xfrma[XFRMA_ALG_COMP-1]	||
-		    xfrma[XFRMA_ALG_AUTH-1]	||
-		    xfrma[XFRMA_ALG_CRYPT-1])
-			goto out;
-		break;
-
-	default:
-		goto out;
-	};
-
-	if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH)))
-		goto out;
-	if ((err = verify_one_alg(xfrma, XFRMA_ALG_CRYPT)))
-		goto out;
-	if ((err = verify_one_alg(xfrma, XFRMA_ALG_COMP)))
-		goto out;
-
-	err = -EINVAL;
-	switch (p->mode) {
-	case 0:
-	case 1:
-		break;
-
-	default:
-		goto out;
-	};
-
-	err = 0;
-
-out:
-	return err;
-}
-
-static int attach_one_algo(struct xfrm_algo **algpp, struct rtattr *u_arg)
-{
-	struct rtattr *rta = u_arg;
-	struct xfrm_algo *p, *ualg;
-
-	if (!rta)
-		return 0;
-
-	ualg = RTA_DATA(rta);
-	p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL);
-	if (!p)
-		return -ENOMEM;
-
-	memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len);
-	*algpp = p;
-	return 0;
-}
-
-static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
-{
-	memcpy(&x->id, &p->id, sizeof(x->id));
-	memcpy(&x->sel, &p->sel, sizeof(x->sel));
-	memcpy(&x->lft, &p->lft, sizeof(x->lft));
-	x->props.mode = p->mode;
-	x->props.replay_window = p->replay_window;
-	x->props.reqid = p->reqid;
-	x->props.family = p->family;
-	x->props.saddr = x->sel.saddr;
-}
-
-static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
-					       struct rtattr **xfrma,
-					       int *errp)
-{
-	struct xfrm_state *x = xfrm_state_alloc();
-	int err = -ENOMEM;
-
-	if (!x)
-		goto error_no_put;
-
-	copy_from_user_state(x, p);
-
-	if ((err = attach_one_algo(&x->aalg, xfrma[XFRMA_ALG_AUTH-1])))
-		goto error;
-	if ((err = attach_one_algo(&x->ealg, xfrma[XFRMA_ALG_CRYPT-1])))
-		goto error;
-	if ((err = attach_one_algo(&x->calg, xfrma[XFRMA_ALG_COMP-1])))
-		goto error;
-
-	err = -ENOENT;
-	x->type = xfrm_get_type(x->id.proto, x->props.family);
-	if (x->type == NULL)
-		goto error;
-
-	err = x->type->init_state(x, NULL);
-	if (err)
-		goto error;
-
-	x->curlft.add_time = (unsigned long) xtime.tv_sec;
-	x->km.state = XFRM_STATE_VALID;
-	x->km.seq = p->seq;
-
-	return x;
-
-error:
-	xfrm_state_put(x);
-error_no_put:
-	*errp = err;
-	return NULL;
-}
-
-static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
-{
-	struct xfrm_usersa_info *p = NLMSG_DATA(nlh);
-	struct xfrm_state *x, *x1;
-	int err;
-
-	err = verify_newsa_info(p, (struct rtattr **) xfrma);
-	if (err)
-		return err;
-
-	x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err);
-	if (!x)
-		return err;
-
-	x1 = xfrm_state_lookup(&x->props.saddr, x->id.spi, x->id.proto, x->props.family);
-	if (x1) {
-		xfrm_state_put(x);
-		xfrm_state_put(x1);
-		return -EEXIST;
-	}
-
-	xfrm_state_insert(x);
-
-	return 0;
-}
-
-static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
-{
-	struct xfrm_state *x;
-	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
-
-	x = xfrm_state_lookup(&p->saddr, p->spi, p->proto, p->family);
-	if (x == NULL)
-		return -ESRCH;
-
-	xfrm_state_delete(x);
-	xfrm_state_put(x);
-
-	return 0;
-}
-
-static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
-{
-	memcpy(&p->id, &x->id, sizeof(p->id));
-	memcpy(&p->sel, &x->sel, sizeof(p->sel));
-	memcpy(&p->lft, &x->lft, sizeof(p->lft));
-	memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
-	memcpy(&p->stats, &x->stats, sizeof(p->stats));
-	p->mode = x->props.mode;
-	p->replay_window = x->props.replay_window;
-	p->reqid = x->props.reqid;
-	p->family = x->props.family;
-	p->seq = x->km.seq;
-}
-
-struct xfrm_dump_info {
-	struct sk_buff *in_skb;
-	struct sk_buff *out_skb;
-	u32 nlmsg_seq;
-	int start_idx;
-	int this_idx;
-};
-
-static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
-{
-	struct xfrm_dump_info *sp = ptr;
-	struct sk_buff *in_skb = sp->in_skb;
-	struct sk_buff *skb = sp->out_skb;
-	struct xfrm_usersa_info *p;
-	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
-
-	if (sp->this_idx < sp->start_idx)
-		goto out;
-
-	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid,
-			sp->nlmsg_seq,
-			XFRM_MSG_NEWSA, sizeof(*p));
-	nlh->nlmsg_flags = 0;
-
-	p = NLMSG_DATA(nlh);
-	copy_to_user_state(x, p);
-
-	if (x->aalg)
-		RTA_PUT(skb, XFRMA_ALG_AUTH,
-			sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg);
-	if (x->ealg)
-		RTA_PUT(skb, XFRMA_ALG_CRYPT,
-			sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg);
-	if (x->calg)
-		RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
-
-	nlh->nlmsg_len = skb->tail - b;
-out:
-	sp->this_idx++;
-	return 0;
-
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
-}
-
-static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct xfrm_dump_info info;
-
-	info.in_skb = cb->skb;
-	info.out_skb = skb;
-	info.nlmsg_seq = cb->nlh->nlmsg_seq;
-	info.this_idx = 0;
-	info.start_idx = cb->args[0];
-	(void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info);
-	cb->args[0] = info.this_idx;
-
-	return skb->len;
-}
-
-static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
-					  struct xfrm_state *x, u32 seq)
-{
-	struct xfrm_dump_info info;
-	struct sk_buff *skb;
-
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
-	if (!skb)
-		return ERR_PTR(-ENOMEM);
-
-	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-	info.in_skb = in_skb;
-	info.out_skb = skb;
-	info.nlmsg_seq = seq;
-	info.this_idx = info.start_idx = 0;
-
-	if (dump_one_state(x, 0, &info)) {
-		kfree_skb(skb);
-		return NULL;
-	}
-
-	return skb;
-}
-
-static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
-{
-	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
-	struct xfrm_state *x;
-	struct sk_buff *resp_skb;
-	int err;
-
-	x = xfrm_state_lookup(&p->saddr, p->spi, p->proto, p->family);
-	err = -ESRCH;
-	if (x == NULL)
-		goto out_noput;
-
-	resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
-	if (IS_ERR(resp_skb)) {
-		err = PTR_ERR(resp_skb);
-	} else {
-		err = netlink_unicast(xfrm_nl, resp_skb,
-				      NETLINK_CB(skb).pid, MSG_DONTWAIT);
-	}
-	xfrm_state_put(x);
-out_noput:
-	return err;
-}
-
-static int verify_userspi_info(struct xfrm_userspi_info *p)
-{
-	switch (p->info.id.proto) {
-	case IPPROTO_AH:
-	case IPPROTO_ESP:
-		break;
-
-	case IPPROTO_COMP:
-		/* IPCOMP spi is 16-bits. */
-		if (p->min >= 0x10000 ||
-		    p->max >= 0x10000)
-			return -EINVAL;
-
-	default:
-		return -EINVAL;
-	};
-
-	if (p->min > p->max)
-		return -EINVAL;
-
-	return 0;
-}
-
-static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
-{
-	struct xfrm_state *x;
-	struct xfrm_userspi_info *p;
-	struct sk_buff *resp_skb;
-	int err;
-
-	p = NLMSG_DATA(nlh);
-	err = verify_userspi_info(p);
-	if (err)
-		goto out_noput;
-	x = xfrm_find_acq(p->info.mode, p->info.reqid, p->info.id.proto,
-			  &p->info.sel.daddr,
-			  &p->info.sel.saddr, 1,
-			  p->info.family);
-	err = -ENOENT;
-	if (x == NULL)
-		goto out_noput;
-
-	resp_skb = ERR_PTR(-ENOENT);
-
-	spin_lock_bh(&x->lock);
-	if (x->km.state != XFRM_STATE_DEAD) {
-		xfrm_alloc_spi(x, p->min, p->max);
-		if (x->id.spi)
-			resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
-	}
-	spin_unlock_bh(&x->lock);
-
-	if (IS_ERR(resp_skb)) {
-		err = PTR_ERR(resp_skb);
-		goto out;
-	}
-
-	err = netlink_unicast(xfrm_nl, resp_skb,
-			      NETLINK_CB(skb).pid, MSG_DONTWAIT);
-
-out:
-	xfrm_state_put(x);
-out_noput:
-	return err;
-}
-
-static int verify_policy_dir(__u8 dir)
-{
-	switch (dir) {
-	case XFRM_POLICY_IN:
-	case XFRM_POLICY_OUT:
-	case XFRM_POLICY_FWD:
-		break;
-
-	default:
-		return -EINVAL;
-	};
-
-	return 0;
-}
-
-static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
-{
-	switch (p->share) {
-	case XFRM_SHARE_ANY:
-	case XFRM_SHARE_SESSION:
-	case XFRM_SHARE_USER:
-	case XFRM_SHARE_UNIQUE:
-		break;
-
-	default:
-		return -EINVAL;
-	};
-
-	switch (p->action) {
-	case XFRM_POLICY_ALLOW:
-	case XFRM_POLICY_BLOCK:
-		break;
-
-	default:
-		return -EINVAL;
-	};
-
-	switch (p->family) {
-	case AF_INET:
-		break;
-
-	case AF_INET6:
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-		break;
-#else
-		return  -EAFNOSUPPORT;
-#endif
-
-	default:
-		return -EINVAL;
-	};
-
-	return verify_policy_dir(p->dir);
-}
-
-static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
-			   int nr)
-{
-	int i;
-
-	xp->xfrm_nr = nr;
-	for (i = 0; i < nr; i++, ut++) {
-		struct xfrm_tmpl *t = &xp->xfrm_vec[i];
-
-		memcpy(&t->id, &ut->id, sizeof(struct xfrm_id));
-		memcpy(&t->saddr, &ut->saddr,
-		       sizeof(xfrm_address_t));
-		t->reqid = ut->reqid;
-		t->mode = ut->mode;
-		t->share = ut->share;
-		t->optional = ut->optional;
-		t->aalgos = ut->aalgos;
-		t->ealgos = ut->ealgos;
-		t->calgos = ut->calgos;
-	}
-}
-
-static int copy_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma)
-{
-	struct rtattr *rt = xfrma[XFRMA_TMPL-1];
-	struct xfrm_user_tmpl *utmpl;
-	int nr;
-
-	if (!rt) {
-		pol->xfrm_nr = 0;
-	} else {
-		nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl);
-
-		if (nr > XFRM_MAX_DEPTH)
-			return -EINVAL;
-
-		copy_templates(pol, RTA_DATA(rt), nr);
-	}
-	return 0;
-}
-
-static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p)
-{
-	xp->priority = p->priority;
-	xp->index = p->index;
-	memcpy(&xp->selector, &p->sel, sizeof(xp->selector));
-	memcpy(&xp->lft, &p->lft, sizeof(xp->lft));
-	xp->action = p->action;
-	xp->flags = p->flags;
-	xp->family = p->family;
-	/* XXX xp->share = p->share; */
-}
-
-static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir)
-{
-	memcpy(&p->sel, &xp->selector, sizeof(p->sel));
-	memcpy(&p->lft, &xp->lft, sizeof(p->lft));
-	memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft));
-	p->priority = xp->priority;
-	p->index = xp->index;
-	p->family = xp->family;
-	p->dir = dir;
-	p->action = xp->action;
-	p->flags = xp->flags;
-	p->share = XFRM_SHARE_ANY; /* XXX xp->share */
-}
-
-static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct rtattr **xfrma, int *errp)
-{
-	struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL);
-	int err;
-
-	if (!xp) {
-		*errp = -ENOMEM;
-		return NULL;
-	}
-
-	copy_from_user_policy(xp, p);
-	err = copy_user_tmpl(xp, xfrma);
-	if (err) {
-		*errp = err;
-		kfree(xp);
-		xp = NULL;
-	}
-
-	return xp;
-}
-
-static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
-{
-	struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh);
-	struct xfrm_policy *xp;
-	int err;
-
-	err = verify_newpolicy_info(p);
-	if (err)
-		return err;
-
-	xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err);
-	if (!xp)
-		return err;
-
-	err = xfrm_policy_insert(p->dir, xp, 1);
-	if (err) {
-		kfree(xp);
-		return err;
-	}
-
-	xfrm_pol_put(xp);
-
-	return 0;
-}
-
-static int xfrm_del_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
-{
-	struct xfrm_policy *xp;
-	struct xfrm_userpolicy_id *p;
-	int err;
-
-	p = NLMSG_DATA(nlh);
-
-	err = verify_policy_dir(p->dir);
-	if (err)
-		return err;
-
-	xp = xfrm_policy_delete(p->dir, &p->sel);
-	if (xp == NULL)
-		return -ENOENT;
-	xfrm_policy_kill(xp);
-	xfrm_pol_put(xp);
-	return 0;
-}
-
-static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
-{
-	struct xfrm_dump_info *sp = ptr;
-	struct xfrm_userpolicy_info *p;
-	struct sk_buff *in_skb = sp->in_skb;
-	struct sk_buff *skb = sp->out_skb;
-	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
-
-	if (sp->this_idx < sp->start_idx)
-		goto out;
-
-	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid,
-			sp->nlmsg_seq,
-			XFRM_MSG_NEWPOLICY, sizeof(*p));
-	p = NLMSG_DATA(nlh);
-	nlh->nlmsg_flags = 0;
-
-	copy_to_user_policy(xp, p, dir);
-
-	if (xp->xfrm_nr) {
-		struct xfrm_user_tmpl vec[XFRM_MAX_DEPTH];
-		int i;
-
-		for (i = 0; i < xp->xfrm_nr; i++) {
-			struct xfrm_user_tmpl *up = &vec[i];
-			struct xfrm_tmpl *kp = &xp->xfrm_vec[i];
-
-			memcpy(&up->id, &kp->id, sizeof(up->id));
-			memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr));
-			up->reqid = kp->reqid;
-			up->mode = kp->mode;
-			up->share = kp->share;
-			up->optional = kp->optional;
-			up->aalgos = kp->aalgos;
-			up->ealgos = kp->ealgos;
-			up->calgos = kp->calgos;
-		}
-		RTA_PUT(skb, XFRMA_TMPL,
-			(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr),
-			vec);
-	}
-
-	nlh->nlmsg_len = skb->tail - b;
-out:
-	sp->this_idx++;
-	return 0;
-
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
-}
-
-static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct xfrm_dump_info info;
-
-	info.in_skb = cb->skb;
-	info.out_skb = skb;
-	info.nlmsg_seq = cb->nlh->nlmsg_seq;
-	info.this_idx = 0;
-	info.start_idx = cb->args[0];
-	(void) xfrm_policy_walk(dump_one_policy, &info);
-	cb->args[0] = info.this_idx;
-
-	return skb->len;
-}
-
-static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
-					  struct xfrm_policy *xp,
-					  int dir, u32 seq)
-{
-	struct xfrm_dump_info info;
-	struct sk_buff *skb;
-
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb)
-		return ERR_PTR(-ENOMEM);
-
-	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-	info.in_skb = in_skb;
-	info.out_skb = skb;
-	info.nlmsg_seq = seq;
-	info.this_idx = info.start_idx = 0;
-
-	if (dump_one_policy(xp, dir, 0, &info) < 0) {
-		kfree_skb(skb);
-		return NULL;
-	}
-
-	return skb;
-}
-
-static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
-{
-	struct xfrm_policy *xp;
-	struct xfrm_userpolicy_id *p;
-	struct sk_buff *resp_skb;
-	int err;
-
-	p = NLMSG_DATA(nlh);
-	xp = xfrm_policy_byid(p->dir, p->index, 0);
-	if (xp == NULL)
-		return -ENOENT;
-
-	resp_skb = xfrm_policy_netlink(skb, xp, p->dir, nlh->nlmsg_seq);
-	if (IS_ERR(resp_skb)) {
-		err = PTR_ERR(resp_skb);
-	} else {
-		err = netlink_unicast(xfrm_nl, resp_skb,
-				      NETLINK_CB(skb).pid, MSG_DONTWAIT);
-	}
-
-	xfrm_pol_put(xp);
-
-	return err;
-}
-
-static const int xfrm_msg_min[(XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)] = {
-	NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)),	/* NEW SA */
-	NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)),	/* DEL SA */
-	NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)),	/* GET SA */
-	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* NEW POLICY */
-	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)),  /* DEL POLICY */
-	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)),  /* GET POLICY */
-	NLMSG_LENGTH(sizeof(struct xfrm_userspi_info)),	/* ALLOC SPI */
-	NLMSG_LENGTH(sizeof(struct xfrm_user_acquire)),	/* ACQUIRE */
-	NLMSG_LENGTH(sizeof(struct xfrm_user_expire)),	/* EXPIRE */
-};
-
-static struct xfrm_link {
-	int (*doit)(struct sk_buff *, struct nlmsghdr *, void **);
-	int (*dump)(struct sk_buff *, struct netlink_callback *);
-} xfrm_dispatch[] = {
-	{	.doit	=	xfrm_add_sa, 		},
-	{	.doit	=	xfrm_del_sa, 		},
-	{
-		.doit	=	xfrm_get_sa,
-		.dump	=	xfrm_dump_sa,
-	},
-	{	.doit	=	xfrm_add_policy 	},
-	{	.doit	=	xfrm_del_policy 	},
-	{
-		.doit	=	xfrm_get_policy,
-		.dump	=	xfrm_dump_policy,
-	},
-	{	.doit	=	xfrm_alloc_userspi	},
-};
-
-static int xfrm_done(struct netlink_callback *cb)
-{
-	return 0;
-}
-
-static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
-{
-	struct rtattr *xfrma[XFRMA_MAX];
-	struct xfrm_link *link;
-	int type, min_len;
-
-	if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
-		return 0;
-
-	type = nlh->nlmsg_type;
-
-	/* A control message: ignore them */
-	if (type < XFRM_MSG_BASE)
-		return 0;
-
-	/* Unknown message: reply with EINVAL */
-	if (type > XFRM_MSG_MAX)
-		goto err_einval;
-
-	type -= XFRM_MSG_BASE;
-	link = &xfrm_dispatch[type];
-
-	/* All operations require privileges, even GET */
-	if (security_netlink_recv(skb)) {
-		*errp = -EPERM;
-		return -1;
-	}
-
-	if ((type == 2 || type == 5) && (nlh->nlmsg_flags & NLM_F_DUMP)) {
-		u32 rlen;
-
-		if (link->dump == NULL)
-			goto err_einval;
-
-		if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh,
-						link->dump,
-						xfrm_done)) != 0) {
-			return -1;
-		}
-		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-		if (rlen > skb->len)
-			rlen = skb->len;
-		skb_pull(skb, rlen);
-		return -1;
-	}
-
-	memset(xfrma, 0, sizeof(xfrma));
-
-	if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type]))
-		goto err_einval;
-
-	if (nlh->nlmsg_len > min_len) {
-		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
-		struct rtattr *attr = (void *) nlh + NLMSG_ALIGN(min_len);
-
-		while (RTA_OK(attr, attrlen)) {
-			unsigned short flavor = attr->rta_type;
-			if (flavor) {
-				if (flavor > XFRMA_MAX)
-					goto err_einval;
-				xfrma[flavor - 1] = attr;
-			}
-			attr = RTA_NEXT(attr, attrlen);
-		}
-	}
-
-	if (link->doit == NULL)
-		goto err_einval;
-	*errp = link->doit(skb, nlh, (void **) &xfrma);
-
-	return *errp;
-
-err_einval:
-	*errp = -EINVAL;
-	return -1;
-}
-
-static int xfrm_user_rcv_skb(struct sk_buff *skb)
-{
-	int err;
-	struct nlmsghdr *nlh;
-
-	while (skb->len >= NLMSG_SPACE(0)) {
-		u32 rlen;
-
-		nlh = (struct nlmsghdr *) skb->data;
-		if (nlh->nlmsg_len < sizeof(*nlh) ||
-		    skb->len < nlh->nlmsg_len)
-			return 0;
-		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-		if (rlen > skb->len)
-			rlen = skb->len;
-		if (xfrm_user_rcv_msg(skb, nlh, &err)) {
-			if (err == 0)
-				return -1;
-			netlink_ack(skb, nlh, err);
-		} else if (nlh->nlmsg_flags & NLM_F_ACK)
-			netlink_ack(skb, nlh, 0);
-		skb_pull(skb, rlen);
-	}
-
-	return 0;
-}
-
-static void xfrm_netlink_rcv(struct sock *sk, int len)
-{
-	do {
-		struct sk_buff *skb;
-
-		down(&xfrm_cfg_sem);
-
-		while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
-			if (xfrm_user_rcv_skb(skb)) {
-				if (skb->len)
-					skb_queue_head(&sk->receive_queue, skb);
-				else
-					kfree_skb(skb);
-				break;
-			}
-			kfree_skb(skb);
-		}
-
-		up(&xfrm_cfg_sem);
-
-	} while (xfrm_nl && xfrm_nl->receive_queue.qlen);
-}
-
-static int build_expire(struct sk_buff *skb, struct xfrm_state *x, int hard)
-{
-	struct xfrm_user_expire *ue;
-	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
-
-	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_EXPIRE,
-			sizeof(*ue));
-	ue = NLMSG_DATA(nlh);
-	nlh->nlmsg_flags = 0;
-
-	copy_to_user_state(x, &ue->state);
-	ue->hard = (hard != 0) ? 1 : 0;
-
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
-
-nlmsg_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
-}
-
-static int xfrm_send_notify(struct xfrm_state *x, int hard)
-{
-	struct sk_buff *skb;
-
-	skb = alloc_skb(sizeof(struct xfrm_user_expire) + 16, GFP_ATOMIC);
-	if (skb == NULL)
-		return -ENOMEM;
-
-	if (build_expire(skb, x, hard) < 0)
-		BUG();
-
-	NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
-
-	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
-}
-
-/* XXX Make this xfrm_state.c:xfrm_get_acqseq() */
-static u32 get_acqseq(void)
-{
-	u32 res;
-	static u32 acqseq;
-	static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
-
-	spin_lock_bh(&acqseq_lock);
-	res = (++acqseq ? : ++acqseq);
-	spin_unlock_bh(&acqseq_lock);
-	return res;
-}
-
-static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
-			 struct xfrm_tmpl *xt, struct xfrm_policy *xp,
-			 int dir)
-{
-	struct xfrm_user_acquire *ua;
-	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
-	__u32 seq = get_acqseq();
-
-	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE,
-			sizeof(*ua));
-	ua = NLMSG_DATA(nlh);
-	nlh->nlmsg_flags = 0;
-
-	memcpy(&ua->id, &x->id, sizeof(ua->id));
-	memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr));
-	copy_to_user_policy(xp, &ua->policy, dir);
-	ua->aalgos = xt->aalgos;
-	ua->ealgos = xt->ealgos;
-	ua->calgos = xt->calgos;
-	ua->seq = x->km.seq = seq;
-
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
-
-nlmsg_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
-}
-
-static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
-			     struct xfrm_policy *xp, int dir)
-{
-	struct sk_buff *skb;
-
-	skb = alloc_skb(sizeof(struct xfrm_user_acquire) + 16, GFP_ATOMIC);
-	if (skb == NULL)
-		return -ENOMEM;
-
-	if (build_acquire(skb, x, xt, xp, dir) < 0)
-		BUG();
-
-	NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE;
-
-	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC);
-}
-
-/* User gives us xfrm_user_policy_info followed by an array of 0
- * or more templates.
- */
-struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
-                                        u8 *data, int len, int *dir)
-{
-	struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data;
-	struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1);
-	struct xfrm_policy *xp;
-	int nr;
-
-	switch (family) {
-	case AF_INET:
-		if (opt != IP_XFRM_POLICY) {
-			*dir = -EOPNOTSUPP;
-			return NULL;
-		}
-		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	case AF_INET6:
-		if (opt != IPV6_XFRM_POLICY) {
-			*dir = -EOPNOTSUPP;
-			return NULL;
-		}
-		break;
-#endif
-	default:
-		*dir = -EINVAL;
-		return NULL;
-	}
-
-	*dir = -EINVAL;
-
-	if (len < sizeof(*p) ||
-	    verify_newpolicy_info(p))
-		return NULL;
-
-	nr = ((len - sizeof(*p)) / sizeof(*ut));
-	if (nr > XFRM_MAX_DEPTH)
-		return NULL;
-
-	xp = xfrm_policy_alloc(GFP_KERNEL);
-	if (xp == NULL) {
-		*dir = -ENOBUFS;
-		return NULL;
-	}
-
-	copy_from_user_policy(xp, p);
-	copy_templates(xp, ut, nr);
-
-	*dir = p->dir;
-
-	return xp;
-}
-
-static struct xfrm_mgr netlink_mgr = {
-	.id		= "netlink",
-	.notify		= xfrm_send_notify,
-	.acquire	= xfrm_send_acquire,
-	.compile_policy	= xfrm_compile_policy,
-};
-
-static int __init xfrm_user_init(void)
-{
-	printk(KERN_INFO "Initializing IPsec netlink socket\n");
-
-	xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv);
-	if (xfrm_nl == NULL)
-		panic("xfrm_user_init: cannot initialize xfrm_nl\n");
-
-
-	xfrm_register_km(&netlink_mgr);
-
-	return 0;
-}
-
-static void __exit xfrm_user_exit(void)
-{
-	xfrm_unregister_km(&netlink_mgr);
-	sock_release(xfrm_nl->socket);
-}
-
-module_init(xfrm_user_init);
-module_exit(xfrm_user_exit);
diff -Nru a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
--- a/net/ipv6/af_inet6.c	Mon Mar 31 13:41:07 2003
+++ b/net/ipv6/af_inet6.c	Mon Mar 31 13:41:07 2003
@@ -62,14 +62,15 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
-#ifdef MODULE
+#if 0 /*def MODULE*/
 static int unloadable = 0; /* XX: Turn to one when all is ok within the
 			      module for allowing unload */
+MODULE_PARM(unloadable, "i");
 #endif
 
 MODULE_AUTHOR("Cast of dozens");
 MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
-MODULE_PARM(unloadable, "i");
+MODULE_LICENSE("GPL");
 
 /* IPv6 procfs goodies... */
 
@@ -871,4 +872,3 @@
 }
 module_exit(inet6_exit);
 #endif /* MODULE */
-MODULE_LICENSE("GPL");
diff -Nru a/net/ipv6/ah6.c b/net/ipv6/ah6.c
--- a/net/ipv6/ah6.c	Mon Mar 31 13:41:07 2003
+++ b/net/ipv6/ah6.c	Mon Mar 31 13:41:07 2003
@@ -199,7 +199,7 @@
 		}
 	}
 
-	nexthdr = ah->nexthdr;
+	nexthdr = ((struct ipv6hdr*)tmp_hdr)->nexthdr = ah->nexthdr;
 	skb->nh.raw = skb_pull(skb, (ah->hdrlen+2)<<2);
 	memcpy(skb->nh.raw, tmp_hdr, hdr_len);
 	skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
@@ -287,7 +287,7 @@
 	
 	x->props.header_len = XFRM_ALIGN8(ahp->icv_trunc_len + AH_HLEN_NOICV);
 	if (x->props.mode)
-		x->props.header_len += 20;
+		x->props.header_len += 40;
 	x->data = ahp;
 
 	return 0;
@@ -330,6 +330,7 @@
 static struct inet6_protocol ah6_protocol = {
 	.handler	=	xfrm6_rcv,
 	.err_handler	=	ah6_err,
+	.no_policy	=	1,
 };
 
 int __init ah6_init(void)
diff -Nru a/net/ipv6/esp6.c b/net/ipv6/esp6.c
--- a/net/ipv6/esp6.c	Mon Mar 31 13:41:07 2003
+++ b/net/ipv6/esp6.c	Mon Mar 31 13:41:07 2003
@@ -108,7 +108,7 @@
 	struct dst_entry *dst = skb->dst;
 	struct xfrm_state *x  = dst->xfrm;
 	struct ipv6hdr *iph = NULL, *top_iph;
-	struct ip_esp_hdr *esph;
+	struct ipv6_esp_hdr *esph;
 	struct crypto_tfm *tfm;
 	struct esp_data *esp;
 	struct sk_buff *trailer;
@@ -154,7 +154,7 @@
 	esp = x->data;
 	alen = esp->auth.icv_trunc_len;
 	tfm = esp->conf.tfm;
-	blksize = crypto_tfm_alg_blocksize(tfm);
+	blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3;
 	clen = (clen + 2 + blksize-1)&~(blksize-1);
 	if (esp->conf.padlen)
 		clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1);
@@ -176,7 +176,7 @@
 	if (x->props.mode) {
 		iph = skb->nh.ipv6h;
 		top_iph = (struct ipv6hdr*)skb_push(skb, x->props.header_len);
-		esph = (struct ip_esp_hdr*)(top_iph+1);
+		esph = (struct ipv6_esp_hdr*)(top_iph+1);
 		*(u8*)(trailer->tail - 1) = IPPROTO_IPV6;
 		top_iph->version = 6;
 		top_iph->priority = iph->priority;
@@ -184,13 +184,13 @@
 		top_iph->flow_lbl[1] = iph->flow_lbl[1];
 		top_iph->flow_lbl[2] = iph->flow_lbl[2];
 		top_iph->nexthdr = IPPROTO_ESP;
-		top_iph->payload_len = htons(skb->len + alen);
+		top_iph->payload_len = htons(skb->len + alen - sizeof(struct ipv6hdr));
 		top_iph->hop_limit = iph->hop_limit;
-		memcpy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr, sizeof(struct ipv6hdr));
-		memcpy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr, sizeof(struct ipv6hdr));
+		memcpy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr, sizeof(struct in6_addr));
+		memcpy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr, sizeof(struct in6_addr));
 	} else { 
 		/* XXX exthdr */
-		esph = (struct ip_esp_hdr*)skb_push(skb, x->props.header_len);
+		esph = (struct ipv6_esp_hdr*)skb_push(skb, x->props.header_len);
 		skb->h.raw = (unsigned char*)esph;
 		top_iph = (struct ipv6hdr*)skb_push(skb, hdr_len);
 		memcpy(top_iph, iph, hdr_len);
@@ -257,7 +257,7 @@
 int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct ipv6hdr *iph;
-	struct ip_esp_hdr *esph;
+	struct ipv6_esp_hdr *esph;
 	struct esp_data *esp = x->data;
 	struct sk_buff *trailer;
 	int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
@@ -269,7 +269,7 @@
 	u8 ret_nexthdr = 0;
 	unsigned char *tmp_hdr = NULL;
 
-	if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr)))
+	if (!pskb_may_pull(skb, sizeof(struct ipv6_esp_hdr)))
 		goto out;
 
 	if (elen <= 0 || (elen & (blksize-1)))
@@ -301,7 +301,7 @@
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-	esph = (struct ip_esp_hdr*)skb->data;
+	esph = (struct ipv6_esp_hdr*)skb->data;
 	iph = skb->nh.ipv6h;
 
 	/* Get ivec. This can be wrong, check against another impls. */
@@ -336,7 +336,7 @@
 		}
 		/* ... check padding bits here. Silly. :-) */ 
 
-		ret_nexthdr = nexthdr[1];
+		ret_nexthdr = ((struct ipv6hdr*)tmp_hdr)->nexthdr = nexthdr[1];
 		pskb_trim(skb, skb->len - alen - padlen - 2);
 		skb->h.raw = skb_pull(skb, 8 + esp->conf.ivlen);
 		skb->nh.raw += 8 + esp->conf.ivlen;
@@ -370,7 +370,7 @@
 		int type, int code, int offset, __u32 info)
 {
 	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
-	struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+offset);
+	struct ipv6_esp_hdr *esph = (struct ipv6_esp_hdr*)(skb->data+offset);
 	struct xfrm_state *x;
 
 	if (type != ICMPV6_DEST_UNREACH ||
@@ -416,7 +416,7 @@
 		if (x->aalg->alg_key_len == 0 || x->aalg->alg_key_len > 512)
 			goto error;
 	}
-	if (x->ealg == NULL || x->ealg->alg_key_len == 0)
+	if (x->ealg == NULL)
 		goto error;
 
 	esp = kmalloc(sizeof(*esp), GFP_KERNEL);
@@ -499,6 +499,7 @@
 static struct inet6_protocol esp6_protocol = {
 	.handler 	=	xfrm6_rcv,
 	.err_handler	=	esp6_err,
+	.no_policy	=	1,
 };
 
 int __init esp6_init(void)
diff -Nru a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
--- a/net/ipv6/ip6_input.c	Mon Mar 31 13:41:08 2003
+++ b/net/ipv6/ip6_input.c	Mon Mar 31 13:41:08 2003
@@ -43,6 +43,7 @@
 #include <net/ndisc.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
+#include <net/xfrm.h>
 
 
 
@@ -149,7 +150,14 @@
 
 	hash = nexthdr & (MAX_INET_PROTOS - 1);
 	if ((ipprot = inet6_protos[hash]) != NULL) {
-		int ret = ipprot->handler(&skb);
+		int ret;
+		
+		if (!ipprot->no_policy &&
+		    !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+			kfree_skb(skb);
+			return 0;
+		}
+		ret = ipprot->handler(&skb);
 		if (ret < 0) {
 			nexthdr = -ret;
 			goto resubmit;
@@ -157,9 +165,11 @@
 		IP6_INC_STATS_BH(Ip6InDelivers);
 	} else {
 		if (!raw_sk) {
-			IP6_INC_STATS_BH(Ip6InUnknownProtos);
-			icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR,
-					  offsetof(struct ipv6hdr, nexthdr));
+			if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+				IP6_INC_STATS_BH(Ip6InUnknownProtos);
+				icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR,
+						  offsetof(struct ipv6hdr, nexthdr));
+			}
 		} else {
 			IP6_INC_STATS_BH(Ip6InDelivers);
 			kfree_skb(skb);
diff -Nru a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
--- a/net/ipv6/ip6_output.c	Mon Mar 31 13:41:06 2003
+++ b/net/ipv6/ip6_output.c	Mon Mar 31 13:41:06 2003
@@ -50,6 +50,7 @@
 #include <net/addrconf.h>
 #include <net/rawv6.h>
 #include <net/icmp.h>
+#include <net/xfrm.h>
 
 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
 {
@@ -747,6 +748,9 @@
 	if (ipv6_devconf.forwarding == 0)
 		goto error;
 
+	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb))
+		goto drop;
+
 	skb->ip_summed = CHECKSUM_NONE;
 
 	/*
@@ -780,6 +784,9 @@
 		kfree_skb(skb);
 		return -ETIMEDOUT;
 	}
+
+	if (!xfrm6_route_forward(skb))
+		goto drop;
 
 	/* IPv6 specs say nothing about it, but it is clear that we cannot
 	   send redirects to source routed frames.
diff -Nru a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
--- a/net/ipv6/ipv6_syms.c	Mon Mar 31 13:41:08 2003
+++ b/net/ipv6/ipv6_syms.c	Mon Mar 31 13:41:08 2003
@@ -9,6 +9,8 @@
 
 EXPORT_SYMBOL(ipv6_addr_type);
 EXPORT_SYMBOL(icmpv6_send);
+EXPORT_SYMBOL(icmpv6_statistics);
+EXPORT_SYMBOL(icmpv6_err_convert);
 EXPORT_SYMBOL(ndisc_mc_map);
 EXPORT_SYMBOL(register_inet6addr_notifier);
 EXPORT_SYMBOL(unregister_inet6addr_notifier);
diff -Nru a/net/ipv6/raw.c b/net/ipv6/raw.c
--- a/net/ipv6/raw.c	Mon Mar 31 13:41:07 2003
+++ b/net/ipv6/raw.c	Mon Mar 31 13:41:07 2003
@@ -273,7 +273,6 @@
 
 static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
 {
-#if defined(CONFIG_FILTER)
 	if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
 		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
 			/* FIXME: increment a raw6 drops counter here */
@@ -282,7 +281,7 @@
 		}
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
-#endif
+
 	/* Charge it to the socket. */
 	if (sock_queue_rcv_skb(sk,skb)<0) {
 		/* FIXME: increment a raw6 drops counter here */
diff -Nru a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
--- a/net/ipv6/tcp_ipv6.c	Mon Mar 31 13:41:07 2003
+++ b/net/ipv6/tcp_ipv6.c	Mon Mar 31 13:41:07 2003
@@ -2193,6 +2193,7 @@
 static struct inet6_protocol tcpv6_protocol = {
 	.handler	=	tcp_v6_rcv,
 	.err_handler	=	tcp_v6_err,
+	.no_policy	=	1,
 };
 
 extern struct proto_ops inet6_stream_ops;
diff -Nru a/net/ipv6/udp.c b/net/ipv6/udp.c
--- a/net/ipv6/udp.c	Mon Mar 31 13:41:08 2003
+++ b/net/ipv6/udp.c	Mon Mar 31 13:41:08 2003
@@ -547,7 +547,6 @@
 		return -1;
 	}
 
-#if defined(CONFIG_FILTER)
 	if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
 		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
 			UDP6_INC_STATS_BH(UdpInErrors);
@@ -556,7 +555,7 @@
 		}
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
-#endif
+
 	if (sock_queue_rcv_skb(sk,skb)<0) {
 		UDP6_INC_STATS_BH(UdpInErrors);
 		kfree_skb(skb);
@@ -653,9 +652,6 @@
 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 		goto short_packet;
 
-	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
-                goto discard;
-
 	saddr = &skb->nh.ipv6h->saddr;
 	daddr = &skb->nh.ipv6h->daddr;
 	uh = skb->h.uh;
@@ -713,6 +709,9 @@
 	sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest, dev->ifindex);
 
 	if (sk == NULL) {
+		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+			goto discard;
+
 		if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
 		    (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
 			goto discard;
@@ -955,6 +954,7 @@
 static struct inet6_protocol udpv6_protocol = {
 	.handler	=	udpv6_rcv,
 	.err_handler	=	udpv6_err,
+	.no_policy	=	1,
 };
 
 #define LINE_LEN 190
diff -Nru a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
--- a/net/ipv6/xfrm6_input.c	Mon Mar 31 13:41:08 2003
+++ b/net/ipv6/xfrm6_input.c	Mon Mar 31 13:41:08 2003
@@ -186,6 +186,8 @@
 
 		xfrm_vec[xfrm_nr++] = x;
 
+		iph = skb->nh.ipv6h;
+
 		if (x->props.mode) { /* XXX */
 			if (iph->nexthdr != IPPROTO_IPV6)
 				goto drop;
@@ -199,9 +201,11 @@
 			goto drop;
 	} while (!err);
 
-	memcpy(skb->nh.raw, tmp_hdr, hdr_len);
-	skb->nh.raw[nh_offset] = nexthdr;
-	skb->nh.ipv6h->payload_len = htons(hdr_len + skb->len - sizeof(struct ipv6hdr));
+	if (!decaps) {
+		memcpy(skb->nh.raw, tmp_hdr, hdr_len);
+		skb->nh.raw[nh_offset] = nexthdr;
+		skb->nh.ipv6h->payload_len = htons(hdr_len + skb->len - sizeof(struct ipv6hdr));
+	}
 
 	/* Allocate new secpath or COW existing one. */
 	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
diff -Nru a/net/irda/af_irda.c b/net/irda/af_irda.c
--- a/net/irda/af_irda.c	Mon Mar 31 13:41:06 2003
+++ b/net/irda/af_irda.c	Mon Mar 31 13:41:06 2003
@@ -401,11 +401,10 @@
  *
  *    Got a selective discovery indication from IrLMP.
  *
- * IrLMP is telling us that this node is matching our hint bit
- * filter. Check if it's a newly discovered node (or if node changed its
- * hint bits), and then wake up any process waiting for answer...
+ * IrLMP is telling us that this node is new and matching our hint bit
+ * filter. Wake up any process waiting for answer...
  */
-static void irda_selective_discovery_indication(discovery_t *discovery,
+static void irda_selective_discovery_indication(discinfo_t *discovery,
 						DISCOVERY_MODE mode,
 						void *priv)
 {
@@ -419,18 +418,8 @@
 		return;
 	}
 
-	/* Check if node is discovered is a new one or an old one.
-	 * We check when how long ago this node was discovered, with a
-	 * coarse timeout (we may miss some discovery events or be delayed).
-	 * Note : by doing this test here, we avoid waking up a process ;-)
-	 */
-	if((jiffies - discovery->first_timestamp) >
-	   (sysctl_discovery_timeout * HZ)) {
-		return;		/* Too old, not interesting -> goodbye */
-	}
-
 	/* Pass parameter to the caller */
-	self->cachediscovery = discovery;
+	self->cachedaddr = discovery->daddr;
 
 	/* Wake up process if its waiting for device to be discovered */
 	wake_up_interruptible(&self->query_wait);
@@ -455,7 +444,7 @@
 
 	/* Nothing for the caller */
 	self->cachelog = NULL;
-	self->cachediscovery = NULL;
+	self->cachedaddr = 0;
 	self->errno = -ETIME;
 
 	/* Wake up process if its still waiting... */
@@ -627,7 +616,7 @@
  */
 static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
 {
-	struct irda_device_info *discoveries;	/* Copy of the discovery log */
+	discinfo_t *discoveries;	/* Copy of the discovery log */
 	int	number;			/* Number of nodes in the log */
 	int	i;
 	int	err = -ENETUNREACH;
@@ -642,7 +631,8 @@
 	 * Note : we have to use irlmp_get_discoveries(), as opposed
 	 * to play with the cachelog directly, because while we are
 	 * making our ias query, le log might change... */
-	discoveries = irlmp_get_discoveries(&number, self->mask, self->nslots);
+	discoveries = irlmp_get_discoveries(&number, self->mask.word,
+					    self->nslots);
 	/* Check if the we got some results */
 	if (discoveries == NULL)
 		return -ENETUNREACH;	/* No nodes discovered */
@@ -1137,7 +1127,7 @@
 
 	/* Register as a client with IrLMP */
 	self->ckey = irlmp_register_client(0, NULL, NULL, NULL);
-	self->mask = 0xffff;
+	self->mask.word = 0xffff;
 	self->rx_flow = self->tx_flow = FLOW_START;
 	self->nslots = DISCOVERY_DEFAULT_SLOTS;
 	self->daddr = DEV_ADDR_ANY;	/* Until we get connected */
@@ -1997,6 +1987,7 @@
 		if (optlen < sizeof(int))
 			return -EINVAL;
 
+		/* The input is really a (__u8 hints[2]), easier as an int */
 		if (get_user(opt, (int *)optval))
 			return -EFAULT;
 
@@ -2015,16 +2006,17 @@
 		if (optlen < sizeof(int))
 			return -EINVAL;
 
+		/* The input is really a (__u8 hints[2]), easier as an int */
 		if (get_user(opt, (int *)optval))
 			return -EFAULT;
 
 		/* Set the new hint mask */
-		self->mask = (__u16) opt;
+		self->mask.word = (__u16) opt;
 		/* Mask out extension bits */
-		self->mask &= 0x7f7f;
+		self->mask.word &= 0x7f7f;
 		/* Check if no bits */
-		if(!self->mask)
-			self->mask = 0xFFFF;
+		if(!self->mask.word)
+			self->mask.word = 0xFFFF;
 
 		break;
 	default:
@@ -2115,7 +2107,7 @@
 	switch (optname) {
 	case IRLMP_ENUMDEVICES:
 		/* Ask lmp for the current discovery log */
-		discoveries = irlmp_get_discoveries(&list.len, self->mask,
+		discoveries = irlmp_get_discoveries(&list.len, self->mask.word,
 						    self->nslots);
 		/* Check if the we got some results */
 		if (discoveries == NULL)
@@ -2347,7 +2339,7 @@
 			return -EFAULT;
 
 		/* Tell IrLMP we want to be notified */
-		irlmp_update_client(self->ckey, self->mask,
+		irlmp_update_client(self->ckey, self->mask.word,
 				    irda_selective_discovery_indication,
 				    NULL, (void *) self);
 
@@ -2355,7 +2347,7 @@
 		irlmp_discovery_request(self->nslots);
 
 		/* Wait until a node is discovered */
-		if (!self->cachediscovery) {
+		if (!self->cachedaddr) {
 			int ret = 0;
 
 			IRDA_DEBUG(1, "%s(), nothing discovered yet, going to sleep...\n", __FUNCTION__);
@@ -2370,7 +2362,7 @@
 
 			/* Wait for IR-LMP to call us back */
 			__wait_event_interruptible(self->query_wait,
-			   (self->cachediscovery!=NULL || self->errno==-ETIME),
+			      (self->cachedaddr != 0 || self->errno == -ETIME),
 						   ret);
 
 			/* If watchdog is still activated, kill it! */
@@ -2387,19 +2379,25 @@
 				   __FUNCTION__);
 
 		/* Tell IrLMP that we have been notified */
-		irlmp_update_client(self->ckey, self->mask, NULL, NULL, NULL);
+		irlmp_update_client(self->ckey, self->mask.word,
+				    NULL, NULL, NULL);
 
 		/* Check if the we got some results */
-		if (!self->cachediscovery)
+		if (!self->cachedaddr)
 			return -EAGAIN;		/* Didn't find any devices */
+		daddr = self->cachedaddr;
 		/* Cleanup */
-		self->cachediscovery = NULL;
+		self->cachedaddr = 0;
 
-		/* Note : We don't return anything to the user.
-		 * We could return the device that triggered the wake up,
-		 * but it's probably better to force the user to query
-		 * the whole discovery log and let him pick one device...
+		/* We return the daddr of the device that trigger the
+		 * wakeup. As irlmp pass us only the new devices, we
+		 * are sure that it's not an old device.
+		 * If the user want more details, he should query
+		 * the whole discovery log and pick one device...
 		 */
+		if (put_user(daddr, (int *)optval))
+			return -EFAULT;
+
 		break;
 	default:
 		return -ENOPROTOOPT;
diff -Nru a/net/irda/discovery.c b/net/irda/discovery.c
--- a/net/irda/discovery.c	Mon Mar 31 13:41:06 2003
+++ b/net/irda/discovery.c	Mon Mar 31 13:41:06 2003
@@ -59,7 +59,7 @@
 	unsigned long flags;
 
 	/* Set time of first discovery if node is new (see below) */
-	new->first_timestamp = new->timestamp;
+	new->firststamp = new->timestamp;
 
 	spin_lock_irqsave(&cachelog->hb_spinlock, flags);
 
@@ -76,24 +76,24 @@
 		/* Be sure to stay one item ahead */
 		discovery = (discovery_t *) hashbin_get_next(cachelog);
 
-		if ((node->saddr == new->saddr) &&
-		    ((node->daddr == new->daddr) || 
-		     (strcmp(node->nickname, new->nickname) == 0)))
+		if ((node->data.saddr == new->data.saddr) &&
+		    ((node->data.daddr == new->data.daddr) || 
+		     (strcmp(node->data.info, new->data.info) == 0)))
 		{
 			/* This discovery is a previous discovery 
 			 * from the same device, so just remove it
 			 */
 			hashbin_remove_this(cachelog, (irda_queue_t *) node);
-			/* Check if hints bits have changed */
-			if(node->hints.word == new->hints.word)
+			/* Check if hints bits are unchanged */
+			if(u16ho(node->data.hints) == u16ho(new->data.hints))
 				/* Set time of first discovery for this node */
-				new->first_timestamp = node->first_timestamp;
+				new->firststamp = node->firststamp;
 			kfree(node);
 		}
 	}
 
 	/* Insert the new and updated version */
-	hashbin_insert(cachelog, (irda_queue_t *) new, new->daddr, NULL);
+	hashbin_insert(cachelog, (irda_queue_t *) new, new->data.daddr, NULL);
 
 	spin_unlock_irqrestore(&cachelog->hb_spinlock, flags);
 }
@@ -147,27 +147,50 @@
  */
 void irlmp_expire_discoveries(hashbin_t *log, __u32 saddr, int force)
 {
-	discovery_t *discovery, *curr;
-	unsigned long flags;
+	discovery_t *		discovery;
+	discovery_t *		curr;
+	unsigned long		flags;
+	discinfo_t *		buffer = NULL;
+	int			n;		/* Size of the full log */
+	int			i = 0;		/* How many we expired */
 
+	ASSERT(log != NULL, return;);
 	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
 
 	spin_lock_irqsave(&log->hb_spinlock, flags);
 
 	discovery = (discovery_t *) hashbin_get_first(log);
 	while (discovery != NULL) {
-		curr = discovery;
-
 		/* Be sure to be one item ahead */
+		curr = discovery;
 		discovery = (discovery_t *) hashbin_get_next(log);
 
 		/* Test if it's time to expire this discovery */
-		if ((curr->saddr == saddr) &&
+		if ((curr->data.saddr == saddr) &&
 		    (force ||
 		     ((jiffies - curr->timestamp) > DISCOVERY_EXPIRE_TIMEOUT)))
 		{
-			/* Tell IrLMP and registered clients about it */
-			irlmp_discovery_expiry(curr);
+			/* Create buffer as needed.
+			 * As this function get called a lot and most time
+			 * we don't have anything to put in the log (we are
+			 * quite picky), we can save a lot of overhead
+			 * by not calling kmalloc. Jean II */
+			if(buffer == NULL) {
+				/* Create the client specific buffer */
+				n = HASHBIN_GET_SIZE(log);
+				buffer = kmalloc(n * sizeof(struct irda_device_info), GFP_ATOMIC);
+				if (buffer == NULL) {
+					spin_unlock_irqrestore(&log->hb_spinlock, flags);
+					return;
+				}
+
+			}
+
+			/* Copy discovery information */
+			memcpy(&(buffer[i]), &(curr->data),
+			       sizeof(discinfo_t));
+			i++;
+
 			/* Remove it from the log */
 			curr = hashbin_remove_this(log, (irda_queue_t *) curr);
 			if (curr)
@@ -175,9 +198,23 @@
 		}
 	}
 
+	/* Drop the spinlock before calling the higher layers, as
+	 * we can't guarantee they won't call us back and create a
+	 * deadlock. We will work on our own private data, so we
+	 * don't care to be interupted. - Jean II */
 	spin_unlock_irqrestore(&log->hb_spinlock, flags);
+
+	if(buffer == NULL)
+		return;
+
+	/* Tell IrLMP and registered clients about it */
+	irlmp_discovery_expiry(buffer, i);
+
+	/* Free up our buffer */
+	kfree(buffer);
 }
 
+#if 0
 /*
  * Function irlmp_dump_discoveries (log)
  *
@@ -193,13 +230,14 @@
 	discovery = (discovery_t *) hashbin_get_first(log);
 	while (discovery != NULL) {
 		IRDA_DEBUG(0, "Discovery:\n");
-		IRDA_DEBUG(0, "  daddr=%08x\n", discovery->daddr);
-		IRDA_DEBUG(0, "  saddr=%08x\n", discovery->saddr); 
-		IRDA_DEBUG(0, "  nickname=%s\n", discovery->nickname);
+		IRDA_DEBUG(0, "  daddr=%08x\n", discovery->data.daddr);
+		IRDA_DEBUG(0, "  saddr=%08x\n", discovery->data.saddr); 
+		IRDA_DEBUG(0, "  nickname=%s\n", discovery->data.info);
 
 		discovery = (discovery_t *) hashbin_get_next(log);
 	}
 }
+#endif
 
 /*
  * Function irlmp_copy_discoveries (log, pn, mask)
@@ -221,43 +259,49 @@
  * Note : the client must kfree himself() the log...
  * Jean II
  */
-struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn, __u16 mask)
+struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
+						__u16 mask, int old_entries)
 {
-	discovery_t *			discovery;
-	unsigned long			flags;
-	struct irda_device_info *	buffer;
-	int				i = 0;
-	int				n;
+	discovery_t *		discovery;
+	unsigned long		flags;
+	discinfo_t *		buffer = NULL;
+	int			j_timeout = (sysctl_discovery_timeout * HZ);
+	int			n;		/* Size of the full log */
+	int			i = 0;		/* How many we picked */
 
 	ASSERT(pn != NULL, return NULL;);
+	ASSERT(log != NULL, return NULL;);
 
-	/* Check if log is empty */
-	if(log == NULL)
-		return NULL;
-
-	/* Save spin lock - spinlock should be discovery specific */
+	/* Save spin lock */
 	spin_lock_irqsave(&log->hb_spinlock, flags);
 
-	/* Create the client specific buffer */
-	n = HASHBIN_GET_SIZE(log);
-	buffer = kmalloc(n * sizeof(struct irda_device_info), GFP_ATOMIC);
-	if (buffer == NULL) {
-		spin_unlock_irqrestore(&log->hb_spinlock, flags);
-		return NULL;
-	}
-
 	discovery = (discovery_t *) hashbin_get_first(log);
-	while ((discovery != NULL) && (i < n)) {
-		/* Mask out the ones we don't want */
-		if (discovery->hints.word & mask) {
+	while (discovery != NULL) {
+		/* Mask out the ones we don't want :
+		 * We want to match the discovery mask, and to get only
+		 * the most recent one (unless we want old ones) */
+		if ((u16ho(discovery->data.hints) & mask) &&
+		    ((old_entries) ||
+		     ((jiffies - discovery->firststamp) < j_timeout)) ) {
+			/* Create buffer as needed.
+			 * As this function get called a lot and most time
+			 * we don't have anything to put in the log (we are
+			 * quite picky), we can save a lot of overhead
+			 * by not calling kmalloc. Jean II */
+			if(buffer == NULL) {
+				/* Create the client specific buffer */
+				n = HASHBIN_GET_SIZE(log);
+				buffer = kmalloc(n * sizeof(struct irda_device_info), GFP_ATOMIC);
+				if (buffer == NULL) {
+					spin_unlock_irqrestore(&log->hb_spinlock, flags);
+					return NULL;
+				}
+
+			}
+
 			/* Copy discovery information */
-			buffer[i].saddr = discovery->saddr;
-			buffer[i].daddr = discovery->daddr;
-			buffer[i].charset = discovery->charset;
-			buffer[i].hints[0] = discovery->hints.byte[0];
-			buffer[i].hints[1] = discovery->hints.byte[1];
-			strncpy(buffer[i].info, discovery->nickname,
-				NICKNAME_MAX_LEN);
+			memcpy(&(buffer[i]), &(discovery->data),
+			       sizeof(discinfo_t));
 			i++;
 		}
 		discovery = (discovery_t *) hashbin_get_next(log);
@@ -288,14 +332,14 @@
 	d = (discovery_t *) hashbin_get_first(cachelog);
 	while (d != NULL) {
 		IRDA_DEBUG(1, "Discovery:\n");
-		IRDA_DEBUG(1, "  daddr=%08x\n", d->daddr);
-		IRDA_DEBUG(1, "  nickname=%s\n", d->nickname);
-		
-		if (strcmp(name, d->nickname) == 0) {
-			*saddr = d->saddr;
+		IRDA_DEBUG(1, "  daddr=%08x\n", d->data.daddr);
+		IRDA_DEBUG(1, "  nickname=%s\n", d->data.info);
+
+		if (strcmp(name, d->data.info) == 0) {
+			*saddr = d->data.saddr;
 			
 			spin_unlock_irqrestore(&cachelog->hb_spinlock, flags);
-			return d->daddr;
+			return d->data.daddr;
 		}
 		d = (discovery_t *) hashbin_get_next(cachelog);
 	}
@@ -328,41 +372,41 @@
 
 	discovery = (discovery_t *) hashbin_get_first(cachelog);
 	while (( discovery != NULL) && (len < length)) {
-		len += sprintf(buf+len, "nickname: %s,", discovery->nickname);
+		len += sprintf(buf+len, "nickname: %s,", discovery->data.info);
 		
 		len += sprintf(buf+len, " hint: 0x%02x%02x", 
-			       discovery->hints.byte[0], 
-			       discovery->hints.byte[1]);
+			       discovery->data.hints[0], 
+			       discovery->data.hints[1]);
 #if 0
-		if ( discovery->hints.byte[0] & HINT_PNP)
+		if ( discovery->data.hints[0] & HINT_PNP)
 			len += sprintf( buf+len, "PnP Compatible ");
-		if ( discovery->hints.byte[0] & HINT_PDA)
+		if ( discovery->data.hints[0] & HINT_PDA)
 			len += sprintf( buf+len, "PDA/Palmtop ");
-		if ( discovery->hints.byte[0] & HINT_COMPUTER)
+		if ( discovery->data.hints[0] & HINT_COMPUTER)
 			len += sprintf( buf+len, "Computer ");
-		if ( discovery->hints.byte[0] & HINT_PRINTER)
+		if ( discovery->data.hints[0] & HINT_PRINTER)
 			len += sprintf( buf+len, "Printer ");
-		if ( discovery->hints.byte[0] & HINT_MODEM)
+		if ( discovery->data.hints[0] & HINT_MODEM)
 			len += sprintf( buf+len, "Modem ");
-		if ( discovery->hints.byte[0] & HINT_FAX)
+		if ( discovery->data.hints[0] & HINT_FAX)
 			len += sprintf( buf+len, "Fax ");
-		if ( discovery->hints.byte[0] & HINT_LAN)
+		if ( discovery->data.hints[0] & HINT_LAN)
 			len += sprintf( buf+len, "LAN Access ");
 		
-		if ( discovery->hints.byte[1] & HINT_TELEPHONY)
+		if ( discovery->data.hints[1] & HINT_TELEPHONY)
 			len += sprintf( buf+len, "Telephony ");
-		if ( discovery->hints.byte[1] & HINT_FILE_SERVER)
+		if ( discovery->data.hints[1] & HINT_FILE_SERVER)
 			len += sprintf( buf+len, "File Server ");       
-		if ( discovery->hints.byte[1] & HINT_COMM)
+		if ( discovery->data.hints[1] & HINT_COMM)
 			len += sprintf( buf+len, "IrCOMM ");
-		if ( discovery->hints.byte[1] & HINT_OBEX)
+		if ( discovery->data.hints[1] & HINT_OBEX)
 			len += sprintf( buf+len, "IrOBEX ");
 #endif		
 		len += sprintf(buf+len, ", saddr: 0x%08x", 
-			       discovery->saddr);
+			       discovery->data.saddr);
 
 		len += sprintf(buf+len, ", daddr: 0x%08x\n", 
-			       discovery->daddr);
+			       discovery->data.daddr);
 		
 		len += sprintf(buf+len, "\n");
 		
diff -Nru a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c
--- a/net/irda/ircomm/ircomm_tty_attach.c	Mon Mar 31 13:41:08 2003
+++ b/net/irda/ircomm/ircomm_tty_attach.c	Mon Mar 31 13:41:08 2003
@@ -46,7 +46,7 @@
 #include <net/irda/ircomm_tty_attach.h>
 
 static void ircomm_tty_ias_register(struct ircomm_tty_cb *self);
-static void ircomm_tty_discovery_indication(discovery_t *discovery,
+static void ircomm_tty_discovery_indication(discinfo_t *discovery,
 					    DISCOVERY_MODE mode,
 					    void *priv);
 static void ircomm_tty_getvalue_confirm(int result, __u16 obj_id, 
@@ -305,7 +305,7 @@
  *    device it is, and which services it has.
  *
  */
-static void ircomm_tty_discovery_indication(discovery_t *discovery,
+static void ircomm_tty_discovery_indication(discinfo_t *discovery,
 					    DISCOVERY_MODE mode,
 					    void *priv)
 {
diff -Nru a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c
--- a/net/irda/irlan/irlan_client.c	Mon Mar 31 13:41:07 2003
+++ b/net/irda/irlan/irlan_client.c	Mon Mar 31 13:41:07 2003
@@ -145,7 +145,7 @@
  *    Remote device with IrLAN server support discovered
  *
  */
-void irlan_client_discovery_indication(discovery_t *discovery,
+void irlan_client_discovery_indication(discinfo_t *discovery,
 				       DISCOVERY_MODE mode,
 				       void *priv) 
 {
diff -Nru a/net/irda/irlap_event.c b/net/irda/irlap_event.c
--- a/net/irda/irlap_event.c	Mon Mar 31 13:41:07 2003
+++ b/net/irda/irlap_event.c	Mon Mar 31 13:41:07 2003
@@ -419,7 +419,7 @@
 								   info->s);
 			if (self->slot == info->s) {
 				discovery_rsp = irlmp_get_discovery_response();
-				discovery_rsp->daddr = info->daddr;
+				discovery_rsp->data.daddr = info->daddr;
 
 				irlap_send_discovery_xid_frame(self, info->S,
 							       self->slot,
@@ -576,7 +576,7 @@
 		ASSERT(info->discovery != NULL, return -1;);
 
 		IRDA_DEBUG(4, "%s(), daddr=%08x\n", __FUNCTION__,
-			   info->discovery->daddr);
+			   info->discovery->data.daddr);
 
 		if (!self->discovery_log) {
 			WARNING("%s: discovery log is gone! "
@@ -586,7 +586,7 @@
 		}
 		hashbin_insert(self->discovery_log,
 			       (irda_queue_t *) info->discovery,
-			       info->discovery->daddr, NULL);
+			       info->discovery->data.daddr, NULL);
 
 		/* Keep state */
 		/* irlap_next_state(self, LAP_QUERY);  */
@@ -704,7 +704,7 @@
 			irlap_discovery_indication(self, info->discovery);
 		} else if ((info->s >= self->slot) && (!self->frame_sent)) {
 			discovery_rsp = irlmp_get_discovery_response();
-			discovery_rsp->daddr = info->daddr;
+			discovery_rsp->data.daddr = info->daddr;
 
 			irlap_send_discovery_xid_frame(self, info->S,
 						       self->slot, FALSE,
@@ -982,15 +982,48 @@
 		 *  Only send frame if send-window > 0.
 		 */
 		if ((self->window > 0) && (!self->remote_busy)) {
+			int nextfit;
 #ifdef CONFIG_IRDA_DYNAMIC_WINDOW
+			struct sk_buff *skb_next;
+
+			/* With DYNAMIC_WINDOW, we keep the window size
+			 * maximum, and adapt on the packets we are sending.
+			 * At 115k, we can send only 2 packets of 2048 bytes
+			 * in a 500 ms turnaround. Without this option, we
+			 * would always limit the window to 2. With this
+			 * option, if we send smaller packets, we can send
+			 * up to 7 of them (always depending on QoS).
+			 * Jean II */
+
+			/* Look at the next skb. This is safe, as we are
+			 * the only consumer of the Tx queue (if we are not,
+			 * we have other problems) - Jean II */
+			skb_next = skb_peek(&self->txq);
+
+			/* Check if a subsequent skb exist and would fit in
+			 * the current window (with respect to turnaround
+			 * time).
+			 * This allow us to properly mark the current packet
+			 * with the pf bit, to avoid falling back on the
+			 * second test below, and avoid waiting the
+			 * end of the window and sending a extra RR.
+			 * Note : (skb_next != NULL) <=> (skb_queue_len() > 0)
+			 * Jean II */
+			nextfit = ((skb_next != NULL) &&
+				   ((skb_next->len + skb->len) <=
+				    self->bytes_left));
+
 			/*
+			 * The current packet may not fit ! Because of test
+			 * above, this should not happen any more !!!
 			 *  Test if we have transmitted more bytes over the
 			 *  link than its possible to do with the current
 			 *  speed and turn-around-time.
 			 */
-			if (skb->len > self->bytes_left) {
-				IRDA_DEBUG(4, "%s(), Not allowed to transmit"
+			if((!nextfit) && (skb->len > self->bytes_left)) {
+				IRDA_DEBUG(0, "%s(), Not allowed to transmit"
 					   " more bytes!\n", __FUNCTION__);
+				/* Requeue the skb */
 				skb_queue_head(&self->txq, skb_get(skb));
 				/*
 				 *  We should switch state to LAP_NRM_P, but
@@ -1000,20 +1033,33 @@
 				 *  trigger anyway now, so we just wait for it
 				 *  DB
 				 */
+				/*
+				 * Sorry, but that's not totally true. If
+				 * we send 2000B packets, we may wait another
+				 * 1000B until our turnaround expire. That's
+				 * why we need to be proactive in avoiding
+				 * comming here. - Jean II
+				 */
 				return -EPROTO;
 			}
+
+			/* Substract space used by this skb */
 			self->bytes_left -= skb->len;
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
+#else	/* CONFIG_IRDA_DYNAMIC_WINDOW */
+			/* Window has been adjusted for the max packet
+			 * size, so much simpler... - Jean II */
+			nextfit = (skb_queue_len(&self->txq) > 0);
+#endif	/* CONFIG_IRDA_DYNAMIC_WINDOW */
 			/*
 			 *  Send data with poll bit cleared only if window > 1
 			 *  and there is more frames after this one to be sent
 			 */
-			if ((self->window > 1) &&
-			    skb_queue_len( &self->txq) > 0)
-			{
+			if ((self->window > 1) && (nextfit)) {
+				/* More packet to send in current window */
 				irlap_send_data_primary(self, skb);
 				irlap_next_state(self, LAP_XMIT_P);
 			} else {
+				/* Final packet of window */
 				irlap_send_data_primary_poll(self, skb);
 				irlap_next_state(self, LAP_NRM_P);
 
@@ -1683,16 +1729,37 @@
 	switch (event) {
 	case SEND_I_CMD:
 		/*
-		 *  Send frame only if send window > 1
+		 *  Send frame only if send window > 0
 		 */
 		if ((self->window > 0) && (!self->remote_busy)) {
+			int nextfit;
 #ifdef CONFIG_IRDA_DYNAMIC_WINDOW
+			struct sk_buff *skb_next;
+
+			/*
+			 * Same deal as in irlap_state_xmit_p(), so see
+			 * the comments at that point.
+			 * We are the secondary, so there are only subtle
+			 * differences. - Jean II
+			 */
+
+			/* Check if a subsequent skb exist and would fit in
+			 * the current window (with respect to turnaround
+			 * time). - Jean II */
+			skb_next = skb_peek(&self->txq);
+			nextfit = ((skb_next != NULL) &&
+				   ((skb_next->len + skb->len) <=
+				    self->bytes_left));
+
 			/*
 			 *  Test if we have transmitted more bytes over the
 			 *  link than its possible to do with the current
 			 *  speed and turn-around-time.
 			 */
-			if (skb->len > self->bytes_left) {
+			if((!nextfit) && (skb->len > self->bytes_left)) {
+				IRDA_DEBUG(0, "%s(), Not allowed to transmit"
+					   " more bytes!\n", __FUNCTION__);
+				/* Requeue the skb */
 				skb_queue_head(&self->txq, skb_get(skb));
 
 				/*
@@ -1706,18 +1773,24 @@
 				irlap_start_wd_timer(self, self->wd_timeout);
 
 				irlap_next_state(self, LAP_NRM_S);
+				/* Slight difference with primary :
+				 * here we would wait for the other side to
+				 * expire the turnaround. - Jean II */
 
 				return -EPROTO; /* Try again later */
 			}
+			/* Substract space used by this skb */
 			self->bytes_left -= skb->len;
+#else	/* CONFIG_IRDA_DYNAMIC_WINDOW */
+			/* Window has been adjusted for the max packet
+			 * size, so much simpler... - Jean II */
+			nextfit = (skb_queue_len(&self->txq) > 0);
 #endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
 			/*
 			 *  Send data with final bit cleared only if window > 1
 			 *  and there is more frames to be sent
 			 */
-			if ((self->window > 1) &&
-			    skb_queue_len(&self->txq) > 0)
-			{
+			if ((self->window > 1) && (nextfit)) {
 				irlap_send_data_secondary(self, skb);
 				irlap_next_state(self, LAP_XMIT_S);
 			} else {
diff -Nru a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
--- a/net/irda/irlap_frame.c	Mon Mar 31 13:41:06 2003
+++ b/net/irda/irlap_frame.c	Mon Mar 31 13:41:06 2003
@@ -335,7 +335,7 @@
 	if (command)
 		frame->daddr = cpu_to_le32(bcast);
 	else
-		frame->daddr = cpu_to_le32(discovery->daddr);
+		frame->daddr = cpu_to_le32(discovery->data.daddr);
 
 	switch (S) {
 	case 1:
@@ -366,20 +366,20 @@
 	if (!command || (frame->slotnr == 0xff)) {
 		int len;
 
-		if (discovery->hints.byte[0] & HINT_EXTENSION) {
+		if (discovery->data.hints[0] & HINT_EXTENSION) {
 			info = skb_put(skb, 2);
-			info[0] = discovery->hints.byte[0];
-			info[1] = discovery->hints.byte[1];
+			info[0] = discovery->data.hints[0];
+			info[1] = discovery->data.hints[1];
 		} else {
 			info = skb_put(skb, 1);
-			info[0] = discovery->hints.byte[0];
+			info[0] = discovery->data.hints[0];
 		}
 		info = skb_put(skb, 1);
-		info[0] = discovery->charset;
+		info[0] = discovery->data.charset;
 
 		len = IRDA_MIN(discovery->name_len, skb_tailroom(skb));
 		info = skb_put(skb, len);
-		memcpy(info, discovery->nickname, len);
+		memcpy(info, discovery->data.info, len);
 	}
 	irlap_queue_xmit(self, skb);
 }
@@ -422,24 +422,25 @@
 	}
 	memset(discovery, 0, sizeof(discovery_t));
 
-	discovery->daddr = info->daddr;
-	discovery->saddr = self->saddr;
+	discovery->data.daddr = info->daddr;
+	discovery->data.saddr = self->saddr;
 	discovery->timestamp = jiffies;
 
-	IRDA_DEBUG(4, "%s(), daddr=%08x\n", __FUNCTION__, discovery->daddr);
+	IRDA_DEBUG(4, "%s(), daddr=%08x\n", __FUNCTION__,
+		   discovery->data.daddr);
 
 	discovery_info = skb_pull(skb, sizeof(struct xid_frame));
 
 	/* Get info returned from peer */
-	discovery->hints.byte[0] = discovery_info[0];
+	discovery->data.hints[0] = discovery_info[0];
 	if (discovery_info[0] & HINT_EXTENSION) {
 		IRDA_DEBUG(4, "EXTENSION\n");
-		discovery->hints.byte[1] = discovery_info[1];
-		discovery->charset = discovery_info[2];
+		discovery->data.hints[1] = discovery_info[1];
+		discovery->data.charset = discovery_info[2];
 		text = (char *) &discovery_info[3];
 	} else {
-		discovery->hints.byte[1] = 0;
-		discovery->charset = discovery_info[1];
+		discovery->data.hints[1] = 0;
+		discovery->data.charset = discovery_info[1];
 		text = (char *) &discovery_info[2];
 	}
 	/*
@@ -447,8 +448,8 @@
 	 *  FCS bytes resides.
 	 */
 	skb->data[skb->len] = '\0';
-	strncpy(discovery->nickname, text, NICKNAME_MAX_LEN);
-	discovery->name_len = strlen(discovery->nickname);
+	strncpy(discovery->data.info, text, NICKNAME_MAX_LEN);
+	discovery->name_len = strlen(discovery->data.info);
 
 	info->discovery = discovery;
 
@@ -523,18 +524,18 @@
 			return;
 		}
 
-		discovery->daddr = info->daddr;
-		discovery->saddr = self->saddr;
+		discovery->data.daddr = info->daddr;
+		discovery->data.saddr = self->saddr;
 		discovery->timestamp = jiffies;
 
-		discovery->hints.byte[0] = discovery_info[0];
+		discovery->data.hints[0] = discovery_info[0];
 		if (discovery_info[0] & HINT_EXTENSION) {
-			discovery->hints.byte[1] = discovery_info[1];
-			discovery->charset = discovery_info[2];
+			discovery->data.hints[1] = discovery_info[1];
+			discovery->data.charset = discovery_info[2];
 			text = (char *) &discovery_info[3];
 		} else {
-			discovery->hints.byte[1] = 0;
-			discovery->charset = discovery_info[1];
+			discovery->data.hints[1] = 0;
+			discovery->data.charset = discovery_info[1];
 			text = (char *) &discovery_info[2];
 		}
 		/*
@@ -542,8 +543,8 @@
 		 *  FCS bytes resides.
 		 */
 		skb->data[skb->len] = '\0';
-		strncpy(discovery->nickname, text, NICKNAME_MAX_LEN);
-		discovery->name_len = strlen(discovery->nickname);
+		strncpy(discovery->data.info, text, NICKNAME_MAX_LEN);
+		discovery->name_len = strlen(discovery->data.info);
 
 		info->discovery = discovery;
 	} else
diff -Nru a/net/irda/irlmp.c b/net/irda/irlmp.c
--- a/net/irda/irlmp.c	Mon Mar 31 13:41:08 2003
+++ b/net/irda/irlmp.c	Mon Mar 31 13:41:08 2003
@@ -401,8 +401,8 @@
 		}
 
 		if (discovery) {
-			saddr = discovery->saddr;
-			daddr = discovery->daddr;
+			saddr = discovery->data.saddr;
+			daddr = discovery->data.daddr;
 		}
 		spin_unlock_irqrestore(&irlmp->cachelog->hb_spinlock, flags);
 	}
@@ -793,17 +793,17 @@
 	}
 
 	/* Construct new discovery info to be used by IrLAP, */
-	irlmp->discovery_cmd.hints.word = irlmp->hints.word;
+	u16ho(irlmp->discovery_cmd.data.hints) = irlmp->hints.word;
 
 	/*
 	 *  Set character set for device name (we use ASCII), and
 	 *  copy device name. Remember to make room for a \0 at the
 	 *  end
 	 */
-	irlmp->discovery_cmd.charset = CS_ASCII;
-	strncpy(irlmp->discovery_cmd.nickname, sysctl_devname,
+	irlmp->discovery_cmd.data.charset = CS_ASCII;
+	strncpy(irlmp->discovery_cmd.data.info, sysctl_devname,
 		NICKNAME_MAX_LEN);
-	irlmp->discovery_cmd.name_len = strlen(irlmp->discovery_cmd.nickname);
+	irlmp->discovery_cmd.name_len = strlen(irlmp->discovery_cmd.data.info);
 	irlmp->discovery_cmd.nslots = nslots;
 
 	/*
@@ -827,10 +827,13 @@
  *
  *    Do a discovery of devices in front of the computer
  *
+ * If the caller has registered a client discovery callback, this
+ * allow him to receive the full content of the discovery log through
+ * this callback (as normally he will receive only new discoveries).
  */
 void irlmp_discovery_request(int nslots)
 {
-	/* Return current cached discovery log */
+	/* Return current cached discovery log (in full) */
 	irlmp_discovery_confirm(irlmp->cachelog, DISCOVERY_LOG);
 
 	/*
@@ -854,6 +857,8 @@
  *
  *    Return the current discovery log
  *
+ * If discovery is not enabled, you should call this function again
+ * after 1 or 2 seconds (i.e. after discovery has been done).
  */
 struct irda_device_info *irlmp_get_discoveries(int *pn, __u16 mask, int nslots)
 {
@@ -875,50 +880,9 @@
 	}
 
 	/* Return current cached discovery log */
-	return(irlmp_copy_discoveries(irlmp->cachelog, pn, mask));
+	return(irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE));
 }
 
-#if 0
-/*
- * Function irlmp_check_services (discovery)
- */
-void irlmp_check_services(discovery_t *discovery)
-{
-	struct irlmp_client *client;
-	__u8 *service_log;
-	__u8 service;
-	int i = 0;
-
-	IRDA_DEBUG(1, "IrDA Discovered: %s\n", discovery->info);
-	IRDA_DEBUG(1, "    Services: ");
-
-	service_log = irlmp_hint_to_service(discovery->hints.byte);
-	if (!service_log)
-		return;
-
-	/*
-	 *  Check all services on the device
-	 */
-	while ((service = service_log[i++]) != S_END) {
-		IRDA_DEBUG( 4, "service=%02x\n", service);
-		client = hashbin_lock_find(irlmp->registry, service, NULL);
-		if (entry && entry->discovery_callback) {
-			IRDA_DEBUG( 4, "discovery_callback!\n");
-
-			entry->discovery_callback(discovery);
-		} else {
-			/* Don't notify about the ANY service */
-			if (service == S_ANY)
-				continue;
-			/*
-			 * Found no clients for dealing with this service,
-			 */
-		}
-	}
-	kfree(service_log);
-}
-#endif
-
 /*
  * Function irlmp_notify_client (log)
  *
@@ -935,7 +899,9 @@
 irlmp_notify_client(irlmp_client_t *client,
 		    hashbin_t *log, DISCOVERY_MODE mode)
 {
-	discovery_t *discovery;
+	discinfo_t *discoveries;	/* Copy of the discovery log */
+	int	number;			/* Number of nodes in the log */
+	int	i;
 
 	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
 
@@ -944,28 +910,36 @@
 		return;
 
 	/*
+	 * Locking notes :
+	 * the old code was manipulating the log directly, which was
+	 * very racy. Now, we use copy_discoveries, that protects
+	 * itself while dumping the log for us.
+	 * The overhead of the copy is compensated by the fact that
+	 * we only pass new discoveries in normal mode and don't
+	 * pass the same old entry every 3s to the caller as we used
+	 * to do (virtual function calling is expensive).
+	 * Jean II
+	 */
+
+	/*
 	 * Now, check all discovered devices (if any), and notify client
 	 * only about the services that the client is interested in
-	 * Note : most often, we will get called immediately following
-	 * a discovery, so the log is not going to expire.
-	 * On the other hand, comming here through irlmp_discovery_request()
-	 * is *very* problematic - Jean II
-	 * Can't use hashbin_find_next(), key is not unique. I'm running
-	 * out of options :-( - Jean II
+	 * We also notify only about the new devices unless the caller
+	 * explicity request a dump of the log. Jean II
 	 */
-	discovery = (discovery_t *) hashbin_get_first(log);
-	while (discovery != NULL) {
-		IRDA_DEBUG(3, "discovery->daddr = 0x%08x\n", discovery->daddr);
-
-		/*
-		 * Any common hint bits? Remember to mask away the extension
-		 * bits ;-)
-		 */
-		if (client->hint_mask & discovery->hints.word & 0x7f7f)
-			client->disco_callback(discovery, mode, client->priv);
+	discoveries = irlmp_copy_discoveries(log, &number,
+					     client->hint_mask.word,
+					     (mode == DISCOVERY_LOG));
+	/* Check if the we got some results */
+	if (discoveries == NULL)
+		return;	/* No nodes discovered */
+
+	/* Pass all entries to the listener */
+	for(i = 0; i < number; i++)
+		client->disco_callback(&(discoveries[i]), mode, client->priv);
 
-		discovery = (discovery_t *) hashbin_get_next(log);
-	}
+	/* Free up our buffer */
+	kfree(discoveries);
 }
 
 /*
@@ -987,6 +961,7 @@
 	if (!(HASHBIN_GET_SIZE(log)))
 		return;
 
+	/* For each client - notify callback may touch client list */
 	client = (irlmp_client_t *) hashbin_get_first(irlmp->clients);
 	while (NULL != hashbin_find_next(irlmp->clients, (long) client, NULL,
 					 (void *) &client_next) ) {
@@ -1005,26 +980,34 @@
  *	registered for this event...
  *
  *	Note : called exclusively from discovery.c
- *	Note : as we are currently processing the log, the clients callback
- *	should *NOT* attempt to touch the log now.
+ *	Note : this is no longer called under discovery spinlock, so the
+ *		client can do whatever he wants in the callback.
  */
-void irlmp_discovery_expiry(discovery_t *expiry)
+void irlmp_discovery_expiry(discinfo_t *expiries, int number)
 {
 	irlmp_client_t *client;
 	irlmp_client_t *client_next;
+	int		i;
 
 	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
 
-	ASSERT(expiry != NULL, return;);
+	ASSERT(expiries != NULL, return;);
 
+	/* For each client - notify callback may touch client list */
 	client = (irlmp_client_t *) hashbin_get_first(irlmp->clients);
 	while (NULL != hashbin_find_next(irlmp->clients, (long) client, NULL,
 					 (void *) &client_next) ) {
-		/* Check if we should notify client */
-		if ((client->expir_callback) &&
-		    (client->hint_mask & expiry->hints.word & 0x7f7f))
-			client->expir_callback(expiry, EXPIRY_TIMEOUT,
-					       client->priv);
+
+		/* Pass all entries to the listener */
+		for(i = 0; i < number; i++) {
+			/* Check if we should notify client */
+			if ((client->expir_callback) &&
+			    (client->hint_mask.word & u16ho(expiries[i].hints)
+			     & 0x7f7f) )
+				client->expir_callback(&(expiries[i]),
+						       EXPIRY_TIMEOUT,
+						       client->priv);
+		}
 
 		/* Next client */
 		client = client_next;
@@ -1043,18 +1026,18 @@
 
 	ASSERT(irlmp != NULL, return NULL;);
 
-	irlmp->discovery_rsp.hints.word = irlmp->hints.word;
+	u16ho(irlmp->discovery_rsp.data.hints) = irlmp->hints.word;
 
 	/*
 	 *  Set character set for device name (we use ASCII), and
 	 *  copy device name. Remember to make room for a \0 at the
 	 *  end
 	 */
-	irlmp->discovery_rsp.charset = CS_ASCII;
+	irlmp->discovery_rsp.data.charset = CS_ASCII;
 
-	strncpy(irlmp->discovery_rsp.nickname, sysctl_devname,
+	strncpy(irlmp->discovery_rsp.data.info, sysctl_devname,
 		NICKNAME_MAX_LEN);
-	irlmp->discovery_rsp.name_len = strlen(irlmp->discovery_rsp.nickname);
+	irlmp->discovery_rsp.name_len = strlen(irlmp->discovery_rsp.data.info);
 
 	return &irlmp->discovery_rsp;
 }
@@ -1291,6 +1274,7 @@
 	}
 }
 
+#if 0
 /*
  * Function irlmp_hint_to_service (hint)
  *
@@ -1365,6 +1349,21 @@
 
 	return service;
 }
+#endif
+
+const __u16 service_hint_mapping[S_END][2] = {
+	{ HINT_PNP,		0 },			/* S_PNP */
+	{ HINT_PDA,		0 },			/* S_PDA */
+	{ HINT_COMPUTER,	0 },			/* S_COMPUTER */
+	{ HINT_PRINTER,		0 },			/* S_PRINTER */
+	{ HINT_MODEM,		0 },			/* S_MODEM */
+	{ HINT_FAX,		0 },			/* S_FAX */
+	{ HINT_LAN,		0 },			/* S_LAN */
+	{ HINT_EXTENSION,	HINT_TELEPHONY },	/* S_TELEPHONY */
+	{ HINT_EXTENSION,	HINT_COMM },		/* S_COMM */
+	{ HINT_EXTENSION,	HINT_OBEX },		/* S_OBEX */
+	{ 0xFF,			0xFF },			/* S_ANY */
+};
 
 /*
  * Function irlmp_service_to_hint (service)
@@ -1377,46 +1376,9 @@
 {
 	__u16_host_order hint;
 
-	hint.word = 0;
+	hint.byte[0] = service_hint_mapping[service][0];
+	hint.byte[1] = service_hint_mapping[service][1];
 
-	switch (service) {
-	case S_PNP:
-		hint.byte[0] |= HINT_PNP;
-		break;
-	case S_PDA:
-		hint.byte[0] |= HINT_PDA;
-		break;
-	case S_COMPUTER:
-		hint.byte[0] |= HINT_COMPUTER;
-		break;
-	case S_PRINTER:
-		hint.byte[0] |= HINT_PRINTER;
-		break;
-	case S_MODEM:
-		hint.byte[0] |= HINT_PRINTER;
-		break;
-	case S_LAN:
-		hint.byte[0] |= HINT_LAN;
-		break;
-	case S_COMM:
-		hint.byte[0] |= HINT_EXTENSION;
-		hint.byte[1] |= HINT_COMM;
-		break;
-	case S_OBEX:
-		hint.byte[0] |= HINT_EXTENSION;
-		hint.byte[1] |= HINT_OBEX;
-		break;
-	case S_TELEPHONY:
-		hint.byte[0] |= HINT_EXTENSION;
-		hint.byte[1] |= HINT_TELEPHONY;
-		break;
-	case S_ANY:
-		hint.word = 0xffff;
-		break;
-	default:
-		IRDA_DEBUG( 1, "%s(), Unknown service!\n", __FUNCTION__);
-		break;
-	}
 	return hint.word;
 }
 
@@ -1438,7 +1400,7 @@
 		IRDA_DEBUG(1, "%s(), Unable to kmalloc!\n", __FUNCTION__);
 		return 0;
 	}
-	service->hints = hints;
+	service->hints.word = hints;
 	hashbin_insert(irlmp->services, (irda_queue_t *) service,
 		       (long) service, NULL);
 
@@ -1481,7 +1443,7 @@
 	spin_lock_irqsave(&irlmp->services->hb_spinlock, flags);
         service = (irlmp_service_t *) hashbin_get_first(irlmp->services);
         while (service) {
-		irlmp->hints.word |= service->hints;
+		irlmp->hints.word |= service->hints.word;
 
                 service = (irlmp_service_t *)hashbin_get_next(irlmp->services);
         }
@@ -1499,7 +1461,7 @@
  *    Returns: handle > 0 on success, 0 on error
  */
 void *irlmp_register_client(__u16 hint_mask, DISCOVERY_CALLBACK1 disco_clb,
-			    DISCOVERY_CALLBACK1 expir_clb, void *priv)
+			    DISCOVERY_CALLBACK2 expir_clb, void *priv)
 {
 	irlmp_client_t *client;
 
@@ -1514,7 +1476,7 @@
 	}
 
 	/* Register the details */
-	client->hint_mask = hint_mask;
+	client->hint_mask.word = hint_mask;
 	client->disco_callback = disco_clb;
 	client->expir_callback = expir_clb;
 	client->priv = priv;
@@ -1535,7 +1497,7 @@
  */
 int irlmp_update_client(void *handle, __u16 hint_mask,
 			DISCOVERY_CALLBACK1 disco_clb,
-			DISCOVERY_CALLBACK1 expir_clb, void *priv)
+			DISCOVERY_CALLBACK2 expir_clb, void *priv)
 {
 	irlmp_client_t *client;
 
@@ -1548,7 +1510,7 @@
 		return -1;
 	}
 
-	client->hint_mask = hint_mask;
+	client->hint_mask.word = hint_mask;
 	client->disco_callback = disco_clb;
 	client->expir_callback = expir_clb;
 	client->priv = priv;
diff -Nru a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
--- a/net/irda/irnet/irnet.h	Mon Mar 31 13:41:07 2003
+++ b/net/irda/irnet/irnet.h	Mon Mar 31 13:41:07 2003
@@ -225,6 +225,10 @@
  *
  * v13 - 30.5.02 - Jean II
  *	o Update module init code
+ *
+ * v14 - 20.2.03 - Jean II
+ *	o Add discovery hint bits in the control channel.
+ *	o Remove obsolete MOD_INC/DEC_USE_COUNT in favor of .owner
  */
 
 /***************************** INCLUDES *****************************/
@@ -476,6 +480,7 @@
   __u32		saddr;
   __u32		daddr;
   char		name[NICKNAME_MAX_LEN + 1];	/* 21 + 1 */
+  __u16_host_order hints;			/* Discovery hint bits */
 } irnet_log;
 
 /*
diff -Nru a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
--- a/net/irda/irnet/irnet_irda.c	Mon Mar 31 13:41:06 2003
+++ b/net/irda/irnet/irnet_irda.c	Mon Mar 31 13:41:06 2003
@@ -28,7 +28,8 @@
 		 irnet_event	event,
 		 __u32		saddr,
 		 __u32		daddr,
-		 char *		name)
+		 char *		name,
+		 __u16		hints)
 {
   unsigned long		flags;		/* For spinlock */
   int			index;		/* In the log */
@@ -52,6 +53,8 @@
     strcpy(irnet_events.log[index].name, name);
   else
     irnet_events.log[index].name[0] = '\0';
+  /* Copy hints */
+  irnet_events.log[index].hints.word = hints;
   /* Try to get ppp unit number */
   if((ap != (irnet_socket *) NULL) && (ap->ppp_open))
     irnet_events.log[index].unit = ppp_unit_number(&ap->chan);
@@ -609,7 +612,7 @@
        * doesn't exist anymore when we post the event, so we need to pass
        * NULL as the first arg... */
       irnet_post_event(NULL, IRNET_DISCONNECT_TO,
-		       self->saddr, self->daddr, self->rname);
+		       self->saddr, self->daddr, self->rname, 0);
     }
 
   /* Prevent various IrDA callbacks from messing up things
@@ -862,7 +865,7 @@
 
   /* Notify the control channel */
   irnet_post_event(new, IRNET_CONNECT_FROM,
-		   new->saddr, new->daddr, server->rname);
+		   new->saddr, new->daddr, server->rname, 0);
 
   DEXIT(IRDA_SERV_TRACE, "\n");
   return 0;
@@ -893,7 +896,7 @@
 
   /* Notify the control channel (see irnet_find_socket()) */
   irnet_post_event(NULL, IRNET_REQUEST_FROM,
-		   self->saddr, self->daddr, self->rname);
+		   self->saddr, self->daddr, self->rname, 0);
 
   /* Clean up the server to keep it in listen state */
   irttp_listen(self->tsap);
@@ -1108,12 +1111,12 @@
   /* If we were active, notify the control channel */
   if(test_open)
     irnet_post_event(self, IRNET_DISCONNECT_FROM,
-		     self->saddr, self->daddr, self->rname);
+		     self->saddr, self->daddr, self->rname, 0);
   else
     /* If we were trying to connect, notify the control channel */
     if((self->tsap) && (self != &irnet_server.s))
       irnet_post_event(self, IRNET_NOANSWER_FROM,
-		       self->saddr, self->daddr, self->rname);
+		       self->saddr, self->daddr, self->rname, 0);
 
   /* Close our IrTTP connection, cleanup tsap */
   if((self->tsap) && (self != &irnet_server.s))
@@ -1213,7 +1216,7 @@
 
   /* Notify the control channel */
   irnet_post_event(self, IRNET_CONNECT_TO,
-		   self->saddr, self->daddr, self->rname);
+		   self->saddr, self->daddr, self->rname, 0);
 
   DEXIT(IRDA_TCB_TRACE, "\n");
 }
@@ -1282,7 +1285,7 @@
     {
     case STATUS_NO_ACTIVITY:
       irnet_post_event(self, IRNET_BLOCKED_LINK,
-		       self->saddr, self->daddr, self->rname);
+		       self->saddr, self->daddr, self->rname, 0);
       break;
     default:
       DEBUG(IRDA_CB_INFO, "Unknown status...\n");
@@ -1616,8 +1619,8 @@
  *
  *    Got a discovery indication from IrLMP, post an event
  *
- * Note : IrLMP take care of matching the hint mask for us, we only
- * check if it is a "new" node...
+ * Note : IrLMP take care of matching the hint mask for us, and also
+ * check if it is a "new" node for us...
  *
  * As IrLMP filter on the IrLAN hint bit, we get both IrLAN and IrNET
  * nodes, so it's only at connection time that we will know if the
@@ -1633,7 +1636,7 @@
  * is to messy, so we leave that to user space...
  */
 static void
-irnet_discovery_indication(discovery_t *	discovery,
+irnet_discovery_indication(discinfo_t *		discovery,
 			   DISCOVERY_MODE	mode,
 			   void *		priv)
 {
@@ -1643,21 +1646,13 @@
   DASSERT(priv == &irnet_server, , IRDA_OCB_ERROR,
 	  "Invalid instance (0x%X) !!!\n", (unsigned int) priv);
 
-  /* Check if node is discovered is a new one or an old one.
-   * We check when how long ago this node was discovered, with a
-   * coarse timeout (we may miss some discovery events or be delayed).
-   */
-  if((jiffies - discovery->first_timestamp) >= (sysctl_discovery_timeout * HZ))
-    {
-      return;		/* Too old, not interesting -> goodbye */
-    }
-
   DEBUG(IRDA_OCB_INFO, "Discovered new IrNET/IrLAN node %s...\n",
-	discovery->nickname);
+	discovery->info);
 
   /* Notify the control channel */
   irnet_post_event(NULL, IRNET_DISCOVER,
-		   discovery->saddr, discovery->daddr, discovery->nickname);
+		   discovery->saddr, discovery->daddr, discovery->info,
+		   u16ho(discovery->hints));
 
   DEXIT(IRDA_OCB_TRACE, "\n");
 }
@@ -1672,7 +1667,7 @@
  * check if it is a "new" node...
  */
 static void
-irnet_expiry_indication(discovery_t *	expiry,
+irnet_expiry_indication(discinfo_t *	expiry,
 			DISCOVERY_MODE	mode,
 			void *		priv)
 {
@@ -1683,11 +1678,12 @@
 	  "Invalid instance (0x%X) !!!\n", (unsigned int) priv);
 
   DEBUG(IRDA_OCB_INFO, "IrNET/IrLAN node %s expired...\n",
-	expiry->nickname);
+	expiry->info);
 
   /* Notify the control channel */
   irnet_post_event(NULL, IRNET_EXPIRE,
-		   expiry->saddr, expiry->daddr, expiry->nickname);
+		   expiry->saddr, expiry->daddr, expiry->info,
+		   u16ho(expiry->hints));
 
   DEXIT(IRDA_OCB_TRACE, "\n");
 }
diff -Nru a/net/irda/irnet/irnet_irda.h b/net/irda/irnet/irnet_irda.h
--- a/net/irda/irnet/irnet_irda.h	Mon Mar 31 13:41:06 2003
+++ b/net/irda/irnet/irnet_irda.h	Mon Mar 31 13:41:06 2003
@@ -69,7 +69,8 @@
 			 irnet_event,
 			 __u32,
 			 __u32,
-			 char *);
+			 char *,
+			 __u16);
 /* ----------------------- IRDA SUBROUTINES ----------------------- */
 static inline int
 	irnet_open_tsap(irnet_socket *);
@@ -150,11 +151,11 @@
 				    void *);
 #ifdef DISCOVERY_EVENTS
 static void
-	irnet_discovery_indication(discovery_t *,
+	irnet_discovery_indication(discinfo_t *,
 				   DISCOVERY_MODE,
 				   void *);
 static void
-	irnet_expiry_indication(discovery_t *,
+	irnet_expiry_indication(discinfo_t *,
 				DISCOVERY_MODE,
 				void *);
 #endif
diff -Nru a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
--- a/net/irda/irnet/irnet_ppp.c	Mon Mar 31 13:41:08 2003
+++ b/net/irda/irnet/irnet_ppp.c	Mon Mar 31 13:41:08 2003
@@ -213,10 +213,12 @@
   if(ap->disco_index < ap->disco_number)
     {
       /* Write an event */
-      sprintf(event, "Found %08x (%s) behind %08x\n",
+      sprintf(event, "Found %08x (%s) behind %08x {hints %02X-%02X}\n",
 	      ap->discoveries[ap->disco_index].daddr,
 	      ap->discoveries[ap->disco_index].info,
-	      ap->discoveries[ap->disco_index].saddr);
+	      ap->discoveries[ap->disco_index].saddr,
+	      ap->discoveries[ap->disco_index].hints[0],
+	      ap->discoveries[ap->disco_index].hints[1]);
       DEBUG(CTRL_INFO, "Writing discovery %d : %s\n",
 	    ap->disco_index, ap->discoveries[ap->disco_index].info);
 
@@ -313,16 +315,20 @@
   switch(irnet_events.log[ap->event_index].event)
     {
     case IRNET_DISCOVER:
-      sprintf(event, "Discovered %08x (%s) behind %08x\n",
+      sprintf(event, "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n",
 	      irnet_events.log[ap->event_index].daddr,
 	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].saddr);
+	      irnet_events.log[ap->event_index].saddr,
+	      irnet_events.log[ap->event_index].hints.byte[0],
+	      irnet_events.log[ap->event_index].hints.byte[1]);
       break;
     case IRNET_EXPIRE:
-      sprintf(event, "Expired %08x (%s) behind %08x\n",
+      sprintf(event, "Expired %08x (%s) behind %08x {hints %02X-%02X}\n",
 	      irnet_events.log[ap->event_index].daddr,
 	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].saddr);
+	      irnet_events.log[ap->event_index].saddr,
+	      irnet_events.log[ap->event_index].hints.byte[0],
+	      irnet_events.log[ap->event_index].hints.byte[1]);
       break;
     case IRNET_CONNECT_TO:
       sprintf(event, "Connected to %08x (%s) on ppp%d\n",
@@ -445,8 +451,6 @@
   ap = kmalloc(sizeof(*ap), GFP_KERNEL);
   DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n");
 
-  MOD_INC_USE_COUNT;
-
   /* initialize the irnet structure */
   memset(ap, 0, sizeof(*ap));
   ap->file = file;
@@ -469,7 +473,6 @@
     {
       DERROR(FS_ERROR, "Can't setup IrDA link...\n");
       kfree(ap);
-      MOD_DEC_USE_COUNT;
       return err;
     }
 
@@ -514,7 +517,6 @@
     }
 
   kfree(ap);
-  MOD_DEC_USE_COUNT;
 
   DEXIT(FS_TRACE, "\n");
   return 0;
diff -Nru a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h
--- a/net/irda/irnet/irnet_ppp.h	Mon Mar 31 13:41:08 2003
+++ b/net/irda/irnet/irnet_ppp.h	Mon Mar 31 13:41:08 2003
@@ -98,6 +98,7 @@
 /* Filesystem callbacks (to call us) */
 static struct file_operations irnet_device_fops =
 {
+	.owner		= THIS_MODULE,
 	.read		= dev_irnet_read,
 	.write		= dev_irnet_write,
 	.poll		= dev_irnet_poll,
diff -Nru a/net/irda/timer.c b/net/irda/timer.c
--- a/net/irda/timer.c	Mon Mar 31 13:41:07 2003
+++ b/net/irda/timer.c	Mon Mar 31 13:41:07 2003
@@ -39,8 +39,8 @@
 static void irlap_final_timer_expired(void* data);
 static void irlap_wd_timer_expired(void* data);
 static void irlap_backoff_timer_expired(void* data);
-
 static void irlap_media_busy_expired(void* data); 
+
 /*
  * Function irda_start_timer (timer, timeout)
  *
@@ -50,19 +50,18 @@
 void irda_start_timer(struct timer_list *ptimer, int timeout, void *data,
 		      TIMER_CALLBACK callback) 
 {
-	del_timer(ptimer);
- 
-	ptimer->data = (unsigned long) data;
-
 	/* 
 	 * For most architectures void * is the same as unsigned long, but
 	 * at least we try to use void * as long as possible. Since the 
 	 * timer functions use unsigned long, we cast the function here
 	 */
 	ptimer->function = (void (*)(unsigned long)) callback;
-	ptimer->expires = jiffies + timeout;
+	ptimer->data = (unsigned long) data;
 	
-	add_timer(ptimer);
+	/* Set new value for timer (update or add timer).
+	 * We use mod_timer() because it's more efficient and also
+	 * safer with respect to race conditions - Jean II */
+	mod_timer(ptimer, jiffies + timeout);
 }
 
 void irlap_start_slot_timer(struct irlap_cb *self, int timeout)
@@ -136,8 +135,7 @@
 void irlmp_stop_idle_timer(struct lap_cb *self) 
 {
 	/* If timer is activated, kill it! */
-	if(timer_pending(&self->idle_timer))
-		del_timer(&self->idle_timer);
+	del_timer(&self->idle_timer);
 }
 
 /*
diff -Nru a/net/netsyms.c b/net/netsyms.c
--- a/net/netsyms.c	Mon Mar 31 13:41:07 2003
+++ b/net/netsyms.c	Mon Mar 31 13:41:07 2003
@@ -171,10 +171,8 @@
 EXPORT_SYMBOL(sock_map_fd);
 EXPORT_SYMBOL(sockfd_lookup);
 
-#ifdef CONFIG_FILTER
 EXPORT_SYMBOL(sk_run_filter);
 EXPORT_SYMBOL(sk_chk_filter);
-#endif
 
 EXPORT_SYMBOL(neigh_table_init);
 EXPORT_SYMBOL(neigh_table_clear);
@@ -316,6 +314,7 @@
 EXPORT_SYMBOL(xfrm_replay_advance);
 EXPORT_SYMBOL(xfrm_check_selectors);
 EXPORT_SYMBOL(__secpath_destroy);
+EXPORT_SYMBOL(xfrm_get_acqseq);
 EXPORT_SYMBOL(xfrm_parse_spi);
 EXPORT_SYMBOL(xfrm4_rcv);
 EXPORT_SYMBOL(xfrm_register_type);
@@ -355,13 +354,10 @@
 EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname);
 EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname);
 EXPORT_SYMBOL_GPL(xfrm_calg_get_byname);
-#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE)
-EXPORT_SYMBOL_GPL(skb_ah_walk);
-#endif
+EXPORT_SYMBOL_GPL(skb_icv_walk);
 #if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
 EXPORT_SYMBOL_GPL(skb_cow_data);
 EXPORT_SYMBOL_GPL(pskb_put);
-EXPORT_SYMBOL_GPL(skb_icv_walk);
 EXPORT_SYMBOL_GPL(skb_to_sgvec);
 #endif
 
diff -Nru a/net/packet/af_packet.c b/net/packet/af_packet.c
--- a/net/packet/af_packet.c	Mon Mar 31 13:41:07 2003
+++ b/net/packet/af_packet.c	Mon Mar 31 13:41:07 2003
@@ -401,9 +401,7 @@
 	struct packet_opt *po;
 	u8 * skb_head = skb->data;
 	int skb_len = skb->len;
-#ifdef CONFIG_FILTER
 	unsigned snaplen;
-#endif
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
@@ -429,7 +427,6 @@
 		}
 	}
 
-#ifdef CONFIG_FILTER
 	snaplen = skb->len;
 
 	if (sk->filter) {
@@ -446,7 +443,6 @@
 		if (snaplen > res)
 			snaplen = res;
 	}
-#endif /* CONFIG_FILTER */
 
 	if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
 		goto drop_n_acct;
@@ -475,10 +471,8 @@
 	if (dev->hard_header_parse)
 		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
 
-#ifdef CONFIG_FILTER
 	if (pskb_trim(skb, snaplen))
 		goto drop_n_acct;
-#endif
 
 	skb_set_owner_r(skb, sk);
 	skb->dev = NULL;
@@ -494,9 +488,7 @@
 	po->stats.tp_drops++;
 	spin_unlock(&sk->receive_queue.lock);
 
-#ifdef CONFIG_FILTER
 drop_n_restore:
-#endif
 	if (skb_head != skb->data && skb_shared(skb)) {
 		skb->data = skb_head;
 		skb->len = skb_len;
@@ -539,7 +531,6 @@
 
 	snaplen = skb->len;
 
-#ifdef CONFIG_FILTER
 	if (sk->filter) {
 		unsigned res = snaplen;
 		struct sk_filter *filter;
@@ -554,7 +545,6 @@
 		if (snaplen > res)
 			snaplen = res;
 	}
-#endif
 
 	if (sk->type == SOCK_DGRAM) {
 		macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
diff -Nru a/net/sched/sch_csz.c b/net/sched/sch_csz.c
--- a/net/sched/sch_csz.c	Mon Mar 31 13:41:07 2003
+++ b/net/sched/sch_csz.c	Mon Mar 31 13:41:07 2003
@@ -56,7 +56,7 @@
 	CSZ presents a more precise but less flexible and less efficient
 	approach. As I understand it, the main idea is to create
 	WFQ flows for each guaranteed service and to allocate
-	the rest of bandwith to dummy flow-0. Flow-0 comprises
+	the rest of bandwidth to dummy flow-0. Flow-0 comprises
 	the predictive services and the best effort traffic;
 	it is handled by a priority scheduler with the highest
 	priority band allocated	for predictive services, and the rest ---
diff -Nru a/net/sctp/ipv6.c b/net/sctp/ipv6.c
--- a/net/sctp/ipv6.c	Mon Mar 31 13:41:07 2003
+++ b/net/sctp/ipv6.c	Mon Mar 31 13:41:07 2003
@@ -680,6 +680,7 @@
 static struct inet6_protocol sctpv6_protocol = {
 	.handler      = sctp6_rcv,
 	.err_handler  = sctp_v6_err,
+	.no_policy    = 1,
 };
 
 static struct sctp_af sctp_ipv6_specific = {
diff -Nru a/net/sctp/protocol.c b/net/sctp/protocol.c
--- a/net/sctp/protocol.c	Mon Mar 31 13:41:07 2003
+++ b/net/sctp/protocol.c	Mon Mar 31 13:41:07 2003
@@ -776,6 +776,7 @@
 static struct inet_protocol sctp_protocol = {
 	.handler     = sctp_rcv,
 	.err_handler = sctp_v4_err,
+	.no_policy   = 1,
 };
 
 /* IPv4 address related functions.  */
diff -Nru a/net/socket.c b/net/socket.c
--- a/net/socket.c	Mon Mar 31 13:41:06 2003
+++ b/net/socket.c	Mon Mar 31 13:41:06 2003
@@ -347,17 +347,17 @@
 /*
  *	Obtains the first available file descriptor and sets it up for use.
  *
- *	This functions creates file structure and maps it to fd space
+ *	This function creates file structure and maps it to fd space
  *	of current process. On success it returns file descriptor
  *	and file struct implicitly stored in sock->file.
  *	Note that another thread may close file descriptor before we return
  *	from this function. We use the fact that now we do not refer
  *	to socket after mapping. If one day we will need it, this
- *	function will inincrement ref. count on file by 1.
+ *	function will increment ref. count on file by 1.
  *
  *	In any case returned fd MAY BE not valid!
- *	This race condition is inavoidable
- *	with shared fd spaces, we cannot solve is inside kernel,
+ *	This race condition is unavoidable
+ *	with shared fd spaces, we cannot solve it inside kernel,
  *	but we take care of internal coherence yet.
  */
 
@@ -1692,6 +1692,8 @@
 
 	cmsg_ptr = (unsigned long)msg_sys.msg_control;
 	msg_sys.msg_flags = 0;
+	if (MSG_CMSG_COMPAT & flags)
+		msg_sys.msg_flags = MSG_CMSG_COMPAT;
 	
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
@@ -1709,7 +1711,8 @@
 	if (err)
 		goto out_freeiov;
 	if (MSG_CMSG_COMPAT & flags)
-		err = put_compat_msg_controllen(&msg_sys, msg_compat, cmsg_ptr);
+		err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
+				 &msg_compat->msg_controllen);
 	else
 		err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
 				 &msg->msg_controllen);
diff -Nru a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
--- a/net/sunrpc/auth_gss/auth_gss.c	Mon Mar 31 13:41:07 2003
+++ b/net/sunrpc/auth_gss/auth_gss.c	Mon Mar 31 13:41:07 2003
@@ -654,7 +654,6 @@
 	struct xdr_netobj bufin;
 	struct xdr_netobj bufout;
 	u32		flav,len;
-	int             code = 0;
 
 	dprintk("RPC: gss_validate\n");
 
@@ -675,8 +674,7 @@
 	bufout.data = (u8 *) p;
 	bufout.len = len;
 
-	if ((code = gss_verify_mic(ctx->gc_gss_ctx, 
-				   &bufin, &bufout, &qop_state) < 0))
+	if (gss_verify_mic(ctx->gc_gss_ctx, &bufin, &bufout, &qop_state) != 0)
 		return NULL;
 	task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2;
 	dprintk("RPC: GSS gss_validate: gss_verify_mic succeeded.\n");
diff -Nru a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
--- a/net/sunrpc/clnt.c	Mon Mar 31 13:41:07 2003
+++ b/net/sunrpc/clnt.c	Mon Mar 31 13:41:07 2003
@@ -654,7 +654,8 @@
 	if (task->tk_status < 0)
 		return;
 	/* Encode here so that rpcsec_gss can use correct sequence number. */
-	call_encode(task);
+	if (!task->tk_rqstp->rq_bytes_sent)
+		call_encode(task);
 	if (task->tk_status < 0)
 		return;
 	xprt_transmit(task);
diff -Nru a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
--- a/net/sunrpc/rpc_pipe.c	Mon Mar 31 13:41:06 2003
+++ b/net/sunrpc/rpc_pipe.c	Mon Mar 31 13:41:06 2003
@@ -422,8 +422,10 @@
 }
 
 static int
-rpc_lookup_path(char *path, struct nameidata *nd, int flags)
+rpc_lookup_parent(char *path, struct nameidata *nd)
 {
+	if (path[0] == '\0')
+		return -ENOENT;
 	if (rpc_get_mount()) {
 		printk(KERN_WARNING "%s: %s failed to mount "
 			       "pseudofilesystem \n", __FILE__, __FUNCTION__);
@@ -432,7 +434,7 @@
 	nd->mnt = mntget(rpc_mount);
 	nd->dentry = dget(rpc_mount->mnt_root);
 	nd->last_type = LAST_ROOT;
-	nd->flags = flags;
+	nd->flags = LOOKUP_PARENT;
 
 	if (path_walk(path, nd)) {
 		printk(KERN_WARNING "%s: %s failed to find path %s\n",
@@ -594,7 +596,7 @@
 	struct inode *dir;
 	int error;
 
-	if ((error = rpc_lookup_path(path, nd, LOOKUP_PARENT)) != 0)
+	if ((error = rpc_lookup_parent(path, nd)) != 0)
 		return ERR_PTR(error);
 	dir = nd->dentry->d_inode;
 	down(&dir->i_sem);
@@ -656,7 +658,7 @@
 	struct inode *dir;
 	int error;
 
-	if ((error = rpc_lookup_path(path, &nd, LOOKUP_PARENT)) != 0)
+	if ((error = rpc_lookup_parent(path, &nd)) != 0)
 		return error;
 	dir = nd.dentry->d_inode;
 	down(&dir->i_sem);
@@ -716,7 +718,7 @@
 	struct inode *dir;
 	int error;
 
-	if ((error = rpc_lookup_path(path, &nd, LOOKUP_PARENT)) != 0)
+	if ((error = rpc_lookup_parent(path, &nd)) != 0)
 		return error;
 	dir = nd.dentry->d_inode;
 	down(&dir->i_sem);
diff -Nru a/net/sunrpc/sched.c b/net/sunrpc/sched.c
--- a/net/sunrpc/sched.c	Mon Mar 31 13:41:07 2003
+++ b/net/sunrpc/sched.c	Mon Mar 31 13:41:07 2003
@@ -1115,11 +1115,12 @@
 		"-rpcwait -action- --exit--\n");
 	alltask_for_each(t, le, &all_tasks)
 		printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n",
-			t->tk_pid, t->tk_msg.rpc_proc->p_proc,
+			t->tk_pid,
+			(t->tk_msg.rpc_proc->p_proc ? t->tk_msg.rpc_proc->p_proc : -1),
 			t->tk_flags, t->tk_status,
 			t->tk_client, t->tk_client->cl_prog,
 			t->tk_rqstp, t->tk_timeout,
-			t->tk_rpcwait ? rpc_qname(t->tk_rpcwait) : " <NULL> ",
+			rpc_qname(t->tk_rpcwait),
 			t->tk_action, t->tk_exit);
 	spin_unlock(&rpc_sched_lock);
 }
diff -Nru a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
--- a/net/sunrpc/xprt.c	Mon Mar 31 13:41:07 2003
+++ b/net/sunrpc/xprt.c	Mon Mar 31 13:41:07 2003
@@ -175,10 +175,10 @@
 
 	if (xprt->snd_task)
 		return;
-	if (!xprt->nocong && RPCXPRT_CONGESTED(xprt))
-		return;
 	task = rpc_wake_up_next(&xprt->resend);
 	if (!task) {
+		if (!xprt->nocong && RPCXPRT_CONGESTED(xprt))
+			return;
 		task = rpc_wake_up_next(&xprt->sending);
 		if (!task)
 			return;
@@ -1071,7 +1071,6 @@
 		}
 		rpc_inc_timeo(&task->tk_client->cl_rtt);
 		xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT);
-		__xprt_put_cong(xprt, req);
 	}
 	req->rq_nresend++;
 
@@ -1211,10 +1210,7 @@
 		req->rq_bytes_sent = 0;
 	}
  out_release:
-	spin_lock_bh(&xprt->sock_lock);
-	__xprt_release_write(xprt, task);
-	__xprt_put_cong(xprt, req);
-	spin_unlock_bh(&xprt->sock_lock);
+	xprt_release_write(xprt, task);
 	return;
  out_receive:
 	dprintk("RPC: %4d xmit complete\n", task->tk_pid);
diff -Nru a/net/xfrm/Kconfig b/net/xfrm/Kconfig
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/net/xfrm/Kconfig	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,12 @@
+#
+# XFRM configuration
+#
+config XFRM_USER
+	tristate "IPsec user configuration interface"
+	depends on INET
+	---help---
+	  Support for IPsec user configuration interface used
+	  by native Linux tools.
+
+	  If unsure, say Y.
+
diff -Nru a/net/xfrm/Makefile b/net/xfrm/Makefile
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/net/xfrm/Makefile	Mon Mar 31 13:41:09 2003
@@ -0,0 +1,7 @@
+#
+# Makefile for the XFRM subsystem.
+#
+
+obj-y := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o
+obj-$(CONFIG_XFRM_USER) += xfrm_user.o
+
diff -Nru a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/net/xfrm/xfrm_algo.c	Mon Mar 31 13:41:07 2003
@@ -0,0 +1,695 @@
+/* 
+ * xfrm algorithm interface
+ *
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/pfkeyv2.h>
+#include <net/xfrm.h>
+#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE)
+#include <net/ah.h>
+#endif
+#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
+#include <net/esp.h>
+#endif
+#include <asm/scatterlist.h>
+
+/*
+ * Algorithms supported by IPsec.  These entries contain properties which
+ * are used in key negotiation and xfrm processing, and are used to verify
+ * that instantiated crypto transforms have correct parameters for IPsec
+ * purposes.
+ */
+static struct xfrm_algo_desc aalg_list[] = {
+{
+	.name = "digest_null",
+	
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 0,
+			.icv_fullbits = 0,
+		}
+	},
+	
+	.desc = {
+		.sadb_alg_id = SADB_X_AALG_NULL,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 0,
+		.sadb_alg_maxbits = 0
+	}
+},
+{
+	.name = "md5",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 128,
+		}
+	},
+	
+	.desc = {
+		.sadb_alg_id = SADB_AALG_MD5HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 128,
+		.sadb_alg_maxbits = 128
+	}
+},
+{
+	.name = "sha1",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 160,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_AALG_SHA1HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 160,
+		.sadb_alg_maxbits = 160
+	}
+},
+{
+	.name = "sha256",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 128,
+			.icv_fullbits = 256,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_AALG_SHA2_256HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 256,
+		.sadb_alg_maxbits = 256
+	}
+},
+{
+	.name = "ripemd160",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 160,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 160,
+		.sadb_alg_maxbits = 160
+	}
+},
+};
+
+static struct xfrm_algo_desc ealg_list[] = {
+{
+	.name = "cipher_null",
+	
+	.uinfo = {
+		.encr = {
+			.blockbits = 8,
+			.defkeybits = 0,
+		}
+	},
+	
+	.desc = {
+		.sadb_alg_id =	SADB_EALG_NULL,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 0,
+		.sadb_alg_maxbits = 0
+	}
+},
+{
+	.name = "des",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 64,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_EALG_DESCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 64,
+		.sadb_alg_maxbits = 64
+	}
+},
+{
+	.name = "des3_ede",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 192,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_EALG_3DESCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 192,
+		.sadb_alg_maxbits = 192
+	}
+},
+{
+	.name = "cast128",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 128,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_CASTCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 40,
+		.sadb_alg_maxbits = 128
+	}
+},
+{
+	.name = "blowfish",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 128,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_BLOWFISHCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 40,
+		.sadb_alg_maxbits = 448
+	}
+},
+{
+	.name = "aes",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 128,
+			.defkeybits = 128,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_AESCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 128,
+		.sadb_alg_maxbits = 256
+	}
+},
+};
+
+static struct xfrm_algo_desc calg_list[] = {
+{
+	.name = "deflate",
+	.uinfo = {
+		.comp = {
+			.threshold = 90,
+		}
+	},
+	.desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE }
+},
+{
+	.name = "lzs",
+	.uinfo = {
+		.comp = {
+			.threshold = 90,
+		}
+	},
+	.desc = { .sadb_alg_id = SADB_X_CALG_LZS }
+},
+{
+	.name = "lzjh",
+	.uinfo = {
+		.comp = {
+			.threshold = 50,
+		}
+	},
+	.desc = { .sadb_alg_id = SADB_X_CALG_LZJH }
+},
+};
+
+static inline int aalg_entries(void)
+{
+	return sizeof(aalg_list) / sizeof(aalg_list[0]);
+}
+
+static inline int ealg_entries(void)
+{
+	return sizeof(ealg_list) / sizeof(ealg_list[0]);
+}
+
+static inline int calg_entries(void)
+{
+	return sizeof(calg_list) / sizeof(calg_list[0]);
+}
+
+/* Todo: generic iterators */
+struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id)
+{
+	int i;
+
+	for (i = 0; i < aalg_entries(); i++) {
+		if (aalg_list[i].desc.sadb_alg_id == alg_id) {
+			if (aalg_list[i].available)
+				return &aalg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+
+struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id)
+{
+	int i;
+
+	for (i = 0; i < ealg_entries(); i++) {
+		if (ealg_list[i].desc.sadb_alg_id == alg_id) {
+			if (ealg_list[i].available)
+				return &ealg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+
+struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id)
+{
+	int i;
+
+	for (i = 0; i < calg_entries(); i++) {
+		if (calg_list[i].desc.sadb_alg_id == alg_id) {
+			if (calg_list[i].available)
+				return &calg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+
+struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name)
+{
+	int i;
+
+	if (!name)
+		return NULL;
+
+	for (i=0; i < aalg_entries(); i++) {
+		if (strcmp(name, aalg_list[i].name) == 0) {
+			if (aalg_list[i].available)
+				return &aalg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+
+struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name)
+{
+	int i;
+
+	if (!name)
+		return NULL;
+
+	for (i=0; i < ealg_entries(); i++) {
+		if (strcmp(name, ealg_list[i].name) == 0) {
+			if (ealg_list[i].available)
+				return &ealg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+
+struct xfrm_algo_desc *xfrm_calg_get_byname(char *name)
+{
+	int i;
+
+	if (!name)
+		return NULL;
+
+	for (i=0; i < calg_entries(); i++) {
+		if (strcmp(name, calg_list[i].name) == 0) {
+			if (calg_list[i].available)
+				return &calg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+
+struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx)
+{
+	if (idx >= aalg_entries())
+		return NULL;
+
+	return &aalg_list[idx];
+}
+
+struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx)
+{
+	if (idx >= ealg_entries())
+		return NULL;
+
+	return &ealg_list[idx];
+}
+
+struct xfrm_algo_desc *xfrm_calg_get_byidx(unsigned int idx)
+{
+	if (idx >= calg_entries())
+		return NULL;
+
+	return &calg_list[idx];
+}
+
+/*
+ * Probe for the availability of crypto algorithms, and set the available
+ * flag for any algorithms found on the system.  This is typically called by
+ * pfkey during userspace SA add, update or register.
+ */
+void xfrm_probe_algs(void)
+{
+#ifdef CONFIG_CRYPTO
+	int i, status;
+	
+	BUG_ON(in_softirq());
+
+	for (i = 0; i < aalg_entries(); i++) {
+		status = crypto_alg_available(aalg_list[i].name, 0);
+		if (aalg_list[i].available != status)
+			aalg_list[i].available = status;
+	}
+	
+	for (i = 0; i < ealg_entries(); i++) {
+		status = crypto_alg_available(ealg_list[i].name, 0);
+		if (ealg_list[i].available != status)
+			ealg_list[i].available = status;
+	}
+	
+	for (i = 0; i < calg_entries(); i++) {
+		status = crypto_alg_available(calg_list[i].name, 0);
+		if (calg_list[i].available != status)
+			calg_list[i].available = status;
+	}
+#endif
+}
+
+int xfrm_count_auth_supported(void)
+{
+	int i, n;
+
+	for (i = 0, n = 0; i < aalg_entries(); i++)
+		if (aalg_list[i].available)
+			n++;
+	return n;
+}
+
+int xfrm_count_enc_supported(void)
+{
+	int i, n;
+
+	for (i = 0, n = 0; i < ealg_entries(); i++)
+		if (ealg_list[i].available)
+			n++;
+	return n;
+}
+
+/* Move to common area: it is shared with AH. */
+
+void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
+		  int offset, int len, icv_update_fn_t icv_update)
+{
+	int start = skb->len - skb->data_len;
+	int i, copy = start - offset;
+	struct scatterlist sg;
+
+	/* Checksum header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		
+		sg.page = virt_to_page(skb->data + offset);
+		sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
+		sg.length = copy;
+		
+		icv_update(tfm, &sg, 1);
+		
+		if ((len -= copy) == 0)
+			return;
+		offset += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			
+			sg.page = frag->page;
+			sg.offset = frag->page_offset + offset-start;
+			sg.length = copy;
+			
+			icv_update(tfm, &sg, 1);
+
+			if (!(len -= copy))
+				return;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				skb_icv_walk(list, tfm, offset-start, copy, icv_update);
+				if ((len -= copy) == 0)
+					return;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+	if (len)
+		BUG();
+}
+
+#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
+
+/* Looking generic it is not used in another places. */
+
+int
+skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+	int start = skb->len - skb->data_len;
+	int i, copy = start - offset;
+	int elt = 0;
+
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		sg[elt].page = virt_to_page(skb->data + offset);
+		sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
+		sg[elt].length = copy;
+		elt++;
+		if ((len -= copy) == 0)
+			return elt;
+		offset += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			sg[elt].page = frag->page;
+			sg[elt].offset = frag->page_offset+offset-start;
+			sg[elt].length = copy;
+			elt++;
+			if (!(len -= copy))
+				return elt;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
+				if ((len -= copy) == 0)
+					return elt;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+	if (len)
+		BUG();
+	return elt;
+}
+
+/* Check that skb data bits are writable. If they are not, copy data
+ * to newly created private area. If "tailbits" is given, make sure that
+ * tailbits bytes beyond current end of skb are writable.
+ *
+ * Returns amount of elements of scatterlist to load for subsequent
+ * transformations and pointer to writable trailer skb.
+ */
+
+int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
+{
+	int copyflag;
+	int elt;
+	struct sk_buff *skb1, **skb_p;
+
+	/* If skb is cloned or its head is paged, reallocate
+	 * head pulling out all the pages (pages are considered not writable
+	 * at the moment even if they are anonymous).
+	 */
+	if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
+	    __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
+		return -ENOMEM;
+
+	/* Easy case. Most of packets will go this way. */
+	if (!skb_shinfo(skb)->frag_list) {
+		/* A little of trouble, not enough of space for trailer.
+		 * This should not happen, when stack is tuned to generate
+		 * good frames. OK, on miss we reallocate and reserve even more
+		 * space, 128 bytes is fair. */
+
+		if (skb_tailroom(skb) < tailbits &&
+		    pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
+			return -ENOMEM;
+
+		/* Voila! */
+		*trailer = skb;
+		return 1;
+	}
+
+	/* Misery. We are in troubles, going to mincer fragments... */
+
+	elt = 1;
+	skb_p = &skb_shinfo(skb)->frag_list;
+	copyflag = 0;
+
+	while ((skb1 = *skb_p) != NULL) {
+		int ntail = 0;
+
+		/* The fragment is partially pulled by someone,
+		 * this can happen on input. Copy it and everything
+		 * after it. */
+
+		if (skb_shared(skb1))
+			copyflag = 1;
+
+		/* If the skb is the last, worry about trailer. */
+
+		if (skb1->next == NULL && tailbits) {
+			if (skb_shinfo(skb1)->nr_frags ||
+			    skb_shinfo(skb1)->frag_list ||
+			    skb_tailroom(skb1) < tailbits)
+				ntail = tailbits + 128;
+		}
+
+		if (copyflag ||
+		    skb_cloned(skb1) ||
+		    ntail ||
+		    skb_shinfo(skb1)->nr_frags ||
+		    skb_shinfo(skb1)->frag_list) {
+			struct sk_buff *skb2;
+
+			/* Fuck, we are miserable poor guys... */
+			if (ntail == 0)
+				skb2 = skb_copy(skb1, GFP_ATOMIC);
+			else
+				skb2 = skb_copy_expand(skb1,
+						       skb_headroom(skb1),
+						       ntail,
+						       GFP_ATOMIC);
+			if (unlikely(skb2 == NULL))
+				return -ENOMEM;
+
+			if (skb1->sk)
+				skb_set_owner_w(skb, skb1->sk);
+
+			/* Looking around. Are we still alive?
+			 * OK, link new skb, drop old one */
+
+			skb2->next = skb1->next;
+			*skb_p = skb2;
+			kfree_skb(skb1);
+			skb1 = skb2;
+		}
+		elt++;
+		*trailer = skb1;
+		skb_p = &skb1->next;
+	}
+
+	return elt;
+}
+
+void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
+{
+	if (tail != skb) {
+		skb->data_len += len;
+		skb->len += len;
+	}
+	return skb_put(tail, len);
+}
+#endif
diff -Nru a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/net/xfrm/xfrm_input.c	Mon Mar 31 13:41:06 2003
@@ -0,0 +1,52 @@
+/*
+ * xfrm_input.c
+ *
+ * Changes:
+ * 	YOSHIFUJI Hideaki @USAGI
+ * 		Split up af-specific portion
+ * 	
+ */
+
+#include <net/ip.h>
+#include <net/xfrm.h>
+
+void __secpath_destroy(struct sec_path *sp)
+{
+	int i;
+	for (i = 0; i < sp->len; i++)
+		xfrm_state_put(sp->xvec[i]);
+	kmem_cache_free(sp->pool, sp);
+}
+
+/* Fetch spi and seq frpm ipsec header */
+
+int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
+{
+	int offset, offset_seq;
+
+	switch (nexthdr) {
+	case IPPROTO_AH:
+		offset = offsetof(struct ip_auth_hdr, spi);
+		offset_seq = offsetof(struct ip_auth_hdr, seq_no);
+		break;
+	case IPPROTO_ESP:
+		offset = offsetof(struct ip_esp_hdr, spi);
+		offset_seq = offsetof(struct ip_esp_hdr, seq_no);
+		break;
+	case IPPROTO_COMP:
+		if (!pskb_may_pull(skb, 4))
+			return -EINVAL;
+		*spi = ntohl(ntohs(*(u16*)(skb->h.raw + 2)));
+		*seq = 0;
+		return 0;
+	default:
+		return 1;
+	}
+
+	if (!pskb_may_pull(skb, 16))
+		return -EINVAL;
+
+	*spi = *(u32*)(skb->h.raw + offset);
+	*seq = *(u32*)(skb->h.raw + offset_seq);
+	return 0;
+}
diff -Nru a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/net/xfrm/xfrm_policy.c	Mon Mar 31 13:41:07 2003
@@ -0,0 +1,1232 @@
+/* 
+ * xfrm_policy.c
+ *
+ * Changes:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro
+ * 		IPv6 support
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	YOSHIFUJI Hideaki
+ * 		Split up af-specific portion
+ * 	
+ */
+
+#include <linux/config.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
+
+DECLARE_MUTEX(xfrm_cfg_sem);
+
+static u32      xfrm_policy_genid;
+static rwlock_t xfrm_policy_lock = RW_LOCK_UNLOCKED;
+
+struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
+
+static rwlock_t xfrm_policy_afinfo_lock = RW_LOCK_UNLOCKED;
+static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
+
+kmem_cache_t *xfrm_dst_cache;
+
+/* Limited flow cache. Its function now is to accelerate search for
+ * policy rules.
+ *
+ * Flow cache is private to cpus, at the moment this is important
+ * mostly for flows which do not match any rule, so that flow lookups
+ * are absolultely cpu-local. When a rule exists we do some updates
+ * to rule (refcnt, stats), so that locality is broken. Later this
+ * can be repaired.
+ */
+
+struct flow_entry
+{
+	struct flow_entry	*next;
+	struct flowi		fl;
+	u8			dir;
+	u32			genid;
+	struct xfrm_policy	*pol;
+};
+
+static kmem_cache_t *flow_cachep;
+
+struct flow_entry **flow_table;
+
+static int flow_lwm = 2*XFRM_FLOWCACHE_HASH_SIZE;
+static int flow_hwm = 4*XFRM_FLOWCACHE_HASH_SIZE;
+
+static int flow_number[NR_CPUS] __cacheline_aligned;
+
+#define flow_count(cpu)		(flow_number[cpu])
+
+static void flow_cache_shrink(int cpu)
+{
+	int i;
+	struct flow_entry *fle, **flp;
+	int shrink_to = flow_lwm/XFRM_FLOWCACHE_HASH_SIZE;
+
+	for (i=0; i<XFRM_FLOWCACHE_HASH_SIZE; i++) {
+		int k = 0;
+		flp = &flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+i];
+		while ((fle=*flp) != NULL && k<shrink_to) {
+			k++;
+			flp = &fle->next;
+		}
+		while ((fle=*flp) != NULL) {
+			*flp = fle->next;
+			if (fle->pol)
+				xfrm_pol_put(fle->pol);
+			kmem_cache_free(flow_cachep, fle);
+		}
+	}
+}
+
+struct xfrm_policy *flow_lookup(int dir, struct flowi *fl, 
+				unsigned short family)
+{
+	struct xfrm_policy *pol = NULL;
+	struct flow_entry *fle;
+	u32 hash;
+	int cpu;
+
+	hash = flow_hash(fl, family);
+
+	local_bh_disable();
+	cpu = smp_processor_id();
+
+	for (fle = flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash];
+	     fle; fle = fle->next) {
+		if (memcmp(fl, &fle->fl, sizeof(fle->fl)) == 0 &&
+		    fle->dir == dir) {
+			if (fle->genid == xfrm_policy_genid) {
+				if ((pol = fle->pol) != NULL)
+					atomic_inc(&pol->refcnt);
+				local_bh_enable();
+				return pol;
+			}
+			break;
+		}
+	}
+
+	pol = xfrm_policy_lookup(dir, fl, family);
+
+	if (fle) {
+		/* Stale flow entry found. Update it. */
+		fle->genid = xfrm_policy_genid;
+
+		if (fle->pol)
+			xfrm_pol_put(fle->pol);
+		fle->pol = pol;
+		if (pol)
+			atomic_inc(&pol->refcnt);
+	} else {
+		if (flow_count(cpu) > flow_hwm)
+			flow_cache_shrink(cpu);
+
+		fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC);
+		if (fle) {
+			flow_count(cpu)++;
+			fle->fl = *fl;
+			fle->genid = xfrm_policy_genid;
+			fle->dir = dir;
+			fle->pol = pol;
+			if (pol)
+				atomic_inc(&pol->refcnt);
+			fle->next = flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash];
+			flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash] = fle;
+		}
+	}
+	local_bh_enable();
+	return pol;
+}
+
+void __init flow_cache_init(void)
+{
+	int order;
+
+	flow_cachep = kmem_cache_create("flow_cache",
+					sizeof(struct flow_entry),
+					0, SLAB_HWCACHE_ALIGN,
+					NULL, NULL);
+
+	if (!flow_cachep)
+		panic("NET: failed to allocate flow cache slab\n");
+
+	for (order = 0;
+	     (PAGE_SIZE<<order) < (NR_CPUS*sizeof(struct flow_entry *)*XFRM_FLOWCACHE_HASH_SIZE);
+	     order++)
+		/* NOTHING */;
+
+	flow_table = (struct flow_entry **)__get_free_pages(GFP_ATOMIC, order);
+
+	if (!flow_table)
+		panic("Failed to allocate flow cache hash table\n");
+
+	memset(flow_table, 0, PAGE_SIZE<<order);
+}
+
+int xfrm_register_type(struct xfrm_type *type, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	struct xfrm_type_map *typemap;
+	int err = 0;
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+	typemap = afinfo->type_map;
+
+	write_lock(&typemap->lock);
+	if (likely(typemap->map[type->proto] == NULL))
+		typemap->map[type->proto] = type;
+	else
+		err = -EEXIST;
+	write_unlock(&typemap->lock);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+
+int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	struct xfrm_type_map *typemap;
+	int err = 0;
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+	typemap = afinfo->type_map;
+
+	write_lock(&typemap->lock);
+	if (unlikely(typemap->map[type->proto] != type))
+		err = -ENOENT;
+	else
+		typemap->map[type->proto] = NULL;
+	write_unlock(&typemap->lock);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+
+struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	struct xfrm_type_map *typemap;
+	struct xfrm_type *type;
+
+	if (unlikely(afinfo == NULL))
+		return NULL;
+	typemap = afinfo->type_map;
+
+	read_lock(&typemap->lock);
+	type = typemap->map[proto];
+	if (unlikely(type && !try_module_get(type->owner)))
+		type = NULL;
+	read_unlock(&typemap->lock);
+	xfrm_policy_put_afinfo(afinfo);
+	return type;
+}
+
+int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 
+		    unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	int err = 0;
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	if (likely(afinfo->dst_lookup != NULL))
+		err = afinfo->dst_lookup(dst, fl);
+	else
+		err = -EINVAL;
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+
+void xfrm_put_type(struct xfrm_type *type)
+{
+	module_put(type->owner);
+}
+
+static inline unsigned long make_jiffies(long secs)
+{
+	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
+		return MAX_SCHEDULE_TIMEOUT-1;
+	else
+	        return secs*HZ;
+}
+
+static void xfrm_policy_timer(unsigned long data)
+{
+	struct xfrm_policy *xp = (struct xfrm_policy*)data;
+	unsigned long now = (unsigned long)xtime.tv_sec;
+	long next = LONG_MAX;
+	u32 index;
+
+	if (xp->dead)
+		goto out;
+
+	if (xp->lft.hard_add_expires_seconds) {
+		long tmo = xp->lft.hard_add_expires_seconds +
+			xp->curlft.add_time - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (next != LONG_MAX &&
+	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
+		atomic_inc(&xp->refcnt);
+
+out:
+	xfrm_pol_put(xp);
+	return;
+
+expired:
+	index = xp->index;
+	xfrm_pol_put(xp);
+
+	/* Not 100% correct. id can be recycled in theory */
+	xp = xfrm_policy_byid(0, index, 1);
+	if (xp) {
+		xfrm_policy_kill(xp);
+		xfrm_pol_put(xp);
+	}
+}
+
+
+/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
+ * SPD calls.
+ */
+
+struct xfrm_policy *xfrm_policy_alloc(int gfp)
+{
+	struct xfrm_policy *policy;
+
+	policy = kmalloc(sizeof(struct xfrm_policy), gfp);
+
+	if (policy) {
+		memset(policy, 0, sizeof(struct xfrm_policy));
+		atomic_set(&policy->refcnt, 1);
+		policy->lock = RW_LOCK_UNLOCKED;
+		init_timer(&policy->timer);
+		policy->timer.data = (unsigned long)policy;
+		policy->timer.function = xfrm_policy_timer;
+	}
+	return policy;
+}
+
+/* Destroy xfrm_policy: descendant resources must be released to this moment. */
+
+void __xfrm_policy_destroy(struct xfrm_policy *policy)
+{
+	if (!policy->dead)
+		BUG();
+
+	if (policy->bundles)
+		BUG();
+
+	if (del_timer(&policy->timer))
+		BUG();
+
+	kfree(policy);
+}
+
+/* Rule must be locked. Release descentant resources, announce
+ * entry dead. The rule must be unlinked from lists to the moment.
+ */
+
+void xfrm_policy_kill(struct xfrm_policy *policy)
+{
+	struct dst_entry *dst;
+
+	write_lock_bh(&policy->lock);
+	if (policy->dead)
+		goto out;
+
+	policy->dead = 1;
+
+	while ((dst = policy->bundles) != NULL) {
+		policy->bundles = dst->next;
+		dst_free(dst);
+	}
+
+	if (del_timer(&policy->timer))
+		atomic_dec(&policy->refcnt);
+
+out:
+	write_unlock_bh(&policy->lock);
+}
+
+/* Generate new index... KAME seems to generate them ordered by cost
+ * of an absolute inpredictability of ordering of rules. This will not pass. */
+static u32 xfrm_gen_index(int dir)
+{
+	u32 idx;
+	struct xfrm_policy *p;
+	static u32 idx_generator;
+
+	for (;;) {
+		idx = (idx_generator | dir);
+		idx_generator += 8;
+		if (idx == 0)
+			idx = 8;
+		for (p = xfrm_policy_list[dir]; p; p = p->next) {
+			if (p->index == idx)
+				break;
+		}
+		if (!p)
+			return idx;
+	}
+}
+
+int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+{
+	struct xfrm_policy *pol, **p;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
+		if (memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
+			if (excl) {
+				write_unlock_bh(&xfrm_policy_lock);
+				return -EEXIST;
+			}
+			break;
+		}
+	}
+	atomic_inc(&policy->refcnt);
+	policy->next = pol ? pol->next : NULL;
+	*p = policy;
+	xfrm_policy_genid++;
+	policy->index = pol ? pol->index : xfrm_gen_index(dir);
+	policy->curlft.add_time = (unsigned long)xtime.tv_sec;
+	policy->curlft.use_time = 0;
+	if (policy->lft.hard_add_expires_seconds &&
+	    !mod_timer(&policy->timer, jiffies + HZ))
+		atomic_inc(&policy->refcnt);
+	write_unlock_bh(&xfrm_policy_lock);
+
+	if (pol) {
+		atomic_dec(&pol->refcnt);
+		xfrm_policy_kill(pol);
+		xfrm_pol_put(pol);
+	}
+	return 0;
+}
+
+struct xfrm_policy *xfrm_policy_delete(int dir, struct xfrm_selector *sel)
+{
+	struct xfrm_policy *pol, **p;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
+		if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) {
+			*p = pol->next;
+			break;
+		}
+	}
+	if (pol)
+		xfrm_policy_genid++;
+	write_unlock_bh(&xfrm_policy_lock);
+	return pol;
+}
+
+struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
+{
+	struct xfrm_policy *pol, **p;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) {
+		if (pol->index == id) {
+			if (delete)
+				*p = pol->next;
+			break;
+		}
+	}
+	if (pol) {
+		if (delete)
+			xfrm_policy_genid++;
+		else
+			atomic_inc(&pol->refcnt);
+	}
+	write_unlock_bh(&xfrm_policy_lock);
+	return pol;
+}
+
+void xfrm_policy_flush()
+{
+	struct xfrm_policy *xp;
+	int dir;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
+		while ((xp = xfrm_policy_list[dir]) != NULL) {
+			xfrm_policy_list[dir] = xp->next;
+			write_unlock_bh(&xfrm_policy_lock);
+
+			xfrm_policy_kill(xp);
+			xfrm_pol_put(xp);
+
+			write_lock_bh(&xfrm_policy_lock);
+		}
+	}
+	xfrm_policy_genid++;
+	write_unlock_bh(&xfrm_policy_lock);
+}
+
+int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
+		     void *data)
+{
+	struct xfrm_policy *xp;
+	int dir;
+	int count = 0;
+	int error = 0;
+
+	read_lock_bh(&xfrm_policy_lock);
+	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
+		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
+			count++;
+	}
+
+	if (count == 0) {
+		error = -ENOENT;
+		goto out;
+	}
+
+	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
+		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
+			error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
+			if (error)
+				goto out;
+		}
+	}
+
+out:
+	read_unlock_bh(&xfrm_policy_lock);
+	return error;
+}
+
+
+/* Find policy to apply to this flow. */
+
+struct xfrm_policy *xfrm_policy_lookup(int dir, struct flowi *fl, 
+				       unsigned short family)
+{
+	struct xfrm_policy *pol;
+
+	read_lock_bh(&xfrm_policy_lock);
+	for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
+		struct xfrm_selector *sel = &pol->selector;
+		int match;
+
+		if (pol->family != family)
+			continue;
+
+		match = xfrm_selector_match(sel, fl, family);
+		if (match) {
+			atomic_inc(&pol->refcnt);
+			break;
+		}
+	}
+	read_unlock_bh(&xfrm_policy_lock);
+	return pol;
+}
+
+struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
+{
+	struct xfrm_policy *pol;
+
+	read_lock_bh(&xfrm_policy_lock);
+	if ((pol = sk->policy[dir]) != NULL) {
+		int match;
+
+		match = xfrm_selector_match(&pol->selector, fl, sk->family);
+		if (match)
+			atomic_inc(&pol->refcnt);
+		else
+			pol = NULL;
+	}
+	read_unlock_bh(&xfrm_policy_lock);
+	return pol;
+}
+
+void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir)
+{
+	pol->next = xfrm_policy_list[XFRM_POLICY_MAX+dir];
+	xfrm_policy_list[XFRM_POLICY_MAX+dir] = pol;
+	atomic_inc(&pol->refcnt);
+}
+
+void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir)
+{
+	struct xfrm_policy **polp;
+
+	for (polp = &xfrm_policy_list[XFRM_POLICY_MAX+dir];
+	     *polp != NULL; polp = &(*polp)->next) {
+		if (*polp == pol) {
+			*polp = pol->next;
+			atomic_dec(&pol->refcnt);
+			return;
+		}
+	}
+}
+
+int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
+{
+	struct xfrm_policy *old_pol;
+
+	write_lock_bh(&xfrm_policy_lock);
+	old_pol = sk->policy[dir];
+	sk->policy[dir] = pol;
+	if (pol) {
+		pol->curlft.add_time = (unsigned long)xtime.tv_sec;
+		pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
+		xfrm_sk_policy_link(pol, dir);
+	}
+	if (old_pol)
+		xfrm_sk_policy_unlink(old_pol, dir);
+	write_unlock_bh(&xfrm_policy_lock);
+
+	if (old_pol) {
+		xfrm_policy_kill(old_pol);
+		xfrm_pol_put(old_pol);
+	}
+	return 0;
+}
+
+static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
+{
+	struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
+
+	if (newp) {
+		newp->selector = old->selector;
+		newp->lft = old->lft;
+		newp->curlft = old->curlft;
+		newp->action = old->action;
+		newp->flags = old->flags;
+		newp->xfrm_nr = old->xfrm_nr;
+		newp->index = old->index;
+		memcpy(newp->xfrm_vec, old->xfrm_vec,
+		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
+		write_lock_bh(&xfrm_policy_lock);
+		xfrm_sk_policy_link(newp, dir);
+		write_unlock_bh(&xfrm_policy_lock);
+	}
+	return newp;
+}
+
+int __xfrm_sk_clone_policy(struct sock *sk)
+{
+	struct xfrm_policy *p0, *p1;
+	p0 = sk->policy[0];
+	p1 = sk->policy[1];
+	sk->policy[0] = NULL;
+	sk->policy[1] = NULL;
+	if (p0 && (sk->policy[0] = clone_policy(p0, 0)) == NULL)
+		return -ENOMEM;
+	if (p1 && (sk->policy[1] = clone_policy(p1, 1)) == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+void __xfrm_sk_free_policy(struct xfrm_policy *pol, int dir)
+{
+	write_lock_bh(&xfrm_policy_lock);
+	xfrm_sk_policy_unlink(pol, dir);
+	write_unlock_bh(&xfrm_policy_lock);
+
+	xfrm_policy_kill(pol);
+	xfrm_pol_put(pol);
+}
+
+/* Resolve list of templates for the flow, given policy. */
+
+static int
+xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
+		  struct xfrm_state **xfrm,
+		  unsigned short family)
+{
+	int nx;
+	int i, error;
+	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
+	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
+
+	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
+		struct xfrm_state *x;
+		xfrm_address_t *remote = daddr;
+		xfrm_address_t *local  = saddr;
+		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
+
+		if (tmpl->mode) {
+			remote = &tmpl->id.daddr;
+			local = &tmpl->saddr;
+		}
+
+		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
+
+		if (x && x->km.state == XFRM_STATE_VALID) {
+			xfrm[nx++] = x;
+			daddr = remote;
+			saddr = local;
+			continue;
+		}
+		if (x) {
+			error = (x->km.state == XFRM_STATE_ERROR ?
+				 -EINVAL : -EAGAIN);
+			xfrm_state_put(x);
+		}
+
+		if (!tmpl->optional)
+			goto fail;
+	}
+	return nx;
+
+fail:
+	for (nx--; nx>=0; nx--)
+		xfrm_state_put(xfrm[nx]);
+	return error;
+}
+
+/* Check that the bundle accepts the flow and its components are
+ * still valid.
+ */
+
+static struct dst_entry *
+xfrm_find_bundle(struct flowi *fl, struct rtable *rt, struct xfrm_policy *policy, unsigned short family)
+{
+	struct dst_entry *x;
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return ERR_PTR(-EINVAL);
+	x = afinfo->find_bundle(fl, rt, policy);
+	xfrm_policy_put_afinfo(afinfo);
+	return x;
+}
+
+/* Allocate chain of dst_entry's, attach known xfrm's, calculate
+ * all the metrics... Shortly, bundle a bundle.
+ */
+
+static int
+xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
+		   struct flowi *fl, struct dst_entry **dst_p,
+		   unsigned short family)
+{
+	int err;
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+
+/* Main function: finds/creates a bundle for given flow.
+ *
+ * At the moment we eat a raw IP route. Mostly to speed up lookups
+ * on interfaces with disabled IPsec.
+ */
+int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
+		struct sock *sk, int flags)
+{
+	struct xfrm_policy *policy;
+	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+	struct rtable *rt = (struct rtable*)*dst_p;
+	struct dst_entry *dst;
+	int nx = 0;
+	int err;
+	u32 genid;
+	u16 family = (*dst_p)->ops->family;
+
+	switch (family) {
+	case AF_INET:
+		if (!fl->fl4_src)
+			fl->fl4_src = rt->rt_src;
+		if (!fl->fl4_dst)
+			fl->fl4_dst = rt->rt_dst;
+	case AF_INET6:
+		/* Still not clear... */
+	default:
+		/* nothing */;
+	}
+
+restart:
+	genid = xfrm_policy_genid;
+	policy = NULL;
+	if (sk && sk->policy[1])
+		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+
+	if (!policy) {
+		/* To accelerate a bit...  */
+		if ((rt->u.dst.flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
+			return 0;
+
+		policy = flow_lookup(XFRM_POLICY_OUT, fl, family);
+	}
+
+	if (!policy)
+		return 0;
+
+	policy->curlft.use_time = (unsigned long)xtime.tv_sec;
+
+	switch (policy->action) {
+	case XFRM_POLICY_BLOCK:
+		/* Prohibit the flow */
+		xfrm_pol_put(policy);
+		return -EPERM;
+
+	case XFRM_POLICY_ALLOW:
+		if (policy->xfrm_nr == 0) {
+			/* Flow passes not transformed. */
+			xfrm_pol_put(policy);
+			return 0;
+		}
+
+		/* Try to find matching bundle.
+		 *
+		 * LATER: help from flow cache. It is optional, this
+		 * is required only for output policy.
+		 */
+		dst = xfrm_find_bundle(fl, rt, policy, family);
+		if (IS_ERR(dst)) {
+			xfrm_pol_put(policy);
+			return PTR_ERR(dst);
+		}
+
+		if (dst)
+			break;
+
+		nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+
+		if (unlikely(nx<0)) {
+			err = nx;
+			if (err == -EAGAIN) {
+				struct task_struct *tsk = current;
+				DECLARE_WAITQUEUE(wait, tsk);
+				if (!flags)
+					goto error;
+
+				__set_task_state(tsk, TASK_INTERRUPTIBLE);
+				add_wait_queue(&km_waitq, &wait);
+				err = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+				if (err == -EAGAIN)
+					schedule();
+				__set_task_state(tsk, TASK_RUNNING);
+				remove_wait_queue(&km_waitq, &wait);
+
+				if (err == -EAGAIN && signal_pending(current)) {
+					err = -ERESTART;
+					goto error;
+				}
+				if (err == -EAGAIN ||
+				    genid != xfrm_policy_genid)
+					goto restart;
+			}
+			if (err)
+				goto error;
+		} else if (nx == 0) {
+			/* Flow passes not transformed. */
+			xfrm_pol_put(policy);
+			return 0;
+		}
+
+		dst = &rt->u.dst;
+		err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
+
+		if (unlikely(err)) {
+			int i;
+			for (i=0; i<nx; i++)
+				xfrm_state_put(xfrm[i]);
+			goto error;
+		}
+
+		write_lock_bh(&policy->lock);
+		if (unlikely(policy->dead)) {
+			/* Wow! While we worked on resolving, this
+			 * policy has gone. Retry. It is not paranoia,
+			 * we just cannot enlist new bundle to dead object.
+			 */
+			write_unlock_bh(&policy->lock);
+
+			xfrm_pol_put(policy);
+			if (dst)
+				dst_free(dst);
+			goto restart;
+		}
+		dst->next = policy->bundles;
+		policy->bundles = dst;
+		dst_hold(dst);
+		write_unlock_bh(&policy->lock);
+	}
+	*dst_p = dst;
+	ip_rt_put(rt);
+	xfrm_pol_put(policy);
+	return 0;
+
+error:
+	ip_rt_put(rt);
+	xfrm_pol_put(policy);
+	*dst_p = NULL;
+	return err;
+}
+
+/* When skb is transformed back to its "native" form, we have to
+ * check policy restrictions. At the moment we make this in maximally
+ * stupid way. Shame on me. :-) Of course, connected sockets must
+ * have policy cached at them.
+ */
+
+static inline int
+xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 
+	      unsigned short family)
+{
+	return	x->id.proto == tmpl->id.proto &&
+		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
+		x->props.mode == tmpl->mode &&
+		(tmpl->aalgos & (1<<x->props.aalgo)) &&
+		!(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
+}
+
+static inline int
+xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int idx,
+	       unsigned short family)
+{
+	for (; idx < sp->len; idx++) {
+		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
+			return ++idx;
+	}
+	return -1;
+}
+
+static int
+_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	afinfo->decode_session(skb, fl);
+	xfrm_policy_put_afinfo(afinfo);
+	return 0;
+}
+
+int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 
+			unsigned short family)
+{
+	struct xfrm_policy *pol;
+	struct flowi fl;
+
+	if (_decode_session(skb, &fl, family) < 0)
+		return 0;
+
+	/* First, check used SA against their selectors. */
+	if (skb->sp) {
+		int i;
+
+		for (i=skb->sp->len-1; i>=0; i--) {
+			if (!xfrm_selector_match(&skb->sp->xvec[i]->sel, &fl, family))
+				return 0;
+		}
+	}
+
+	pol = NULL;
+	if (sk && sk->policy[dir])
+		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+
+	if (!pol)
+		pol = flow_lookup(dir, &fl, family);
+
+	if (!pol)
+		return 1;
+
+	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
+
+	if (pol->action == XFRM_POLICY_ALLOW) {
+		if (pol->xfrm_nr != 0) {
+			struct sec_path *sp;
+			static struct sec_path dummy;
+			int i, k;
+
+			if ((sp = skb->sp) == NULL)
+				sp = &dummy;
+
+			/* For each tmpl search corresponding xfrm.
+			 * Order is _important_. Later we will implement
+			 * some barriers, but at the moment barriers
+			 * are implied between each two transformations.
+			 */
+			for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
+				k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
+				if (k < 0)
+					goto reject;
+			}
+		}
+		xfrm_pol_put(pol);
+		return 1;
+	}
+
+reject:
+	xfrm_pol_put(pol);
+	return 0;
+}
+
+int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
+{
+	struct flowi fl;
+
+	if (_decode_session(skb, &fl, family) < 0)
+		return 0;
+
+	return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
+}
+
+/* Optimize later using cookies and generation ids. */
+
+static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
+{
+	struct dst_entry *child = dst;
+
+	while (child) {
+		if (child->obsolete > 0 ||
+		    (child->xfrm && child->xfrm->km.state != XFRM_STATE_VALID)) {
+			dst_release(dst);
+			return NULL;
+		}
+		child = child->child;
+	}
+
+	return dst;
+}
+
+static void xfrm_dst_destroy(struct dst_entry *dst)
+{
+	xfrm_state_put(dst->xfrm);
+	dst->xfrm = NULL;
+}
+
+static void xfrm_link_failure(struct sk_buff *skb)
+{
+	/* Impossible. Such dst must be popped before reaches point of failure. */
+	return;
+}
+
+static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
+{
+	if (dst) {
+		if (dst->obsolete) {
+			dst_release(dst);
+			dst = NULL;
+		}
+	}
+	return dst;
+}
+
+static void __xfrm_garbage_collect(void)
+{
+	int i;
+	struct xfrm_policy *pol;
+	struct dst_entry *dst, **dstp, *gc_list = NULL;
+
+	read_lock_bh(&xfrm_policy_lock);
+	for (i=0; i<2*XFRM_POLICY_MAX; i++) {
+		for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
+			write_lock(&pol->lock);
+			dstp = &pol->bundles;
+			while ((dst=*dstp) != NULL) {
+				if (atomic_read(&dst->__refcnt) == 0) {
+					*dstp = dst->next;
+					dst->next = gc_list;
+					gc_list = dst;
+				} else {
+					dstp = &dst->next;
+				}
+			}
+			write_unlock(&pol->lock);
+		}
+	}
+	read_unlock_bh(&xfrm_policy_lock);
+
+	while (gc_list) {
+		dst = gc_list;
+		gc_list = dst->next;
+		dst_free(dst);
+	}
+}
+
+static int bundle_depends_on(struct dst_entry *dst, struct xfrm_state *x)
+{
+	do {
+		if (dst->xfrm == x)
+			return 1;
+	} while ((dst = dst->child) != NULL);
+	return 0;
+}
+
+int xfrm_flush_bundles(struct xfrm_state *x)
+{
+	int i;
+	struct xfrm_policy *pol;
+	struct dst_entry *dst, **dstp, *gc_list = NULL;
+
+	read_lock_bh(&xfrm_policy_lock);
+	for (i=0; i<2*XFRM_POLICY_MAX; i++) {
+		for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
+			write_lock(&pol->lock);
+			dstp = &pol->bundles;
+			while ((dst=*dstp) != NULL) {
+				if (bundle_depends_on(dst, x)) {
+					*dstp = dst->next;
+					dst->next = gc_list;
+					gc_list = dst;
+				} else {
+					dstp = &dst->next;
+				}
+			}
+			write_unlock(&pol->lock);
+		}
+	}
+	read_unlock_bh(&xfrm_policy_lock);
+
+	while (gc_list) {
+		dst = gc_list;
+		gc_list = dst->next;
+		dst_free(dst);
+	}
+
+	return 0;
+}
+
+/* Well... that's _TASK_. We need to scan through transformation
+ * list and figure out what mss tcp should generate in order to
+ * final datagram fit to mtu. Mama mia... :-)
+ *
+ * Apparently, some easy way exists, but we used to choose the most
+ * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta.
+ *
+ * Consider this function as something like dark humour. :-)
+ */
+static int xfrm_get_mss(struct dst_entry *dst, u32 mtu)
+{
+	int res = mtu - dst->header_len;
+
+	for (;;) {
+		struct dst_entry *d = dst;
+		int m = res;
+
+		do {
+			struct xfrm_state *x = d->xfrm;
+			if (x) {
+				spin_lock_bh(&x->lock);
+				if (x->km.state == XFRM_STATE_VALID &&
+				    x->type && x->type->get_max_size)
+					m = x->type->get_max_size(d->xfrm, m);
+				else
+					m += x->props.header_len;
+				spin_unlock_bh(&x->lock);
+			}
+		} while ((d = d->child) != NULL);
+
+		if (m <= mtu)
+			break;
+		res -= (m - mtu);
+		if (res < 88)
+			return mtu;
+	}
+
+	return res + dst->header_len;
+}
+
+int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_policy_afinfo_lock);
+	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
+		err = -ENOBUFS;
+	else {
+		struct dst_ops *dst_ops = afinfo->dst_ops;
+		if (likely(dst_ops->kmem_cachep == NULL))
+			dst_ops->kmem_cachep = xfrm_dst_cache;
+		if (likely(dst_ops->check == NULL))
+			dst_ops->check = xfrm_dst_check;
+		if (likely(dst_ops->destroy == NULL))
+			dst_ops->destroy = xfrm_dst_destroy;
+		if (likely(dst_ops->negative_advice == NULL))
+			dst_ops->negative_advice = xfrm_negative_advice;
+		if (likely(dst_ops->link_failure == NULL))
+			dst_ops->link_failure = xfrm_link_failure;
+		if (likely(dst_ops->get_mss == NULL))
+			dst_ops->get_mss = xfrm_get_mss;
+		if (likely(afinfo->garbage_collect == NULL))
+			afinfo->garbage_collect = __xfrm_garbage_collect;
+		xfrm_policy_afinfo[afinfo->family] = afinfo;
+	}
+	write_unlock(&xfrm_policy_afinfo_lock);
+	return err;
+}
+
+int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_policy_afinfo_lock);
+	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
+		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
+			err = -EINVAL;
+		else {
+			struct dst_ops *dst_ops = afinfo->dst_ops;
+			xfrm_policy_afinfo[afinfo->family] = NULL;
+			dst_ops->kmem_cachep = NULL;
+			dst_ops->check = NULL;
+			dst_ops->destroy = NULL;
+			dst_ops->negative_advice = NULL;
+			dst_ops->link_failure = NULL;
+			dst_ops->get_mss = NULL;
+			afinfo->garbage_collect = NULL;
+		}
+	}
+	write_unlock(&xfrm_policy_afinfo_lock);
+	return err;
+}
+
+struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo;
+	if (unlikely(family >= NPROTO))
+		return NULL;
+	read_lock(&xfrm_policy_afinfo_lock);
+	afinfo = xfrm_policy_afinfo[family];
+	if (likely(afinfo != NULL))
+		read_lock(&afinfo->lock);
+	read_unlock(&xfrm_policy_afinfo_lock);
+	return afinfo;
+}
+
+void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+	if (unlikely(afinfo == NULL))
+		return;
+	read_unlock(&afinfo->lock);
+}
+
+void __init xfrm_policy_init(void)
+{
+	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
+					   sizeof(struct xfrm_dst),
+					   0, SLAB_HWCACHE_ALIGN,
+					   NULL, NULL);
+	if (!xfrm_dst_cache)
+		panic("XFRM: failed to allocate xfrm_dst_cache\n");
+}
+
+void __init xfrm_init(void)
+{
+	xfrm_state_init();
+	flow_cache_init();
+	xfrm_policy_init();
+}
+
diff -Nru a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/net/xfrm/xfrm_state.c	Mon Mar 31 13:41:08 2003
@@ -0,0 +1,739 @@
+/*
+ * xfrm_state.c
+ *
+ * Changes:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro
+ * 		IPv6 support
+ * 	YOSHIFUJI Hideaki @USAGI
+ * 		Split up af-specific functions
+ * 	
+ */
+
+#include <net/xfrm.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+
+/* Each xfrm_state may be linked to two tables:
+
+   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
+   2. Hash table by daddr to find what SAs exist for given
+      destination/tunnel endpoint. (output)
+ */
+
+static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED;
+
+/* Hash table to find appropriate SA towards given target (endpoint
+ * of tunnel or destination of transport mode) allowed by selector.
+ *
+ * Main use is finding SA after policy selected tunnel or transport mode.
+ * Also, it can be used by ah/esp icmp error handler to find offending SA.
+ */
+static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
+static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
+
+DECLARE_WAIT_QUEUE_HEAD(km_waitq);
+
+static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED;
+static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
+
+static void __xfrm_state_delete(struct xfrm_state *x);
+
+static inline unsigned long make_jiffies(long secs)
+{
+	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
+		return MAX_SCHEDULE_TIMEOUT-1;
+	else
+	        return secs*HZ;
+}
+
+static void xfrm_timer_handler(unsigned long data)
+{
+	struct xfrm_state *x = (struct xfrm_state*)data;
+	unsigned long now = (unsigned long)xtime.tv_sec;
+	long next = LONG_MAX;
+	int warn = 0;
+
+	spin_lock(&x->lock);
+	if (x->km.state == XFRM_STATE_DEAD)
+		goto out;
+	if (x->km.state == XFRM_STATE_EXPIRED)
+		goto expired;
+	if (x->lft.hard_add_expires_seconds) {
+		long tmo = x->lft.hard_add_expires_seconds +
+			x->curlft.add_time - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (x->lft.hard_use_expires_seconds && x->curlft.use_time) {
+		long tmo = x->lft.hard_use_expires_seconds +
+			x->curlft.use_time - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (x->km.dying)
+		goto resched;
+	if (x->lft.soft_add_expires_seconds) {
+		long tmo = x->lft.soft_add_expires_seconds +
+			x->curlft.add_time - now;
+		if (tmo <= 0)
+			warn = 1;
+		else if (tmo < next)
+			next = tmo;
+	}
+	if (x->lft.soft_use_expires_seconds && x->curlft.use_time) {
+		long tmo = x->lft.soft_use_expires_seconds +
+			x->curlft.use_time - now;
+		if (tmo <= 0)
+			warn = 1;
+		else if (tmo < next)
+			next = tmo;
+	}
+
+	if (warn)
+		km_warn_expired(x);
+resched:
+	if (next != LONG_MAX &&
+	    !mod_timer(&x->timer, jiffies + make_jiffies(next)))
+		atomic_inc(&x->refcnt);
+	goto out;
+
+expired:
+	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
+		x->km.state = XFRM_STATE_EXPIRED;
+		wake_up(&km_waitq);
+		next = 2;
+		goto resched;
+	}
+	if (x->id.spi != 0)
+		km_expired(x);
+	__xfrm_state_delete(x);
+
+out:
+	spin_unlock(&x->lock);
+	xfrm_state_put(x);
+}
+
+struct xfrm_state *xfrm_state_alloc(void)
+{
+	struct xfrm_state *x;
+
+	x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
+
+	if (x) {
+		memset(x, 0, sizeof(struct xfrm_state));
+		atomic_set(&x->refcnt, 1);
+		INIT_LIST_HEAD(&x->bydst);
+		INIT_LIST_HEAD(&x->byspi);
+		init_timer(&x->timer);
+		x->timer.function = xfrm_timer_handler;
+		x->timer.data	  = (unsigned long)x;
+		x->curlft.add_time = (unsigned long)xtime.tv_sec;
+		x->lft.soft_byte_limit = XFRM_INF;
+		x->lft.soft_packet_limit = XFRM_INF;
+		x->lft.hard_byte_limit = XFRM_INF;
+		x->lft.hard_packet_limit = XFRM_INF;
+		x->lock = SPIN_LOCK_UNLOCKED;
+	}
+	return x;
+}
+
+void __xfrm_state_destroy(struct xfrm_state *x)
+{
+	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
+	if (del_timer(&x->timer))
+		BUG();
+	if (x->aalg)
+		kfree(x->aalg);
+	if (x->ealg)
+		kfree(x->ealg);
+	if (x->calg)
+		kfree(x->calg);
+	if (x->type)
+		xfrm_put_type(x->type);
+	kfree(x);
+}
+
+static void __xfrm_state_delete(struct xfrm_state *x)
+{
+	int kill = 0;
+
+	if (x->km.state != XFRM_STATE_DEAD) {
+		x->km.state = XFRM_STATE_DEAD;
+		kill = 1;
+		spin_lock(&xfrm_state_lock);
+		list_del(&x->bydst);
+		atomic_dec(&x->refcnt);
+		if (x->id.spi) {
+			list_del(&x->byspi);
+			atomic_dec(&x->refcnt);
+		}
+		spin_unlock(&xfrm_state_lock);
+		if (del_timer(&x->timer))
+			atomic_dec(&x->refcnt);
+		if (atomic_read(&x->refcnt) != 1)
+			xfrm_flush_bundles(x);
+	}
+
+	if (kill && x->type)
+		x->type->destructor(x);
+	wake_up(&km_waitq);
+}
+
+void xfrm_state_delete(struct xfrm_state *x)
+{
+	spin_lock_bh(&x->lock);
+	__xfrm_state_delete(x);
+	spin_unlock_bh(&x->lock);
+}
+
+void xfrm_state_flush(u8 proto)
+{
+	int i;
+	struct xfrm_state *x;
+
+	spin_lock_bh(&xfrm_state_lock);
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+restart:
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) {
+				atomic_inc(&x->refcnt);
+				spin_unlock_bh(&xfrm_state_lock);
+
+				xfrm_state_delete(x);
+				xfrm_state_put(x);
+
+				spin_lock_bh(&xfrm_state_lock);
+				goto restart;
+			}
+		}
+	}
+	spin_unlock_bh(&xfrm_state_lock);
+	wake_up(&km_waitq);
+}
+
+static int
+xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
+		  struct xfrm_tmpl *tmpl,
+		  xfrm_address_t *daddr, xfrm_address_t *saddr,
+		  unsigned short family)
+{
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return -1;
+	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
+	xfrm_state_put_afinfo(afinfo);
+	return 0;
+}
+
+struct xfrm_state *
+xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
+		struct flowi *fl, struct xfrm_tmpl *tmpl,
+		struct xfrm_policy *pol, int *err,
+		unsigned short family)
+{
+	unsigned h = xfrm_dst_hash(daddr, family);
+	struct xfrm_state *x;
+	int acquire_in_progress = 0;
+	int error = 0;
+	struct xfrm_state *best = NULL;
+
+	spin_lock_bh(&xfrm_state_lock);
+	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
+		if (x->props.family == family &&
+		    x->props.reqid == tmpl->reqid &&
+		    xfrm_state_addr_check(x, daddr, saddr, family) &&
+		    tmpl->mode == x->props.mode &&
+		    tmpl->id.proto == x->id.proto) {
+			/* Resolution logic:
+			   1. There is a valid state with matching selector.
+			      Done.
+			   2. Valid state with inappropriate selector. Skip.
+
+			   Entering area of "sysdeps".
+
+			   3. If state is not valid, selector is temporary,
+			      it selects only session which triggered
+			      previous resolution. Key manager will do
+			      something to install a state with proper
+			      selector.
+			 */
+			if (x->km.state == XFRM_STATE_VALID) {
+				if (!xfrm_selector_match(&x->sel, fl, family))
+					continue;
+				if (!best ||
+				    best->km.dying > x->km.dying ||
+				    (best->km.dying == x->km.dying &&
+				     best->curlft.add_time < x->curlft.add_time))
+					best = x;
+			} else if (x->km.state == XFRM_STATE_ACQ) {
+				acquire_in_progress = 1;
+			} else if (x->km.state == XFRM_STATE_ERROR ||
+				   x->km.state == XFRM_STATE_EXPIRED) {
+				if (xfrm_selector_match(&x->sel, fl, family))
+					error = 1;
+			}
+		}
+	}
+
+	if (best) {
+		atomic_inc(&best->refcnt);
+		spin_unlock_bh(&xfrm_state_lock);
+		return best;
+	}
+
+	x = NULL;
+	if (!error && !acquire_in_progress &&
+	    ((x = xfrm_state_alloc()) != NULL)) {
+		/* Initialize temporary selector matching only
+		 * to current session. */
+		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
+
+		if (km_query(x, tmpl, pol) == 0) {
+			x->km.state = XFRM_STATE_ACQ;
+			list_add_tail(&x->bydst, xfrm_state_bydst+h);
+			atomic_inc(&x->refcnt);
+			if (x->id.spi) {
+				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
+				list_add(&x->byspi, xfrm_state_byspi+h);
+				atomic_inc(&x->refcnt);
+			}
+			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
+			atomic_inc(&x->refcnt);
+			mod_timer(&x->timer, XFRM_ACQ_EXPIRES*HZ);
+		} else {
+			x->km.state = XFRM_STATE_DEAD;
+			xfrm_state_put(x);
+			x = NULL;
+			error = 1;
+		}
+	}
+	spin_unlock_bh(&xfrm_state_lock);
+	if (!x)
+		*err = acquire_in_progress ? -EAGAIN :
+			(error ? -ESRCH : -ENOMEM);
+	return x;
+}
+
+void xfrm_state_insert(struct xfrm_state *x)
+{
+	unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
+
+	spin_lock_bh(&xfrm_state_lock);
+	list_add(&x->bydst, xfrm_state_bydst+h);
+	atomic_inc(&x->refcnt);
+
+	h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
+
+	list_add(&x->byspi, xfrm_state_byspi+h);
+	atomic_inc(&x->refcnt);
+
+	if (!mod_timer(&x->timer, jiffies + HZ))
+		atomic_inc(&x->refcnt);
+
+	spin_unlock_bh(&xfrm_state_lock);
+	wake_up(&km_waitq);
+}
+
+int xfrm_state_check_expire(struct xfrm_state *x)
+{
+	if (!x->curlft.use_time)
+		x->curlft.use_time = (unsigned long)xtime.tv_sec;
+
+	if (x->km.state != XFRM_STATE_VALID)
+		return -EINVAL;
+
+	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
+	    x->curlft.packets >= x->lft.hard_packet_limit) {
+		km_expired(x);
+		if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
+			atomic_inc(&x->refcnt);
+		return -EINVAL;
+	}
+
+	if (!x->km.dying &&
+	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
+	     x->curlft.packets >= x->lft.soft_packet_limit))
+		km_warn_expired(x);
+	return 0;
+}
+
+int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
+		- skb_headroom(skb);
+
+	if (nhead > 0)
+		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
+
+	/* Check tail too... */
+	return 0;
+}
+
+struct xfrm_state *
+xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
+		  unsigned short family)
+{
+	struct xfrm_state *x;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return NULL;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x = afinfo->state_lookup(daddr, spi, proto);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return x;
+}
+
+struct xfrm_state *
+xfrm_find_acq(u8 mode, u16 reqid, u8 proto, 
+	      xfrm_address_t *daddr, xfrm_address_t *saddr, 
+	      int create, unsigned short family)
+{
+	struct xfrm_state *x;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return NULL;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return x;
+}
+
+/* Silly enough, but I'm lazy to build resolution list */
+
+struct xfrm_state * xfrm_find_acq_byseq(u32 seq)
+{
+	int i;
+	struct xfrm_state *x;
+
+	spin_lock_bh(&xfrm_state_lock);
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (x->km.seq == seq) {
+				atomic_inc(&x->refcnt);
+				spin_unlock_bh(&xfrm_state_lock);
+				return x;
+			}
+		}
+	}
+	spin_unlock_bh(&xfrm_state_lock);
+	return NULL;
+}
+ 
+u32 xfrm_get_acqseq(void)
+{
+	u32 res;
+	static u32 acqseq;
+	static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
+
+	spin_lock_bh(&acqseq_lock);
+	res = (++acqseq ? : ++acqseq);
+	spin_unlock_bh(&acqseq_lock);
+	return res;
+}
+
+void
+xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
+{
+	u32 h;
+	struct xfrm_state *x0;
+
+	if (x->id.spi)
+		return;
+
+	if (minspi == maxspi) {
+		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
+		if (x0) {
+			xfrm_state_put(x0);
+			return;
+		}
+		x->id.spi = minspi;
+	} else {
+		u32 spi = 0;
+		minspi = ntohl(minspi);
+		maxspi = ntohl(maxspi);
+		for (h=0; h<maxspi-minspi+1; h++) {
+			spi = minspi + net_random()%(maxspi-minspi+1);
+			x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
+			if (x0 == NULL)
+				break;
+			xfrm_state_put(x0);
+		}
+		x->id.spi = htonl(spi);
+	}
+	if (x->id.spi) {
+		spin_lock_bh(&xfrm_state_lock);
+		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
+		list_add(&x->byspi, xfrm_state_byspi+h);
+		atomic_inc(&x->refcnt);
+		spin_unlock_bh(&xfrm_state_lock);
+		wake_up(&km_waitq);
+	}
+}
+
+int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
+		    void *data)
+{
+	int i;
+	struct xfrm_state *x;
+	int count = 0;
+	int err = 0;
+
+	spin_lock_bh(&xfrm_state_lock);
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
+				count++;
+		}
+	}
+	if (count == 0) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
+				continue;
+			err = func(x, --count, data);
+			if (err)
+				goto out;
+		}
+	}
+out:
+	spin_unlock_bh(&xfrm_state_lock);
+	return err;
+}
+
+
+int xfrm_replay_check(struct xfrm_state *x, u32 seq)
+{
+	u32 diff;
+
+	seq = ntohl(seq);
+
+	if (unlikely(seq == 0))
+		return -EINVAL;
+
+	if (likely(seq > x->replay.seq))
+		return 0;
+
+	diff = x->replay.seq - seq;
+	if (diff >= x->props.replay_window) {
+		x->stats.replay_window++;
+		return -EINVAL;
+	}
+
+	if (x->replay.bitmap & (1U << diff)) {
+		x->stats.replay++;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
+{
+	u32 diff;
+
+	seq = ntohl(seq);
+
+	if (seq > x->replay.seq) {
+		diff = seq - x->replay.seq;
+		if (diff < x->props.replay_window)
+			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
+		else
+			x->replay.bitmap = 1;
+		x->replay.seq = seq;
+	} else {
+		diff = x->replay.seq - seq;
+		x->replay.bitmap |= (1U << diff);
+	}
+}
+
+int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl)
+{
+	int i;
+
+	for (i=0; i<n; i++) {
+		int match;
+		match = xfrm_selector_match(&x[i]->sel, fl, x[i]->props.family);
+		if (!match)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
+static rwlock_t		xfrm_km_lock = RW_LOCK_UNLOCKED;
+
+void km_warn_expired(struct xfrm_state *x)
+{
+	struct xfrm_mgr *km;
+
+	x->km.dying = 1;
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list)
+		km->notify(x, 0);
+	read_unlock(&xfrm_km_lock);
+}
+
+void km_expired(struct xfrm_state *x)
+{
+	struct xfrm_mgr *km;
+
+	x->km.state = XFRM_STATE_EXPIRED;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list)
+		km->notify(x, 1);
+	read_unlock(&xfrm_km_lock);
+	wake_up(&km_waitq);
+}
+
+int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
+{
+	int err = -EINVAL;
+	struct xfrm_mgr *km;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
+		if (!err)
+			break;
+	}
+	read_unlock(&xfrm_km_lock);
+	return err;
+}
+
+int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen)
+{
+	int err;
+	u8 *data;
+	struct xfrm_mgr *km;
+	struct xfrm_policy *pol = NULL;
+
+	if (optlen <= 0 || optlen > PAGE_SIZE)
+		return -EMSGSIZE;
+
+	data = kmalloc(optlen, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	err = -EFAULT;
+	if (copy_from_user(data, optval, optlen))
+		goto out;
+
+	err = -EINVAL;
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		pol = km->compile_policy(sk->family, optname, data, optlen, &err);
+		if (err >= 0)
+			break;
+	}
+	read_unlock(&xfrm_km_lock);
+
+	if (err >= 0) {
+		xfrm_sk_policy_insert(sk, err, pol);
+		err = 0;
+	}
+
+out:
+	kfree(data);
+	return err;
+}
+
+int xfrm_register_km(struct xfrm_mgr *km)
+{
+	write_lock_bh(&xfrm_km_lock);
+	list_add_tail(&km->list, &xfrm_km_list);
+	write_unlock_bh(&xfrm_km_lock);
+	return 0;
+}
+
+int xfrm_unregister_km(struct xfrm_mgr *km)
+{
+	write_lock_bh(&xfrm_km_lock);
+	list_del(&km->list);
+	write_unlock_bh(&xfrm_km_lock);
+	return 0;
+}
+
+int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_state_afinfo_lock);
+	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
+		err = -ENOBUFS;
+	else {
+		afinfo->state_bydst = xfrm_state_bydst;
+		afinfo->state_byspi = xfrm_state_byspi;
+		xfrm_state_afinfo[afinfo->family] = afinfo;
+	}
+	write_unlock(&xfrm_state_afinfo_lock);
+	return err;
+}
+
+int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_state_afinfo_lock);
+	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
+		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
+			err = -EINVAL;
+		else {
+			xfrm_state_afinfo[afinfo->family] = NULL;
+			afinfo->state_byspi = NULL;
+			afinfo->state_bydst = NULL;
+		}
+	}
+	write_unlock(&xfrm_state_afinfo_lock);
+	return err;
+}
+
+struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
+{
+	struct xfrm_state_afinfo *afinfo;
+	if (unlikely(family >= NPROTO))
+		return NULL;
+	read_lock(&xfrm_state_afinfo_lock);
+	afinfo = xfrm_state_afinfo[family];
+	if (likely(afinfo != NULL))
+		read_lock(&afinfo->lock);
+	read_unlock(&xfrm_state_afinfo_lock);
+	return afinfo;
+}
+
+void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
+{
+	if (unlikely(afinfo == NULL))
+		return;
+	read_unlock(&afinfo->lock);
+}
+
+void __init xfrm_state_init(void)
+{
+	int i;
+
+	for (i=0; i<XFRM_DST_HSIZE; i++) {
+		INIT_LIST_HEAD(&xfrm_state_bydst[i]);
+		INIT_LIST_HEAD(&xfrm_state_byspi[i]);
+	}
+}
+
diff -Nru a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/net/xfrm/xfrm_user.c	Mon Mar 31 13:41:07 2003
@@ -0,0 +1,1082 @@
+/* xfrm_user.c: User interface to configure xfrm engine.
+ *
+ * Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ *
+ * Changes:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro
+ * 		IPv6 support
+ * 	
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+#include <linux/init.h>
+#include <linux/security.h>
+#include <net/sock.h>
+#include <net/xfrm.h>
+
+static struct sock *xfrm_nl;
+
+static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
+{
+	struct rtattr *rt = xfrma[type - 1];
+	struct xfrm_algo *algp;
+
+	if (!rt)
+		return 0;
+
+	if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp))
+		return -EINVAL;
+
+	algp = RTA_DATA(rt);
+	switch (type) {
+	case XFRMA_ALG_AUTH:
+		if (!algp->alg_key_len &&
+		    strcmp(algp->alg_name, "digest_null") != 0)
+			return -EINVAL;
+		break;
+
+	case XFRMA_ALG_CRYPT:
+		if (!algp->alg_key_len &&
+		    strcmp(algp->alg_name, "cipher_null") != 0)
+			return -EINVAL;
+		break;
+
+	case XFRMA_ALG_COMP:
+		/* Zero length keys are legal.  */
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
+	return 0;
+}
+
+static int verify_newsa_info(struct xfrm_usersa_info *p,
+			     struct rtattr **xfrma)
+{
+	int err;
+
+	err = -EINVAL;
+	switch (p->family) {
+	case AF_INET:
+		break;
+
+	case AF_INET6:
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		break;
+#else
+		err = -EAFNOSUPPORT;
+		goto out;
+#endif
+
+	default:
+		goto out;
+	};
+
+	err = -EINVAL;
+	switch (p->id.proto) {
+	case IPPROTO_AH:
+		if (!xfrma[XFRMA_ALG_AUTH-1]	||
+		    xfrma[XFRMA_ALG_CRYPT-1]	||
+		    xfrma[XFRMA_ALG_COMP-1])
+			goto out;
+		break;
+
+	case IPPROTO_ESP:
+		if ((!xfrma[XFRMA_ALG_AUTH-1] &&
+		     !xfrma[XFRMA_ALG_CRYPT-1])	||
+		    xfrma[XFRMA_ALG_COMP-1])
+			goto out;
+		break;
+
+	case IPPROTO_COMP:
+		if (!xfrma[XFRMA_ALG_COMP-1]	||
+		    xfrma[XFRMA_ALG_AUTH-1]	||
+		    xfrma[XFRMA_ALG_CRYPT-1])
+			goto out;
+		break;
+
+	default:
+		goto out;
+	};
+
+	if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH)))
+		goto out;
+	if ((err = verify_one_alg(xfrma, XFRMA_ALG_CRYPT)))
+		goto out;
+	if ((err = verify_one_alg(xfrma, XFRMA_ALG_COMP)))
+		goto out;
+
+	err = -EINVAL;
+	switch (p->mode) {
+	case 0:
+	case 1:
+		break;
+
+	default:
+		goto out;
+	};
+
+	err = 0;
+
+out:
+	return err;
+}
+
+static int attach_one_algo(struct xfrm_algo **algpp, struct rtattr *u_arg)
+{
+	struct rtattr *rta = u_arg;
+	struct xfrm_algo *p, *ualg;
+
+	if (!rta)
+		return 0;
+
+	ualg = RTA_DATA(rta);
+	p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len);
+	*algpp = p;
+	return 0;
+}
+
+static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
+{
+	memcpy(&x->id, &p->id, sizeof(x->id));
+	memcpy(&x->sel, &p->sel, sizeof(x->sel));
+	memcpy(&x->lft, &p->lft, sizeof(x->lft));
+	x->props.mode = p->mode;
+	x->props.replay_window = p->replay_window;
+	x->props.reqid = p->reqid;
+	x->props.family = p->family;
+	x->props.saddr = x->sel.saddr;
+}
+
+static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
+					       struct rtattr **xfrma,
+					       int *errp)
+{
+	struct xfrm_state *x = xfrm_state_alloc();
+	int err = -ENOMEM;
+
+	if (!x)
+		goto error_no_put;
+
+	copy_from_user_state(x, p);
+
+	if ((err = attach_one_algo(&x->aalg, xfrma[XFRMA_ALG_AUTH-1])))
+		goto error;
+	if ((err = attach_one_algo(&x->ealg, xfrma[XFRMA_ALG_CRYPT-1])))
+		goto error;
+	if ((err = attach_one_algo(&x->calg, xfrma[XFRMA_ALG_COMP-1])))
+		goto error;
+
+	err = -ENOENT;
+	x->type = xfrm_get_type(x->id.proto, x->props.family);
+	if (x->type == NULL)
+		goto error;
+
+	err = x->type->init_state(x, NULL);
+	if (err)
+		goto error;
+
+	x->curlft.add_time = (unsigned long) xtime.tv_sec;
+	x->km.state = XFRM_STATE_VALID;
+	x->km.seq = p->seq;
+
+	return x;
+
+error:
+	xfrm_state_put(x);
+error_no_put:
+	*errp = err;
+	return NULL;
+}
+
+static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_usersa_info *p = NLMSG_DATA(nlh);
+	struct xfrm_state *x, *x1;
+	int err;
+
+	err = verify_newsa_info(p, (struct rtattr **) xfrma);
+	if (err)
+		return err;
+
+	x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err);
+	if (!x)
+		return err;
+
+	x1 = xfrm_state_lookup(&x->props.saddr, x->id.spi, x->id.proto, x->props.family);
+	if (x1) {
+		xfrm_state_put(x);
+		xfrm_state_put(x1);
+		return -EEXIST;
+	}
+
+	xfrm_state_insert(x);
+
+	return 0;
+}
+
+static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_state *x;
+	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
+
+	x = xfrm_state_lookup(&p->saddr, p->spi, p->proto, p->family);
+	if (x == NULL)
+		return -ESRCH;
+
+	xfrm_state_delete(x);
+	xfrm_state_put(x);
+
+	return 0;
+}
+
+static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
+{
+	memcpy(&p->id, &x->id, sizeof(p->id));
+	memcpy(&p->sel, &x->sel, sizeof(p->sel));
+	memcpy(&p->lft, &x->lft, sizeof(p->lft));
+	memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
+	memcpy(&p->stats, &x->stats, sizeof(p->stats));
+	p->mode = x->props.mode;
+	p->replay_window = x->props.replay_window;
+	p->reqid = x->props.reqid;
+	p->family = x->props.family;
+	p->seq = x->km.seq;
+}
+
+struct xfrm_dump_info {
+	struct sk_buff *in_skb;
+	struct sk_buff *out_skb;
+	u32 nlmsg_seq;
+	int start_idx;
+	int this_idx;
+};
+
+static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
+{
+	struct xfrm_dump_info *sp = ptr;
+	struct sk_buff *in_skb = sp->in_skb;
+	struct sk_buff *skb = sp->out_skb;
+	struct xfrm_usersa_info *p;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	if (sp->this_idx < sp->start_idx)
+		goto out;
+
+	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid,
+			sp->nlmsg_seq,
+			XFRM_MSG_NEWSA, sizeof(*p));
+	nlh->nlmsg_flags = 0;
+
+	p = NLMSG_DATA(nlh);
+	copy_to_user_state(x, p);
+
+	if (x->aalg)
+		RTA_PUT(skb, XFRMA_ALG_AUTH,
+			sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg);
+	if (x->ealg)
+		RTA_PUT(skb, XFRMA_ALG_CRYPT,
+			sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg);
+	if (x->calg)
+		RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
+
+	nlh->nlmsg_len = skb->tail - b;
+out:
+	sp->this_idx++;
+	return 0;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct xfrm_dump_info info;
+
+	info.in_skb = cb->skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = cb->nlh->nlmsg_seq;
+	info.this_idx = 0;
+	info.start_idx = cb->args[0];
+	(void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info);
+	cb->args[0] = info.this_idx;
+
+	return skb->len;
+}
+
+static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
+					  struct xfrm_state *x, u32 seq)
+{
+	struct xfrm_dump_info info;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
+	info.in_skb = in_skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = seq;
+	info.this_idx = info.start_idx = 0;
+
+	if (dump_one_state(x, 0, &info)) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	return skb;
+}
+
+static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
+	struct xfrm_state *x;
+	struct sk_buff *resp_skb;
+	int err;
+
+	x = xfrm_state_lookup(&p->saddr, p->spi, p->proto, p->family);
+	err = -ESRCH;
+	if (x == NULL)
+		goto out_noput;
+
+	resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
+	if (IS_ERR(resp_skb)) {
+		err = PTR_ERR(resp_skb);
+	} else {
+		err = netlink_unicast(xfrm_nl, resp_skb,
+				      NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	}
+	xfrm_state_put(x);
+out_noput:
+	return err;
+}
+
+static int verify_userspi_info(struct xfrm_userspi_info *p)
+{
+	switch (p->info.id.proto) {
+	case IPPROTO_AH:
+	case IPPROTO_ESP:
+		break;
+
+	case IPPROTO_COMP:
+		/* IPCOMP spi is 16-bits. */
+		if (p->min >= 0x10000 ||
+		    p->max >= 0x10000)
+			return -EINVAL;
+
+	default:
+		return -EINVAL;
+	};
+
+	if (p->min > p->max)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_state *x;
+	struct xfrm_userspi_info *p;
+	struct sk_buff *resp_skb;
+	int err;
+
+	p = NLMSG_DATA(nlh);
+	err = verify_userspi_info(p);
+	if (err)
+		goto out_noput;
+	x = xfrm_find_acq(p->info.mode, p->info.reqid, p->info.id.proto,
+			  &p->info.sel.daddr,
+			  &p->info.sel.saddr, 1,
+			  p->info.family);
+	err = -ENOENT;
+	if (x == NULL)
+		goto out_noput;
+
+	resp_skb = ERR_PTR(-ENOENT);
+
+	spin_lock_bh(&x->lock);
+	if (x->km.state != XFRM_STATE_DEAD) {
+		xfrm_alloc_spi(x, p->min, p->max);
+		if (x->id.spi)
+			resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
+	}
+	spin_unlock_bh(&x->lock);
+
+	if (IS_ERR(resp_skb)) {
+		err = PTR_ERR(resp_skb);
+		goto out;
+	}
+
+	err = netlink_unicast(xfrm_nl, resp_skb,
+			      NETLINK_CB(skb).pid, MSG_DONTWAIT);
+
+out:
+	xfrm_state_put(x);
+out_noput:
+	return err;
+}
+
+static int verify_policy_dir(__u8 dir)
+{
+	switch (dir) {
+	case XFRM_POLICY_IN:
+	case XFRM_POLICY_OUT:
+	case XFRM_POLICY_FWD:
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	return 0;
+}
+
+static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
+{
+	switch (p->share) {
+	case XFRM_SHARE_ANY:
+	case XFRM_SHARE_SESSION:
+	case XFRM_SHARE_USER:
+	case XFRM_SHARE_UNIQUE:
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	switch (p->action) {
+	case XFRM_POLICY_ALLOW:
+	case XFRM_POLICY_BLOCK:
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	switch (p->family) {
+	case AF_INET:
+		break;
+
+	case AF_INET6:
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		break;
+#else
+		return  -EAFNOSUPPORT;
+#endif
+
+	default:
+		return -EINVAL;
+	};
+
+	return verify_policy_dir(p->dir);
+}
+
+static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
+			   int nr)
+{
+	int i;
+
+	xp->xfrm_nr = nr;
+	for (i = 0; i < nr; i++, ut++) {
+		struct xfrm_tmpl *t = &xp->xfrm_vec[i];
+
+		memcpy(&t->id, &ut->id, sizeof(struct xfrm_id));
+		memcpy(&t->saddr, &ut->saddr,
+		       sizeof(xfrm_address_t));
+		t->reqid = ut->reqid;
+		t->mode = ut->mode;
+		t->share = ut->share;
+		t->optional = ut->optional;
+		t->aalgos = ut->aalgos;
+		t->ealgos = ut->ealgos;
+		t->calgos = ut->calgos;
+	}
+}
+
+static int copy_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_TMPL-1];
+	struct xfrm_user_tmpl *utmpl;
+	int nr;
+
+	if (!rt) {
+		pol->xfrm_nr = 0;
+	} else {
+		nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl);
+
+		if (nr > XFRM_MAX_DEPTH)
+			return -EINVAL;
+
+		copy_templates(pol, RTA_DATA(rt), nr);
+	}
+	return 0;
+}
+
+static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p)
+{
+	xp->priority = p->priority;
+	xp->index = p->index;
+	memcpy(&xp->selector, &p->sel, sizeof(xp->selector));
+	memcpy(&xp->lft, &p->lft, sizeof(xp->lft));
+	xp->action = p->action;
+	xp->flags = p->flags;
+	xp->family = p->family;
+	/* XXX xp->share = p->share; */
+}
+
+static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir)
+{
+	memcpy(&p->sel, &xp->selector, sizeof(p->sel));
+	memcpy(&p->lft, &xp->lft, sizeof(p->lft));
+	memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft));
+	p->priority = xp->priority;
+	p->index = xp->index;
+	p->family = xp->family;
+	p->dir = dir;
+	p->action = xp->action;
+	p->flags = xp->flags;
+	p->share = XFRM_SHARE_ANY; /* XXX xp->share */
+}
+
+static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct rtattr **xfrma, int *errp)
+{
+	struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL);
+	int err;
+
+	if (!xp) {
+		*errp = -ENOMEM;
+		return NULL;
+	}
+
+	copy_from_user_policy(xp, p);
+	err = copy_user_tmpl(xp, xfrma);
+	if (err) {
+		*errp = err;
+		kfree(xp);
+		xp = NULL;
+	}
+
+	return xp;
+}
+
+static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh);
+	struct xfrm_policy *xp;
+	int err;
+
+	err = verify_newpolicy_info(p);
+	if (err)
+		return err;
+
+	xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err);
+	if (!xp)
+		return err;
+
+	err = xfrm_policy_insert(p->dir, xp, 1);
+	if (err) {
+		kfree(xp);
+		return err;
+	}
+
+	xfrm_pol_put(xp);
+
+	return 0;
+}
+
+static int xfrm_del_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_policy *xp;
+	struct xfrm_userpolicy_id *p;
+	int err;
+
+	p = NLMSG_DATA(nlh);
+
+	err = verify_policy_dir(p->dir);
+	if (err)
+		return err;
+
+	xp = xfrm_policy_delete(p->dir, &p->sel);
+	if (xp == NULL)
+		return -ENOENT;
+	xfrm_policy_kill(xp);
+	xfrm_pol_put(xp);
+	return 0;
+}
+
+static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
+{
+	struct xfrm_dump_info *sp = ptr;
+	struct xfrm_userpolicy_info *p;
+	struct sk_buff *in_skb = sp->in_skb;
+	struct sk_buff *skb = sp->out_skb;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	if (sp->this_idx < sp->start_idx)
+		goto out;
+
+	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid,
+			sp->nlmsg_seq,
+			XFRM_MSG_NEWPOLICY, sizeof(*p));
+	p = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	copy_to_user_policy(xp, p, dir);
+
+	if (xp->xfrm_nr) {
+		struct xfrm_user_tmpl vec[XFRM_MAX_DEPTH];
+		int i;
+
+		for (i = 0; i < xp->xfrm_nr; i++) {
+			struct xfrm_user_tmpl *up = &vec[i];
+			struct xfrm_tmpl *kp = &xp->xfrm_vec[i];
+
+			memcpy(&up->id, &kp->id, sizeof(up->id));
+			memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr));
+			up->reqid = kp->reqid;
+			up->mode = kp->mode;
+			up->share = kp->share;
+			up->optional = kp->optional;
+			up->aalgos = kp->aalgos;
+			up->ealgos = kp->ealgos;
+			up->calgos = kp->calgos;
+		}
+		RTA_PUT(skb, XFRMA_TMPL,
+			(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr),
+			vec);
+	}
+
+	nlh->nlmsg_len = skb->tail - b;
+out:
+	sp->this_idx++;
+	return 0;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct xfrm_dump_info info;
+
+	info.in_skb = cb->skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = cb->nlh->nlmsg_seq;
+	info.this_idx = 0;
+	info.start_idx = cb->args[0];
+	(void) xfrm_policy_walk(dump_one_policy, &info);
+	cb->args[0] = info.this_idx;
+
+	return skb->len;
+}
+
+static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
+					  struct xfrm_policy *xp,
+					  int dir, u32 seq)
+{
+	struct xfrm_dump_info info;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
+	info.in_skb = in_skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = seq;
+	info.this_idx = info.start_idx = 0;
+
+	if (dump_one_policy(xp, dir, 0, &info) < 0) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	return skb;
+}
+
+static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_policy *xp;
+	struct xfrm_userpolicy_id *p;
+	struct sk_buff *resp_skb;
+	int err;
+
+	p = NLMSG_DATA(nlh);
+	xp = xfrm_policy_byid(p->dir, p->index, 0);
+	if (xp == NULL)
+		return -ENOENT;
+
+	resp_skb = xfrm_policy_netlink(skb, xp, p->dir, nlh->nlmsg_seq);
+	if (IS_ERR(resp_skb)) {
+		err = PTR_ERR(resp_skb);
+	} else {
+		err = netlink_unicast(xfrm_nl, resp_skb,
+				      NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	}
+
+	xfrm_pol_put(xp);
+
+	return err;
+}
+
+static const int xfrm_msg_min[(XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)] = {
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)),	/* NEW SA */
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)),	/* DEL SA */
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)),	/* GET SA */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* NEW POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)),  /* DEL POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)),  /* GET POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_userspi_info)),	/* ALLOC SPI */
+	NLMSG_LENGTH(sizeof(struct xfrm_user_acquire)),	/* ACQUIRE */
+	NLMSG_LENGTH(sizeof(struct xfrm_user_expire)),	/* EXPIRE */
+};
+
+static struct xfrm_link {
+	int (*doit)(struct sk_buff *, struct nlmsghdr *, void **);
+	int (*dump)(struct sk_buff *, struct netlink_callback *);
+} xfrm_dispatch[] = {
+	{	.doit	=	xfrm_add_sa, 		},
+	{	.doit	=	xfrm_del_sa, 		},
+	{
+		.doit	=	xfrm_get_sa,
+		.dump	=	xfrm_dump_sa,
+	},
+	{	.doit	=	xfrm_add_policy 	},
+	{	.doit	=	xfrm_del_policy 	},
+	{
+		.doit	=	xfrm_get_policy,
+		.dump	=	xfrm_dump_policy,
+	},
+	{	.doit	=	xfrm_alloc_userspi	},
+};
+
+static int xfrm_done(struct netlink_callback *cb)
+{
+	return 0;
+}
+
+static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+{
+	struct rtattr *xfrma[XFRMA_MAX];
+	struct xfrm_link *link;
+	int type, min_len;
+
+	if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
+		return 0;
+
+	type = nlh->nlmsg_type;
+
+	/* A control message: ignore them */
+	if (type < XFRM_MSG_BASE)
+		return 0;
+
+	/* Unknown message: reply with EINVAL */
+	if (type > XFRM_MSG_MAX)
+		goto err_einval;
+
+	type -= XFRM_MSG_BASE;
+	link = &xfrm_dispatch[type];
+
+	/* All operations require privileges, even GET */
+	if (security_netlink_recv(skb)) {
+		*errp = -EPERM;
+		return -1;
+	}
+
+	if ((type == 2 || type == 5) && (nlh->nlmsg_flags & NLM_F_DUMP)) {
+		u32 rlen;
+
+		if (link->dump == NULL)
+			goto err_einval;
+
+		if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh,
+						link->dump,
+						xfrm_done)) != 0) {
+			return -1;
+		}
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		skb_pull(skb, rlen);
+		return -1;
+	}
+
+	memset(xfrma, 0, sizeof(xfrma));
+
+	if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type]))
+		goto err_einval;
+
+	if (nlh->nlmsg_len > min_len) {
+		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+		struct rtattr *attr = (void *) nlh + NLMSG_ALIGN(min_len);
+
+		while (RTA_OK(attr, attrlen)) {
+			unsigned short flavor = attr->rta_type;
+			if (flavor) {
+				if (flavor > XFRMA_MAX)
+					goto err_einval;
+				xfrma[flavor - 1] = attr;
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
+	}
+
+	if (link->doit == NULL)
+		goto err_einval;
+	*errp = link->doit(skb, nlh, (void **) &xfrma);
+
+	return *errp;
+
+err_einval:
+	*errp = -EINVAL;
+	return -1;
+}
+
+static int xfrm_user_rcv_skb(struct sk_buff *skb)
+{
+	int err;
+	struct nlmsghdr *nlh;
+
+	while (skb->len >= NLMSG_SPACE(0)) {
+		u32 rlen;
+
+		nlh = (struct nlmsghdr *) skb->data;
+		if (nlh->nlmsg_len < sizeof(*nlh) ||
+		    skb->len < nlh->nlmsg_len)
+			return 0;
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		if (xfrm_user_rcv_msg(skb, nlh, &err)) {
+			if (err == 0)
+				return -1;
+			netlink_ack(skb, nlh, err);
+		} else if (nlh->nlmsg_flags & NLM_F_ACK)
+			netlink_ack(skb, nlh, 0);
+		skb_pull(skb, rlen);
+	}
+
+	return 0;
+}
+
+static void xfrm_netlink_rcv(struct sock *sk, int len)
+{
+	do {
+		struct sk_buff *skb;
+
+		down(&xfrm_cfg_sem);
+
+		while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
+			if (xfrm_user_rcv_skb(skb)) {
+				if (skb->len)
+					skb_queue_head(&sk->receive_queue, skb);
+				else
+					kfree_skb(skb);
+				break;
+			}
+			kfree_skb(skb);
+		}
+
+		up(&xfrm_cfg_sem);
+
+	} while (xfrm_nl && xfrm_nl->receive_queue.qlen);
+}
+
+static int build_expire(struct sk_buff *skb, struct xfrm_state *x, int hard)
+{
+	struct xfrm_user_expire *ue;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_EXPIRE,
+			sizeof(*ue));
+	ue = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	copy_to_user_state(x, &ue->state);
+	ue->hard = (hard != 0) ? 1 : 0;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_send_notify(struct xfrm_state *x, int hard)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(sizeof(struct xfrm_user_expire) + 16, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	if (build_expire(skb, x, hard) < 0)
+		BUG();
+
+	NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
+
+	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
+}
+
+static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
+			 struct xfrm_tmpl *xt, struct xfrm_policy *xp,
+			 int dir)
+{
+	struct xfrm_user_acquire *ua;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+	__u32 seq = xfrm_get_acqseq();
+
+	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE,
+			sizeof(*ua));
+	ua = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	memcpy(&ua->id, &x->id, sizeof(ua->id));
+	memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr));
+	copy_to_user_policy(xp, &ua->policy, dir);
+	ua->aalgos = xt->aalgos;
+	ua->ealgos = xt->ealgos;
+	ua->calgos = xt->calgos;
+	ua->seq = x->km.seq = seq;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
+			     struct xfrm_policy *xp, int dir)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(sizeof(struct xfrm_user_acquire) + 16, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	if (build_acquire(skb, x, xt, xp, dir) < 0)
+		BUG();
+
+	NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE;
+
+	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC);
+}
+
+/* User gives us xfrm_user_policy_info followed by an array of 0
+ * or more templates.
+ */
+struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
+                                        u8 *data, int len, int *dir)
+{
+	struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data;
+	struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1);
+	struct xfrm_policy *xp;
+	int nr;
+
+	switch (family) {
+	case AF_INET:
+		if (opt != IP_XFRM_POLICY) {
+			*dir = -EOPNOTSUPP;
+			return NULL;
+		}
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		if (opt != IPV6_XFRM_POLICY) {
+			*dir = -EOPNOTSUPP;
+			return NULL;
+		}
+		break;
+#endif
+	default:
+		*dir = -EINVAL;
+		return NULL;
+	}
+
+	*dir = -EINVAL;
+
+	if (len < sizeof(*p) ||
+	    verify_newpolicy_info(p))
+		return NULL;
+
+	nr = ((len - sizeof(*p)) / sizeof(*ut));
+	if (nr > XFRM_MAX_DEPTH)
+		return NULL;
+
+	xp = xfrm_policy_alloc(GFP_KERNEL);
+	if (xp == NULL) {
+		*dir = -ENOBUFS;
+		return NULL;
+	}
+
+	copy_from_user_policy(xp, p);
+	copy_templates(xp, ut, nr);
+
+	*dir = p->dir;
+
+	return xp;
+}
+
+static struct xfrm_mgr netlink_mgr = {
+	.id		= "netlink",
+	.notify		= xfrm_send_notify,
+	.acquire	= xfrm_send_acquire,
+	.compile_policy	= xfrm_compile_policy,
+};
+
+static int __init xfrm_user_init(void)
+{
+	printk(KERN_INFO "Initializing IPsec netlink socket\n");
+
+	xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv);
+	if (xfrm_nl == NULL)
+		panic("xfrm_user_init: cannot initialize xfrm_nl\n");
+
+
+	xfrm_register_km(&netlink_mgr);
+
+	return 0;
+}
+
+static void __exit xfrm_user_exit(void)
+{
+	xfrm_unregister_km(&netlink_mgr);
+	sock_release(xfrm_nl->socket);
+}
+
+module_init(xfrm_user_init);
+module_exit(xfrm_user_exit);